2606 files changed, 89602 insertions, 29379 deletions
diff --git a/.gitignore b/.gitignore
index 705e09913dc2..1be78fd8163b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,7 @@ all.config
 
 # Kdevelop4
 *.kdev4
+
+#Automatically generated by ASN.1 compiler
+net/ipv4/netfilter/nf_nat_snmp_basic-asn1.c
+net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h
diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata
index aa4296498859..9ab0ef1dd1c7 100644
--- a/Documentation/ABI/testing/sysfs-ata
+++ b/Documentation/ABI/testing/sysfs-ata
@@ -1,110 +1,139 @@
 What:		/sys/class/ata_...
-Date:		August 2008
-Contact:	Gwendal Grignou<[email protected]>
 Description:
-
-Provide a place in sysfs for storing the ATA topology of the system.  This allows
-retrieving various information about ATA objects.
+		Provide a place in sysfs for storing the ATA topology of the
+		system. This allows retrieving various information about ATA
+		objects.
 
 Files under /sys/class/ata_port
 -------------------------------
 
-	For each port, a directory ataX is created where X is the ata_port_id of
-	the port. The device parent is the ata host device.
+For each port, a directory ataX is created where X is the ata_port_id of the
+port. The device parent is the ata host device.
 
-idle_irq (read)
 
-	Number of IRQ received by the port while idle [some ata HBA only].
+What:		/sys/class/ata_port/ataX/nr_pmp_links
+What:		/sys/class/ata_port/ataX/idle_irq
+Date:		May, 2010
+KernelVersion:	v2.6.37
+Contact:	Gwendal Grignou <[email protected]>
+Description:
+		nr_pmp_links:	(RO) If a SATA Port Multiplier (PM) is
+				connected, the number of links behind it.
 
-nr_pmp_links (read)
+		idle_irq:	(RO) Number of IRQ received by the port while
+				idle [some ata HBA only].
 
-	If a SATA Port Multiplier (PM) is connected, number of link behind it.
+
+What:		/sys/class/ata_port/ataX/port_no
+Date:		May, 2013
+KernelVersion:	v3.11
+Contact:	Gwendal Grignou <[email protected]>
+Description:
+		(RO) Host local port number. While registering host controller,
+		port numbers are tracked based upon number of ports available on
+		the controller. This attribute is needed by udev for composing
+		persistent links in /dev/disk/by-path.
 
 Files under /sys/class/ata_link
 -------------------------------
 
-	Behind each port, there is a ata_link. If there is a SATA PM in the
-	topology, 15 ata_link objects are created.
-
-	If a link is behind a port, the directory name is linkX, where X is
-	ata_port_id of the port.
-	If a link is behind a PM, its name is linkX.Y where X is ata_port_id
-	of the parent port and Y the PM port.
+Behind each port, there is a ata_link. If there is a SATA PM in the topology, 15
+ata_link objects are created.
 
-hw_sata_spd_limit
+If a link is behind a port, the directory name is linkX, where X is ata_port_id
+of the port. If a link is behind a PM, its name is linkX.Y where X is
+ata_port_id of the parent port and Y the PM port.
 
-	Maximum speed supported by the connected SATA device.
 
-sata_spd_limit
+What:		/sys/class/ata_link/linkX[.Y]/hw_sata_spd_limit
+What:		/sys/class/ata_link/linkX[.Y]/sata_spd_limit
+What:		/sys/class/ata_link/linkX[.Y]/sata_spd
+Date:		May, 2010
+KernelVersion:	v2.6.37
+Contact:	Gwendal Grignou <[email protected]>
+Description:
+		hw_sata_spd_limit:	(RO) Maximum speed supported by the
+					connected SATA device.
 
-	Maximum speed imposed by libata.
+		sata_spd_limit:		(RO) Maximum speed imposed by libata.
 
-sata_spd
+		sata_spd:		(RO) Current speed of the link
+					eg. 1.5, 3 Gbps etc.
 
-	Current speed of the link [1.5, 3Gps,...].
 
 Files under /sys/class/ata_device
 ---------------------------------
 
-	Behind each link, up to two ata device are created.
-	The name of the directory is devX[.Y].Z where:
-	- X is ata_port_id of the port where the device is connected,
-	- Y the port of the PM if any, and
-	- Z the device id: for PATA, there is usually 2 devices [0,1],
-	only 1 for SATA.
-
-class
-	Device class. Can be "ata" for disk, "atapi" for packet device,
-	"pmp" for PM, or "none" if no device was found behind the link.
-
-dma_mode
+Behind each link, up to two ata devices are created.
+The name of the directory is devX[.Y].Z where:
+- X is ata_port_id of the port where the device is connected,
+- Y the port of the PM if any, and
+- Z the device id: for PATA, there is usually 2 devices [0,1], only 1 for SATA.
+
+
+What:		/sys/class/ata_device/devX[.Y].Z/spdn_cnt
+What:		/sys/class/ata_device/devX[.Y].Z/gscr
+What:		/sys/class/ata_device/devX[.Y].Z/ering
+What:		/sys/class/ata_device/devX[.Y].Z/id
+What:		/sys/class/ata_device/devX[.Y].Z/pio_mode
+What:		/sys/class/ata_device/devX[.Y].Z/xfer_mode
+What:		/sys/class/ata_device/devX[.Y].Z/dma_mode
+What:		/sys/class/ata_device/devX[.Y].Z/class
+Date:		May, 2010
+KernelVersion:	v2.6.37
+Contact:	Gwendal Grignou <[email protected]>
+Description:
+		spdn_cnt:	(RO) Number of times libata decided to lower the
+				speed of link due to errors.
 
-	Transfer modes supported by the device when in DMA mode.
-	Mostly used by PATA device.
+		gscr:		(RO) Cached result of the dump of PM GSCR
+				register. Valid registers are:
 
-pio_mode
+				0:      SATA_PMP_GSCR_PROD_ID,
+				1:	SATA_PMP_GSCR_REV,
+				2:      SATA_PMP_GSCR_PORT_INFO,
+				32:     SATA_PMP_GSCR_ERROR,
+				33:     SATA_PMP_GSCR_ERROR_EN,
+				64:     SATA_PMP_GSCR_FEAT,
+				96:     SATA_PMP_GSCR_FEAT_EN,
+				130:    SATA_PMP_GSCR_SII_GPIO
 
-	Transfer modes supported by the device when in PIO mode.
-	Mostly used by PATA device.
+				Only valid if the device is a PM.
 
-xfer_mode
+		ering:		(RO) Formatted output of the error ring of the
+				device.
 
-	Current transfer mode.
+		id:		(RO) Cached result of IDENTIFY command, as
+				described in ATA8 7.16 and 7.17. Only valid if
+				the device is not a PM.
 
-id
+		pio_mode:	(RO) Transfer modes supported by the device when
+				in PIO mode. Mostly used by PATA device.
 
-	Cached result of IDENTIFY command, as described in ATA8 7.16 and 7.17.
-	Only valid if the device is not a PM.
+		xfer_mode:	(RO) Current transfer mode
 
-gscr
+		dma_mode:	(RO) Transfer modes supported by the device when
+				in DMA mode. Mostly used by PATA device.
 
-	Cached result of the dump of PM GSCR register.
-	Valid registers are:
-	0: 	SATA_PMP_GSCR_PROD_ID,
-	1: 	SATA_PMP_GSCR_REV,
-	2: 	SATA_PMP_GSCR_PORT_INFO,
-	32:	SATA_PMP_GSCR_ERROR,
-	33:	SATA_PMP_GSCR_ERROR_EN,
-	64:	SATA_PMP_GSCR_FEAT,
-	96:	SATA_PMP_GSCR_FEAT_EN,
-	130:	SATA_PMP_GSCR_SII_GPIO
-	Only valid if the device is a PM.
+		class:		(RO) Device class. Can be "ata" for disk,
+				"atapi" for packet device, "pmp" for PM, or
+				"none" if no device was found behind the link.
 
-trim
 
-	Shows the DSM TRIM mode currently used by the device. Valid
-	values are:
-	unsupported:		Drive does not support DSM TRIM
-	unqueued:		Drive supports unqueued DSM TRIM only
-	queued:			Drive supports queued DSM TRIM
-	forced_unqueued:	Drive's queued DSM support is known to be
-				buggy and only unqueued TRIM commands
-				are sent
+What:		/sys/class/ata_device/devX[.Y].Z/trim
+Date:		May, 2015
+KernelVersion:	v4.10
+Contact:	Gwendal Grignou <[email protected]>
+Description:
+		(RO) Shows the DSM TRIM mode currently used by the device. Valid
+		values are:
 
-spdn_cnt
+		unsupported:		Drive does not support DSM TRIM
 
-	Number of time libata decided to lower the speed of link due to errors.
+		unqueued:               Drive supports unqueued DSM TRIM only
 
-ering
+		queued:                 Drive supports queued DSM TRIM
 
-	Formatted output of the error ring of the device.
+		forced_unqueued:	Drive's queued DSM support is known to
+					be buggy and only unqueued TRIM commands
+					are sent
diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device
new file mode 100644
index 000000000000..82ef6eab042d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-device
@@ -0,0 +1,58 @@
+What:		/sys/block/*/device/sw_activity
+Date:		Jun, 2008
+KernelVersion:	v2.6.27
+Contact:	[email protected]
+Description:
+		(RW) Used by drivers which support software controlled activity
+		LEDs.
+
+		It has the following valid values:
+
+		0	OFF - the LED is not activated on activity
+		1	BLINK_ON - the LED blinks on every 10ms when activity is
+			detected.
+		2	BLINK_OFF - the LED is on when idle, and blinks off
+			every 10ms when activity is detected.
+
+		Note that the user must turn sw_activity OFF it they wish to
+		control the activity LED via the em_message file.
+
+
+What:		/sys/block/*/device/unload_heads
+Date:		Sep, 2008
+KernelVersion:	v2.6.28
+Contact:	[email protected]
+Description:
+		(RW) Hard disk shock protection
+
+		Writing an integer value to this file will take the heads of the
+		respective drive off the platter and block all I/O operations
+		for the specified number of milliseconds.
+
+		- If the device does not support the unload heads feature,
+		  access is denied with -EOPNOTSUPP.
+		- The maximal value accepted for a timeout is 30000
+		  milliseconds.
+		- A previously set timeout can be cancelled and disk can resume
+		  normal operation immediately by specifying a timeout of 0.
+		- Some hard drives only comply with an earlier version of the
+		  ATA standard, but support the unload feature nonetheless.
+		  There is no safe way Linux can detect these devices, so this
+		  is not enabled by default. If it is known that your device
+		  does support the unload feature, then you can tell the kernel
+		  to enable it by writing -1. It can be disabled again by
+		  writing -2.
+		- Values below -2 are rejected with -EINVAL
+
+		For more information, see
+		Documentation/laptops/disk-shock-protection.txt
+
+
+What:		/sys/block/*/device/ncq_prio_enable
+Date:		Oct, 2016
+KernelVersion:	v4.10
+Contact:	[email protected]
+Description:
+		(RW) Write to the file to turn on or off the SATA ncq (native
+		command queueing) support. By default this feature is turned
+		off.
diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host
index 0eb255e7db12..bafc59fd7b69 100644
--- a/Documentation/ABI/testing/sysfs-class-scsi_host
+++ b/Documentation/ABI/testing/sysfs-class-scsi_host
@@ -27,3 +27,92 @@ Description:	This file contains the current status of the "SSD Smart Path"
 		the direct i/o path to physical devices.  This setting is
 		controller wide, affecting all configured logical drives on the
 		controller.  This file is readable and writable.
+
+What:		/sys/class/scsi_host/hostX/link_power_management_policy
+Date:		Oct, 2007
+KernelVersion:	v2.6.24
+Contact:	[email protected]
+Description:
+		(RW) This parameter allows the user to read and set the link
+		(interface) power management.
+
+		There are four possible options:
+
+		min_power: Tell the controller to try to make the link use the
+		least possible power when possible. This may sacrifice some
+		performance due to increased latency when coming out of lower
+		power states.
+
+		max_performance: Generally, this means no power management.
+		Tell the controller to have performance be a priority over power
+		management.
+
+		medium_power: Tell the controller to enter a lower power state
+		when possible, but do not enter the lowest power state, thus
+		improving latency over min_power setting.
+
+		med_power_with_dipm: Identical to the existing medium_power
+		setting except that it enables dipm (device initiated power
+		management) on top, which makes it match the Windows IRST (Intel
+		Rapid Storage Technology) driver settings. This setting is also
+		close to min_power, except that:
+		a) It does not use host-initiated slumber mode, but it does
+		allow device-initiated slumber
+		b) It does not enable low power device sleep mode (DevSlp).
+
+What:		/sys/class/scsi_host/hostX/em_message
+What:		/sys/class/scsi_host/hostX/em_message_type
+Date:		Jun, 2008
+KernelVersion:	v2.6.27
+Contact:	[email protected]
+Description:
+		em_message: (RW) Enclosure management support. For the LED
+		protocol, writes and reads correspond to the LED message format
+		as defined in the AHCI spec.
+
+		The user must turn sw_activity (under /sys/block/*/device/) OFF
+		it they wish to control the activity LED via the em_message
+		file.
+
+		em_message_type: (RO) Displays the current enclosure management
+		protocol that is being used by the driver (for eg. LED, SAF-TE,
+		SES-2, SGPIO etc).
+
+What:		/sys/class/scsi_host/hostX/ahci_port_cmd
+What:		/sys/class/scsi_host/hostX/ahci_host_caps
+What:		/sys/class/scsi_host/hostX/ahci_host_cap2
+Date:		Mar, 2010
+KernelVersion:	v2.6.35
+Contact:	[email protected]
+Description:
+		[to be documented]
+
+What:		/sys/class/scsi_host/hostX/ahci_host_version
+Date:		Mar, 2010
+KernelVersion:	v2.6.35
+Contact:	[email protected]
+Description:
+		(RO) Display the version of the AHCI spec implemented by the
+		host.
+
+What:		/sys/class/scsi_host/hostX/em_buffer
+Date:		Apr, 2010
+KernelVersion:	v2.6.35
+Contact:	[email protected]
+Description:
+		(RW) Allows access to AHCI EM (enclosure management) buffer
+		directly if the host supports EM.
+
+		For eg. the AHCI driver supports SGPIO EM messages but the
+		SATA/AHCI specs do not define the SGPIO message format of the EM
+		buffer. Different hardware(HW) vendors may have different
+		definitions. With the em_buffer attribute, this issue can be
+		solved by allowing HW vendors to provide userland drivers and
+		tools for their SGPIO initiators.
+
+What:		/sys/class/scsi_host/hostX/em_message_supported
+Date:		Oct, 2009
+KernelVersion:	v2.6.39
+Contact:	[email protected]
+Description:
+		(RO) Displays supported enclosure management message types.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-dock b/Documentation/ABI/testing/sysfs-devices-platform-dock
new file mode 100644
index 000000000000..1d8c18f905c7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-dock
@@ -0,0 +1,39 @@
+What:		/sys/devices/platform/dock.N/docked
+Date:		Dec, 2006
+KernelVersion:	2.6.19
+Contact:	[email protected]
+Description:
+		(RO) Value 1 or 0 indicates whether the software believes the
+		laptop is docked in a docking station.
+
+What:		/sys/devices/platform/dock.N/undock
+Date:		Dec, 2006
+KernelVersion:	2.6.19
+Contact:	[email protected]
+Description:
+		(WO) Writing to this file causes the software to initiate an
+		undock request to the firmware.
+
+What:		/sys/devices/platform/dock.N/uid
+Date:		Feb, 2007
+KernelVersion:	v2.6.21
+Contact:	[email protected]
+Description:
+		(RO) Displays the docking station the laptop is docked to.
+
+What:		/sys/devices/platform/dock.N/flags
+Date:		May, 2007
+KernelVersion:	v2.6.21
+Contact:	[email protected]
+Description:
+		(RO) Show dock station flags, useful for checking if undock
+		request has been made by the user (from the immediate_undock
+		option).
+
+What:		/sys/devices/platform/dock.N/type
+Date:		Aug, 2008
+KernelVersion:	v2.6.27
+Contact:	[email protected]
+Description:
+		(RO) Display the dock station type- dock_station, ata_bay or
+		battery_bay.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index bfd29bc8d37a..4ed63b6cfb15 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -108,6 +108,8 @@ Description:	CPU topology files that describe a logical CPU's relationship
 
 What:		/sys/devices/system/cpu/cpuidle/current_driver
 		/sys/devices/system/cpu/cpuidle/current_governer_ro
+		/sys/devices/system/cpu/cpuidle/available_governors
+		/sys/devices/system/cpu/cpuidle/current_governor
 Date:		September 2007
 Contact:	Linux kernel mailing list <[email protected]>
 Description:	Discover cpuidle policy and mechanism
@@ -119,13 +121,84 @@ Description:	Discover cpuidle policy and mechanism
 		Idle policy (governor) is differentiated from idle mechanism
 		(driver)
 
-		current_driver: displays current idle mechanism
+		current_driver: (RO) displays current idle mechanism
 
-		current_governor_ro: displays current idle policy
+		current_governor_ro: (RO) displays current idle policy
+
+		With the cpuidle_sysfs_switch boot option enabled (meant for
+		developer testing), the following three attributes are visible
+		instead:
+
+		current_driver: same as described above
+
+		available_governors: (RO) displays a space separated list of
+		available governors
+
+		current_governor: (RW) displays current idle policy. Users can
+		switch the governor at runtime by writing to this file.
 
 		See files in Documentation/cpuidle/ for more information.
 
 
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/name
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/power
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/time
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+Date:		September 2007
+KernelVersion:	v2.6.24
+Contact:	Linux power management list <[email protected]>
+Description:
+		The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
+		logical CPU specific cpuidle information for each online cpu X.
+		The processor idle states which are available for use have the
+		following attributes:
+
+		name: (RO) Name of the idle state (string).
+
+		latency: (RO) The latency to exit out of this idle state (in
+		microseconds).
+
+		power: (RO) The power consumed while in this idle state (in
+		milliwatts).
+
+		time: (RO) The total time spent in this idle state (in microseconds).
+
+		usage: (RO) Number of times this state was entered (a count).
+
+
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
+Date:		February 2008
+KernelVersion:	v2.6.25
+Contact:	Linux power management list <[email protected]>
+Description:
+		(RO) A small description about the idle state (string).
+
+
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
+Date:		March 2012
+KernelVersion:	v3.10
+Contact:	Linux power management list <[email protected]>
+Description:
+		(RW) Option to disable this idle state (bool). The behavior and
+		the effect of the disable variable depends on the implementation
+		of a particular governor. In the ladder governor, for example,
+		it is not coherent, i.e. if one is disabling a light state, then
+		all deeper states are disabled as well, but the disable variable
+		does not reflect it. Likewise, if one enables a deep state but a
+		lighter state still is disabled, then this has no effect.
+
+
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
+Date:		March 2014
+KernelVersion:	v3.15
+Contact:	Linux power management list <[email protected]>
+Description:
+		(RO) Display the target residency i.e. the minimum amount of
+		time (in microseconds) this cpu should spend in this idle state
+		to make the transition worth the effort.
+
+
 What:		/sys/devices/system/cpu/cpu#/cpufreq/*
 Date:		pre-git history
 Contact:	[email protected]
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
new file mode 100644
index 000000000000..325dc0667dbb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dptf
@@ -0,0 +1,40 @@
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	[email protected]
+Description:
+		(RO) The charger type - Traditional, Hybrid or NVDC.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	[email protected]
+Description:
+		(RO) Adapter rating in milliwatts (the maximum Adapter power).
+		Must be 0 if no AC Adaptor is plugged in.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	[email protected]
+Description:
+		(RO) Maximum platform power that can be supported by the battery
+		in milliwatts.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	[email protected]
+Description:
+		(RO) Display the platform power source
+		0x00 = DC
+		0x01 = AC
+		0x02 = USB
+		0x03 = Wireless Charger
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	[email protected]
+Description:
+		(RO) The maximum sustained power for battery in milliwatts.
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 611a75e4366e..badb26ac33dc 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -570,7 +570,9 @@ your driver if they're helpful, or just use plain hex constants.
 The device IDs are arbitrary hex numbers (vendor controlled) and normally used
 only in a single location, the pci_device_id table.
 
-Please DO submit new vendor/device IDs to http://pciids.sourceforge.net/.
+Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/.
+There are mirrors of the pci.ids file at http://pciids.sourceforge.net/
+and https://github.com/pciutils/pciids.
 
 
 
diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst
index 4f7af841d935..ddcc58d01cfb 100644
--- a/Documentation/accelerators/ocxl.rst
+++ b/Documentation/accelerators/ocxl.rst
@@ -152,6 +152,11 @@ OCXL_IOCTL_IRQ_SET_FD:
   Associate an event fd to an AFU interrupt so that the user process
   can be notified when the AFU sends an interrupt.
 
+OCXL_IOCTL_GET_METADATA:
+
+  Obtains configuration information from the card, such at the size of
+  MMIO areas, the AFU version, and the PASID for the current context.
+
 
 mmap
 ----
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt
index 5550bfdcce5f..be70b32c95d9 100644
--- a/Documentation/atomic_bitops.txt
+++ b/Documentation/atomic_bitops.txt
@@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
 
  - RMW operations that have a return value are fully ordered.
 
-Except for test_and_set_bit_lock() which has ACQUIRE semantics and
+ - RMW operations that are conditional are unordered on FAILURE,
+   otherwise the above rules apply. In the case of test_and_{}_bit() operations,
+   if the bit in memory is unchanged by the operation then it is deemed to have
+   failed.
+
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
 clear_bit_unlock() which has RELEASE semantics.
 
 Since a platform only has a single means of achieving atomic operations
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
index 84cbb302f2b5..1a0b704e1a38 100644
--- a/Documentation/bpf/bpf_devel_QA.txt
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -539,6 +539,18 @@ A: Although LLVM IR generation and optimization try to stay architecture
        The clang option "-fno-jump-tables" can be used to disable
        switch table generation.
 
+     - For clang -target bpf, it is guaranteed that pointer or long /
+       unsigned long types will always have a width of 64 bit, no matter
+       whether underlying clang binary or default target (or kernel) is
+       32 bit. However, when native clang target is used, then it will
+       compile these types based on the underlying architecture's conventions,
+       meaning in case of 32 bit architecture, pointer or long / unsigned
+       long types e.g. in BPF context structure will have width of 32 bit
+       while the BPF LLVM back end still operates in 64 bit. The native
+       target is mostly needed in tracing for the case of walking pt_regs
+       or other kernel structures where CPU's register width matters.
+       Otherwise, clang -target bpf is generally recommended.
+
    You should use default target when:
 
      - Your program includes a header file, e.g., ptrace.h, which eventually
diff --git a/Documentation/devicetree/bindings/misc/arm-charlcd.txt b/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt
index e28e2aac47f1..e28e2aac47f1 100644
--- a/Documentation/devicetree/bindings/misc/arm-charlcd.txt
+++ b/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt b/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt
index 6394ea9e3b9e..58b12e25bbb1 100644
--- a/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt
@@ -16,6 +16,7 @@ Required properties:
 - ddc: phandle to the hdmi ddc node
 - phy: phandle to the hdmi phy node
 - samsung,syscon-phandle: phandle for system controller node for PMU.
+- #sound-dai-cells: should be 0.
 
 Required properties for Exynos 4210, 4212, 5420 and 5433:
 - clocks: list of clock IDs from SoC clock driver.
diff --git a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
index 217a90eaabe7..9c38bbe7e6d7 100644
--- a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
+++ b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
@@ -11,7 +11,11 @@ Required properties:
   interrupts.
 
 Optional properties:
-- clocks: Optional reference to the clock used by the XOR engine.
+- clocks: Optional reference to the clocks used by the XOR engine.
+- clock-names: mandatory if there is a second clock, in this case the
+   name must be "core" for the first clock and "reg" for the second
+   one
+
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/eeprom/at24.txt b/Documentation/devicetree/bindings/eeprom/at24.txt
index 1812c848e369..abfae1beca2b 100644
--- a/Documentation/devicetree/bindings/eeprom/at24.txt
+++ b/Documentation/devicetree/bindings/eeprom/at24.txt
@@ -38,9 +38,9 @@ Required properties:
 
                 "catalyst",
                 "microchip",
+                "nxp",
                 "ramtron",
                 "renesas",
-                "nxp",
                 "st",
 
                 Some vendors use different model names for chips which are just
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
index 33c9a10fdc91..20f121daa910 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
@@ -14,6 +14,7 @@ Required properties:
     - "renesas,irqc-r8a7794" (R-Car E2)
     - "renesas,intc-ex-r8a7795" (R-Car H3)
     - "renesas,intc-ex-r8a7796" (R-Car M3-W)
+    - "renesas,intc-ex-r8a77965" (R-Car M3-N)
     - "renesas,intc-ex-r8a77970" (R-Car V3M)
     - "renesas,intc-ex-r8a77995" (R-Car D3)
 - #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 1d4d0f49c9d0..60d50a2b0323 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -13,9 +13,18 @@ placed as a child node of an mdio device.
 The properties described here are those specific to Marvell devices.
 Additional required and optional properties can be found in dsa.txt.
 
+The compatibility string is used only to find an identification register,
+which is at a different MDIO base address in different switch families.
+- "marvell,mv88e6085"	: Switch has base address 0x10. Use with models:
+			  6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165,
+			  6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
+			  6341, 6350, 6351, 6352
+- "marvell,mv88e6190"	: Switch has base address 0x00. Use with models:
+			  6190, 6190X, 6191, 6290, 6390, 6390X
+
 Required properties:
 - compatible		: Should be one of "marvell,mv88e6085" or
-			  "marvell,mv88e6190"
+			  "marvell,mv88e6190" as indicated above
 - reg			: Address on the MII bus for the switch.
 
 Optional properties:
@@ -50,14 +59,15 @@ Example:
 			compatible = "marvell,mv88e6085";
 			reg = <0>;
 			reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
-		};
-		mdio {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			switch1phy0: switch1phy0@0 {
-				reg = <0>;
-				interrupt-parent = <&switch0>;
-				interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+			mdio {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				switch1phy0: switch1phy0@0 {
+					reg = <0>;
+					interrupt-parent = <&switch0>;
+					interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+				};
 			};
 		};
 	};
@@ -74,23 +84,24 @@ Example:
 			compatible = "marvell,mv88e6390";
 			reg = <0>;
 			reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
-		};
-		mdio {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			switch1phy0: switch1phy0@0 {
-				reg = <0>;
-				interrupt-parent = <&switch0>;
-				interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+			mdio {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				switch1phy0: switch1phy0@0 {
+					reg = <0>;
+					interrupt-parent = <&switch0>;
+					interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+				};
 			};
-		};
 
-		mdio1 {
-			compatible = "marvell,mv88e6xxx-mdio-external";
-			#address-cells = <1>;
-			#size-cells = <0>;
-			switch1phy9: switch1phy0@9 {
-				reg = <9>;
+			mdio1 {
+				compatible = "marvell,mv88e6xxx-mdio-external";
+				#address-cells = <1>;
+				#size-cells = <0>;
+				switch1phy9: switch1phy0@9 {
+					reg = <9>;
+				};
 			};
 		};
 	};
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644
index 000000000000..2aaef567c5be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+  - compatible:		should be "nxp,mcr20a"
+  - spi-max-frequency:	maximal bus speed, should be set to a frequency
+			lower than 9000000 depends sync or async operation mode
+  - reg:		the chipselect index
+  - interrupts:		the interrupt generated by the device. Non high-level
+			can occur deadlocks while handling isr.
+
+Optional properties:
+  - rst_b-gpio:		GPIO spec for the RST_B pin
+
+Example:
+
+	mcr20a@0 {
+		compatible = "nxp,mcr20a";
+		spi-max-frequency = <9000000>;
+		reg = <0>;
+		interrupts = <17 2>;
+		interrupt-parent = <&gpio>;
+		rst_b-gpio = <&gpio 27 1>
+	};
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 27966ae741e0..457d5ae16f23 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -29,6 +29,7 @@ Optional properties for PHY child node:
 - reset-gpios : Should specify the gpio for phy reset
 - magic-packet : If present, indicates that the hardware supports waking
   up via magic packet.
+- phy-handle : see ethernet.txt file in the same directory
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index c902261893b9..b4dc455eb155 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -18,6 +18,7 @@ Required properties:
       - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
       - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
       - "renesas,etheravb-r8a77970" for the R8A77970 SoC.
+      - "renesas,etheravb-r8a77980" for the R8A77980 SoC.
       - "renesas,etheravb-r8a77995" for the R8A77995 SoC.
       - "renesas,etheravb-rcar-gen3" as a fallback for the above
 		R-Car Gen3 devices.
@@ -26,7 +27,11 @@ Required properties:
 	SoC-specific version corresponding to the platform first followed by
 	the generic version.
 
-- reg: offset and length of (1) the register block and (2) the stream buffer.
+- reg: Offset and length of (1) the register block and (2) the stream buffer.
+       The region for the register block is mandatory.
+       The region for the stream buffer is optional, as it is only present on
+       R-Car Gen2 and RZ/G1 SoCs, and on R-Car H3 (R8A7795), M3-W (R8A7796),
+       and M3-N (R8A77965).
 - interrupts: A list of interrupt-specifiers, one for each entry in
 	      interrupt-names.
 	      If interrupt-names is not present, an interrupt specifier
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index f1c441bedf68..929591d52ed6 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -33,6 +33,10 @@ Optional Properties:
   Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
   high Tx rate. Must not be present for SFF modules
 
+- maximum-power-milliwatt : Maximum module power consumption
+  Specifies the maximum power consumption allowable by a module in the
+  slot, in milli-Watts.  Presently, modules can be up to 1W, 1.5W or 2W.
+
 Example #1: Direct serdes to SFP connection
 
 sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
 	i2c-bus = <&sfp_1g_i2c>;
 	los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
 	mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+	maximum-power-milliwatt = <1000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
 	tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
index 270ea4efff13..96398cc2982f 100644
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
@@ -9,6 +9,7 @@ Required properties:
 	- "socionext,uniphier-pxs2-ave4" : for PXs2 SoC
 	- "socionext,uniphier-ld11-ave4" : for LD11 SoC
 	- "socionext,uniphier-ld20-ave4" : for LD20 SoC
+	- "socionext,uniphier-pxs3-ave4" : for PXs3 SoC
  - reg: Address where registers are mapped and size of region.
  - interrupts: Should contain the MAC interrupt.
  - phy-mode: See ethernet.txt in the same directory. Allow to choose
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 02c4353b5cf2..9ef9338aaee1 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -25,6 +25,8 @@ Optional property:
 				    software needs to take when this pin is
 				    strapped in these modes. See data manual
 				    for details.
+	- ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
+				    for applicable values.
 
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance
diff --git a/Documentation/devicetree/bindings/power/mti,mips-cpc.txt b/Documentation/devicetree/bindings/power/mti,mips-cpc.txt
new file mode 100644
index 000000000000..c6b82511ae8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/mti,mips-cpc.txt
@@ -0,0 +1,8 @@
+Binding for MIPS Cluster Power Controller (CPC).
+
+This binding allows a system to specify where the CPC registers are
+located.
+
+Required properties:
+compatible : Should be "mti,mips-cpc".
+regs: Should describe the address & size of the CPC register region.
diff --git a/Documentation/devicetree/bindings/power/wakeup-source.txt b/Documentation/devicetree/bindings/power/wakeup-source.txt
index 3c81f78b5c27..5d254ab13ebf 100644
--- a/Documentation/devicetree/bindings/power/wakeup-source.txt
+++ b/Documentation/devicetree/bindings/power/wakeup-source.txt
@@ -60,7 +60,7 @@ Examples
 		#size-cells = <0>;
 
 		button@1 {
-			debounce_interval = <50>;
+			debounce-interval = <50>;
 			wakeup-source;
 			linux,code = <116>;
 			label = "POWER";
diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
index 28be51afdb6a..379eb763073e 100644
--- a/Documentation/devicetree/bindings/thermal/imx-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
@@ -22,7 +22,32 @@ Optional properties:
 - clocks : thermal sensor's clock source.
 
 Example:
+ocotp: ocotp@21bc000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,imx6sx-ocotp", "syscon";
+	reg = <0x021bc000 0x4000>;
+	clocks = <&clks IMX6SX_CLK_OCOTP>;
 
+	tempmon_calib: calib@38 {
+		reg = <0x38 4>;
+	};
+
+	tempmon_temp_grade: temp-grade@20 {
+		reg = <0x20 4>;
+	};
+};
+
+tempmon: tempmon {
+	compatible = "fsl,imx6sx-tempmon", "fsl,imx6q-tempmon";
+	interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+	fsl,tempmon = <&anatop>;
+	nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>;
+	nvmem-cell-names = "calib", "temp_grade";
+	clocks = <&clks IMX6SX_CLK_PLL3_USB_OTG>;
+};
+
+Legacy method (Deprecated):
 tempmon {
 	compatible = "fsl,imx6q-tempmon";
 	fsl,tempmon = <&anatop>;
diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
index e64d903bcbe8..46da5f184460 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -19,7 +19,7 @@ Required properties:
   configured in FS mode;
   - "st,stm32f4x9-hsotg": The DWC2 USB HS controller instance in STM32F4x9 SoCs
   configured in HS mode;
-  - "st,stm32f7xx-hsotg": The DWC2 USB HS controller instance in STM32F7xx SoCs
+  - "st,stm32f7-hsotg": The DWC2 USB HS controller instance in STM32F7 SoCs
     configured in HS mode;
 - reg : Should contain 1 register range (address and length)
 - interrupts : Should contain 1 interrupt
diff --git a/Documentation/devicetree/bindings/usb/renesas_usb3.txt b/Documentation/devicetree/bindings/usb/renesas_usb3.txt
index 87a45e2f9b7f..2c071bb5801e 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usb3.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usb3.txt
@@ -4,6 +4,7 @@ Required properties:
   - compatible: Must contain one of the following:
 	- "renesas,r8a7795-usb3-peri"
 	- "renesas,r8a7796-usb3-peri"
+	- "renesas,r8a77965-usb3-peri"
 	- "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 compatible
 	  device
 
diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
index d060172f1529..43960faf5a88 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
@@ -12,6 +12,7 @@ Required properties:
 	- "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device
 	- "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
 	- "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
+	- "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device
 	- "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
 	- "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
 	- "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index e2ea59bbca93..1651483a7048 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -13,6 +13,7 @@ Required properties:
     - "renesas,xhci-r8a7793" for r8a7793 SoC
     - "renesas,xhci-r8a7795" for r8a7795 SoC
     - "renesas,xhci-r8a7796" for r8a7796 SoC
+    - "renesas,xhci-r8a77965" for r8a77965 SoC
     - "renesas,rcar-gen2-xhci" for a generic R-Car Gen2 or RZ/G1 compatible
       device
     - "renesas,rcar-gen3-xhci" for a generic R-Car Gen3 compatible device
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
new file mode 100644
index 000000000000..2c815a7f1ba7
--- /dev/null
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -0,0 +1,62 @@
+#
+# Feature name:          membarrier-sync-core
+#         Kconfig:       ARCH_HAS_MEMBARRIER_SYNC_CORE
+#         description:   arch supports core serializing membarrier
+#
+# Architecture requirements
+#
+# * arm64
+#
+# Rely on eret context synchronization when returning from IPI handler, and
+# when returning to user-space.
+#
+# * x86
+#
+# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
+# instruction is core serializing, but not SYSEXIT.
+#
+# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it can return to user-space through either SYSRETL (compat code),
+# SYSRETQ, or IRET.
+#
+# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
+# instead on write_cr3() performed by switch_mm() to provide core serialization
+# after changing the current mm, and deal with the special case of kthread ->
+# uthread (temporarily keeping current mm into active_mm) by issuing a
+# sync_core_before_usermode() in that specific case.
+#
+    -----------------------
+    |         arch |status|
+    -----------------------
+    |       alpha: | TODO |
+    |         arc: | TODO |
+    |         arm: | TODO |
+    |       arm64: |  ok  |
+    |    blackfin: | TODO |
+    |         c6x: | TODO |
+    |        cris: | TODO |
+    |         frv: | TODO |
+    |       h8300: | TODO |
+    |     hexagon: | TODO |
+    |        ia64: | TODO |
+    |        m32r: | TODO |
+    |        m68k: | TODO |
+    |       metag: | TODO |
+    |  microblaze: | TODO |
+    |        mips: | TODO |
+    |     mn10300: | TODO |
+    |       nios2: | TODO |
+    |    openrisc: | TODO |
+    |      parisc: | TODO |
+    |     powerpc: | TODO |
+    |        s390: | TODO |
+    |       score: | TODO |
+    |          sh: | TODO |
+    |       sparc: | TODO |
+    |        tile: | TODO |
+    |          um: | TODO |
+    |   unicore32: | TODO |
+    |         x86: |  ok  |
+    |      xtensa: | TODO |
+    -----------------------
diff --git a/Documentation/gpu/tve200.rst b/Documentation/gpu/tve200.rst
index 69b17b324e12..152ea9398f7e 100644
--- a/Documentation/gpu/tve200.rst
+++ b/Documentation/gpu/tve200.rst
@@ -3,4 +3,4 @@
 ==================================
 
 .. kernel-doc:: drivers/gpu/drm/tve200/tve200_drv.c
-   :doc: Faraday TV Encoder 200
+   :doc: Faraday TV Encoder TVE200 DRM Driver
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index d47702456926..65514c251318 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -28,8 +28,10 @@ Supported adapters:
   * Intel Wildcat Point (PCH)
   * Intel Wildcat Point-LP (PCH)
   * Intel BayTrail (SOC)
+  * Intel Braswell (SOC)
   * Intel Sunrise Point-H (PCH)
   * Intel Sunrise Point-LP (PCH)
+  * Intel Kaby Lake-H (PCH)
   * Intel DNV (SOC)
   * Intel Broxton (SOC)
   * Intel Lewisburg (PCH)
diff --git a/Documentation/ia64/serial.txt b/Documentation/ia64/serial.txt
index 6869c73de4e2..a63d2c54329b 100644
--- a/Documentation/ia64/serial.txt
+++ b/Documentation/ia64/serial.txt
@@ -111,7 +111,7 @@ TROUBLESHOOTING SERIAL CONSOLE PROBLEMS
 
 	- If you don't have an HCDP, the kernel doesn't know where
 	  your console lives until the driver discovers serial
-	  devices.  Use "console=uart, io,0x3f8" (or appropriate
+	  devices.  Use "console=uart,io,0x3f8" (or appropriate
 	  address for your machine).
 
     Kernel and init script output works fine, but no "login:" prompt:
diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt
index 60c482df1a38..818aca19612f 100644
--- a/Documentation/locking/mutex-design.txt
+++ b/Documentation/locking/mutex-design.txt
@@ -21,37 +21,23 @@ Implementation
 --------------
 
 Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
-	  1: unlocked
-	  0: locked, no waiters
-   negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
+and implemented in kernel/locking/mutex.c. These locks use an atomic variable
+(->owner) to keep track of the lock state during its lifetime.  Field owner
+actually contains 'struct task_struct *' to the current lock owner and it is
+therefore NULL if not currently owned. Since task_struct pointers are aligned
+at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
+if waiter list is non-empty).  In its most basic form it also includes a
+wait-queue and a spinlock that serializes access to it. Furthermore,
+CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
+below in (ii).
 
 When acquiring a mutex, there are three possible paths that can be
 taken, depending on the state of the lock:
 
-(i) fastpath: tries to atomically acquire the lock by decrementing the
-    counter. If it was already taken by another task it goes to the next
-    possible path. This logic is architecture specific. On x86-64, the
-    locking fastpath is 2 instructions:
-
-    0000000000000e10 <mutex_lock>:
-    e21:   f0 ff 0b                lock decl (%rbx)
-    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
-
-   the unlocking fastpath is equally tight:
-
-    0000000000000bc0 <mutex_unlock>:
-    bc8:   f0 ff 07                lock incl (%rdi)
-    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
-
+(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
+    the current task. This only works in the uncontended case (cmpxchg() checks
+    against 0UL, so all 3 state bits above have to be 0). If the lock is
+    contended it goes to the next possible path.
 
 (ii) midpath: aka optimistic spinning, tries to spin for acquisition
      while the lock owner is running and there are no other tasks ready
@@ -143,11 +129,10 @@ Test if the mutex is taken:
 Disadvantages
 -------------
 
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
-for the largest lock in the kernel. Larger structure sizes mean more
-CPU cache and memory footprint.
+Unlike its original design and purpose, 'struct mutex' is among the largest
+locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
+is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
+cache and memory footprint.
 
 When to use mutexes
 -------------------
diff --git a/Documentation/media/dmx.h.rst.exceptions b/Documentation/media/dmx.h.rst.exceptions
index 63f55a9ae2b1..a8c4239ed95b 100644
--- a/Documentation/media/dmx.h.rst.exceptions
+++ b/Documentation/media/dmx.h.rst.exceptions
@@ -50,9 +50,15 @@ replace typedef dmx_filter_t :c:type:`dmx_filter`
 replace typedef dmx_pes_type_t :c:type:`dmx_pes_type`
 replace typedef dmx_input_t :c:type:`dmx_input`
 
-ignore symbol DMX_OUT_DECODER
-ignore symbol DMX_OUT_TAP
-ignore symbol DMX_OUT_TS_TAP
-ignore symbol DMX_OUT_TSDEMUX_TAP
+replace symbol DMX_BUFFER_FLAG_HAD_CRC32_DISCARD :c:type:`dmx_buffer_flags`
+replace	symbol DMX_BUFFER_FLAG_TEI :c:type:`dmx_buffer_flags`
+replace	symbol DMX_BUFFER_PKT_COUNTER_MISMATCH :c:type:`dmx_buffer_flags`
+replace	symbol DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED :c:type:`dmx_buffer_flags`
+replace	symbol DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR :c:type:`dmx_buffer_flags`
+
+replace symbol DMX_OUT_DECODER :c:type:`dmx_output`
+replace symbol DMX_OUT_TAP :c:type:`dmx_output`
+replace symbol DMX_OUT_TS_TAP :c:type:`dmx_output`
+replace symbol DMX_OUT_TSDEMUX_TAP :c:type:`dmx_output`
 
 replace ioctl DMX_DQBUF dmx_qbuf
diff --git a/Documentation/media/uapi/dvb/dmx-qbuf.rst b/Documentation/media/uapi/dvb/dmx-qbuf.rst
index b48c4931658e..be5a4c6f1904 100644
--- a/Documentation/media/uapi/dvb/dmx-qbuf.rst
+++ b/Documentation/media/uapi/dvb/dmx-qbuf.rst
@@ -51,9 +51,10 @@ out to disk. Buffers remain locked until dequeued, until the
 the device is closed.
 
 Applications call the ``DMX_DQBUF`` ioctl to dequeue a filled
-(capturing) buffer from the driver's outgoing queue. They just set the ``reserved`` field array to zero. When ``DMX_DQBUF`` is called with a
-pointer to this structure, the driver fills the remaining fields or
-returns an error code.
+(capturing) buffer from the driver's outgoing queue.
+They just set the ``index`` field withe the buffer ID to be queued.
+When ``DMX_DQBUF`` is called with a pointer to struct :c:type:`dmx_buffer`,
+the driver fills the remaining fields or returns an error code.
 
 By default ``DMX_DQBUF`` blocks when no buffer is in the outgoing
 queue. When the ``O_NONBLOCK`` flag was given to the
diff --git a/Documentation/networking/ice.txt b/Documentation/networking/ice.txt
new file mode 100644
index 000000000000..6261c46378e1
--- /dev/null
+++ b/Documentation/networking/ice.txt
@@ -0,0 +1,39 @@
+Intel(R) Ethernet Connection E800 Series Linux Driver
+===================================================================
+
+Intel ice Linux driver.
+Copyright(c) 2018 Intel Corporation.
+
+Contents
+========
+- Enabling the driver
+- Support
+
+The driver in this release supports Intel's E800 Series of products. For
+more information, visit Intel's support page at http://support.intel.com.
+
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command:
+
+     Make oldconfig/silentoldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+	-> Device Drivers
+	  -> Network device support (NETDEVICES [=y])
+	    -> Ethernet driver support
+	      -> Intel devices
+	        -> Intel(R) Ethernet Connection E800 Series Support
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+    http://support.intel.com
+
+If an issue is identified with the released source code, please email
+the maintainer listed in the MAINTAINERS file.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..1d1120753ae8 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -755,13 +755,13 @@ udp_rmem_min - INTEGER
 	Minimal size of receive buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for receiving data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 udp_wmem_min - INTEGER
 	Minimal size of send buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for sending data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 CIPSOv4 Variables:
 
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
 	FALSE: disabled
 	Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+	Controls which hash policy to use for multipath routes.
+	Default: 0 (Layer 3)
+	Possible values:
+	0 - Layer 3 (source and destination addresses plus flow label)
+	1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
@@ -2094,7 +2101,7 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
 	It is guaranteed to each SCTP socket (but not association) even
 	under moderate memory pressure.
 
-	Default: 1 page
+	Default: 4K
 
 sctp_wmem  - vector of 3 INTEGERs: min, default, max
 	Currently this tunable has no effect.
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 291a01264967..fe46d4867e2d 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,11 +72,6 @@ this flag, a process must first signal intent by setting a socket option:
 	if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
 		error(1, errno, "setsockopt zerocopy");
 
-Setting the socket option only works when the socket is in its initial
-(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
-for example, will lead to an EBUSY error. In this case, the option should be set
-to the listening socket and it will be inherited by the accepted sockets.
-
 Transmission
 ------------
 
diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt
new file mode 100644
index 000000000000..9cb31c5e2dcd
--- /dev/null
+++ b/Documentation/networking/net_dim.txt
@@ -0,0 +1,174 @@
+Net DIM - Generic Network Dynamic Interrupt Moderation
+======================================================
+
+Author:
+	Tal Gilboa <[email protected]>
+
+
+Contents
+=========
+
+- Assumptions
+- Introduction
+- The Net DIM Algorithm
+- Registering a Network Device to DIM
+- Example
+
+Part 0: Assumptions
+======================
+
+This document assumes the reader has basic knowledge in network drivers
+and in general interrupt moderation.
+
+
+Part I: Introduction
+======================
+
+Dynamic Interrupt Moderation (DIM) (in networking) refers to changing the
+interrupt moderation configuration of a channel in order to optimize packet
+processing. The mechanism includes an algorithm which decides if and how to
+change moderation parameters for a channel, usually by performing an analysis on
+runtime data sampled from the system. Net DIM is such a mechanism. In each
+iteration of the algorithm, it analyses a given sample of the data, compares it
+to the previous sample and if required, it can decide to change some of the
+interrupt moderation configuration fields. The data sample is composed of data
+bandwidth, the number of packets and the number of events. The time between
+samples is also measured. Net DIM compares the current and the previous data and
+returns an adjusted interrupt moderation configuration object. In some cases,
+the algorithm might decide not to change anything. The configuration fields are
+the minimum duration (microseconds) allowed between events and the maximum
+number of wanted packets per event. The Net DIM algorithm ascribes importance to
+increase bandwidth over reducing interrupt rate.
+
+
+Part II: The Net DIM Algorithm
+===============================
+
+Each iteration of the Net DIM algorithm follows these steps:
+1. Calculates new data sample.
+2. Compares it to previous sample.
+3. Makes a decision - suggests interrupt moderation configuration fields.
+4. Applies a schedule work function, which applies suggested configuration.
+
+The first two steps are straightforward, both the new and the previous data are
+supplied by the driver registered to Net DIM. The previous data is the new data
+supplied to the previous iteration. The comparison step checks the difference
+between the new and previous data and decides on the result of the last step.
+A step would result as "better" if bandwidth increases and as "worse" if
+bandwidth reduces. If there is no change in bandwidth, the packet rate is
+compared in a similar fashion - increase == "better" and decrease == "worse".
+In case there is no change in the packet rate as well, the interrupt rate is
+compared. Here the algorithm tries to optimize for lower interrupt rate so an
+increase in the interrupt rate is considered "worse" and a decrease is
+considered "better". Step #2 has an optimization for avoiding false results: it
+only considers a difference between samples as valid if it is greater than a
+certain percentage. Also, since Net DIM does not measure anything by itself, it
+assumes the data provided by the driver is valid.
+
+Step #3 decides on the suggested configuration based on the result from step #2
+and the internal state of the algorithm. The states reflect the "direction" of
+the algorithm: is it going left (reducing moderation), right (increasing
+moderation) or standing still. Another optimization is that if a decision
+to stay still is made multiple times, the interval between iterations of the
+algorithm would increase in order to reduce calculation overhead. Also, after
+"parking" on one of the most left or most right decisions, the algorithm may
+decide to verify this decision by taking a step in the other direction. This is
+done in order to avoid getting stuck in a "deep sleep" scenario. Once a
+decision is made, an interrupt moderation configuration is selected from
+the predefined profiles.
+
+The last step is to notify the registered driver that it should apply the
+suggested configuration. This is done by scheduling a work function, defined by
+the Net DIM API and provided by the registered driver.
+
+As you can see, Net DIM itself does not actively interact with the system. It
+would have trouble making the correct decisions if the wrong data is supplied to
+it and it would be useless if the work function would not apply the suggested
+configuration. This does, however, allow the registered driver some room for
+manoeuvre as it may provide partial data or ignore the algorithm suggestion
+under some conditions.
+
+
+Part III: Registering a Network Device to DIM
+==============================================
+
+Net DIM API exposes the main function net_dim(struct net_dim *dim,
+struct net_dim_sample end_sample). This function is the entry point to the Net
+DIM algorithm and has to be called every time the driver would like to check if
+it should change interrupt moderation parameters. The driver should provide two
+data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+describes the state of DIM for a specific object (RX queue, TX queue,
+other queues, etc.). This includes the current selected profile, previous data
+samples, the callback function provided by the driver and more.
+Struct net_dim_sample describes a data sample, which will be compared to the
+data sample stored in struct net_dim in order to decide on the algorithm's next
+step. The sample should include bytes, packets and interrupts, measured by
+the driver.
+
+In order to use Net DIM from a networking driver, the driver needs to call the
+main net_dim() function. The recommended method is to call net_dim() on each
+interrupt. Since Net DIM has a built-in moderation and it might decide to skip
+iterations under certain conditions, there is no need to moderate the net_dim()
+calls as well. As mentioned above, the driver needs to provide an object of type
+struct net_dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct net_dim as part of its data structure and use it
+as the main Net DIM API object. The struct net_dim_sample should hold the latest
+bytes, packets and interrupts count. No need to perform any calculations, just
+include the raw data.
+
+The net_dim() call itself does not return anything. Instead Net DIM relies on
+the driver to provide a callback function, which is called when the algorithm
+decides to make a change in the interrupt moderation parameters. This callback
+will be scheduled and run in a separate thread in order not to add overhead to
+the data flow. After the work is done, Net DIM algorithm needs to be set to
+the proper state in order to move to the next iteration.
+
+
+Part IV: Example
+=================
+
+The following code demonstrates how to register a driver to Net DIM. The actual
+usage is not complete but it should make the outline of the usage clear.
+
+my_driver.c:
+
+#include <linux/net_dim.h>
+
+/* Callback for net DIM to schedule on a decision to change moderation */
+void my_driver_do_dim_work(struct work_struct *work)
+{
+	/* Get struct net_dim from struct work_struct */
+	struct net_dim *dim = container_of(work, struct net_dim,
+					   work);
+	/* Do interrupt moderation related stuff */
+	...
+
+	/* Signal net DIM work is done and it should move to next iteration */
+	dim->state = NET_DIM_START_MEASURE;
+}
+
+/* My driver's interrupt handler */
+int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* A struct to hold current measured data */
+	struct net_dim_sample dim_sample;
+	...
+	/* Initiate data sample struct with current data */
+	net_dim_sample(my_entity->events,
+		       my_entity->packets,
+		       my_entity->bytes,
+		       &dim_sample);
+	/* Call net DIM */
+	net_dim(&my_entity->dim, dim_sample);
+	...
+}
+
+/* My entity's initialization function (my_entity was already allocated) */
+int my_driver_init_my_entity(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* Initiate struct work_struct with my driver's callback function */
+	INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work);
+	...
+}
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index bf654845556e..999eb41da81d 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -7,15 +7,12 @@ socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
 i) capture network traffic with utilities like tcpdump, ii) transmit network
 traffic, or any other that needs raw access to network interface.
 
-You can find the latest version of this document at:
-    http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap
-
 Howto can be found at:
-    http://wiki.gnu-log.net (packet_mmap)
+    https://sites.google.com/site/packetmmap/
 
 Please send your comments to
     Ulisses Alonso Camaró <[email protected]>
-    Johann Baudy <[email protected]>
+    Johann Baudy
 
 -------------------------------------------------------------------------------
 + Why use PACKET_MMAP
@@ -51,17 +48,8 @@ From the user standpoint, you should use the higher level libpcap library, which
 is a de facto standard, portable across nearly all operating systems
 including Win32. 
 
-Said that, at time of this writing, official libpcap 0.8.1 is out and doesn't include
-support for PACKET_MMAP, and also probably the libpcap included in your distribution. 
-
-I'm aware of two implementations of PACKET_MMAP in libpcap:
-
-    http://wiki.ipxwarzone.com/		     (by Simon Patarin, based on libpcap 0.6.2)
-    http://public.lanl.gov/cpw/              (by Phil Wood, based on lastest libpcap)
-
-The rest of this document is intended for people who want to understand
-the low level details or want to improve libpcap by including PACKET_MMAP
-support.
+Packet MMAP support was integrated into libpcap around the time of version 1.3.0;
+TPACKET_V3 support was added in version 1.5.0
 
 --------------------------------------------------------------------------------
 + How to use mmap() directly to improve capture process
@@ -174,7 +162,7 @@ As capture, each frame contains two parts:
  /* bind socket to eth0 */
  bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
 
- A complete tutorial is available at: http://wiki.gnu-log.net/
+ A complete tutorial is available at: https://sites.google.com/site/packetmmap/
 
 By default, the user should put data at :
  frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt
index 2f09455a993a..aca542ec125c 100644
--- a/Documentation/networking/segmentation-offloads.txt
+++ b/Documentation/networking/segmentation-offloads.txt
@@ -13,14 +13,15 @@ The following technologies are described:
  * Generic Segmentation Offload - GSO
  * Generic Receive Offload - GRO
  * Partial Generic Segmentation Offload - GSO_PARTIAL
+ * SCTP accelleration with GSO - GSO_BY_FRAGS
 
 TCP Segmentation Offload
 ========================
 
 TCP segmentation allows a device to segment a single frame into multiple
 frames with a data payload size specified in skb_shinfo()->gso_size.
-When TCP segmentation requested the bit for either SKB_GSO_TCP or
-SKB_GSO_TCP6 should be set in skb_shinfo()->gso_type and
+When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or
+SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and
 skb_shinfo()->gso_size should be set to a non-zero value.
 
 TCP segmentation is dependent on support for the use of partial checksum
@@ -49,6 +50,10 @@ datagram into multiple IPv4 fragments.  Many of the requirements for UDP
 fragmentation offload are the same as TSO.  However the IPv4 ID for
 fragments should not increment as a single IPv4 datagram is fragmented.
 
+UFO is deprecated: modern kernels will no longer generate UFO skbs, but can
+still receive them from tuntap and similar devices. Offload of UDP-based
+tunnel protocols is still supported.
+
 IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
 ========================================================
 
@@ -83,10 +88,10 @@ SKB_GSO_UDP_TUNNEL_CSUM.  These two additional tunnel types reflect the
 fact that the outer header also requests to have a non-zero checksum
 included in the outer header.
 
-Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header
-has requested a remote checksum offload.  In this case the inner headers
-will be left with a partial checksum and only the outer header checksum
-will be computed.
+Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel
+header has requested a remote checksum offload.  In this case the inner
+headers will be left with a partial checksum and only the outer header
+checksum will be computed.
 
 Generic Segmentation Offload
 ============================
@@ -128,3 +133,38 @@ values for if the header was simply duplicated.  The one exception to this
 is the outer IPv4 ID field.  It is up to the device drivers to guarantee
 that the IPv4 ID field is incremented in the case that a given header does
 not have the DF bit set.
+
+SCTP accelleration with GSO
+===========================
+
+SCTP - despite the lack of hardware support - can still take advantage of
+GSO to pass one large packet through the network stack, rather than
+multiple small packets.
+
+This requires a different approach to other offloads, as SCTP packets
+cannot be just segmented to (P)MTU. Rather, the chunks must be contained in
+IP segments, padding respected. So unlike regular GSO, SCTP can't just
+generate a big skb, set gso_size to the fragmentation point and deliver it
+to IP layer.
+
+Instead, the SCTP protocol layer builds an skb with the segments correctly
+padded and stored as chained skbs, and skb_segment() splits based on those.
+To signal this, gso_size is set to the special value GSO_BY_FRAGS.
+
+Therefore, any code in the core networking stack must be aware of the
+possibility that gso_size will be GSO_BY_FRAGS and handle that case
+appropriately.
+
+There are some helpers to make this easier:
+
+ - skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if
+   an skb is an SCTP GSO skb.
+
+ - For size checks, the skb_gso_validate_*_len family of helpers correctly
+   considers GSO_BY_FRAGS.
+
+ - For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size
+   will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs.
+
+This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
+set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
diff --git a/Documentation/networking/tls.txt b/Documentation/networking/tls.txt
index 77ed00631c12..58b5ef75f1b7 100644
--- a/Documentation/networking/tls.txt
+++ b/Documentation/networking/tls.txt
@@ -48,6 +48,9 @@ the transmit and the receive into the kernel.
 
   setsockopt(sock, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info));
 
+Transmit and receive are set separately, but the setup is the same, using either
+TLS_TX or TLS_RX.
+
 Sending TLS application data
 ----------------------------
 
@@ -79,6 +82,28 @@ for memory), or the encryption will always succeed.  If send() returns
 -ENOMEM and some data was left on the socket buffer from a previous
 call using MSG_MORE, the MSG_MORE data is left on the socket buffer.
 
+Receiving TLS application data
+------------------------------
+
+After setting the TLS_RX socket option, all recv family socket calls
+are decrypted using TLS parameters provided.  A full TLS record must
+be received before decryption can happen.
+
+  char buffer[16384];
+  recv(sock, buffer, 16384);
+
+Received data is decrypted directly in to the user buffer if it is
+large enough, and no additional allocations occur.  If the userspace
+buffer is too small, data is decrypted in the kernel and copied to
+userspace.
+
+EINVAL is returned if the TLS version in the received message does not
+match the version passed in setsockopt.
+
+EMSGSIZE is returned if the received message is too big.
+
+EBADMSG is returned if decryption failed for any other reason.
+
 Send TLS control messages
 -------------------------
 
@@ -118,6 +143,43 @@ using a record of type @record_type.
 Control message data should be provided unencrypted, and will be
 encrypted by the kernel.
 
+Receiving TLS control messages
+------------------------------
+
+TLS control messages are passed in the userspace buffer, with message
+type passed via cmsg.  If no cmsg buffer is provided, an error is
+returned if a control message is received.  Data messages may be
+received without a cmsg buffer set.
+
+  char buffer[16384];
+  char cmsg[CMSG_SPACE(sizeof(unsigned char))];
+  struct msghdr msg = {0};
+  msg.msg_control = cmsg;
+  msg.msg_controllen = sizeof(cmsg);
+
+  struct iovec msg_iov;
+  msg_iov.iov_base = buffer;
+  msg_iov.iov_len = 16384;
+
+  msg.msg_iov = &msg_iov;
+  msg.msg_iovlen = 1;
+
+  int ret = recvmsg(sock, &msg, 0 /* flags */);
+
+  struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+  if (cmsg->cmsg_level == SOL_TLS &&
+      cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
+      int record_type = *((unsigned char *)CMSG_DATA(cmsg));
+      // Do something with record_type, and control message data in
+      // buffer.
+      //
+      // Note that record_type may be == to application data (23).
+  } else {
+      // Buffer contains application data.
+  }
+
+recv will never return data from mixed types of TLS records.
+
 Integrating in to userspace TLS library
 ---------------------------------------
 
@@ -126,10 +188,10 @@ layer of a userspace TLS library.
 
 A patchset to OpenSSL to use ktls as the record layer is here:
 
-https://github.com/Mellanox/tls-openssl
+https://github.com/Mellanox/openssl/commits/tls_rx2
 
 An example of calling send directly after a handshake using
 gnutls.  Since it doesn't implement a full record layer, control
 messages are not supported:
 
-https://github.com/Mellanox/tls-af_ktls_tool
+https://github.com/ktls/af_ktls-tool/commits/RX
diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt
index ae8fef86b832..11e904ee073f 100644
--- a/Documentation/ptp/ptp.txt
+++ b/Documentation/ptp/ptp.txt
@@ -18,7 +18,6 @@
     - Adjust clock frequency
 
   + Ancillary clock features
-    - One short or periodic alarms, with signal delivery to user program
     - Time stamp external events
     - Period output signals configurable from user space
     - Synchronization of the Linux system time via the PPS subsystem
@@ -48,9 +47,7 @@
    User space programs may control the clock using standardized
    ioctls. A program may query, enable, configure, and disable the
    ancillary clock features. User space can receive time stamped
-   events via blocking read() and poll(). One shot and periodic
-   signals may be configured via the POSIX timer_settime() system
-   call.
+   events via blocking read() and poll().
 
 ** Writing clock drivers
 
diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py
index 39aa9e8697cc..fbedcc39460b 100644
--- a/Documentation/sphinx/kerneldoc.py
+++ b/Documentation/sphinx/kerneldoc.py
@@ -36,8 +36,7 @@ import glob
 
 from docutils import nodes, statemachine
 from docutils.statemachine import ViewList
-from docutils.parsers.rst import directives
-from sphinx.util.compat import Directive
+from docutils.parsers.rst import directives, Directive
 from sphinx.ext.autodoc import AutodocReporter
 
 __version__  = '1.0'
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 35c62f522754..5992602469d8 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -270,6 +270,18 @@ optmem_max
 Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
 of struct cmsghdr structures with appended data.
 
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created when a new
+network namespace is created, if corresponding tunnel is present
+in initial network namespace.
+If set to 1, these devices are not automatically created, and
+user space is responsible for creating them if needed.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------
 
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 792fa8717d13..d6b3ff51a14f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -123,14 +123,15 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
 flag KVM_VM_MIPS_VZ.
 
 
-4.3 KVM_GET_MSR_INDEX_LIST
+4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
 
-Capability: basic
+Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
 Architectures: x86
-Type: system
+Type: system ioctl
 Parameters: struct kvm_msr_list (in/out)
 Returns: 0 on success; -1 on error
 Errors:
+  EFAULT:    the msr index list cannot be read from or written to
   E2BIG:     the msr index list is to be to fit in the array specified by
              the user.
 
@@ -139,16 +140,23 @@ struct kvm_msr_list {
 	__u32 indices[0];
 };
 
-This ioctl returns the guest msrs that are supported.  The list varies
-by kvm version and host processor, but does not change otherwise.  The
-user fills in the size of the indices array in nmsrs, and in return
-kvm adjusts nmsrs to reflect the actual number of msrs and fills in
-the indices array with their numbers.
+The user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
+indices array with their numbers.
+
+KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported.  The list
+varies by kvm version and host processor, but does not change otherwise.
 
 Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
 not returned in the MSR list, as different vcpus can have a different number
 of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
+KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed
+to the KVM_GET_MSRS system ioctl.  This lets userspace probe host capabilities
+and processor features that are exposed via MSRs (e.g., VMX capabilities).
+This list also varies by kvm version and host processor, but does not change
+otherwise.
+
 
 4.4 KVM_CHECK_EXTENSION
 
@@ -475,14 +483,22 @@ Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.
 
 4.18 KVM_GET_MSRS
 
-Capability: basic
+Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
 Architectures: x86
-Type: vcpu ioctl
+Type: system ioctl, vcpu ioctl
 Parameters: struct kvm_msrs (in/out)
-Returns: 0 on success, -1 on error
+Returns: number of msrs successfully returned;
+        -1 on error
+
+When used as a system ioctl:
+Reads the values of MSR-based features that are available for the VM.  This
+is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values.
+The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST
+in a system ioctl.
 
+When used as a vcpu ioctl:
 Reads model-specific registers from the vcpu.  Supported msr indices can
-be obtained using KVM_GET_MSR_INDEX_LIST.
+be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.
 
 struct kvm_msrs {
 	__u32 nmsrs; /* number of msrs in entries */
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index dcab6dc11e3b..87a7506f31c2 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH           ||     9 || guest checks this feature bit
                                    ||       || before enabling paravirtualized
                                    ||       || tlb flush.
 ------------------------------------------------------------------------------
+KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
+                                   ||       || can be enabled by setting bit 2
+                                   ||       || when writing to msr 0x4b564d02
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 1ebecc115dc6..f3f0d57ced8e 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
 	when asynchronous page faults are enabled on the vcpu 0 when
 	disabled. Bit 1 is 1 if asynchronous page faults can be injected
 	when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
-	are delivered to L1 as #PF vmexits.
+	are delivered to L1 as #PF vmexits.  Bit 2 can be set only if
+	KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
 
 	First 4 byte of 64 byte memory location will be written to by
 	the hypervisor at the time of asynchronous page fault (APF)
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 756fd76b78a6..71c30984e94d 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -671,7 +671,7 @@ occupancy of the real time threads on these cores.
 # mkdir p1
 
 Move the cpus 4-7 over to p1
-# echo f0 > p0/cpus
+# echo f0 > p1/cpus
 
 View the llc occupancy snapshot
 
diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
index f3e9d7e9ed6c..2953e3ec9a02 100644
--- a/Documentation/x86/topology.txt
+++ b/Documentation/x86/topology.txt
@@ -108,7 +108,7 @@ The topology of a system is described in the units of:
 
     The number of online threads is also printed in /proc/cpuinfo "siblings."
 
-  - topology_sibling_mask():
+  - topology_sibling_cpumask():
 
     The cpumask contains all online threads in the core to which a thread
     belongs.
diff --git a/MAINTAINERS b/MAINTAINERS
index 3bdc260e36b7..9107d9241564 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1238,7 +1238,7 @@ F:	drivers/clk/at91
 
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:	Nicolas Ferre <[email protected]>
-M:	Alexandre Belloni <[email protected]>
+M:	Alexandre Belloni <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 W:	http://www.linux4sam.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nferre/linux-at91.git
@@ -1590,7 +1590,7 @@ ARM/Marvell Dove/MV78xx0/Orion SOC support
 M:	Jason Cooper <[email protected]>
 M:	Andrew Lunn <[email protected]>
 M:	Sebastian Hesselbarth <[email protected]>
-M:	Gregory Clement <[email protected]>
+M:	Gregory Clement <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/soc/dove/
@@ -1604,7 +1604,7 @@ F:	arch/arm/boot/dts/orion5x*
 ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
 M:	Jason Cooper <[email protected]>
 M:	Andrew Lunn <[email protected]>
-M:	Gregory Clement <[email protected]>
+M:	Gregory Clement <[email protected]>
 M:	Sebastian Hesselbarth <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 S:	Maintained
@@ -1999,8 +1999,10 @@ M:	Maxime Coquelin <[email protected]>
 M:	Alexandre Torgue <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mcoquelin/stm32.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git stm32-next
 N:	stm32
+F:	arch/arm/boot/dts/stm32*
+F:	arch/arm/mach-stm32/
 F:	drivers/clocksource/armv7m_systick.c
 
 ARM/TANGO ARCHITECTURE
@@ -7061,6 +7063,7 @@ F:	Documentation/networking/ixgbe.txt
 F:	Documentation/networking/ixgbevf.txt
 F:	Documentation/networking/i40e.txt
 F:	Documentation/networking/i40evf.txt
+F:	Documentation/networking/ice.txt
 F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
 F:	include/linux/avf/virtchnl.h
@@ -7600,8 +7603,10 @@ F:	mm/kasan/
 F:	scripts/Makefile.kasan
 
 KCONFIG
+M:	Masahiro Yamada <[email protected]>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
 L:	[email protected]
-S:	Orphan
+S:	Maintained
 F:	Documentation/kbuild/kconfig-language.txt
 F:	scripts/kconfig/
 
@@ -7909,7 +7914,6 @@ S:	Maintained
 F:	scripts/leaking_addresses.pl
 
 LED SUBSYSTEM
-M:	Richard Purdie <[email protected]>
 M:	Jacek Anaszewski <[email protected]>
 M:	Pavel Machek <[email protected]>
 L:	[email protected]
@@ -8593,6 +8597,15 @@ S:	Maintained
 F:	Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
 F:	drivers/iio/potentiometer/mcp4531.c
 
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M:	Xue Liu <[email protected]>
+L:	[email protected]
+W:	https://github.com/xueliu/mcr20a-linux
+S:	Maintained
+F:	drivers/net/ieee802154/mcr20a.c
+F:	drivers/net/ieee802154/mcr20a.h
+F:	Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:	William Breathitt Gray <[email protected]>
 L:	[email protected]
@@ -9149,6 +9162,13 @@ F:	drivers/net/dsa/microchip/*
 F:	include/linux/platform_data/microchip-ksz.h
 F:	Documentation/devicetree/bindings/net/dsa/ksz.txt
 
+MICROCHIP LAN743X ETHERNET DRIVER
+M:	Bryan Whitehead <[email protected]>
+M:	Microchip Linux Driver Support <[email protected]>
+L:	[email protected]
+S:	Maintained
+F:	drivers/net/ethernet/microchip/lan743x_*
+
 MICROCHIP USB251XB DRIVER
 M:	Richard Leitner <[email protected]>
 L:	[email protected]
@@ -9206,6 +9226,7 @@ MIPS GENERIC PLATFORM
 M:	Paul Burton <[email protected]>
 L:	[email protected]
 S:	Supported
+F:	Documentation/devicetree/bindings/power/mti,mips-cpc.txt
 F:	arch/mips/generic/
 F:	arch/mips/tools/generic-board-config.sh
 
@@ -9921,6 +9942,13 @@ F:	Documentation/ABI/stable/sysfs-bus-nvmem
 F:	include/linux/nvmem-consumer.h
 F:	include/linux/nvmem-provider.h
 
+NXP SGTL5000 DRIVER
+M:	Fabio Estevam <[email protected]>
+L:	[email protected] (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/sound/sgtl5000.txt
+F:	sound/soc/codecs/sgtl5000*
+
 NXP TDA998X DRM DRIVER
 M:	Russell King <[email protected]>
 S:	Supported
@@ -9945,6 +9973,7 @@ F:	drivers/nfc/nxp-nci
 
 OBJTOOL
 M:	Josh Poimboeuf <[email protected]>
+M:	Peter Zijlstra <[email protected]>
 S:	Supported
 F:	tools/objtool/
 
@@ -10322,7 +10351,7 @@ F:	drivers/oprofile/
 F:	include/linux/oprofile.h
 
 ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
-M:	Mark Fasheh <[email protected]>
+M:	Mark Fasheh <[email protected]>
 M:	Joel Becker <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 W:	http://ocfs2.wiki.kernel.org
@@ -10832,6 +10861,7 @@ F:	drivers/platform/x86/peaq-wmi.c
 PER-CPU MEMORY ALLOCATOR
 M:	Tejun Heo <[email protected]>
 M:	Christoph Lameter <[email protected]>
+M:	Dennis Zhou <[email protected]>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
 S:	Maintained
 F:	include/linux/percpu*.h
@@ -10925,6 +10955,17 @@ L:	[email protected]
 S:	Supported
 F:	drivers/pinctrl/pinctrl-at91-pio4.*
 
+PIN CONTROLLER - FREESCALE
+M:	Dong Aisheng <[email protected]>
+M:	Fabio Estevam <[email protected]>
+M:	Shawn Guo <[email protected]>
+M:	Stefan Agner <[email protected]>
+R:	Pengutronix Kernel Team <[email protected]>
+L:	[email protected]
+S:	Maintained
+F:	drivers/pinctrl/freescale/
+F:	Documentation/devicetree/bindings/pinctrl/fsl,*
+
 PIN CONTROLLER - INTEL
 M:	Mika Westerberg <[email protected]>
 M:	Heikki Krogerus <[email protected]>
@@ -12091,6 +12132,7 @@ M:	Sylwester Nawrocki <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 S:	Supported
 F:	sound/soc/samsung/
+F:	Documentation/devicetree/bindings/sound/samsung*
 
 SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
 M:	Krzysztof Kozlowski <[email protected]>
diff --git a/Makefile b/Makefile
index 79ad2bfa24b6..486db374d1c1 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc6
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -388,7 +388,7 @@ PYTHON		= python
 CHECK		= sparse
 
 CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
-		  -Wbitwise -Wno-return-void $(CF)
+		  -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF)
 NOSTDINC_FLAGS  =
 CFLAGS_MODULE   =
 AFLAGS_MODULE   =
@@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 endif
 
+RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
+export RETPOLINE_CFLAGS
+
 ifeq ($(config-targets),1)
 # ===========================================================================
 # *config targets only - make sure prerequisites are updated, and descend
@@ -579,10 +584,9 @@ ifeq ($(KBUILD_EXTMOD),)
 # To avoid any implicit rule to kick in, define an empty command
 $(KCONFIG_CONFIG) include/config/auto.conf.cmd: ;
 
-# If .config is newer than include/config/auto.conf, someone tinkered
-# with it and forgot to run make oldconfig.
-# if auto.conf.cmd is missing then we are probably in a cleaned tree so
-# we execute the config step to be sure to catch updated Kconfig files
+# The actual configuration files used during the build are stored in
+# include/generated/ and include/config/. Update them if .config is newer than
+# include/config/auto.conf (which mirrors .config).
 include/config/%.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd
 	$(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig
 else
@@ -822,6 +826,15 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)
 
+# clang sets -fmerge-all-constants by default as optimization, but this
+# is non-conforming behavior for C and in fact breaks the kernel, so we
+# need to disable it here generally.
+KBUILD_CFLAGS	+= $(call cc-option,-fno-merge-all-constants)
+
+# for gcc -fno-merge-all-constants disables everything, but it is fine
+# to have actual conforming behavior enabled.
+KBUILD_CFLAGS	+= $(call cc-option,-fmerge-constants)
+
 # Make sure -fstack-check isn't enabled (like gentoo apparently did)
 KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
 
@@ -857,8 +870,7 @@ KBUILD_AFLAGS   += $(ARCH_AFLAGS)   $(KAFLAGS)
 KBUILD_CFLAGS   += $(ARCH_CFLAGS)   $(KCFLAGS)
 
 # Use --build-id when available.
-LDFLAGS_BUILD_ID := $(patsubst -Wl$(comma)%,%,\
-			      $(call cc-ldoption, -Wl$(comma)--build-id,))
+LDFLAGS_BUILD_ID := $(call ld-option, --build-id)
 KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
 LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
 
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 46ebf14aed4e..8a2b331e43fe 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -6,7 +6,6 @@
  * Atomic exchange routines.
  */
 
-#define __ASM__MB
 #define ____xchg(type, args...)		__xchg ## type ## _local(args)
 #define ____cmpxchg(type, args...)	__cmpxchg ## type ## _local(args)
 #include <asm/xchg.h>
@@ -33,10 +32,6 @@
 	cmpxchg_local((ptr), (o), (n));					\
 })
 
-#ifdef CONFIG_SMP
-#undef __ASM__MB
-#define __ASM__MB	"\tmb\n"
-#endif
 #undef ____xchg
 #undef ____cmpxchg
 #define ____xchg(type, args...)		__xchg ##type(args)
@@ -64,7 +59,6 @@
 	cmpxchg((ptr), (o), (n));					\
 })
 
-#undef __ASM__MB
 #undef ____cmpxchg
 
 #endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index 68dfb3cb7145..e2b59fac5257 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -12,6 +12,10 @@
  * Atomic exchange.
  * Since it can be used to implement critical sections
  * it must clobber "memory" (also for interrupts in UP).
+ *
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
  */
 
 static inline unsigned long
@@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
 {
 	unsigned long ret, tmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%4,7,%3\n"
 	"	insbl	%1,%4,%1\n"
@@ -28,12 +33,12 @@ ____xchg(_u8, volatile char *m, unsigned long val)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%3)\n"
 	"	beq	%2,2f\n"
-		__ASM__MB
 	".subsection 2\n"
 	"2:	br	1b\n"
 	".previous"
 	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
 	: "r" ((long)m), "1" (val) : "memory");
+	smp_mb();
 
 	return ret;
 }
@@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
 {
 	unsigned long ret, tmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%4,7,%3\n"
 	"	inswl	%1,%4,%1\n"
@@ -52,12 +58,12 @@ ____xchg(_u16, volatile short *m, unsigned long val)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%3)\n"
 	"	beq	%2,2f\n"
-		__ASM__MB
 	".subsection 2\n"
 	"2:	br	1b\n"
 	".previous"
 	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
 	: "r" ((long)m), "1" (val) : "memory");
+	smp_mb();
 
 	return ret;
 }
@@ -67,17 +73,18 @@ ____xchg(_u32, volatile int *m, unsigned long val)
 {
 	unsigned long dummy;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldl_l %0,%4\n"
 	"	bis $31,%3,%1\n"
 	"	stl_c %1,%2\n"
 	"	beq %1,2f\n"
-		__ASM__MB
 	".subsection 2\n"
 	"2:	br 1b\n"
 	".previous"
 	: "=&r" (val), "=&r" (dummy), "=m" (*m)
 	: "rI" (val), "m" (*m) : "memory");
+	smp_mb();
 
 	return val;
 }
@@ -87,17 +94,18 @@ ____xchg(_u64, volatile long *m, unsigned long val)
 {
 	unsigned long dummy;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldq_l %0,%4\n"
 	"	bis $31,%3,%1\n"
 	"	stq_c %1,%2\n"
 	"	beq %1,2f\n"
-		__ASM__MB
 	".subsection 2\n"
 	"2:	br 1b\n"
 	".previous"
 	: "=&r" (val), "=&r" (dummy), "=m" (*m)
 	: "rI" (val), "m" (*m) : "memory");
+	smp_mb();
 
 	return val;
 }
@@ -128,10 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  *
- * The memory barrier should be placed in SMP only when we actually
- * make the change. If we don't change anything (so if the returned
- * prev is equal to old) then we aren't acquiring anything new and
- * we don't need any memory barrier as far I can tell.
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
+ * The trailing memory barrier is placed in SMP unconditionally, in
+ * order to guarantee that dependency ordering is preserved when a
+ * dependency is headed by an unsuccessful operation.
  */
 
 static inline unsigned long
@@ -139,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
 {
 	unsigned long prev, tmp, cmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%5,7,%4\n"
 	"	insbl	%1,%5,%1\n"
@@ -150,13 +161,13 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%4)\n"
 	"	beq	%2,3f\n"
-		__ASM__MB
 	"2:\n"
 	".subsection 2\n"
 	"3:	br	1b\n"
 	".previous"
 	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
 	: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+	smp_mb();
 
 	return prev;
 }
@@ -166,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
 {
 	unsigned long prev, tmp, cmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%5,7,%4\n"
 	"	inswl	%1,%5,%1\n"
@@ -177,13 +189,13 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%4)\n"
 	"	beq	%2,3f\n"
-		__ASM__MB
 	"2:\n"
 	".subsection 2\n"
 	"3:	br	1b\n"
 	".previous"
 	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
 	: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+	smp_mb();
 
 	return prev;
 }
@@ -193,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
 {
 	unsigned long prev, cmp;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldl_l %0,%5\n"
 	"	cmpeq %0,%3,%1\n"
@@ -200,13 +213,13 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
 	"	mov %4,%1\n"
 	"	stl_c %1,%2\n"
 	"	beq %1,3f\n"
-		__ASM__MB
 	"2:\n"
 	".subsection 2\n"
 	"3:	br 1b\n"
 	".previous"
 	: "=&r"(prev), "=&r"(cmp), "=m"(*m)
 	: "r"((long) old), "r"(new), "m"(*m) : "memory");
+	smp_mb();
 
 	return prev;
 }
@@ -216,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
 {
 	unsigned long prev, cmp;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldq_l %0,%5\n"
 	"	cmpeq %0,%3,%1\n"
@@ -223,13 +237,13 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
 	"	mov %4,%1\n"
 	"	stq_c %1,%2\n"
 	"	beq %1,3f\n"
-		__ASM__MB
 	"2:\n"
 	".subsection 2\n"
 	"3:	br 1b\n"
 	".previous"
 	: "=&r"(prev), "=&r"(cmp), "=m"(*m)
 	: "r"((long) old), "r"(new), "m"(*m) : "memory");
+	smp_mb();
 
 	return prev;
 }
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index f3a80cf164cc..d76bf4a83740 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -484,7 +484,6 @@ config ARC_CURR_IN_REG
 
 config ARC_EMUL_UNALIGNED
 	bool "Emulate unaligned memory access (userspace only)"
-	default N
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
 	select SYSCTL_ARCH_UNALIGN_ALLOW
 	depends on ISA_ARCOMPACT
diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts
index 70aec7d6ca60..626b694c7be7 100644
--- a/arch/arc/boot/dts/axs101.dts
+++ b/arch/arc/boot/dts/axs101.dts
@@ -17,6 +17,6 @@
 	compatible = "snps,axs101", "snps,arc-sdp";
 
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60";
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60 print-fatal-signals=1";
 	};
 };
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index 74d070cd3c13..47b74fbc403c 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -214,13 +214,13 @@
 			};
 
 			eeprom@0x54{
-				compatible = "24c01";
+				compatible = "atmel,24c01";
 				reg = <0x54>;
 				pagesize = <0x8>;
 			};
 
 			eeprom@0x57{
-				compatible = "24c04";
+				compatible = "atmel,24c04";
 				reg = <0x57>;
 				pagesize = <0x8>;
 			};
diff --git a/arch/arc/boot/dts/haps_hs_idu.dts b/arch/arc/boot/dts/haps_hs_idu.dts
index 215cddd0b63b..0c603308aeb3 100644
--- a/arch/arc/boot/dts/haps_hs_idu.dts
+++ b/arch/arc/boot/dts/haps_hs_idu.dts
@@ -22,7 +22,7 @@
 	};
 
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
 	};
 
 	aliases {
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
index 5ee96b067c08..ff2f2c70c545 100644
--- a/arch/arc/boot/dts/nsim_700.dts
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -17,7 +17,7 @@
 	interrupt-parent = <&core_intc>;
 
 	chosen {
-		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
 	};
 
 	aliases {
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index 8d787b251f73..8e2489b16b0a 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -24,7 +24,7 @@
 	};
 
 	chosen {
-		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
 	};
 
 	aliases {
diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts
index 4f98ebf71fd8..ed12f494721d 100644
--- a/arch/arc/boot/dts/nsim_hs_idu.dts
+++ b/arch/arc/boot/dts/nsim_hs_idu.dts
@@ -15,7 +15,7 @@
 	interrupt-parent = <&core_intc>;
 
 	chosen {
-		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
 	};
 
 	aliases {
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index 3c391ba565ed..7842e5eb4ab5 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,7 +20,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
 	};
 
 	aliases {
diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts
index 14a727cbf4c9..b8838cf2b4ec 100644
--- a/arch/arc/boot/dts/nsimosci_hs.dts
+++ b/arch/arc/boot/dts/nsimosci_hs.dts
@@ -20,7 +20,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
 	};
 
 	aliases {
diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts
index 5052917d4a99..72a2c723f1f7 100644
--- a/arch/arc/boot/dts/nsimosci_hs_idu.dts
+++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts
@@ -18,7 +18,7 @@
 
 	chosen {
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24 print-fatal-signals=1";
 	};
 
 	aliases {
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index ea022d47896c..21ec82466d62 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address);
 
 #define BUG()	do {								\
 	pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
-	dump_stack();								\
+	barrier_before_unreachable();						\
+	__builtin_trap();							\
 } while (0)
 
 #define HAVE_ARCH_BUG
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 257a68f3c2fe..309f4e6721b3 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -184,7 +184,7 @@
 .macro FAKE_RET_FROM_EXCPN
 	lr      r9, [status32]
 	bic     r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK)
-	or      r9, r9, (STATUS_L_MASK|STATUS_IE_MASK)
+	or      r9, r9, STATUS_IE_MASK
 	kflag   r9
 .endm
 
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index f61a52b01625..5fe84e481654 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -22,10 +22,79 @@ static DEFINE_RAW_SPINLOCK(mcip_lock);
 
 static char smp_cpuinfo_buf[128];
 
+/*
+ * Set mask to halt GFRC if any online core in SMP cluster is halted.
+ * Only works for ARC HS v3.0+, on earlier versions has no effect.
+ */
+static void mcip_update_gfrc_halt_mask(int cpu)
+{
+	struct bcr_generic gfrc;
+	unsigned long flags;
+	u32 gfrc_halt_mask;
+
+	READ_BCR(ARC_REG_GFRC_BUILD, gfrc);
+
+	/*
+	 * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in
+	 * GFRC 0x3 version.
+	 */
+	if (gfrc.ver < 0x3)
+		return;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	__mcip_cmd(CMD_GFRC_READ_CORE, 0);
+	gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+	gfrc_halt_mask |= BIT(cpu);
+	__mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_update_debug_halt_mask(int cpu)
+{
+	u32 mcip_mask = 0;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	/*
+	 * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK
+	 * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK
+	 * and CMD_DEBUG_READ_SELECT.
+	 */
+	__mcip_cmd(CMD_DEBUG_READ_SELECT, 0);
+	mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+	mcip_mask |= BIT(cpu);
+
+	__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask);
+	/*
+	 * Parameter specified halt cause:
+	 * STATUS32[H]/actionpoint/breakpoint/self-halt
+	 * We choose all of them (0xF).
+	 */
+	__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
 static void mcip_setup_per_cpu(int cpu)
 {
+	struct mcip_bcr mp;
+
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
 	smp_ipi_irq_setup(cpu, IPI_IRQ);
 	smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
+
+	/* Update GFRC halt mask as new CPU came online */
+	if (mp.gfrc)
+		mcip_update_gfrc_halt_mask(cpu);
+
+	/* Update MCIP debug mask as new CPU came online */
+	if (mp.dbg)
+		mcip_update_debug_halt_mask(cpu);
 }
 
 static void mcip_ipi_send(int cpu)
@@ -101,11 +170,6 @@ static void mcip_probe_n_setup(void)
 		IS_AVAIL1(mp.gfrc, "GFRC"));
 
 	cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
-
-	if (mp.dbg) {
-		__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
-		__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
-	}
 }
 
 struct plat_smp_ops plat_smp_ops = {
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 9d27331fe69a..b2cae79a25d7 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -51,7 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = {
 	{ 0x51, "R2.0" },
 	{ 0x52, "R2.1" },
 	{ 0x53, "R3.0" },
-	{ 0x54, "R4.0" },
+	{ 0x54, "R3.10a" },
 #endif
 	{ 0x00, NULL   }
 };
@@ -373,7 +373,7 @@ static void arc_chk_core_config(void)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 	int saved = 0, present = 0;
-	char *opt_nm = NULL;;
+	char *opt_nm = NULL;
 
 	if (!cpu->extn.timer0)
 		panic("Timer0 is not present!\n");
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index efe8b4200a67..21d86c36692b 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -24,6 +24,7 @@
 #include <linux/reboot.h>
 #include <linux/irqdomain.h>
 #include <linux/export.h>
+#include <linux/of_fdt.h>
 
 #include <asm/processor.h>
 #include <asm/setup.h>
@@ -47,6 +48,42 @@ void __init smp_prepare_boot_cpu(void)
 {
 }
 
+static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
+{
+	unsigned long dt_root = of_get_flat_dt_root();
+	const char *buf;
+
+	buf = of_get_flat_dt_prop(dt_root, name, NULL);
+	if (!buf)
+		return -EINVAL;
+
+	if (cpulist_parse(buf, cpumask))
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Read from DeviceTree and setup cpu possible mask. If there is no
+ * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist.
+ */
+static void __init arc_init_cpu_possible(void)
+{
+	struct cpumask cpumask;
+
+	if (arc_get_cpu_map("possible-cpus", &cpumask)) {
+		pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n",
+			NR_CPUS);
+
+		cpumask_setall(&cpumask);
+	}
+
+	if (!cpumask_test_cpu(0, &cpumask))
+		panic("Master cpu (cpu[0]) is missed in cpu possible mask!");
+
+	init_cpu_possible(&cpumask);
+}
+
 /*
  * Called from setup_arch() before calling setup_processor()
  *
@@ -58,10 +95,7 @@ void __init smp_prepare_boot_cpu(void)
  */
 void __init smp_init_cpus(void)
 {
-	unsigned int i;
-
-	for (i = 0; i < NR_CPUS; i++)
-		set_cpu_possible(i, true);
+	arc_init_cpu_possible();
 
 	if (plat_smp_ops.init_early_smp)
 		plat_smp_ops.init_early_smp();
@@ -70,16 +104,12 @@ void __init smp_init_cpus(void)
 /* called from init ( ) =>  process 1 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
 	/*
 	 * if platform didn't set the present map already, do it now
 	 * boot cpu is set to present already by init/main.c
 	 */
-	if (num_present_cpus() <= 1) {
-		for (i = 0; i < max_cpus; i++)
-			set_cpu_present(i, true);
-	}
+	if (num_present_cpus() <= 1)
+		init_cpu_present(cpu_possible_mask);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 333daab7def0..183391d4d33a 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -366,7 +366,7 @@ static void init_unwind_hdr(struct unwind_table *table,
 	return;
 
 ret_err:
-	panic("Attention !!! Dwarf FDE parsing errors\n");;
+	panic("Attention !!! Dwarf FDE parsing errors\n");
 }
 
 #ifdef CONFIG_MODULES
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index eee924dfffa6..2072f3451e9c 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -780,7 +780,10 @@ noinline static void slc_entire_op(const int op)
 
 	write_aux_reg(r, ctrl);
 
-	write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
+	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+		write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1);
+	else
+		write_aux_reg(ARC_REG_SLC_FLUSH, 0x1);
 
 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
 	read_aux_reg(r);
diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts
index 8b2fa9a49967..c28afb242393 100644
--- a/arch/arm/boot/dts/armada-370-rd.dts
+++ b/arch/arm/boot/dts/armada-370-rd.dts
@@ -56,6 +56,7 @@
 
 /dts-v1/;
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/gpio/gpio.h>
 #include "armada-370.dtsi"
 
@@ -243,6 +244,8 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 		reg = <0x10>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
 
 		ports {
 			#address-cells = <1>;
@@ -278,6 +281,35 @@
 				};
 			};
 		};
+
+		mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			switchphy0: switchphy@0 {
+				reg = <0>;
+				interrupt-parent = <&switch>;
+				interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy1: switchphy@1 {
+				reg = <1>;
+				interrupt-parent = <&switch>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy2: switchphy@2 {
+				reg = <2>;
+				interrupt-parent = <&switch>;
+				interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy3: switchphy@3 {
+				reg = <3>;
+				interrupt-parent = <&switch>;
+				interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+			};
+		};
 	};
 };
 
diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi
index 18045c38bcf1..db7cded1b7ad 100644
--- a/arch/arm/boot/dts/bcm11351.dtsi
+++ b/arch/arm/boot/dts/bcm11351.dtsi
@@ -55,7 +55,7 @@
 		      <0x3ff00100 0x100>;
 	};
 
-	smc@0x3404c000 {
+	smc@3404c000 {
 		compatible = "brcm,bcm11351-smc", "brcm,kona-smc";
 		reg = <0x3404c000 0x400>; /* 1 KiB in SRAM */
 	};
diff --git a/arch/arm/boot/dts/bcm21664.dtsi b/arch/arm/boot/dts/bcm21664.dtsi
index 6dde95f21cef..266f2611dc22 100644
--- a/arch/arm/boot/dts/bcm21664.dtsi
+++ b/arch/arm/boot/dts/bcm21664.dtsi
@@ -55,7 +55,7 @@
 		      <0x3ff00100 0x100>;
 	};
 
-	smc@0x3404e000 {
+	smc@3404e000 {
 		compatible = "brcm,bcm21664-smc", "brcm,kona-smc";
 		reg = <0x3404e000 0x400>; /* 1 KiB in SRAM */
 	};
diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi
index 0e3d2a5ff208..a5c3824c8056 100644
--- a/arch/arm/boot/dts/bcm2835.dtsi
+++ b/arch/arm/boot/dts/bcm2835.dtsi
@@ -18,10 +18,10 @@
 	soc {
 		ranges = <0x7e000000 0x20000000 0x02000000>;
 		dma-ranges = <0x40000000 0x00000000 0x20000000>;
+	};
 
-		arm-pmu {
-			compatible = "arm,arm1176-pmu";
-		};
+	arm-pmu {
+		compatible = "arm,arm1176-pmu";
 	};
 };
 
diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi
index 1dfd76442777..c933e8413884 100644
--- a/arch/arm/boot/dts/bcm2836.dtsi
+++ b/arch/arm/boot/dts/bcm2836.dtsi
@@ -9,19 +9,19 @@
 			 <0x40000000 0x40000000 0x00001000>;
 		dma-ranges = <0xc0000000 0x00000000 0x3f000000>;
 
-		local_intc: local_intc {
+		local_intc: local_intc@40000000 {
 			compatible = "brcm,bcm2836-l1-intc";
 			reg = <0x40000000 0x100>;
 			interrupt-controller;
 			#interrupt-cells = <2>;
 			interrupt-parent = <&local_intc>;
 		};
+	};
 
-		arm-pmu {
-			compatible = "arm,cortex-a7-pmu";
-			interrupt-parent = <&local_intc>;
-			interrupts = <9 IRQ_TYPE_LEVEL_HIGH>;
-		};
+	arm-pmu {
+		compatible = "arm,cortex-a7-pmu";
+		interrupt-parent = <&local_intc>;
+		interrupts = <9 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
 	timer {
diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi
index efa7d3387ab2..7704bb029605 100644
--- a/arch/arm/boot/dts/bcm2837.dtsi
+++ b/arch/arm/boot/dts/bcm2837.dtsi
@@ -8,7 +8,7 @@
 			 <0x40000000 0x40000000 0x00001000>;
 		dma-ranges = <0xc0000000 0x00000000 0x3f000000>;
 
-		local_intc: local_intc {
+		local_intc: local_intc@40000000 {
 			compatible = "brcm,bcm2836-l1-intc";
 			reg = <0x40000000 0x100>;
 			interrupt-controller;
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 18db25a5a66e..9d293decf8d3 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -465,7 +465,7 @@
 			status = "disabled";
 		};
 
-		aux: aux@0x7e215000 {
+		aux: aux@7e215000 {
 			compatible = "brcm,bcm2835-aux";
 			#clock-cells = <1>;
 			reg = <0x7e215000 0x8>;
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index 6a44b8021702..f0e2008f7490 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -49,7 +49,7 @@
 
 	memory {
 		device_type = "memory";
-		reg = <0x60000000 0x80000000>;
+		reg = <0x60000000 0x20000000>;
 	};
 
 	gpio-restart {
diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts
index 08568ce24d06..da8bb9d60f99 100644
--- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts
+++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts
@@ -269,7 +269,7 @@
 
 		sata: sata@46000000 {
 			/* The ROM uses this muxmode */
-			cortina,gemini-ata-muxmode = <3>;
+			cortina,gemini-ata-muxmode = <0>;
 			cortina,gemini-enable-sata-bridge;
 			status = "okay";
 		};
diff --git a/arch/arm/boot/dts/imx6dl-icore-rqs.dts b/arch/arm/boot/dts/imx6dl-icore-rqs.dts
index cf42c2f5cdc7..1281bc39b7ab 100644
--- a/arch/arm/boot/dts/imx6dl-icore-rqs.dts
+++ b/arch/arm/boot/dts/imx6dl-icore-rqs.dts
@@ -42,7 +42,7 @@
 
 /dts-v1/;
 
-#include "imx6q.dtsi"
+#include "imx6dl.dtsi"
 #include "imx6qdl-icore-rqs.dtsi"
 
 / {
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index c1aa7a4518fb..a30ee9fcb3ae 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -71,6 +71,8 @@
 };
 
 &i2c1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c1_pins>;
 	clock-frequency = <2600000>;
 
 	twl: twl@48 {
@@ -189,7 +191,12 @@
 		>;
 	};
 
-
+	i2c1_pins: pinmux_i2c1_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
+			OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+		>;
+	};
 };
 
 &omap3_pmx_wkup {
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index b50b796e15c7..47915447a826 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -66,6 +66,8 @@
 };
 
 &i2c1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c1_pins>;
 	clock-frequency = <2600000>;
 
 	twl: twl@48 {
@@ -136,6 +138,12 @@
 			OMAP3_CORE1_IOPAD(0x21b8, PIN_INPUT | MUX_MODE0)	/* hsusb0_data7.hsusb0_data7 */
 		>;
 	};
+	i2c1_pins: pinmux_i2c1_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
+			OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+		>;
+	};
 };
 
 &uart2 {
diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts
index ec2c8baef62a..592e17fd4eeb 100644
--- a/arch/arm/boot/dts/omap5-uevm.dts
+++ b/arch/arm/boot/dts/omap5-uevm.dts
@@ -47,7 +47,7 @@
 			gpios = <&gpio3 19 GPIO_ACTIVE_LOW>;	/* gpio3_83 */
 			wakeup-source;
 			autorepeat;
-			debounce_interval = <50>;
+			debounce-interval = <50>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi
index 3b704cfed69a..a97458112ff6 100644
--- a/arch/arm/boot/dts/rk3036.dtsi
+++ b/arch/arm/boot/dts/rk3036.dtsi
@@ -280,7 +280,7 @@
 		max-frequency = <37500000>;
 		clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
 			 <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
 		resets = <&cru SRST_SDIO>;
@@ -298,7 +298,7 @@
 		max-frequency = <37500000>;
 		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
 			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		default-sample-phase = <158>;
 		disable-wp;
 		dmas = <&pdma 12>;
diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi
index 780ec3a99b21..341deaf62ff6 100644
--- a/arch/arm/boot/dts/rk322x.dtsi
+++ b/arch/arm/boot/dts/rk322x.dtsi
@@ -621,7 +621,7 @@
 		interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
 			 <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>;
@@ -634,7 +634,7 @@
 		interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
 			 <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&sdio_clk &sdio_cmd &sdio_bus4>;
@@ -649,7 +649,7 @@
 		max-frequency = <37500000>;
 		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
 			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		bus-width = <8>;
 		default-sample-phase = <158>;
 		fifo-depth = <0x100>;
diff --git a/arch/arm/boot/dts/rk3288-phycore-som.dtsi b/arch/arm/boot/dts/rk3288-phycore-som.dtsi
index 99cfae875e12..5eae4776ffde 100644
--- a/arch/arm/boot/dts/rk3288-phycore-som.dtsi
+++ b/arch/arm/boot/dts/rk3288-phycore-som.dtsi
@@ -110,26 +110,6 @@
 	};
 };
 
-&cpu0 {
-	cpu0-supply = <&vdd_cpu>;
-	operating-points = <
-		/* KHz    uV */
-		1800000	1400000
-		1608000	1350000
-		1512000 1300000
-		1416000 1200000
-		1200000 1100000
-		1008000 1050000
-		 816000 1000000
-		 696000  950000
-		 600000  900000
-		 408000  900000
-		 312000  900000
-		 216000  900000
-		 126000  900000
-	>;
-};
-
 &emmc {
 	status = "okay";
 	bus-width = <8>;
diff --git a/arch/arm/boot/dts/zx296702.dtsi b/arch/arm/boot/dts/zx296702.dtsi
index 8a74efdb6360..240e7a23d81f 100644
--- a/arch/arm/boot/dts/zx296702.dtsi
+++ b/arch/arm/boot/dts/zx296702.dtsi
@@ -56,7 +56,7 @@
 			clocks = <&topclk ZX296702_A9_PERIPHCLK>;
 		};
 
-		l2cc: l2-cache-controller@0x00c00000 {
+		l2cc: l2-cache-controller@c00000 {
 			compatible = "arm,pl310-cache";
 			reg = <0x00c00000 0x1000>;
 			cache-unified;
@@ -67,30 +67,30 @@
 			arm,double-linefill-incr = <0>;
 		};
 
-		pcu: pcu@0xa0008000 {
+		pcu: pcu@a0008000 {
 			compatible = "zte,zx296702-pcu";
 			reg = <0xa0008000 0x1000>;
 		};
 
-		topclk: topclk@0x09800000 {
+		topclk: topclk@9800000 {
 			compatible = "zte,zx296702-topcrm-clk";
 			reg = <0x09800000 0x1000>;
 			#clock-cells = <1>;
 		};
 
-		lsp1clk: lsp1clk@0x09400000 {
+		lsp1clk: lsp1clk@9400000 {
 			compatible = "zte,zx296702-lsp1crpm-clk";
 			reg = <0x09400000 0x1000>;
 			#clock-cells = <1>;
 		};
 
-		lsp0clk: lsp0clk@0x0b000000 {
+		lsp0clk: lsp0clk@b000000 {
 			compatible = "zte,zx296702-lsp0crpm-clk";
 			reg = <0x0b000000 0x1000>;
 			#clock-cells = <1>;
 		};
 
-		uart0: serial@0x09405000 {
+		uart0: serial@9405000 {
 			compatible = "zte,zx296702-uart";
 			reg = <0x09405000 0x1000>;
 			interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
@@ -98,7 +98,7 @@
 			status = "disabled";
 		};
 
-		uart1: serial@0x09406000 {
+		uart1: serial@9406000 {
 			compatible = "zte,zx296702-uart";
 			reg = <0x09406000 0x1000>;
 			interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
@@ -106,7 +106,7 @@
 			status = "disabled";
 		};
 
-		mmc0: mmc@0x09408000 {
+		mmc0: mmc@9408000 {
 			compatible = "snps,dw-mshc";
 			#address-cells = <1>;
 			#size-cells = <0>;
@@ -119,7 +119,7 @@
 			status = "disabled";
 		};
 
-		mmc1: mmc@0x0b003000 {
+		mmc1: mmc@b003000 {
 			compatible = "snps,dw-mshc";
 			#address-cells = <1>;
 			#size-cells = <0>;
@@ -132,7 +132,7 @@
 			status = "disabled";
 		};
 
-		sysctrl: sysctrl@0xa0007000 {
+		sysctrl: sysctrl@a0007000 {
 			compatible = "zte,sysctrl", "syscon";
 			reg = <0xa0007000 0x1000>;
 		};
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 2f145c4af93a..92674f247a12 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -319,7 +319,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y
 CONFIG_RC_CORE=m
 CONFIG_MEDIA_CONTROLLER=y
 CONFIG_VIDEO_V4L2_SUBDEV_API=y
-CONFIG_LIRC=m
+CONFIG_LIRC=y
 CONFIG_RC_DEVICES=y
 CONFIG_IR_RX51=m
 CONFIG_V4L_PLATFORM_DRIVERS=y
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 629f8e9981f1..cf2701cb0de8 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -83,7 +83,7 @@ static void dummy_clock_access(struct timespec64 *ts)
 }
 
 static clock_access_fn __read_persistent_clock = dummy_clock_access;
-static clock_access_fn __read_boot_clock = dummy_clock_access;;
+static clock_access_fn __read_boot_clock = dummy_clock_access;
 
 void read_persistent_clock64(struct timespec64 *ts)
 {
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 5638ce0c9524..63d6b404d88e 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 KVM=../../../../virt/kvm
 
+CFLAGS_ARMV7VE		   :=$(call cc-option, -march=armv7ve)
+
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
@@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
 obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
 obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
+CFLAGS_banked-sr.o	   += $(CFLAGS_ARMV7VE)
+
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+CFLAGS_switch.o		   += $(CFLAGS_ARMV7VE)
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c
index 111bda8cdebd..be4b8b0a40ad 100644
--- a/arch/arm/kvm/hyp/banked-sr.c
+++ b/arch/arm/kvm/hyp/banked-sr.c
@@ -20,6 +20,10 @@
 
 #include <asm/kvm_hyp.h>
 
+/*
+ * gcc before 4.9 doesn't understand -march=armv7ve, so we have to
+ * trick the assembler.
+ */
 __asm__(".arch_extension     virt");
 
 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
diff --git a/arch/arm/mach-clps711x/board-dt.c b/arch/arm/mach-clps711x/board-dt.c
index ee1f83b1a332..4c89a8e9a2e3 100644
--- a/arch/arm/mach-clps711x/board-dt.c
+++ b/arch/arm/mach-clps711x/board-dt.c
@@ -69,7 +69,7 @@ static void clps711x_restart(enum reboot_mode mode, const char *cmd)
 	soft_restart(0);
 }
 
-static const char *clps711x_compat[] __initconst = {
+static const char *const clps711x_compat[] __initconst = {
 	"cirrus,ep7209",
 	NULL
 };
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index e457f299cd44..d6b11907380c 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -368,7 +368,7 @@ static struct spi_eeprom at25640a = {
 	.flags		= EE_ADDR2,
 };
 
-static struct spi_board_info dm355_evm_spi_info[] __initconst = {
+static const struct spi_board_info dm355_evm_spi_info[] __initconst = {
 	{
 		.modalias	= "at25",
 		.platform_data	= &at25640a,
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index be997243447b..fad9a5611a5d 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -217,7 +217,7 @@ static struct spi_eeprom at25640a = {
 	.flags		= EE_ADDR2,
 };
 
-static struct spi_board_info dm355_leopard_spi_info[] __initconst = {
+static const struct spi_board_info dm355_leopard_spi_info[] __initconst = {
 	{
 		.modalias	= "at25",
 		.platform_data	= &at25640a,
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index e75741fb2c1d..e3780986d2a3 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -726,7 +726,7 @@ static struct spi_eeprom at25640 = {
 	.flags		= EE_ADDR2,
 };
 
-static struct spi_board_info dm365_evm_spi_info[] __initconst = {
+static const struct spi_board_info dm365_evm_spi_info[] __initconst = {
 	{
 		.modalias	= "at25",
 		.platform_data	= &at25640,
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 6b32dc527edc..2c20599cc350 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -41,7 +41,7 @@ config MACH_ARMADA_375
 	depends on ARCH_MULTI_V7
 	select ARMADA_370_XP_IRQ
 	select ARM_ERRATA_720789
-	select ARM_ERRATA_753970
+	select PL310_ERRATA_753970
 	select ARM_GIC
 	select ARMADA_375_CLK
 	select HAVE_ARM_SCU
@@ -57,7 +57,7 @@ config MACH_ARMADA_38X
 	bool "Marvell Armada 380/385 boards"
 	depends on ARCH_MULTI_V7
 	select ARM_ERRATA_720789
-	select ARM_ERRATA_753970
+	select PL310_ERRATA_753970
 	select ARM_GIC
 	select ARM_GLOBAL_TIMER
 	select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
index 43e3e188f521..fa512413a471 100644
--- a/arch/arm/mach-omap1/clock.c
+++ b/arch/arm/mach-omap1/clock.c
@@ -1011,17 +1011,17 @@ static int clk_debugfs_register_one(struct clk *c)
 		return -ENOMEM;
 	c->dent = d;
 
-	d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount);
+	d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);
 	if (!d) {
 		err = -ENOMEM;
 		goto err_out;
 	}
-	d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate);
+	d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);
 	if (!d) {
 		err = -ENOMEM;
 		goto err_out;
 	}
-	d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags);
+	d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);
 	if (!d) {
 		err = -ENOMEM;
 		goto err_out;
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 4bb6751864a5..fc5fb776a710 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -299,8 +299,6 @@ static void irq_save_context(void)
 	if (soc_is_dra7xx())
 		return;
 
-	if (!sar_base)
-		sar_base = omap4_get_sar_ram_base();
 	if (wakeupgen_ops && wakeupgen_ops->save_context)
 		wakeupgen_ops->save_context();
 }
@@ -598,6 +596,8 @@ static int __init wakeupgen_init(struct device_node *node,
 	irq_hotplug_init();
 	irq_pm_init();
 
+	sar_base = omap4_get_sar_ram_base();
+
 	return 0;
 }
 IRQCHIP_DECLARE(ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init);
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 124f9af34a15..34156eca8e23 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -977,6 +977,9 @@ static int _enable_clocks(struct omap_hwmod *oh)
 
 	pr_debug("omap_hwmod: %s: enabling clocks\n", oh->name);
 
+	if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
+		_enable_optional_clocks(oh);
+
 	if (oh->_clk)
 		clk_enable(oh->_clk);
 
@@ -985,9 +988,6 @@ static int _enable_clocks(struct omap_hwmod *oh)
 			clk_enable(os->_clk);
 	}
 
-	if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
-		_enable_optional_clocks(oh);
-
 	/* The opt clocks are controlled by the device driver. */
 
 	return 0;
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 366158a54fcd..6f68576e5695 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -186,7 +186,7 @@ static void omap_pm_end(void)
 	cpu_idle_poll_ctrl(false);
 }
 
-static void omap_pm_finish(void)
+static void omap_pm_wake(void)
 {
 	if (soc_is_omap34xx())
 		omap_prcm_irq_complete();
@@ -196,7 +196,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
 	.begin		= omap_pm_begin,
 	.end		= omap_pm_end,
 	.enter		= omap_pm_enter,
-	.finish		= omap_pm_finish,
+	.wake		= omap_pm_wake,
 	.valid		= suspend_valid_only_mem,
 };
 
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index ece09c9461f7..d61fbd7a2840 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -156,12 +156,6 @@ static struct clock_event_device clockevent_gpt = {
 	.tick_resume		= omap2_gp_timer_shutdown,
 };
 
-static struct property device_disabled = {
-	.name = "status",
-	.length = sizeof("disabled"),
-	.value = "disabled",
-};
-
 static const struct of_device_id omap_timer_match[] __initconst = {
 	{ .compatible = "ti,omap2420-timer", },
 	{ .compatible = "ti,omap3430-timer", },
@@ -203,8 +197,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id *
 				  of_get_property(np, "ti,timer-secure", NULL)))
 			continue;
 
-		if (!of_device_is_compatible(np, "ti,omap-counter32k"))
-			of_add_property(np, &device_disabled);
+		if (!of_device_is_compatible(np, "ti,omap-counter32k")) {
+			struct property *prop;
+
+			prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+			if (!prop)
+				return NULL;
+			prop->name = "status";
+			prop->value = "disabled";
+			prop->length = strlen(prop->value);
+			of_add_property(np, prop);
+		}
 		return np;
 	}
 
diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig
index 2a7bb6ccdcb7..a810f4dd34b1 100644
--- a/arch/arm/mach-orion5x/Kconfig
+++ b/arch/arm/mach-orion5x/Kconfig
@@ -58,7 +58,6 @@ config MACH_KUROBOX_PRO
 
 config MACH_DNS323
 	bool "D-Link DNS-323"
-	select GENERIC_NET_UTILS
 	select I2C_BOARDINFO if I2C
 	help
 	  Say 'Y' here if you want your kernel to support the
@@ -66,7 +65,6 @@ config MACH_DNS323
 
 config MACH_TS209
 	bool "QNAP TS-109/TS-209"
-	select GENERIC_NET_UTILS
 	help
 	  Say 'Y' here if you want your kernel to support the
 	  QNAP TS-109/TS-209 platform.
@@ -101,7 +99,6 @@ config MACH_LINKSTATION_LS_HGL
 
 config MACH_TS409
 	bool "QNAP TS-409"
-	select GENERIC_NET_UTILS
 	help
 	  Say 'Y' here if you want your kernel to support the
 	  QNAP TS-409 platform.
diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c
index cd483bfb5ca8..d13344b2ddcd 100644
--- a/arch/arm/mach-orion5x/dns323-setup.c
+++ b/arch/arm/mach-orion5x/dns323-setup.c
@@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = {
 	.phy_addr = MV643XX_ETH_PHY_ADDR(8),
 };
 
+/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these
+ * functions be kept somewhere?
+ */
+static int __init dns323_parse_hex_nibble(char n)
+{
+	if (n >= '0' && n <= '9')
+		return n - '0';
+
+	if (n >= 'A' && n <= 'F')
+		return n - 'A' + 10;
+
+	if (n >= 'a' && n <= 'f')
+		return n - 'a' + 10;
+
+	return -1;
+}
+
+static int __init dns323_parse_hex_byte(const char *b)
+{
+	int hi;
+	int lo;
+
+	hi = dns323_parse_hex_nibble(b[0]);
+	lo = dns323_parse_hex_nibble(b[1]);
+
+	if (hi < 0 || lo < 0)
+		return -1;
+
+	return (hi << 4) | lo;
+}
+
 static int __init dns323_read_mac_addr(void)
 {
 	u_int8_t addr[6];
-	void __iomem *mac_page;
+	int i;
+	char *mac_page;
 
 	/* MAC address is stored as a regular ol' string in /dev/mtdblock4
 	 * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80).
@@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void)
 	if (!mac_page)
 		return -ENOMEM;
 
-	if (!mac_pton((__force const char *) mac_page, addr))
-		goto error_fail;
+	/* Sanity check the string we're looking at */
+	for (i = 0; i < 5; i++) {
+		if (*(mac_page + (i * 3) + 2) != ':') {
+			goto error_fail;
+		}
+	}
+
+	for (i = 0; i < 6; i++)	{
+		int byte;
+
+		byte = dns323_parse_hex_byte(mac_page + (i * 3));
+		if (byte < 0) {
+			goto error_fail;
+		}
+
+		addr[i] = byte;
+	}
 
 	iounmap(mac_page);
 	printk("DNS-323: Found ethernet MAC address: %pM\n", addr);
diff --git a/arch/arm/mach-orion5x/tsx09-common.c b/arch/arm/mach-orion5x/tsx09-common.c
index 89774985d380..905d4f2dd0b8 100644
--- a/arch/arm/mach-orion5x/tsx09-common.c
+++ b/arch/arm/mach-orion5x/tsx09-common.c
@@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = {
 	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
 };
 
+static int __init qnap_tsx09_parse_hex_nibble(char n)
+{
+	if (n >= '0' && n <= '9')
+		return n - '0';
+
+	if (n >= 'A' && n <= 'F')
+		return n - 'A' + 10;
+
+	if (n >= 'a' && n <= 'f')
+		return n - 'a' + 10;
+
+	return -1;
+}
+
+static int __init qnap_tsx09_parse_hex_byte(const char *b)
+{
+	int hi;
+	int lo;
+
+	hi = qnap_tsx09_parse_hex_nibble(b[0]);
+	lo = qnap_tsx09_parse_hex_nibble(b[1]);
+
+	if (hi < 0 || lo < 0)
+		return -1;
+
+	return (hi << 4) | lo;
+}
+
 static int __init qnap_tsx09_check_mac_addr(const char *addr_str)
 {
 	u_int8_t addr[6];
+	int i;
 
-	if (!mac_pton(addr_str, addr))
-		return -1;
+	for (i = 0; i < 6; i++) {
+		int byte;
+
+		/*
+		 * Enforce "xx:xx:xx:xx:xx:xx\n" format.
+		 */
+		if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n'))
+			return -1;
+
+		byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3));
+		if (byte < 0)
+			return -1;
+		addr[i] = byte;
+	}
 
 	printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr);
 
@@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size)
 	unsigned long addr;
 
 	for (addr = mem_base; addr < (mem_base + size); addr += 1024) {
-		void __iomem *nor_page;
+		char *nor_page;
 		int ret = 0;
 
 		nor_page = ioremap(addr, 1024);
 		if (nor_page != NULL) {
-			ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page);
+			ret = qnap_tsx09_check_mac_addr(nor_page);
 			iounmap(nor_page);
 		}
 
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 57058ac46f49..7e5d7a083707 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -23,7 +23,6 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
-#include <linux/perf/arm_pmu.h>
 #include <linux/regulator/machine.h>
 
 #include <asm/outercache.h>
@@ -112,37 +111,6 @@ static void ux500_restart(enum reboot_mode mode, const char *cmd)
 	prcmu_system_reset(0);
 }
 
-/*
- * The PMU IRQ lines of two cores are wired together into a single interrupt.
- * Bounce the interrupt to the other core if it's not ours.
- */
-static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler)
-{
-	irqreturn_t ret = handler(irq, dev);
-	int other = !smp_processor_id();
-
-	if (ret == IRQ_NONE && cpu_online(other))
-		irq_set_affinity(irq, cpumask_of(other));
-
-	/*
-	 * We should be able to get away with the amount of IRQ_NONEs we give,
-	 * while still having the spurious IRQ detection code kick in if the
-	 * interrupt really starts hitting spuriously.
-	 */
-	return ret;
-}
-
-static struct arm_pmu_platdata db8500_pmu_platdata = {
-	.handle_irq		= db8500_pmu_handler,
-	.irq_flags		= IRQF_NOBALANCING | IRQF_NO_THREAD,
-};
-
-static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = {
-	/* Requires call-back bindings. */
-	OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata),
-	{},
-};
-
 static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = {
 	OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL),
 	{},
@@ -165,9 +133,6 @@ static void __init u8500_init_machine(void)
 	if (of_machine_is_compatible("st-ericsson,u8540"))
 		of_platform_populate(NULL, u8500_local_bus_nodes,
 				     u8540_auxdata_lookup, NULL);
-	else
-		of_platform_populate(NULL, u8500_local_bus_nodes,
-				     u8500_auxdata_lookup, NULL);
 }
 
 static const char * stericsson_dt_platform_compat[] = {
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index aff6994950ba..a2399fd66e97 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -472,28 +472,27 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 /*****************************************************************************
  * Ethernet switch
  ****************************************************************************/
-static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii";
-static __initdata struct mdio_board_info
-		  orion_ge00_switch_board_info;
+static __initdata struct mdio_board_info orion_ge00_switch_board_info = {
+	.bus_id   = "orion-mii",
+	.modalias = "mv88e6085",
+};
 
 void __init orion_ge00_switch_init(struct dsa_chip_data *d)
 {
-	struct mdio_board_info *bd;
 	unsigned int i;
 
 	if (!IS_BUILTIN(CONFIG_PHYLIB))
 		return;
 
-	for (i = 0; i < ARRAY_SIZE(d->port_names); i++)
-		if (!strcmp(d->port_names[i], "cpu"))
+	for (i = 0; i < ARRAY_SIZE(d->port_names); i++) {
+		if (!strcmp(d->port_names[i], "cpu")) {
+			d->netdev[i] = &orion_ge00.dev;
 			break;
+		}
+	}
 
-	bd = &orion_ge00_switch_board_info;
-	bd->bus_id = orion_ge00_mvmdio_bus_name;
-	bd->mdio_addr = d->sw_addr;
-	d->netdev[i] = &orion_ge00.dev;
-	strcpy(bd->modalias, "mv88e6085");
-	bd->platform_data = d;
+	orion_ge00_switch_board_info.mdio_addr = d->sw_addr;
+	orion_ge00_switch_board_info.platform_data = d;
 
 	mdiobus_register_board_info(&orion_ge00_switch_board_info, 1);
 }
diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
index a80632641b39..70c776ef7aa7 100644
--- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
@@ -165,14 +165,14 @@
 
 			uart_A: serial@24000 {
 				compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart";
-				reg = <0x0 0x24000 0x0 0x14>;
+				reg = <0x0 0x24000 0x0 0x18>;
 				interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
 				status = "disabled";
 			};
 
 			uart_B: serial@23000 {
 				compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart";
-				reg = <0x0 0x23000 0x0 0x14>;
+				reg = <0x0 0x23000 0x0 0x18>;
 				interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
 				status = "disabled";
 			};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 6cb3c2a52baf..4ee2e7951482 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -235,14 +235,14 @@
 
 			uart_A: serial@84c0 {
 				compatible = "amlogic,meson-gx-uart";
-				reg = <0x0 0x84c0 0x0 0x14>;
+				reg = <0x0 0x84c0 0x0 0x18>;
 				interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
 				status = "disabled";
 			};
 
 			uart_B: serial@84dc {
 				compatible = "amlogic,meson-gx-uart";
-				reg = <0x0 0x84dc 0x0 0x14>;
+				reg = <0x0 0x84dc 0x0 0x18>;
 				interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
 				status = "disabled";
 			};
@@ -287,7 +287,7 @@
 
 			uart_C: serial@8700 {
 				compatible = "amlogic,meson-gx-uart";
-				reg = <0x0 0x8700 0x0 0x14>;
+				reg = <0x0 0x8700 0x0 0x18>;
 				interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>;
 				status = "disabled";
 			};
@@ -404,14 +404,14 @@
 
 			uart_AO: serial@4c0 {
 				compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart";
-				reg = <0x0 0x004c0 0x0 0x14>;
+				reg = <0x0 0x004c0 0x0 0x18>;
 				interrupts = <GIC_SPI 193 IRQ_TYPE_EDGE_RISING>;
 				status = "disabled";
 			};
 
 			uart_AO_B: serial@4e0 {
 				compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart";
-				reg = <0x0 0x004e0 0x0 0x14>;
+				reg = <0x0 0x004e0 0x0 0x18>;
 				interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
 				status = "disabled";
 			};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 4f355f17eed6..c8514110b9da 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -631,6 +631,7 @@
 
 			internal_phy: ethernet-phy@8 {
 				compatible = "ethernet-phy-id0181.4400", "ethernet-phy-ieee802.3-c22";
+				interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
 				reg = <8>;
 				max-speed = <100>;
 			};
diff --git a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi
index 4220fbdcb24a..ff5c4c47b22b 100644
--- a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi
+++ b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi
@@ -98,7 +98,7 @@
 		clock-output-names = "clk125mhz";
 	};
 
-	pci {
+	pcie@30000000 {
 		compatible = "pci-host-ecam-generic";
 		device_type = "pci";
 		#interrupt-cells = <1>;
@@ -118,6 +118,7 @@
 		ranges =
 		  <0x02000000    0 0x40000000    0 0x40000000    0 0x20000000
 		   0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>;
+		bus-range = <0 0xff>;
 		interrupt-map-mask = <0 0 0 7>;
 		interrupt-map =
 		      /* addr  pin  ic   icaddr  icintr */
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index e94fa1a53192..047641fe294c 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -51,7 +51,7 @@
 		#size-cells = <2>;
 		ranges;
 
-		ramoops@0x21f00000 {
+		ramoops@21f00000 {
 			compatible = "ramoops";
 			reg = <0x0 0x21f00000 0x0 0x00100000>;
 			record-size	= <0x00020000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index 9fbe4705ee88..94597e33c806 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -341,7 +341,7 @@
 			reg = <0 0x10005000 0 0x1000>;
 		};
 
-		pio: pinctrl@0x10005000 {
+		pio: pinctrl@10005000 {
 			compatible = "mediatek,mt8173-pinctrl";
 			reg = <0 0x1000b000 0 0x1000>;
 			mediatek,pctl-regmap = <&syscfg_pctl_a>;
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index 492a011f14f6..1c8f1b86472d 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -140,16 +140,16 @@
 		};
 
 		agnoc@0 {
-			qcom,pcie@00600000 {
+			qcom,pcie@600000 {
 				perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>;
 			};
 
-			qcom,pcie@00608000 {
+			qcom,pcie@608000 {
 				status = "okay";
 				perst-gpio = <&msmgpio 130 GPIO_ACTIVE_LOW>;
 			};
 
-			qcom,pcie@00610000 {
+			qcom,pcie@610000 {
 				status = "okay";
 				perst-gpio = <&msmgpio 114 GPIO_ACTIVE_LOW>;
 			};
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 4b2afcc4fdf4..0a6f7952bbb1 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -840,7 +840,7 @@
 			#size-cells = <1>;
 			ranges;
 
-			pcie0: qcom,pcie@00600000 {
+			pcie0: qcom,pcie@600000 {
 				compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
 				status = "disabled";
 				power-domains = <&gcc PCIE0_GDSC>;
@@ -893,7 +893,7 @@
 
 			};
 
-			pcie1: qcom,pcie@00608000 {
+			pcie1: qcom,pcie@608000 {
 				compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
 				power-domains = <&gcc PCIE1_GDSC>;
 				bus-range = <0x00 0xff>;
@@ -946,7 +946,7 @@
 						"bus_slave";
 			};
 
-			pcie2: qcom,pcie@00610000 {
+			pcie2: qcom,pcie@610000 {
 				compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
 				power-domains = <&gcc PCIE2_GDSC>;
 				bus-range = <0x00 0xff>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index 3890468678ce..28257724a56e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -132,17 +132,16 @@
 	assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
 	assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
 	clock_in_out = "input";
-	/* shows instability at 1GBit right now */
-	max-speed = <100>;
 	phy-supply = <&vcc_io>;
 	phy-mode = "rgmii";
 	pinctrl-names = "default";
 	pinctrl-0 = <&rgmiim1_pins>;
+	snps,force_thresh_dma_mode;
 	snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
 	snps,reset-active-low;
 	snps,reset-delays-us = <0 10000 50000>;
-	tx_delay = <0x26>;
-	rx_delay = <0x11>;
+	tx_delay = <0x24>;
+	rx_delay = <0x18>;
 	status = "okay";
 };
 
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index a037ee56fead..cae341554486 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -730,7 +730,7 @@
 		interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
 			 <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		status = "disabled";
 	};
@@ -741,7 +741,7 @@
 		interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
 			 <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		status = "disabled";
 	};
@@ -752,7 +752,7 @@
 		interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
 			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		status = "disabled";
 	};
diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
index aa4d07046a7b..03458ac44201 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
@@ -257,7 +257,7 @@
 		max-frequency = <150000000>;
 		clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>,
 			 <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>;
-		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
 		resets = <&cru SRST_SDIO0>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi
index 0f873c897d0d..ce592a4c0c4c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi
@@ -457,7 +457,7 @@
 	assigned-clocks = <&cru SCLK_PCIEPHY_REF>;
 	assigned-clock-parents = <&cru SCLK_PCIEPHY_REF100M>;
 	assigned-clock-rates = <100000000>;
-	ep-gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_HIGH>;
+	ep-gpios = <&gpio2 RK_PA4 GPIO_ACTIVE_HIGH>;
 	num-lanes = <4>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pcie_clkreqn_cpm>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 7aa2144e0d47..2605118d4b4c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1739,8 +1739,8 @@
 		compatible = "rockchip,rk3399-edp";
 		reg = <0x0 0xff970000 0x0 0x8000>;
 		interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH 0>;
-		clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>;
-		clock-names = "dp", "pclk";
+		clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>, <&cru PCLK_VIO_GRF>;
+		clock-names = "dp", "pclk", "grf";
 		pinctrl-names = "default";
 		pinctrl-0 = <&edp_hpd>;
 		power-domains = <&power RK3399_PD_EDP>;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index be7bd19c87ec..350c76a1d15b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -20,7 +20,7 @@
 
 #define MPIDR_UP_BITMASK	(0x1 << 30)
 #define MPIDR_MT_BITMASK	(0x1 << 24)
-#define MPIDR_HWID_BITMASK	0xff00ffffff
+#define MPIDR_HWID_BITMASK	UL(0xff00ffffff)
 
 #define MPIDR_LEVEL_BITS_SHIFT	3
 #define MPIDR_LEVEL_BITS	(1 << MPIDR_LEVEL_BITS_SHIFT)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 1dca41bea16a..e73f68569624 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -22,7 +22,7 @@
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
-	return *ptep;
+	return READ_ONCE(*ptep);
 }
 
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 9679067a1574..7faed6e48b46 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
 	return pmd;
 }
 
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
+static inline void kvm_set_s2pte_readonly(pte_t *ptep)
 {
 	pteval_t old_pteval, pteval;
 
-	pteval = READ_ONCE(pte_val(*pte));
+	pteval = READ_ONCE(pte_val(*ptep));
 	do {
 		old_pteval = pteval;
 		pteval &= ~PTE_S2_RDWR;
 		pteval |= PTE_S2_RDONLY;
-		pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+		pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
 	} while (pteval != old_pteval);
 }
 
-static inline bool kvm_s2pte_readonly(pte_t *pte)
+static inline bool kvm_s2pte_readonly(pte_t *ptep)
 {
-	return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+	return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
 }
 
-static inline bool kvm_s2pte_exec(pte_t *pte)
+static inline bool kvm_s2pte_exec(pte_t *ptep)
 {
-	return !(pte_val(*pte) & PTE_S2_XN);
+	return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
 }
 
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
 {
-	kvm_set_s2pte_readonly((pte_t *)pmd);
+	kvm_set_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
 {
-	return kvm_s2pte_readonly((pte_t *)pmd);
+	return kvm_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
 {
-	return !(pmd_val(*pmd) & PMD_S2_XN);
+	return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
 }
 
 static inline bool kvm_page_empty(void *ptr)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 8d3331985d2e..39ec0b8a689e 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void cpu_replace_ttbr1(pgd_t *pgd)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
 {
 	typedef void (ttbr_replace_func)(phys_addr_t);
 	extern ttbr_replace_func idmap_cpu_replace_ttbr1;
 	ttbr_replace_func *replace_phys;
 
-	phys_addr_t pgd_phys = virt_to_phys(pgd);
+	phys_addr_t pgd_phys = virt_to_phys(pgdp);
 
 	replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index e9d9f1b006ef..2e05bcd944c8 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return (pmd_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
 {
-	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-	free_page((unsigned long)pmd);
+	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+	free_page((unsigned long)pmdp);
 }
 
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
-	set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
+	set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-	__pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+	__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
 }
 #else
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
 	BUILD_BUG();
 }
@@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return (pud_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
 {
-	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	free_page((unsigned long)pud);
+	BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
+	free_page((unsigned long)pudp);
 }
 
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
-	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
+	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
 {
-	__pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+	__pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
 }
 #else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
 	BUILD_BUG();
 }
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 /*
  * Free a PTE table.
  */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
 {
-	if (pte)
-		free_page((unsigned long)pte);
+	if (ptep)
+		free_page((unsigned long)ptep);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_page(pte);
 }
 
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
 				  pmdval_t prot)
 {
-	set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
+	set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
 }
 
 /*
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 094374c82db0..7e2c27e63cd8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
-	*ptep = pte;
+	WRITE_ONCE(*ptep, pte);
 
 	/*
 	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
+	pte_t old_pte;
+
 	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
 		__sync_icache_dcache(pte, addr);
 
@@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * hardware updates of the pte (ptep_set_access_flags safely changes
 	 * valid ptes without going through an invalid entry).
 	 */
-	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+	old_pte = READ_ONCE(*ptep);
+	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
 	   (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
 		VM_WARN_ONCE(!pte_young(pte),
 			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
-			     __func__, pte_val(*ptep), pte_val(pte));
-		VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+			     __func__, pte_val(old_pte), pte_val(pte));
+		VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
 			     "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
-			     __func__, pte_val(*ptep), pte_val(pte));
+			     __func__, pte_val(old_pte), pte_val(pte));
 	}
 
 	set_pte(ptep, pte);
@@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-	*pmdp = pmd;
+	WRITE_ONCE(*pmdp, pmd);
 	dsb(ishst);
 	isb();
 }
@@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
-	*pudp = pud;
+	WRITE_ONCE(*pudp, pud);
 	dsb(ishst);
 	isb();
 }
@@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)		(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-#define pmd_offset_phys(dir, addr)	(pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset_phys(dir, addr)	(pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
 #define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
-	*pgdp = pgd;
+	WRITE_ONCE(*pgdp, pgd);
 	dsb(ishst);
 }
 
@@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-#define pud_offset_phys(dir, addr)	(pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset_phys(dir, addr)	(pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
 #define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr))
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 472ef944e932..902f9edacbea 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -28,7 +28,7 @@ struct stackframe {
 	unsigned long fp;
 	unsigned long pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	unsigned int graph;
+	int graph;
 #endif
 };
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 543e11f0f657..e66b0fca99c2 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -72,15 +72,15 @@ static inline void set_fs(mm_segment_t fs)
  * This is equivalent to the following test:
  * (u65)addr + (u65)size <= (u65)current->addr_limit + 1
  */
-static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
+static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
 {
-	unsigned long limit = current_thread_info()->addr_limit;
+	unsigned long ret, limit = current_thread_info()->addr_limit;
 
 	__chk_user_ptr(addr);
 	asm volatile(
 	// A + B <= C + 1 for all A,B,C, in four easy steps:
 	// 1: X = A + B; X' = X % 2^64
-	"	adds	%0, %0, %2\n"
+	"	adds	%0, %3, %2\n"
 	// 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
 	"	csel	%1, xzr, %1, hi\n"
 	// 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
@@ -92,9 +92,9 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
 	//    testing X' - C == 0, subject to the previous adjustments.
 	"	sbcs	xzr, %0, %1\n"
 	"	cset	%0, ls\n"
-	: "+r" (addr), "+r" (limit) : "Ir" (size) : "cc");
+	: "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc");
 
-	return addr;
+	return ret;
 }
 
 /*
@@ -104,7 +104,7 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
  */
 #define untagged_addr(addr)		sign_extend64(addr, 55)
 
-#define access_ok(type, addr, size)	__range_ok((unsigned long)(addr), size)
+#define access_ok(type, addr, size)	__range_ok(addr, size)
 #define user_addr_max			get_fs
 
 #define _ASM_EXTABLE(from, to)						\
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index c33b5e4010ab..68450e954d47 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -370,6 +370,7 @@ static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
 static int swp_handler(struct pt_regs *regs, u32 instr)
 {
 	u32 destreg, data, type, address = 0;
+	const void __user *user_ptr;
 	int rn, rt2, res = 0;
 
 	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
@@ -401,7 +402,8 @@ static int swp_handler(struct pt_regs *regs, u32 instr)
 		aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data);
 
 	/* Check access in reasonable access range for both SWP and SWPB */
-	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+	user_ptr = (const void __user *)(unsigned long)(address & ~3);
+	if (!access_ok(VERIFY_WRITE, user_ptr, 4)) {
 		pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n",
 			address);
 		goto fault;
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 07823595b7f0..b5a28336c077 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -178,7 +178,7 @@ static int enable_smccc_arch_workaround_1(void *data)
 	case PSCI_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		if (res.a0)
+		if ((int)res.a0 < 0)
 			return 0;
 		cb = call_hvc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_hvc_start;
@@ -188,7 +188,7 @@ static int enable_smccc_arch_workaround_1(void *data)
 	case PSCI_CONDUIT_SMC:
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		if (res.a0)
+		if ((int)res.a0 < 0)
 			return 0;
 		cb = call_smc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_smc_start;
@@ -408,6 +408,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	},
 	{
 		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+		.enable = qcom_enable_link_stack_sanitization,
+	},
+	{
+		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+	},
+	{
+		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
 		MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
 		.enable = enable_smccc_arch_workaround_1,
 	},
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 29b1f873e337..2985a067fc13 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -199,9 +199,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 };
 
 static const struct arm64_ftr_bits ftr_ctr[] = {
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),		/* RES1 */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1),	/* DIC */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1),	/* IDC */
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0),	/* ERG */
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
 	/*
 	 * Linux can handle differing I-cache policies. Userspace JITs will
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index f85ac58d08a3..a8bf1c892b90 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
 				  unsigned long addr, void *data)
 {
 	efi_memory_desc_t *md = data;
-	pte_t pte = *ptep;
+	pte_t pte = READ_ONCE(*ptep);
 
 	if (md->attribute & EFI_MEMORY_RO)
 		pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index f20cf7e99249..1ec5f28c39fc 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
 				 gfp_t mask)
 {
 	int rc = 0;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
 	unsigned long dst = (unsigned long)allocator(mask);
 
 	if (!dst) {
@@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	memcpy((void *)dst, src_start, length);
 	flush_icache_range(dst, dst + length);
 
-	pgd = pgd_offset_raw(allocator(mask), dst_addr);
-	if (pgd_none(*pgd)) {
-		pud = allocator(mask);
-		if (!pud) {
+	pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+	if (pgd_none(READ_ONCE(*pgdp))) {
+		pudp = allocator(mask);
+		if (!pudp) {
 			rc = -ENOMEM;
 			goto out;
 		}
-		pgd_populate(&init_mm, pgd, pud);
+		pgd_populate(&init_mm, pgdp, pudp);
 	}
 
-	pud = pud_offset(pgd, dst_addr);
-	if (pud_none(*pud)) {
-		pmd = allocator(mask);
-		if (!pmd) {
+	pudp = pud_offset(pgdp, dst_addr);
+	if (pud_none(READ_ONCE(*pudp))) {
+		pmdp = allocator(mask);
+		if (!pmdp) {
 			rc = -ENOMEM;
 			goto out;
 		}
-		pud_populate(&init_mm, pud, pmd);
+		pud_populate(&init_mm, pudp, pmdp);
 	}
 
-	pmd = pmd_offset(pud, dst_addr);
-	if (pmd_none(*pmd)) {
-		pte = allocator(mask);
-		if (!pte) {
+	pmdp = pmd_offset(pudp, dst_addr);
+	if (pmd_none(READ_ONCE(*pmdp))) {
+		ptep = allocator(mask);
+		if (!ptep) {
 			rc = -ENOMEM;
 			goto out;
 		}
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmdp, ptep);
 	}
 
-	pte = pte_offset_kernel(pmd, dst_addr);
-	set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+	ptep = pte_offset_kernel(pmdp, dst_addr);
+	set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 
 	/*
 	 * Load our new page tables. A strict BBM approach requires that we
@@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	 */
 	cpu_set_reserved_ttbr0();
 	local_flush_tlb_all();
-	write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
+	write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
 	isb();
 
 	*phys_dst_addr = virt_to_phys((void *)dst);
@@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
 	return ret;
 }
 
-static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 {
-	pte_t pte = *src_pte;
+	pte_t pte = READ_ONCE(*src_ptep);
 
 	if (pte_valid(pte)) {
 		/*
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 * read only (code, rodata). Clear the RDONLY bit from
 		 * the temporary mappings we use during restore.
 		 */
-		set_pte(dst_pte, pte_mkwrite(pte));
+		set_pte(dst_ptep, pte_mkwrite(pte));
 	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
 		/*
 		 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 */
 		BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-		set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
+		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
 	}
 }
 
-static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
 		    unsigned long end)
 {
-	pte_t *src_pte;
-	pte_t *dst_pte;
+	pte_t *src_ptep;
+	pte_t *dst_ptep;
 	unsigned long addr = start;
 
-	dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
-	if (!dst_pte)
+	dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+	if (!dst_ptep)
 		return -ENOMEM;
-	pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
-	dst_pte = pte_offset_kernel(dst_pmd, start);
+	pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+	dst_ptep = pte_offset_kernel(dst_pmdp, start);
 
-	src_pte = pte_offset_kernel(src_pmd, start);
+	src_ptep = pte_offset_kernel(src_pmdp, start);
 	do {
-		_copy_pte(dst_pte, src_pte, addr);
-	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+		_copy_pte(dst_ptep, src_ptep, addr);
+	} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
 
 	return 0;
 }
 
-static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
 		    unsigned long end)
 {
-	pmd_t *src_pmd;
-	pmd_t *dst_pmd;
+	pmd_t *src_pmdp;
+	pmd_t *dst_pmdp;
 	unsigned long next;
 	unsigned long addr = start;
 
-	if (pud_none(*dst_pud)) {
-		dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-		if (!dst_pmd)
+	if (pud_none(READ_ONCE(*dst_pudp))) {
+		dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+		if (!dst_pmdp)
 			return -ENOMEM;
-		pud_populate(&init_mm, dst_pud, dst_pmd);
+		pud_populate(&init_mm, dst_pudp, dst_pmdp);
 	}
-	dst_pmd = pmd_offset(dst_pud, start);
+	dst_pmdp = pmd_offset(dst_pudp, start);
 
-	src_pmd = pmd_offset(src_pud, start);
+	src_pmdp = pmd_offset(src_pudp, start);
 	do {
+		pmd_t pmd = READ_ONCE(*src_pmdp);
+
 		next = pmd_addr_end(addr, end);
-		if (pmd_none(*src_pmd))
+		if (pmd_none(pmd))
 			continue;
-		if (pmd_table(*src_pmd)) {
-			if (copy_pte(dst_pmd, src_pmd, addr, next))
+		if (pmd_table(pmd)) {
+			if (copy_pte(dst_pmdp, src_pmdp, addr, next))
 				return -ENOMEM;
 		} else {
-			set_pmd(dst_pmd,
-				__pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+			set_pmd(dst_pmdp,
+				__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
 		}
-	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
+	} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
 
 	return 0;
 }
 
-static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
 		    unsigned long end)
 {
-	pud_t *dst_pud;
-	pud_t *src_pud;
+	pud_t *dst_pudp;
+	pud_t *src_pudp;
 	unsigned long next;
 	unsigned long addr = start;
 
-	if (pgd_none(*dst_pgd)) {
-		dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-		if (!dst_pud)
+	if (pgd_none(READ_ONCE(*dst_pgdp))) {
+		dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+		if (!dst_pudp)
 			return -ENOMEM;
-		pgd_populate(&init_mm, dst_pgd, dst_pud);
+		pgd_populate(&init_mm, dst_pgdp, dst_pudp);
 	}
-	dst_pud = pud_offset(dst_pgd, start);
+	dst_pudp = pud_offset(dst_pgdp, start);
 
-	src_pud = pud_offset(src_pgd, start);
+	src_pudp = pud_offset(src_pgdp, start);
 	do {
+		pud_t pud = READ_ONCE(*src_pudp);
+
 		next = pud_addr_end(addr, end);
-		if (pud_none(*src_pud))
+		if (pud_none(pud))
 			continue;
-		if (pud_table(*(src_pud))) {
-			if (copy_pmd(dst_pud, src_pud, addr, next))
+		if (pud_table(pud)) {
+			if (copy_pmd(dst_pudp, src_pudp, addr, next))
 				return -ENOMEM;
 		} else {
-			set_pud(dst_pud,
-				__pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+			set_pud(dst_pudp,
+				__pud(pud_val(pud) & ~PMD_SECT_RDONLY));
 		}
-	} while (dst_pud++, src_pud++, addr = next, addr != end);
+	} while (dst_pudp++, src_pudp++, addr = next, addr != end);
 
 	return 0;
 }
 
-static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
 			    unsigned long end)
 {
 	unsigned long next;
 	unsigned long addr = start;
-	pgd_t *src_pgd = pgd_offset_k(start);
+	pgd_t *src_pgdp = pgd_offset_k(start);
 
-	dst_pgd = pgd_offset_raw(dst_pgd, start);
+	dst_pgdp = pgd_offset_raw(dst_pgdp, start);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none(*src_pgd))
+		if (pgd_none(READ_ONCE(*src_pgdp)))
 			continue;
-		if (copy_pud(dst_pgd, src_pgd, addr, next))
+		if (copy_pud(dst_pgdp, src_pgdp, addr, next))
 			return -ENOMEM;
-	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
+	} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 75b220ba73a3..85a251b6dfa8 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -908,9 +908,9 @@ static void __armv8pmu_probe_pmu(void *info)
 	int pmuver;
 
 	dfr0 = read_sysreg(id_aa64dfr0_el1);
-	pmuver = cpuid_feature_extract_signed_field(dfr0,
+	pmuver = cpuid_feature_extract_unsigned_field(dfr0,
 			ID_AA64DFR0_PMUVER_SHIFT);
-	if (pmuver < 1)
+	if (pmuver == 0xf || pmuver == 0)
 		return;
 
 	probe->present = true;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index ad8aeb098b31..c0da6efe5465 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -220,8 +220,15 @@ void __show_regs(struct pt_regs *regs)
 
 	show_regs_print_info(KERN_DEFAULT);
 	print_pstate(regs);
-	printk("pc : %pS\n", (void *)regs->pc);
-	printk("lr : %pS\n", (void *)lr);
+
+	if (!user_mode(regs)) {
+		printk("pc : %pS\n", (void *)regs->pc);
+		printk("lr : %pS\n", (void *)lr);
+	} else {
+		printk("pc : %016llx\n", regs->pc);
+		printk("lr : %016llx\n", lr);
+	}
+
 	printk("sp : %016llx\n", sp);
 
 	i = top_reg;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6618036ae6d4..9ae31f7e2243 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1419,7 +1419,7 @@ static int compat_ptrace_hbp_get(unsigned int note_type,
 	u64 addr = 0;
 	u32 ctrl = 0;
 
-	int err, idx = compat_ptrace_hbp_num_to_idx(num);;
+	int err, idx = compat_ptrace_hbp_num_to_idx(num);
 
 	if (num & 1) {
 		err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr);
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 76809ccd309c..d5718a060672 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (tsk->ret_stack &&
 			(frame->pc == (unsigned long)return_to_handler)) {
+		if (WARN_ON_ONCE(frame->graph == -1))
+			return -EINVAL;
+		if (frame->graph < -1)
+			frame->graph += FTRACE_NOTRACE_DEPTH;
+
 		/*
 		 * This is a case where function graph tracer has
 		 * modified a return address (LR) in a stack frame
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 8b8bbd3eaa52..a382b2a1b84e 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -57,7 +57,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
 	if (end < start || flags)
 		return -EINVAL;
 
-	if (!access_ok(VERIFY_READ, start, end - start))
+	if (!access_ok(VERIFY_READ, (const void __user *)start, end - start))
 		return -EFAULT;
 
 	return __do_compat_cache_op(start, end);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index a4391280fba9..f258636273c9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 	frame.fp = regs->regs[29];
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = -1; /* no task info */
+	frame.graph = current->curr_ret_stack;
 #endif
 	do {
 		int ret = unwind_frame(NULL, &frame);
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index bbb0fde2780e..eb2d15147e8d 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -57,7 +57,7 @@ static const char *handler[]= {
 	"Error"
 };
 
-int show_unhandled_signals = 1;
+int show_unhandled_signals = 0;
 
 static void dump_backtrace_entry(unsigned long where)
 {
@@ -526,14 +526,6 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
 	}
 #endif
 
-	if (show_unhandled_signals_ratelimited()) {
-		pr_info("%s[%d]: syscall %d\n", current->comm,
-			task_pid_nr(current), regs->syscallno);
-		dump_instr("", regs);
-		if (user_mode(regs))
-			__show_regs(regs);
-	}
-
 	return sys_ni_syscall();
 }
 
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d7e3299a7734..959e50d2588c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 {
 	int ret = 0;
 
-	vcpu_load(vcpu);
-
 	trace_kvm_set_guest_debug(vcpu, dbg->control);
 
 	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
@@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	}
 
 out:
-	vcpu_put(vcpu);
 	return ret;
 }
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 116252a8d3a5..870f4b1587f9 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -407,8 +407,10 @@ again:
 		u32 midr = read_cpuid_id();
 
 		/* Apply BTAC predictors mitigation to all Falkor chips */
-		if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+		if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+		    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
 			__qcom_hyp_sanitize_btac_predictors();
+		}
 	}
 
 	fp_enabled = __fpsimd_enabled();
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 7b60d62ac593..65dfc8571bf8 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
 {
-	pte_t *pte = pte_offset_kernel(pmd, 0UL);
+	pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
 	unsigned long addr;
 	unsigned i;
 
-	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+	for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
 		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte));
+		note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
 	}
 }
 
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
 {
-	pmd_t *pmd = pmd_offset(pud, 0UL);
+	pmd_t *pmdp = pmd_offset(pudp, 0UL);
 	unsigned long addr;
 	unsigned i;
 
-	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+	for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+		pmd_t pmd = READ_ONCE(*pmdp);
+
 		addr = start + i * PMD_SIZE;
-		if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-			note_page(st, addr, 3, pmd_val(*pmd));
+		if (pmd_none(pmd) || pmd_sect(pmd)) {
+			note_page(st, addr, 3, pmd_val(pmd));
 		} else {
-			BUG_ON(pmd_bad(*pmd));
-			walk_pte(st, pmd, addr);
+			BUG_ON(pmd_bad(pmd));
+			walk_pte(st, pmdp, addr);
 		}
 	}
 }
 
-static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
 {
-	pud_t *pud = pud_offset(pgd, 0UL);
+	pud_t *pudp = pud_offset(pgdp, 0UL);
 	unsigned long addr;
 	unsigned i;
 
-	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+	for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+		pud_t pud = READ_ONCE(*pudp);
+
 		addr = start + i * PUD_SIZE;
-		if (pud_none(*pud) || pud_sect(*pud)) {
-			note_page(st, addr, 2, pud_val(*pud));
+		if (pud_none(pud) || pud_sect(pud)) {
+			note_page(st, addr, 2, pud_val(pud));
 		} else {
-			BUG_ON(pud_bad(*pud));
-			walk_pmd(st, pud, addr);
+			BUG_ON(pud_bad(pud));
+			walk_pmd(st, pudp, addr);
 		}
 	}
 }
@@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
 		     unsigned long start)
 {
-	pgd_t *pgd = pgd_offset(mm, 0UL);
+	pgd_t *pgdp = pgd_offset(mm, 0UL);
 	unsigned i;
 	unsigned long addr;
 
-	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+	for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+		pgd_t pgd = READ_ONCE(*pgdp);
+
 		addr = start + i * PGDIR_SIZE;
-		if (pgd_none(*pgd)) {
-			note_page(st, addr, 1, pgd_val(*pgd));
+		if (pgd_none(pgd)) {
+			note_page(st, addr, 1, pgd_val(pgd));
 		} else {
-			BUG_ON(pgd_bad(*pgd));
-			walk_pud(st, pgd, addr);
+			BUG_ON(pgd_bad(pgd));
+			walk_pud(st, pgdp, addr);
 		}
 	}
 }
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f76bb2c3c943..bff11553eb05 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
 void show_pte(unsigned long addr)
 {
 	struct mm_struct *mm;
-	pgd_t *pgd;
+	pgd_t *pgdp;
+	pgd_t pgd;
 
 	if (addr < TASK_SIZE) {
 		/* TTBR0 */
@@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
 		return;
 	}
 
-	pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+	pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
 		 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
 		 VA_BITS, mm->pgd);
-	pgd = pgd_offset(mm, addr);
-	pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
+	pgdp = pgd_offset(mm, addr);
+	pgd = READ_ONCE(*pgdp);
+	pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
 
 	do {
-		pud_t *pud;
-		pmd_t *pmd;
-		pte_t *pte;
+		pud_t *pudp, pud;
+		pmd_t *pmdp, pmd;
+		pte_t *ptep, pte;
 
-		if (pgd_none(*pgd) || pgd_bad(*pgd))
+		if (pgd_none(pgd) || pgd_bad(pgd))
 			break;
 
-		pud = pud_offset(pgd, addr);
-		pr_cont(", *pud=%016llx", pud_val(*pud));
-		if (pud_none(*pud) || pud_bad(*pud))
+		pudp = pud_offset(pgdp, addr);
+		pud = READ_ONCE(*pudp);
+		pr_cont(", pud=%016llx", pud_val(pud));
+		if (pud_none(pud) || pud_bad(pud))
 			break;
 
-		pmd = pmd_offset(pud, addr);
-		pr_cont(", *pmd=%016llx", pmd_val(*pmd));
-		if (pmd_none(*pmd) || pmd_bad(*pmd))
+		pmdp = pmd_offset(pudp, addr);
+		pmd = READ_ONCE(*pmdp);
+		pr_cont(", pmd=%016llx", pmd_val(pmd));
+		if (pmd_none(pmd) || pmd_bad(pmd))
 			break;
 
-		pte = pte_offset_map(pmd, addr);
-		pr_cont(", *pte=%016llx", pte_val(*pte));
-		pte_unmap(pte);
+		ptep = pte_offset_map(pmdp, addr);
+		pte = READ_ONCE(*ptep);
+		pr_cont(", pte=%016llx", pte_val(pte));
+		pte_unmap(ptep);
 	} while(0);
 
 	pr_cont("\n");
@@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 			  pte_t entry, int dirty)
 {
 	pteval_t old_pteval, pteval;
+	pte_t pte = READ_ONCE(*ptep);
 
-	if (pte_same(*ptep, entry))
+	if (pte_same(pte, entry))
 		return 0;
 
 	/* only preserve the access flags and write permission */
@@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	 * (calculated as: a & b == ~(~a | ~b)).
 	 */
 	pte_val(entry) ^= PTE_RDONLY;
-	pteval = READ_ONCE(pte_val(*ptep));
+	pteval = pte_val(pte);
 	do {
 		old_pteval = pteval;
 		pteval ^= PTE_RDONLY;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 6cb0fa92a651..ecc6818191df 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, size_t *pgsize)
 {
-	pgd_t *pgd = pgd_offset(mm, addr);
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp = pgd_offset(mm, addr);
+	pud_t *pudp;
+	pmd_t *pmdp;
 
 	*pgsize = PAGE_SIZE;
-	pud = pud_offset(pgd, addr);
-	pmd = pmd_offset(pud, addr);
-	if ((pte_t *)pmd == ptep) {
+	pudp = pud_offset(pgdp, addr);
+	pmdp = pmd_offset(pudp, addr);
+	if ((pte_t *)pmdp == ptep) {
 		*pgsize = PMD_SIZE;
 		return CONT_PMDS;
 	}
@@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
 	clear_flush(mm, addr, ptep, pgsize, ncontig);
 
-	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
-		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
-			 pte_val(pfn_pte(pfn, hugeprot)));
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
-	}
 }
 
 void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *huge_pte_alloc(struct mm_struct *mm,
 		      unsigned long addr, unsigned long sz)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pte_t *pte = NULL;
-
-	pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
-	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
-	if (!pud)
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	pudp = pud_alloc(mm, pgdp, addr);
+	if (!pudp)
 		return NULL;
 
 	if (sz == PUD_SIZE) {
-		pte = (pte_t *)pud;
+		ptep = (pte_t *)pudp;
 	} else if (sz == (PAGE_SIZE * CONT_PTES)) {
-		pmd_t *pmd = pmd_alloc(mm, pud, addr);
+		pmdp = pmd_alloc(mm, pudp, addr);
 
 		WARN_ON(addr & (sz - 1));
 		/*
@@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 		 * will be no pte_unmap() to correspond with this
 		 * pte_alloc_map().
 		 */
-		pte = pte_alloc_map(mm, pmd, addr);
+		ptep = pte_alloc_map(mm, pmdp, addr);
 	} else if (sz == PMD_SIZE) {
 		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
-		    pud_none(*pud))
-			pte = huge_pmd_share(mm, addr, pud);
+		    pud_none(READ_ONCE(*pudp)))
+			ptep = huge_pmd_share(mm, addr, pudp);
 		else
-			pte = (pte_t *)pmd_alloc(mm, pud, addr);
+			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
 	} else if (sz == (PMD_SIZE * CONT_PMDS)) {
-		pmd_t *pmd;
-
-		pmd = pmd_alloc(mm, pud, addr);
+		pmdp = pmd_alloc(mm, pudp, addr);
 		WARN_ON(addr & (sz - 1));
-		return (pte_t *)pmd;
+		return (pte_t *)pmdp;
 	}
 
-	pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
-	       sz, pte, pte_val(*pte));
-	return pte;
+	return ptep;
 }
 
 pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
 
-	pgd = pgd_offset(mm, addr);
-	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
-	if (!pgd_present(*pgd))
+	pgdp = pgd_offset(mm, addr);
+	if (!pgd_present(READ_ONCE(*pgdp)))
 		return NULL;
 
-	pud = pud_offset(pgd, addr);
-	if (sz != PUD_SIZE && pud_none(*pud))
+	pudp = pud_offset(pgdp, addr);
+	pud = READ_ONCE(*pudp);
+	if (sz != PUD_SIZE && pud_none(pud))
 		return NULL;
 	/* hugepage or swap? */
-	if (pud_huge(*pud) || !pud_present(*pud))
-		return (pte_t *)pud;
+	if (pud_huge(pud) || !pud_present(pud))
+		return (pte_t *)pudp;
 	/* table; check the next level */
 
 	if (sz == CONT_PMD_SIZE)
 		addr &= CONT_PMD_MASK;
 
-	pmd = pmd_offset(pud, addr);
+	pmdp = pmd_offset(pudp, addr);
+	pmd = READ_ONCE(*pmdp);
 	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
-	    pmd_none(*pmd))
+	    pmd_none(pmd))
 		return NULL;
-	if (pmd_huge(*pmd) || !pmd_present(*pmd))
-		return (pte_t *)pmd;
+	if (pmd_huge(pmd) || !pmd_present(pmd))
+		return (pte_t *)pmdp;
 
-	if (sz == CONT_PTE_SIZE) {
-		pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
-		return pte;
-	}
+	if (sz == CONT_PTE_SIZE)
+		return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 
 	return NULL;
 }
@@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	size_t pgsize;
 	pte_t pte;
 
-	if (!pte_cont(*ptep)) {
+	if (!pte_cont(READ_ONCE(*ptep))) {
 		ptep_set_wrprotect(mm, addr, ptep);
 		return;
 	}
@@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
 	size_t pgsize;
 	int ncontig;
 
-	if (!pte_cont(*ptep)) {
+	if (!pte_cont(READ_ONCE(*ptep))) {
 		ptep_clear_flush(vma, addr, ptep);
 		return;
 	}
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 6e02e6fb4c7b..dabfc1ecda3d 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
 	return __pa(p);
 }
 
-static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
 				      bool early)
 {
-	if (pmd_none(*pmd)) {
+	if (pmd_none(READ_ONCE(*pmdp))) {
 		phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
 					     : kasan_alloc_zeroed_page(node);
-		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
 	}
 
-	return early ? pte_offset_kimg(pmd, addr)
-		     : pte_offset_kernel(pmd, addr);
+	return early ? pte_offset_kimg(pmdp, addr)
+		     : pte_offset_kernel(pmdp, addr);
 }
 
-static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
 				      bool early)
 {
-	if (pud_none(*pud)) {
+	if (pud_none(READ_ONCE(*pudp))) {
 		phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
 					     : kasan_alloc_zeroed_page(node);
-		__pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
+		__pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
 	}
 
-	return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
+	return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
 }
 
-static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
+static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
 				      bool early)
 {
-	if (pgd_none(*pgd)) {
+	if (pgd_none(READ_ONCE(*pgdp))) {
 		phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
 					     : kasan_alloc_zeroed_page(node);
-		__pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
+		__pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
 	}
 
-	return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
+	return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
 }
 
-static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
 	unsigned long next;
-	pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
+	pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
 
 	do {
 		phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
 					      : kasan_alloc_zeroed_page(node);
 		next = addr + PAGE_SIZE;
-		set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
-	} while (pte++, addr = next, addr != end && pte_none(*pte));
+		set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+	} while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
 }
 
-static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
 	unsigned long next;
-	pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
+	pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
 
 	do {
 		next = pmd_addr_end(addr, end);
-		kasan_pte_populate(pmd, addr, next, node, early);
-	} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+		kasan_pte_populate(pmdp, addr, next, node, early);
+	} while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
 }
 
-static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
 	unsigned long next;
-	pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
+	pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
 
 	do {
 		next = pud_addr_end(addr, end);
-		kasan_pmd_populate(pud, addr, next, node, early);
-	} while (pud++, addr = next, addr != end && pud_none(*pud));
+		kasan_pmd_populate(pudp, addr, next, node, early);
+	} while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
 }
 
 static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
 				      int node, bool early)
 {
 	unsigned long next;
-	pgd_t *pgd;
+	pgd_t *pgdp;
 
-	pgd = pgd_offset_k(addr);
+	pgdp = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		kasan_pud_populate(pgd, addr, next, node, early);
-	} while (pgd++, addr = next, addr != end);
+		kasan_pud_populate(pgdp, addr, next, node, early);
+	} while (pgdp++, addr = next, addr != end);
 }
 
 /* The early shadow maps everything to a single page of zeroes */
@@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
  */
 void __init kasan_copy_shadow(pgd_t *pgdir)
 {
-	pgd_t *pgd, *pgd_new, *pgd_end;
+	pgd_t *pgdp, *pgdp_new, *pgdp_end;
 
-	pgd = pgd_offset_k(KASAN_SHADOW_START);
-	pgd_end = pgd_offset_k(KASAN_SHADOW_END);
-	pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+	pgdp = pgd_offset_k(KASAN_SHADOW_START);
+	pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
+	pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
 	do {
-		set_pgd(pgd_new, *pgd);
-	} while (pgd++, pgd_new++, pgd != pgd_end);
+		set_pgd(pgdp_new, READ_ONCE(*pgdp));
+	} while (pgdp++, pgdp_new++, pgdp != pgdp_end);
 }
 
 static void __init clear_pgds(unsigned long start,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4694cda823c9..2dbb2c9f1ec1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -108,7 +108,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
 	 * The following mapping attributes may be updated in live
 	 * kernel mappings without the need for break-before-make.
 	 */
-	static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE;
+	static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
 
 	/* creating or taking down mappings is always safe */
 	if (old == 0 || new == 0)
@@ -118,52 +118,55 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
 	if ((old | new) & PTE_CONT)
 		return false;
 
-	/* Transitioning from Global to Non-Global is safe */
-	if (((old ^ new) == PTE_NG) && (new & PTE_NG))
-		return true;
+	/* Transitioning from Non-Global to Global is unsafe */
+	if (old & ~new & PTE_NG)
+		return false;
 
 	return ((old ^ new) & ~mask) == 0;
 }
 
-static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		     phys_addr_t phys, pgprot_t prot)
 {
-	pte_t *pte;
+	pte_t *ptep;
 
-	pte = pte_set_fixmap_offset(pmd, addr);
+	ptep = pte_set_fixmap_offset(pmdp, addr);
 	do {
-		pte_t old_pte = *pte;
+		pte_t old_pte = READ_ONCE(*ptep);
 
-		set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
+		set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
 
 		/*
 		 * After the PTE entry has been populated once, we
 		 * only allow updates to the permission attributes.
 		 */
-		BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+		BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
+					      READ_ONCE(pte_val(*ptep))));
 
 		phys += PAGE_SIZE;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
 
 	pte_clear_fixmap();
 }
 
-static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 				unsigned long end, phys_addr_t phys,
 				pgprot_t prot,
 				phys_addr_t (*pgtable_alloc)(void),
 				int flags)
 {
 	unsigned long next;
+	pmd_t pmd = READ_ONCE(*pmdp);
 
-	BUG_ON(pmd_sect(*pmd));
-	if (pmd_none(*pmd)) {
+	BUG_ON(pmd_sect(pmd));
+	if (pmd_none(pmd)) {
 		phys_addr_t pte_phys;
 		BUG_ON(!pgtable_alloc);
 		pte_phys = pgtable_alloc();
-		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+		pmd = READ_ONCE(*pmdp);
 	}
-	BUG_ON(pmd_bad(*pmd));
+	BUG_ON(pmd_bad(pmd));
 
 	do {
 		pgprot_t __prot = prot;
@@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
 		    (flags & NO_CONT_MAPPINGS) == 0)
 			__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-		init_pte(pmd, addr, next, phys, __prot);
+		init_pte(pmdp, addr, next, phys, __prot);
 
 		phys += next - addr;
 	} while (addr = next, addr != end);
 }
 
-static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
 		     phys_addr_t phys, pgprot_t prot,
 		     phys_addr_t (*pgtable_alloc)(void), int flags)
 {
 	unsigned long next;
-	pmd_t *pmd;
+	pmd_t *pmdp;
 
-	pmd = pmd_set_fixmap_offset(pud, addr);
+	pmdp = pmd_set_fixmap_offset(pudp, addr);
 	do {
-		pmd_t old_pmd = *pmd;
+		pmd_t old_pmd = READ_ONCE(*pmdp);
 
 		next = pmd_addr_end(addr, end);
 
 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 		    (flags & NO_BLOCK_MAPPINGS) == 0) {
-			pmd_set_huge(pmd, phys, prot);
+			pmd_set_huge(pmdp, phys, prot);
 
 			/*
 			 * After the PMD entry has been populated once, we
 			 * only allow updates to the permission attributes.
 			 */
 			BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
-						      pmd_val(*pmd)));
+						      READ_ONCE(pmd_val(*pmdp))));
 		} else {
-			alloc_init_cont_pte(pmd, addr, next, phys, prot,
+			alloc_init_cont_pte(pmdp, addr, next, phys, prot,
 					    pgtable_alloc, flags);
 
 			BUG_ON(pmd_val(old_pmd) != 0 &&
-			       pmd_val(old_pmd) != pmd_val(*pmd));
+			       pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
 		}
 		phys += next - addr;
-	} while (pmd++, addr = next, addr != end);
+	} while (pmdp++, addr = next, addr != end);
 
 	pmd_clear_fixmap();
 }
 
-static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 				unsigned long end, phys_addr_t phys,
 				pgprot_t prot,
 				phys_addr_t (*pgtable_alloc)(void), int flags)
 {
 	unsigned long next;
+	pud_t pud = READ_ONCE(*pudp);
 
 	/*
 	 * Check for initial section mappings in the pgd/pud.
 	 */
-	BUG_ON(pud_sect(*pud));
-	if (pud_none(*pud)) {
+	BUG_ON(pud_sect(pud));
+	if (pud_none(pud)) {
 		phys_addr_t pmd_phys;
 		BUG_ON(!pgtable_alloc);
 		pmd_phys = pgtable_alloc();
-		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+		pud = READ_ONCE(*pudp);
 	}
-	BUG_ON(pud_bad(*pud));
+	BUG_ON(pud_bad(pud));
 
 	do {
 		pgprot_t __prot = prot;
@@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
 		    (flags & NO_CONT_MAPPINGS) == 0)
 			__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-		init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+		init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
 
 		phys += next - addr;
 	} while (addr = next, addr != end);
@@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 	return true;
 }
 
-static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
-				  phys_addr_t phys, pgprot_t prot,
-				  phys_addr_t (*pgtable_alloc)(void),
-				  int flags)
+static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+			   phys_addr_t phys, pgprot_t prot,
+			   phys_addr_t (*pgtable_alloc)(void),
+			   int flags)
 {
-	pud_t *pud;
 	unsigned long next;
+	pud_t *pudp;
+	pgd_t pgd = READ_ONCE(*pgdp);
 
-	if (pgd_none(*pgd)) {
+	if (pgd_none(pgd)) {
 		phys_addr_t pud_phys;
 		BUG_ON(!pgtable_alloc);
 		pud_phys = pgtable_alloc();
-		__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
+		__pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
+		pgd = READ_ONCE(*pgdp);
 	}
-	BUG_ON(pgd_bad(*pgd));
+	BUG_ON(pgd_bad(pgd));
 
-	pud = pud_set_fixmap_offset(pgd, addr);
+	pudp = pud_set_fixmap_offset(pgdp, addr);
 	do {
-		pud_t old_pud = *pud;
+		pud_t old_pud = READ_ONCE(*pudp);
 
 		next = pud_addr_end(addr, end);
 
@@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		 */
 		if (use_1G_block(addr, next, phys) &&
 		    (flags & NO_BLOCK_MAPPINGS) == 0) {
-			pud_set_huge(pud, phys, prot);
+			pud_set_huge(pudp, phys, prot);
 
 			/*
 			 * After the PUD entry has been populated once, we
 			 * only allow updates to the permission attributes.
 			 */
 			BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
-						      pud_val(*pud)));
+						      READ_ONCE(pud_val(*pudp))));
 		} else {
-			alloc_init_cont_pmd(pud, addr, next, phys, prot,
+			alloc_init_cont_pmd(pudp, addr, next, phys, prot,
 					    pgtable_alloc, flags);
 
 			BUG_ON(pud_val(old_pud) != 0 &&
-			       pud_val(old_pud) != pud_val(*pud));
+			       pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
 		}
 		phys += next - addr;
-	} while (pud++, addr = next, addr != end);
+	} while (pudp++, addr = next, addr != end);
 
 	pud_clear_fixmap();
 }
@@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 				 int flags)
 {
 	unsigned long addr, length, end, next;
-	pgd_t *pgd = pgd_offset_raw(pgdir, virt);
+	pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 
 	/*
 	 * If the virtual and physical address don't have the same offset
@@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+		alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
 			       flags);
 		phys += next - addr;
-	} while (pgd++, addr = next, addr != end);
+	} while (pgdp++, addr = next, addr != end);
 }
 
 static phys_addr_t pgd_pgtable_alloc(void)
@@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
 	flush_tlb_kernel_range(virt, virt + size);
 }
 
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
 				  phys_addr_t end, pgprot_t prot, int flags)
 {
-	__create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+	__create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
 			     prot, early_pgtable_alloc, flags);
 }
 
@@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
 			    PAGE_KERNEL_RO);
 }
 
-static void __init map_mem(pgd_t *pgd)
+static void __init map_mem(pgd_t *pgdp)
 {
 	phys_addr_t kernel_start = __pa_symbol(_text);
 	phys_addr_t kernel_end = __pa_symbol(__init_begin);
@@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
 		if (memblock_is_nomap(reg))
 			continue;
 
-		__map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+		__map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
 	}
 
 	/*
@@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
 	 * Note that contiguous mappings cannot be remapped in this way,
 	 * so we should avoid them here.
 	 */
-	__map_memblock(pgd, kernel_start, kernel_end,
+	__map_memblock(pgdp, kernel_start, kernel_end,
 		       PAGE_KERNEL, NO_CONT_MAPPINGS);
 	memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 
@@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
 	 * through /sys/kernel/kexec_crash_size interface.
 	 */
 	if (crashk_res.end) {
-		__map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+		__map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
 			       PAGE_KERNEL,
 			       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
 		memblock_clear_nomap(crashk_res.start,
@@ -499,7 +506,7 @@ void mark_rodata_ro(void)
 	debug_checkwx();
 }
 
-static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
 				      pgprot_t prot, struct vm_struct *vma,
 				      int flags, unsigned long vm_flags)
 {
@@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
 	BUG_ON(!PAGE_ALIGNED(pa_start));
 	BUG_ON(!PAGE_ALIGNED(size));
 
-	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+	__create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
 			     early_pgtable_alloc, flags);
 
 	if (!(vm_flags & VM_NO_GUARD))
@@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
 /*
  * Create fine-grained mappings for the kernel.
  */
-static void __init map_kernel(pgd_t *pgd)
+static void __init map_kernel(pgd_t *pgdp)
 {
 	static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
 				vmlinux_initdata, vmlinux_data;
@@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
 	 * Only rodata will be remapped with different permissions later on,
 	 * all other segments are allowed to use contiguous mappings.
 	 */
-	map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
+	map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
 			   VM_NO_GUARD);
-	map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+	map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
 			   &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
-	map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+	map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
 			   &vmlinux_inittext, 0, VM_NO_GUARD);
-	map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+	map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
 			   &vmlinux_initdata, 0, VM_NO_GUARD);
-	map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
+	map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
 
-	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+	if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
 		/*
 		 * The fixmap falls in a separate pgd to the kernel, and doesn't
 		 * live in the carveout for the swapper_pg_dir. We can simply
 		 * re-use the existing dir for the fixmap.
 		 */
-		set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
-			*pgd_offset_k(FIXADDR_START));
+		set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
+			READ_ONCE(*pgd_offset_k(FIXADDR_START)));
 	} else if (CONFIG_PGTABLE_LEVELS > 3) {
 		/*
 		 * The fixmap shares its top level pgd entry with the kernel
@@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
 		 * entry instead.
 		 */
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-		pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+		pud_populate(&init_mm,
+			     pud_set_fixmap_offset(pgdp, FIXADDR_START),
 			     lm_alias(bm_pmd));
 		pud_clear_fixmap();
 	} else {
 		BUG();
 	}
 
-	kasan_copy_shadow(pgd);
+	kasan_copy_shadow(pgdp);
 }
 
 /*
@@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
 void __init paging_init(void)
 {
 	phys_addr_t pgd_phys = early_pgtable_alloc();
-	pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+	pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
 
-	map_kernel(pgd);
-	map_mem(pgd);
+	map_kernel(pgdp);
+	map_mem(pgdp);
 
 	/*
 	 * We want to reuse the original swapper_pg_dir so we don't have to
@@ -635,7 +643,7 @@ void __init paging_init(void)
 	 * To do this we need to go via a temporary pgd.
 	 */
 	cpu_replace_ttbr1(__va(pgd_phys));
-	memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
 	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
 	pgd_clear_fixmap();
@@ -655,37 +663,40 @@ void __init paging_init(void)
  */
 int kern_addr_valid(unsigned long addr)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgdp;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
+	pte_t *ptep, pte;
 
 	if ((((long)addr) >> VA_BITS) != -1UL)
 		return 0;
 
-	pgd = pgd_offset_k(addr);
-	if (pgd_none(*pgd))
+	pgdp = pgd_offset_k(addr);
+	if (pgd_none(READ_ONCE(*pgdp)))
 		return 0;
 
-	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud))
+	pudp = pud_offset(pgdp, addr);
+	pud = READ_ONCE(*pudp);
+	if (pud_none(pud))
 		return 0;
 
-	if (pud_sect(*pud))
-		return pfn_valid(pud_pfn(*pud));
+	if (pud_sect(pud))
+		return pfn_valid(pud_pfn(pud));
 
-	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	pmdp = pmd_offset(pudp, addr);
+	pmd = READ_ONCE(*pmdp);
+	if (pmd_none(pmd))
 		return 0;
 
-	if (pmd_sect(*pmd))
-		return pfn_valid(pmd_pfn(*pmd));
+	if (pmd_sect(pmd))
+		return pfn_valid(pmd_pfn(pmd));
 
-	pte = pte_offset_kernel(pmd, addr);
-	if (pte_none(*pte))
+	ptep = pte_offset_kernel(pmdp, addr);
+	pte = READ_ONCE(*ptep);
+	if (pte_none(pte))
 		return 0;
 
-	return pfn_valid(pte_pfn(*pte));
+	return pfn_valid(pte_pfn(pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 #if !ARM64_SWAPPER_USES_SECTION_MAPS
@@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 {
 	unsigned long addr = start;
 	unsigned long next;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
 
 	do {
 		next = pmd_addr_end(addr, end);
 
-		pgd = vmemmap_pgd_populate(addr, node);
-		if (!pgd)
+		pgdp = vmemmap_pgd_populate(addr, node);
+		if (!pgdp)
 			return -ENOMEM;
 
-		pud = vmemmap_pud_populate(pgd, addr, node);
-		if (!pud)
+		pudp = vmemmap_pud_populate(pgdp, addr, node);
+		if (!pudp)
 			return -ENOMEM;
 
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
+		pmdp = pmd_offset(pudp, addr);
+		if (pmd_none(READ_ONCE(*pmdp))) {
 			void *p = NULL;
 
 			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
 			if (!p)
 				return -ENOMEM;
 
-			pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+			pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
 		} else
-			vmemmap_verify((pte_t *)pmd, node, addr, next);
+			vmemmap_verify((pte_t *)pmdp, node, addr, next);
 	} while (addr = next, addr != end);
 
 	return 0;
@@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
 
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
-	pgd_t *pgd = pgd_offset_k(addr);
+	pgd_t *pgdp = pgd_offset_k(addr);
+	pgd_t pgd = READ_ONCE(*pgdp);
 
-	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+	BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
 
-	return pud_offset_kimg(pgd, addr);
+	return pud_offset_kimg(pgdp, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
 {
-	pud_t *pud = fixmap_pud(addr);
+	pud_t *pudp = fixmap_pud(addr);
+	pud_t pud = READ_ONCE(*pudp);
 
-	BUG_ON(pud_none(*pud) || pud_bad(*pud));
+	BUG_ON(pud_none(pud) || pud_bad(pud));
 
-	return pmd_offset_kimg(pud, addr);
+	return pmd_offset_kimg(pudp, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
@@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
  */
 void __init early_fixmap_init(void)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp, pgd;
+	pud_t *pudp;
+	pmd_t *pmdp;
 	unsigned long addr = FIXADDR_START;
 
-	pgd = pgd_offset_k(addr);
+	pgdp = pgd_offset_k(addr);
+	pgd = READ_ONCE(*pgdp);
 	if (CONFIG_PGTABLE_LEVELS > 3 &&
-	    !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
+	    !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
 		/*
 		 * We only end up here if the kernel mapping and the fixmap
 		 * share the top level pgd entry, which should only happen on
 		 * 16k/4 levels configurations.
 		 */
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-		pud = pud_offset_kimg(pgd, addr);
+		pudp = pud_offset_kimg(pgdp, addr);
 	} else {
-		if (pgd_none(*pgd))
-			__pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
-		pud = fixmap_pud(addr);
+		if (pgd_none(pgd))
+			__pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+		pudp = fixmap_pud(addr);
 	}
-	if (pud_none(*pud))
-		__pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
-	pmd = fixmap_pmd(addr);
-	__pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
+	if (pud_none(READ_ONCE(*pudp)))
+		__pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+	pmdp = fixmap_pmd(addr);
+	__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
@@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 
-	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
-	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+	if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+	     || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 		WARN_ON(1);
-		pr_warn("pmd %p != %p, %p\n",
-			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+		pr_warn("pmdp %p != %p, %p\n",
+			pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
 			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
 		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 			fix_to_virt(FIX_BTMAP_BEGIN));
@@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
 			       phys_addr_t phys, pgprot_t flags)
 {
 	unsigned long addr = __fix_to_virt(idx);
-	pte_t *pte;
+	pte_t *ptep;
 
 	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-	pte = fixmap_pte(addr);
+	ptep = fixmap_pte(addr);
 
 	if (pgprot_val(flags)) {
-		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
 	} else {
-		pte_clear(&init_mm, addr, pte);
+		pte_clear(&init_mm, addr, ptep);
 		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
 	}
 }
@@ -915,36 +929,56 @@ int __init arch_ioremap_pmd_supported(void)
 	return 1;
 }
 
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 {
 	pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
 					pgprot_val(mk_sect_prot(prot)));
+
+	/* ioremap_page_range doesn't honour BBM */
+	if (pud_present(READ_ONCE(*pudp)))
+		return 0;
+
 	BUG_ON(phys & ~PUD_MASK);
-	set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
+	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 {
 	pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
 					pgprot_val(mk_sect_prot(prot)));
+
+	/* ioremap_page_range doesn't honour BBM */
+	if (pmd_present(READ_ONCE(*pmdp)))
+		return 0;
+
 	BUG_ON(phys & ~PMD_MASK);
-	set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+	set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
-int pud_clear_huge(pud_t *pud)
+int pud_clear_huge(pud_t *pudp)
 {
-	if (!pud_sect(*pud))
+	if (!pud_sect(READ_ONCE(*pudp)))
 		return 0;
-	pud_clear(pud);
+	pud_clear(pudp);
 	return 1;
 }
 
-int pmd_clear_huge(pmd_t *pmd)
+int pmd_clear_huge(pmd_t *pmdp)
 {
-	if (!pmd_sect(*pmd))
+	if (!pmd_sect(READ_ONCE(*pmdp)))
 		return 0;
-	pmd_clear(pmd);
+	pmd_clear(pmdp);
 	return 1;
 }
+
+int pud_free_pmd_page(pud_t *pud)
+{
+	return pud_none(*pud);
+}
+
+int pmd_free_pte_page(pmd_t *pmd)
+{
+	return pmd_none(*pmd);
+}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index a682a0a2a0fa..a56359373d8b 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
 			void *data)
 {
 	struct page_change_data *cdata = data;
-	pte_t pte = *ptep;
+	pte_t pte = READ_ONCE(*ptep);
 
 	pte = clear_pte_bit(pte, cdata->clear_mask);
 	pte = set_pte_bit(pte, cdata->set_mask);
@@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
  */
 bool kernel_page_present(struct page *page)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgdp;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
+	pte_t *ptep;
 	unsigned long addr = (unsigned long)page_address(page);
 
-	pgd = pgd_offset_k(addr);
-	if (pgd_none(*pgd))
+	pgdp = pgd_offset_k(addr);
+	if (pgd_none(READ_ONCE(*pgdp)))
 		return false;
 
-	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud))
+	pudp = pud_offset(pgdp, addr);
+	pud = READ_ONCE(*pudp);
+	if (pud_none(pud))
 		return false;
-	if (pud_sect(*pud))
+	if (pud_sect(pud))
 		return true;
 
-	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	pmdp = pmd_offset(pudp, addr);
+	pmd = READ_ONCE(*pmdp);
+	if (pmd_none(pmd))
 		return false;
-	if (pmd_sect(*pmd))
+	if (pmd_sect(pmd))
 		return true;
 
-	pte = pte_offset_kernel(pmd, addr);
-	return pte_valid(*pte);
+	ptep = pte_offset_kernel(pmdp, addr);
+	return pte_valid(READ_ONCE(*ptep));
 }
 #endif /* CONFIG_HIBERNATION */
 #endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 71baed7e592a..c0af47617299 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
 	dc	cvac, cur_\()\type\()p		// Ensure any existing dirty
 	dmb	sy				// lines are written back before
 	ldr	\type, [cur_\()\type\()p]	// loading the entry
-	tbz	\type, #0, next_\()\type	// Skip invalid entries
+	tbz	\type, #0, skip_\()\type	// Skip invalid and
+	tbnz	\type, #11, skip_\()\type	// non-global entries
 	.endm
 
 	.macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
 	add	end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
 do_pgd:	__idmap_kpti_get_pgtable_ent	pgd
 	tbnz	pgd, #1, walk_puds
-	__idmap_kpti_put_pgtable_ent_ng	pgd
 next_pgd:
+	__idmap_kpti_put_pgtable_ent_ng	pgd
+skip_pgd:
 	add	cur_pgdp, cur_pgdp, #8
 	cmp	cur_pgdp, end_pgdp
 	b.ne	do_pgd
@@ -294,8 +296,9 @@ walk_puds:
 	add	end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
 do_pud:	__idmap_kpti_get_pgtable_ent	pud
 	tbnz	pud, #1, walk_pmds
-	__idmap_kpti_put_pgtable_ent_ng	pud
 next_pud:
+	__idmap_kpti_put_pgtable_ent_ng	pud
+skip_pud:
 	add	cur_pudp, cur_pudp, 8
 	cmp	cur_pudp, end_pudp
 	b.ne	do_pud
@@ -314,8 +317,9 @@ walk_pmds:
 	add	end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
 do_pmd:	__idmap_kpti_get_pgtable_ent	pmd
 	tbnz	pmd, #1, walk_ptes
-	__idmap_kpti_put_pgtable_ent_ng	pmd
 next_pmd:
+	__idmap_kpti_put_pgtable_ent_ng	pmd
+skip_pmd:
 	add	cur_pmdp, cur_pmdp, #8
 	cmp	cur_pmdp, end_pmdp
 	b.ne	do_pmd
@@ -333,7 +337,7 @@ walk_ptes:
 	add	end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
 do_pte:	__idmap_kpti_get_pgtable_ent	pte
 	__idmap_kpti_put_pgtable_ent_ng	pte
-next_pte:
+skip_pte:
 	add	cur_ptep, cur_ptep, #8
 	cmp	cur_ptep, end_ptep
 	b.ne	do_pte
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 1d4f1da7c58f..a93350451e8e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	off = offsetof(struct bpf_array, map.max_entries);
 	emit_a64_mov_i64(tmp, off, ctx);
 	emit(A64_LDR32(tmp, r2, tmp), ctx);
+	emit(A64_MOV(0, r3, r3), ctx);
 	emit(A64_CMP(0, r3, tmp), ctx);
-	emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
+	emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
 
 	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 	 *     goto out;
@@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	 */
 	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
 	emit(A64_CMP(1, tcc, tmp), ctx);
-	emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
+	emit(A64_B_(A64_COND_HI, jmp_offset), ctx);
 	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
 
 	/* prog = array->ptrs[index];
diff --git a/arch/cris/include/arch-v10/arch/bug.h b/arch/cris/include/arch-v10/arch/bug.h
index 905afeacfedf..06da9d49152a 100644
--- a/arch/cris/include/arch-v10/arch/bug.h
+++ b/arch/cris/include/arch-v10/arch/bug.h
@@ -44,18 +44,25 @@ struct bug_frame {
  * not be used like this with newer versions of gcc.
  */
 #define BUG()								\
+do {									\
 	__asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\
 			      "movu.w " __stringify(__LINE__) ",$r0\n\t"\
 			      "jump 0f\n\t"				\
 			      ".section .rodata\n"			\
 			      "0:\t.string \"" __FILE__ "\"\n\t"	\
-			      ".previous")
+			      ".previous");				\
+	unreachable();							\
+} while (0)
 #endif
 
 #else
 
 /* This just causes an oops. */
-#define BUG() (*(int *)0 = 0)
+#define BUG()								\
+do {									\
+	barrier_before_unreachable();					\
+	__builtin_trap();						\
+} while (0)
 
 #endif
 
diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/asm/byteorder.h
index ecff2d1ca5a3..6eaa7ad5fc2c 100644
--- a/arch/h8300/include/asm/byteorder.h
+++ b/arch/h8300/include/asm/byteorder.h
@@ -2,7 +2,6 @@
 #ifndef __H8300_BYTEORDER_H__
 #define __H8300_BYTEORDER_H__
 
-#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
 #include <linux/byteorder/big_endian.h>
 
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 762eeb0fcc1d..2524fb60fbc2 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -66,38 +66,35 @@ ATOMIC_OPS(add, +)
 ATOMIC_OPS(sub, -)
 
 #ifdef __OPTIMIZE__
-#define __ia64_atomic_const(i)	__builtin_constant_p(i) ?		\
+#define __ia64_atomic_const(i)						\
+	static const int __ia64_atomic_p = __builtin_constant_p(i) ?	\
 		((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 ||	\
-		 (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0
+		 (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0;\
+	__ia64_atomic_p
+#else
+#define __ia64_atomic_const(i)	0
+#endif
 
-#define atomic_add_return(i, v)						\
+#define atomic_add_return(i,v)						\
 ({									\
-	int __i = (i);							\
-	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
-	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
-				ia64_atomic_add(__i, v);		\
+	int __ia64_aar_i = (i);						\
+	__ia64_atomic_const(i)						\
+		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
+		: ia64_atomic_add(__ia64_aar_i, v);			\
 })
 
-#define atomic_sub_return(i, v)						\
+#define atomic_sub_return(i,v)						\
 ({									\
-	int __i = (i);							\
-	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
-	__ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) :	\
-				ia64_atomic_sub(__i, v);		\
+	int __ia64_asr_i = (i);						\
+	__ia64_atomic_const(i)						\
+		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
+		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
-#else
-#define atomic_add_return(i, v)	ia64_atomic_add(i, v)
-#define atomic_sub_return(i, v)	ia64_atomic_sub(i, v)
-#endif
 
 #define atomic_fetch_add(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
-	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
-	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
-	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
 		: ia64_atomic_fetch_add(__ia64_aar_i, v);		\
 })
@@ -105,11 +102,7 @@ ATOMIC_OPS(sub, -)
 #define atomic_fetch_sub(i,v)						\
 ({									\
 	int __ia64_asr_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
-	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
-	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
-	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
 		: ia64_atomic_fetch_sub(__ia64_asr_i, v);		\
 })
@@ -170,11 +163,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_add_return(i,v)					\
 ({									\
 	long __ia64_aar_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
-	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
-	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
-	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+	__ia64_atomic_const(i)						\
 		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
 		: ia64_atomic64_add(__ia64_aar_i, v);			\
 })
@@ -182,11 +171,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_sub_return(i,v)					\
 ({									\
 	long __ia64_asr_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
-	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
-	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
-	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+	__ia64_atomic_const(i)						\
 		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
 		: ia64_atomic64_sub(__ia64_asr_i, v);			\
 })
@@ -194,11 +179,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_fetch_add(i,v)						\
 ({									\
 	long __ia64_aar_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
-	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
-	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
-	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
 		: ia64_atomic64_fetch_add(__ia64_aar_i, v);		\
 })
@@ -206,11 +187,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_fetch_sub(i,v)						\
 ({									\
 	long __ia64_asr_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
-	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
-	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
-	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
 		: ia64_atomic64_fetch_sub(__ia64_asr_i, v);		\
 })
diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h
index bd3eeb8d1cfa..66b37a532765 100644
--- a/arch/ia64/include/asm/bug.h
+++ b/arch/ia64/include/asm/bug.h
@@ -4,7 +4,11 @@
 
 #ifdef CONFIG_BUG
 #define ia64_abort()	__builtin_trap()
-#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
+#define BUG() do {						\
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__);	\
+	barrier_before_unreachable();				\
+	ia64_abort();						\
+} while (0)
 
 /* should this BUG be made generic? */
 #define HAVE_ARCH_BUG
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 0b4c65a1af25..498f3da3f225 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
 obj-y				+= esi_stub.o	# must be in kernel proper
 endif
 obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o
-obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 
 obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
 
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index 85bba43e7d5d..8b5b8e6bc9d9 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -117,7 +117,7 @@ store_call_start(struct device *dev, struct device_attribute *attr,
 
 #ifdef ERR_INJ_DEBUG
 	printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
-	printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
+	printk(KERN_DEBUG "capabilities=%lx,\n", capabilities[cpu]);
 	printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
 #endif
 	return size;
@@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
 	u64 virt_addr=simple_strtoull(buf, NULL, 16);
 	int ret;
 
-	ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL);
+	ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL);
 	if (ret<=0) {
 #ifdef ERR_INJ_DEBUG
 		printk("Virtual address %lx is not existing.\n",virt_addr);
diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py
index 89f3a1480a63..c55276e31b6b 100644
--- a/arch/ia64/scripts/unwcheck.py
+++ b/arch/ia64/scripts/unwcheck.py
@@ -16,7 +16,7 @@ import re
 import sys
 
 if len(sys.argv) != 2:
-    print "Usage: %s FILE" % sys.argv[0]
+    print("Usage: %s FILE" % sys.argv[0])
     sys.exit(2)
 
 readelf = os.getenv("READELF", "readelf")
@@ -29,7 +29,7 @@ def check_func (func, slots, rlen_sum):
         global num_errors
         num_errors += 1
         if not func: func = "[%#x-%#x]" % (start, end)
-        print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)
+        print("ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum))
     return
 
 num_funcs = 0
@@ -43,23 +43,23 @@ for line in os.popen("%s -u %s" % (readelf, sys.argv[1])):
         check_func(func, slots, rlen_sum)
 
         func  = m.group(1)
-        start = long(m.group(2), 16)
-        end   = long(m.group(3), 16)
+        start = int(m.group(2), 16)
+        end   = int(m.group(3), 16)
         slots = 3 * (end - start) / 16
-        rlen_sum = 0L
+        rlen_sum = 0
         num_funcs += 1
     else:
         m = rlen_pattern.match(line)
         if m:
-            rlen_sum += long(m.group(1))
+            rlen_sum += int(m.group(1))
 check_func(func, slots, rlen_sum)
 
 if num_errors == 0:
-    print "No errors detected in %u functions." % num_funcs
+    print("No errors detected in %u functions." % num_funcs)
 else:
     if num_errors > 1:
         err="errors"
     else:
         err="error"
-    print "%u %s detected in %u functions." % (num_errors, err, num_funcs)
+    print("%u %s detected in %u functions." % (num_errors, err, num_funcs))
     sys.exit(1)
diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h
index b7e2bf1ba4a6..275dca1435bf 100644
--- a/arch/m68k/include/asm/bug.h
+++ b/arch/m68k/include/asm/bug.h
@@ -8,16 +8,19 @@
 #ifndef CONFIG_SUN3
 #define BUG() do { \
 	pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	barrier_before_unreachable(); \
 	__builtin_trap(); \
 } while (0)
 #else
 #define BUG() do { \
 	pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	barrier_before_unreachable(); \
 	panic("BUG!"); \
 } while (0)
 #endif
 #else
 #define BUG() do { \
+	barrier_before_unreachable(); \
 	__builtin_trap(); \
 } while (0)
 #endif
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index d3d435248a24..c73eb8209555 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
 	    macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
 		platform_device_register_simple("macsonic", -1, NULL, 0);
 
+	if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+	    macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+		platform_device_register_simple("mac89x0", -1, NULL, 0);
+
 	if (macintosh_config->ether_type == MAC_ETHER_MACE)
 		platform_device_register_simple("macmace", -1, NULL, 0);
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 4f798aa671dd..3817a3e2146c 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -24,6 +24,7 @@ config MICROBLAZE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
+	select NO_BOOTMEM
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_OPROFILE
diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform
index 6996f397c16c..f7f1739c11b9 100644
--- a/arch/microblaze/Kconfig.platform
+++ b/arch/microblaze/Kconfig.platform
@@ -8,7 +8,6 @@ menu "Platform options"
 
 config OPT_LIB_FUNCTION
 	bool "Optimalized lib function"
-	depends on CPU_LITTLE_ENDIAN
 	default y
 	help
 	  Allows turn on optimalized library function (memcpy and memmove).
@@ -21,6 +20,7 @@ config OPT_LIB_FUNCTION
 config OPT_LIB_ASM
 	bool "Optimalized lib function ASM"
 	depends on OPT_LIB_FUNCTION && (XILINX_MICROBLAZE0_USE_BARREL = 1)
+	depends on CPU_BIG_ENDIAN
 	default n
 	help
 	  Allows turn on optimalized library function (memcpy and memmove).
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index be84a4d3917f..7c968c1d1729 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -44,7 +44,6 @@ void machine_shutdown(void);
 void machine_halt(void);
 void machine_power_off(void);
 
-extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
 # endif /* __ASSEMBLY__ */
diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S
index 62021d7e249e..fdc48bb065d8 100644
--- a/arch/microblaze/lib/fastcopy.S
+++ b/arch/microblaze/lib/fastcopy.S
@@ -29,10 +29,6 @@
  *	between mem locations with size of xfer spec'd in bytes
  */
 
-#ifdef __MICROBLAZEEL__
-#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM.
-#endif
-
 #include <linux/linkage.h>
 	.text
 	.globl	memcpy
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 434639f9a3a6..df6de7ccdc2e 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -32,9 +32,6 @@ int mem_init_done;
 #ifndef CONFIG_MMU
 unsigned int __page_offset;
 EXPORT_SYMBOL(__page_offset);
-
-#else
-static int init_bootmem_done;
 #endif /* CONFIG_MMU */
 
 char *klimit = _end;
@@ -117,7 +114,6 @@ static void __init paging_init(void)
 
 void __init setup_memory(void)
 {
-	unsigned long map_size;
 	struct memblock_region *reg;
 
 #ifndef CONFIG_MMU
@@ -174,17 +170,6 @@ void __init setup_memory(void)
 	pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
 	pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
 
-	/*
-	 * Find an area to use for the bootmem bitmap.
-	 * We look for the first area which is at least
-	 * 128kB in length (128kB is enough for a bitmap
-	 * for 4GB of memory, using 4kB pages), plus 1 page
-	 * (in case the address isn't page-aligned).
-	 */
-	map_size = init_bootmem_node(NODE_DATA(0),
-		PFN_UP(TOPHYS((u32)klimit)), min_low_pfn, max_low_pfn);
-	memblock_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size);
-
 	/* Add active regions with valid PFNs */
 	for_each_memblock(memory, reg) {
 		unsigned long start_pfn, end_pfn;
@@ -196,32 +181,9 @@ void __init setup_memory(void)
 				  &memblock.memory, 0);
 	}
 
-	/* free bootmem is whole main memory */
-	free_bootmem_with_active_regions(0, max_low_pfn);
-
-	/* reserve allocate blocks */
-	for_each_memblock(reserved, reg) {
-		unsigned long top = reg->base + reg->size - 1;
-
-		pr_debug("reserved - 0x%08x-0x%08x, %lx, %lx\n",
-			 (u32) reg->base, (u32) reg->size, top,
-						memory_start + lowmem_size - 1);
-
-		if (top <= (memory_start + lowmem_size - 1)) {
-			reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
-		} else if (reg->base < (memory_start + lowmem_size - 1)) {
-			unsigned long trunc_size = memory_start + lowmem_size -
-								reg->base;
-			reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
-		}
-	}
-
 	/* XXX need to clip this if using highmem? */
 	sparse_memory_present_with_active_regions(0);
 
-#ifdef CONFIG_MMU
-	init_bootmem_done = 1;
-#endif
 	paging_init();
 }
 
@@ -398,30 +360,16 @@ asmlinkage void __init mmu_init(void)
 /* This is only called until mem_init is done. */
 void __init *early_get_page(void)
 {
-	void *p;
-	if (init_bootmem_done) {
-		p = alloc_bootmem_pages(PAGE_SIZE);
-	} else {
-		/*
-		 * Mem start + kernel_tlb -> here is limit
-		 * because of mem mapping from head.S
-		 */
-		p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
-					memory_start + kernel_tlb));
-	}
-	return p;
+	/*
+	 * Mem start + kernel_tlb -> here is limit
+	 * because of mem mapping from head.S
+	 */
+	return __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
+				memory_start + kernel_tlb));
 }
 
 #endif /* CONFIG_MMU */
 
-void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-	if (mem_init_done)
-		return kmalloc(size, mask);
-	else
-		return alloc_bootmem(size);
-}
-
 void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c
index 9ab48ff80c1c..6d11ae581ea7 100644
--- a/arch/mips/ath25/board.c
+++ b/arch/mips/ath25/board.c
@@ -135,6 +135,8 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size)
 	}
 
 	board_data = kzalloc(BOARD_CONFIG_BUFSZ, GFP_KERNEL);
+	if (!board_data)
+		goto error;
 	ath25_board.config = (struct ath25_boarddata *)board_data;
 	memcpy_fromio(board_data, bcfg, 0x100);
 	if (broken_boarddata) {
diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile
index 1bd5c4f00d19..c22da16d67b8 100644
--- a/arch/mips/boot/Makefile
+++ b/arch/mips/boot/Makefile
@@ -126,6 +126,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS
 
 quiet_cmd_cpp_its_S = ITS     $@
       cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \
+			-D__ASSEMBLY__ \
 		        -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \
 			-DVMLINUX_BINARY="\"$(3)\"" \
 			-DVMLINUX_COMPRESSION="\"$(2)\"" \
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 5b3a3f6a9ad3..d99f5242169e 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2277,6 +2277,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 	}
 
 	host_data = kzalloc(sizeof(*host_data), GFP_KERNEL);
+	if (!host_data)
+		return -ENOMEM;
 	raw_spin_lock_init(&host_data->lock);
 
 	addr = of_get_address(ciu_node, 0, NULL, NULL);
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 946681db8dc3..9a0fa66b81ac 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -86,7 +86,6 @@ struct compat_flock {
 	compat_off_t	l_len;
 	s32		l_sysid;
 	compat_pid_t	l_pid;
-	short		__unused;
 	s32		pad[4];
 };
 
diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c
index 19c88d770054..fcf9af492d60 100644
--- a/arch/mips/kernel/mips-cpc.c
+++ b/arch/mips/kernel/mips-cpc.c
@@ -10,6 +10,8 @@
 
 #include <linux/errno.h>
 #include <linux/percpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/spinlock.h>
 
 #include <asm/mips-cps.h>
@@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
 phys_addr_t __weak mips_cpc_default_phys_base(void)
 {
+	struct device_node *cpc_node;
+	struct resource res;
+	int err;
+
+	cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+	if (cpc_node) {
+		err = of_address_to_resource(cpc_node, 0, &res);
+		if (!err)
+			return res.start;
+	}
+
 	return 0;
 }
 
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 85bc601e9a0d..5f8b0a9e30b3 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -375,6 +375,7 @@ static void __init bootmem_init(void)
 	unsigned long reserved_end;
 	unsigned long mapstart = ~0UL;
 	unsigned long bootmap_size;
+	phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
 	bool bootmap_valid = false;
 	int i;
 
@@ -395,7 +396,8 @@ static void __init bootmem_init(void)
 	max_low_pfn = 0;
 
 	/*
-	 * Find the highest page frame number we have available.
+	 * Find the highest page frame number we have available
+	 * and the lowest used RAM address
 	 */
 	for (i = 0; i < boot_mem_map.nr_map; i++) {
 		unsigned long start, end;
@@ -407,6 +409,8 @@ static void __init bootmem_init(void)
 		end = PFN_DOWN(boot_mem_map.map[i].addr
 				+ boot_mem_map.map[i].size);
 
+		ramstart = min(ramstart, boot_mem_map.map[i].addr);
+
 #ifndef CONFIG_HIGHMEM
 		/*
 		 * Skip highmem here so we get an accurate max_low_pfn if low
@@ -436,6 +440,13 @@ static void __init bootmem_init(void)
 		mapstart = max(reserved_end, start);
 	}
 
+	/*
+	 * Reserve any memory between the start of RAM and PHYS_OFFSET
+	 */
+	if (ramstart > PHYS_OFFSET)
+		add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
+				  BOOT_MEM_RESERVED);
+
 	if (min_low_pfn >= max_low_pfn)
 		panic("Incorrect memory mapping !!!");
 	if (min_low_pfn > ARCH_PFN_OFFSET) {
@@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
 
 	add_memory_region(start, size, BOOT_MEM_RAM);
 
-	if (start && start > PHYS_OFFSET)
-		add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
-				BOOT_MEM_RESERVED);
 	return 0;
 }
 early_param("mem", early_parse_mem);
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 87dcac2447c8..159e83add4bb 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -168,11 +168,11 @@ static void bmips_prepare_cpus(unsigned int max_cpus)
 		return;
 	}
 
-	if (request_irq(IPI0_IRQ, bmips_ipi_interrupt, IRQF_PERCPU,
-			"smp_ipi0", NULL))
+	if (request_irq(IPI0_IRQ, bmips_ipi_interrupt,
+			IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi0", NULL))
 		panic("Can't request IPI0 interrupt");
-	if (request_irq(IPI1_IRQ, bmips_ipi_interrupt, IRQF_PERCPU,
-			"smp_ipi1", NULL))
+	if (request_irq(IPI1_IRQ, bmips_ipi_interrupt,
+			IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi1", NULL))
 		panic("Can't request IPI1 interrupt");
 }
 
@@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
 	 */
 }
 
-void __init bmips_cpu_setup(void)
+void bmips_cpu_setup(void)
 {
 	void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
 	u32 __maybe_unused cfg;
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index bc2fdbfa8223..72af0c183969 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -7,6 +7,8 @@ choice
 config LEMOTE_FULOONG2E
 	bool "Lemote Fuloong(2e) mini-PC"
 	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
 	select CEVT_R4K
 	select CSRC_R4K
 	select SYS_HAS_CPU_LOONGSON2E
@@ -33,6 +35,8 @@ config LEMOTE_FULOONG2E
 config LEMOTE_MACH2F
 	bool "Lemote Loongson 2F family machines"
 	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
 	select BOARD_SCACHE
 	select BOOT_ELF32
 	select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
@@ -62,6 +66,8 @@ config LEMOTE_MACH2F
 config LOONGSON_MACH3X
 	bool "Generic Loongson 3 family machines"
 	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
 	select GENERIC_ISA_DMA_SUPPORT_BROKEN
 	select BOOT_ELF32
 	select BOARD_SCACHE
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 3742508cc534..bd5ce31936f5 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -26,6 +26,7 @@ void flush_user_icache_range_asm(unsigned long, unsigned long);
 void flush_kernel_icache_range_asm(unsigned long, unsigned long);
 void flush_user_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
+void purge_kernel_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_page_asm(void *);
 void flush_kernel_icache_page(void *);
 
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 0e6ab6e4a4e9..2dbe5580a1a4 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -316,6 +316,8 @@ extern int _parisc_requires_coherency;
 #define parisc_requires_coherency()	(0)
 #endif
 
+extern int running_on_qemu;
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_PARISC_PROCESSOR_H */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 19c0c141bc3f..e3b45546d589 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -465,10 +465,10 @@ EXPORT_SYMBOL(copy_user_page);
 int __flush_tlb_range(unsigned long sid, unsigned long start,
 		      unsigned long end)
 {
-	unsigned long flags, size;
+	unsigned long flags;
 
-	size = (end - start);
-	if (size >= parisc_tlb_flush_threshold) {
+	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+	    end - start >= parisc_tlb_flush_threshold) {
 		flush_tlb_all();
 		return 1;
 	}
@@ -539,13 +539,12 @@ void flush_cache_mm(struct mm_struct *mm)
 	struct vm_area_struct *vma;
 	pgd_t *pgd;
 
-	/* Flush the TLB to avoid speculation if coherency is required. */
-	if (parisc_requires_coherency())
-		flush_tlb_all();
-
 	/* Flushing the whole cache on each cpu takes forever on
 	   rp3440, etc.  So, avoid it if the mm isn't too big.  */
-	if (mm_total_size(mm) >= parisc_cache_flush_threshold) {
+	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+	    mm_total_size(mm) >= parisc_cache_flush_threshold) {
+		if (mm->context)
+			flush_tlb_all();
 		flush_cache_all();
 		return;
 	}
@@ -553,9 +552,9 @@ void flush_cache_mm(struct mm_struct *mm)
 	if (mm->context == mfsp(3)) {
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			flush_user_dcache_range_asm(vma->vm_start, vma->vm_end);
-			if ((vma->vm_flags & VM_EXEC) == 0)
-				continue;
-			flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
+			if (vma->vm_flags & VM_EXEC)
+				flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
+			flush_tlb_range(vma, vma->vm_start, vma->vm_end);
 		}
 		return;
 	}
@@ -573,6 +572,8 @@ void flush_cache_mm(struct mm_struct *mm)
 			pfn = pte_pfn(*ptep);
 			if (!pfn_valid(pfn))
 				continue;
+			if (unlikely(mm->context))
+				flush_tlb_page(vma, addr);
 			__flush_cache_page(vma, addr, PFN_PHYS(pfn));
 		}
 	}
@@ -581,30 +582,45 @@ void flush_cache_mm(struct mm_struct *mm)
 void flush_cache_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end)
 {
-	BUG_ON(!vma->vm_mm->context);
-
-	/* Flush the TLB to avoid speculation if coherency is required. */
-	if (parisc_requires_coherency())
-		flush_tlb_range(vma, start, end);
+	pgd_t *pgd;
+	unsigned long addr;
 
-	if ((end - start) >= parisc_cache_flush_threshold
-	    || vma->vm_mm->context != mfsp(3)) {
+	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+	    end - start >= parisc_cache_flush_threshold) {
+		if (vma->vm_mm->context)
+			flush_tlb_range(vma, start, end);
 		flush_cache_all();
 		return;
 	}
 
-	flush_user_dcache_range_asm(start, end);
-	if (vma->vm_flags & VM_EXEC)
-		flush_user_icache_range_asm(start, end);
+	if (vma->vm_mm->context == mfsp(3)) {
+		flush_user_dcache_range_asm(start, end);
+		if (vma->vm_flags & VM_EXEC)
+			flush_user_icache_range_asm(start, end);
+		flush_tlb_range(vma, start, end);
+		return;
+	}
+
+	pgd = vma->vm_mm->pgd;
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+		unsigned long pfn;
+		pte_t *ptep = get_ptep(pgd, addr);
+		if (!ptep)
+			continue;
+		pfn = pte_pfn(*ptep);
+		if (pfn_valid(pfn)) {
+			if (unlikely(vma->vm_mm->context))
+				flush_tlb_page(vma, addr);
+			__flush_cache_page(vma, addr, PFN_PHYS(pfn));
+		}
+	}
 }
 
 void
 flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
 {
-	BUG_ON(!vma->vm_mm->context);
-
 	if (pfn_valid(pfn)) {
-		if (parisc_requires_coherency())
+		if (likely(vma->vm_mm->context))
 			flush_tlb_page(vma, vmaddr);
 		__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 	}
@@ -613,21 +629,33 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 void flush_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
+	unsigned long end = start + size;
 
-	if ((unsigned long)size > parisc_cache_flush_threshold)
+	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+	    (unsigned long)size >= parisc_cache_flush_threshold) {
+		flush_tlb_kernel_range(start, end);
 		flush_data_cache();
-	else
-		flush_kernel_dcache_range_asm(start, start + size);
+		return;
+	}
+
+	flush_kernel_dcache_range_asm(start, end);
+	flush_tlb_kernel_range(start, end);
 }
 EXPORT_SYMBOL(flush_kernel_vmap_range);
 
 void invalidate_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
+	unsigned long end = start + size;
 
-	if ((unsigned long)size > parisc_cache_flush_threshold)
+	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+	    (unsigned long)size >= parisc_cache_flush_threshold) {
+		flush_tlb_kernel_range(start, end);
 		flush_data_cache();
-	else
-		flush_kernel_dcache_range_asm(start, start + size);
+		return;
+	}
+
+	purge_kernel_dcache_range_asm(start, end);
+	flush_tlb_kernel_range(start, end);
 }
 EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index bbbe360b458f..fbb4e43fda05 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -138,6 +138,16 @@ $pgt_fill_loop:
 	std		%dp,0x18(%r10)
 #endif
 
+#ifdef CONFIG_64BIT
+	/* Get PDCE_PROC for monarch CPU. */
+#define MEM_PDC_LO 0x388
+#define MEM_PDC_HI 0x35C
+	ldw             MEM_PDC_LO(%r0),%r3
+	ldw             MEM_PDC_HI(%r0),%r10
+	depd            %r10, 31, 32, %r3        /* move to upper word */
+#endif
+
+
 #ifdef CONFIG_SMP
 	/* Set the smp rendezvous address into page zero.
 	** It would be safer to do this in init_smp_config() but
@@ -196,12 +206,6 @@ common_stext:
         ** Someday, palo might not do this for the Monarch either.
         */
 2:
-#define MEM_PDC_LO 0x388
-#define MEM_PDC_HI 0x35C
-	ldw             MEM_PDC_LO(%r0),%r3
-	ldw             MEM_PDC_HI(%r0),%r6
-	depd            %r6, 31, 32, %r3        /* move to upper word */
-
 	mfctl		%cr30,%r6		/* PCX-W2 firmware bug */
 
 	ldo             PDC_PSW(%r0),%arg0              /* 21 */
@@ -268,6 +272,8 @@ $install_iva:
 aligned_rfi:
 	pcxt_ssm_bug
 
+	copy		%r3, %arg0	/* PDCE_PROC for smp_callin() */
+
 	rsm		PSW_SM_QUIET,%r0	/* off troublesome PSW bits */
 	/* Don't need NOPs, have 8 compliant insn before rfi */
 
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 2d40c4ff3f69..67b0f7532e83 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -1110,6 +1110,28 @@ ENTRY_CFI(flush_kernel_dcache_range_asm)
 	.procend
 ENDPROC_CFI(flush_kernel_dcache_range_asm)
 
+ENTRY_CFI(purge_kernel_dcache_range_asm)
+	.proc
+	.callinfo NO_CALLS
+	.entry
+
+	ldil		L%dcache_stride, %r1
+	ldw		R%dcache_stride(%r1), %r23
+	ldo		-1(%r23), %r21
+	ANDCM		%r26, %r21, %r26
+
+1:      cmpb,COND(<<),n	%r26, %r25,1b
+	pdc,m		%r23(%r26)
+
+	sync
+	syncdma
+	bv		%r0(%r2)
+	nop
+	.exit
+
+	.procend
+ENDPROC_CFI(purge_kernel_dcache_range_asm)
+
 ENTRY_CFI(flush_user_icache_range_asm)
 	.proc
 	.callinfo NO_CALLS
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 30c28ab14540..4065b5e48c9d 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -292,10 +292,15 @@ smp_cpu_init(int cpunum)
  * Slaves start using C here. Indirectly called from smp_slave_stext.
  * Do what start_kernel() and main() do for boot strap processor (aka monarch)
  */
-void __init smp_callin(void)
+void __init smp_callin(unsigned long pdce_proc)
 {
 	int slave_id = cpu_now_booting;
 
+#ifdef CONFIG_64BIT
+	WARN_ON(((unsigned long)(PAGE0->mem_pdc_hi) << 32
+			| PAGE0->mem_pdc) != pdce_proc);
+#endif
+
 	smp_cpu_init(slave_id);
 	preempt_disable();
 
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 4b8fd6dc22da..f7e684560186 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -76,10 +76,10 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
 	next_tick = cpuinfo->it_value;
 
 	/* Calculate how many ticks have elapsed. */
+	now = mfctl(16);
 	do {
 		++ticks_elapsed;
 		next_tick += cpt;
-		now = mfctl(16);
 	} while (next_tick - now > cpt);
 
 	/* Store (in CR16 cycles) up to when we are accounting right now. */
@@ -103,16 +103,17 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
 	 * if one or the other wrapped. If "now" is "bigger" we'll end up
 	 * with a very large unsigned number.
 	 */
-	while (next_tick - mfctl(16) > cpt)
+	now = mfctl(16);
+	while (next_tick - now > cpt)
 		next_tick += cpt;
 
 	/* Program the IT when to deliver the next interrupt.
 	 * Only bottom 32-bits of next_tick are writable in CR16!
 	 * Timer interrupt will be delivered at least a few hundred cycles
-	 * after the IT fires, so if we are too close (<= 500 cycles) to the
+	 * after the IT fires, so if we are too close (<= 8000 cycles) to the
 	 * next cycle, simply skip it.
 	 */
-	if (next_tick - mfctl(16) <= 500)
+	if (next_tick - now <= 8000)
 		next_tick += cpt;
 	mtctl(next_tick, 16);
 
@@ -248,7 +249,7 @@ static int __init init_cr16_clocksource(void)
 	 * different sockets, so mark them unstable and lower rating on
 	 * multi-socket SMP systems.
 	 */
-	if (num_online_cpus() > 1) {
+	if (num_online_cpus() > 1 && !running_on_qemu) {
 		int cpu;
 		unsigned long cpu0_loc;
 		cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 48f41399fc0b..cab32ee824d2 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -629,7 +629,12 @@ void __init mem_init(void)
 #endif
 
 	mem_init_print_info(NULL);
-#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
+
+#if 0
+	/*
+	 * Do not expose the virtual kernel memory layout to userspace.
+	 * But keep code for debugging purposes.
+	 */
 	printk("virtual kernel memory layout:\n"
 	       "    vmalloc : 0x%px - 0x%px   (%4ld MB)\n"
 	       "    memory  : 0x%px - 0x%px   (%4ld MB)\n"
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index ef6549e57157..26d5d2a5b8e9 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \
 libfdt       := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c
 libfdtheader := fdt.h libfdt.h libfdt_internal.h
 
-$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \
+$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \
+	treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \
 	$(addprefix $(obj)/,$(libfdtheader))
 
 src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 30a155c0a6b0..c615abdce119 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -16,6 +16,7 @@
 #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX	PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 949d691094a4..67c5475311ee 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
  * keeping the prototype consistent across the two formats.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-			unsigned int subpg_index, unsigned long hidx)
+					 unsigned int subpg_index, unsigned long hidx,
+					 int offset)
 {
 	return (hidx << H_PAGE_F_GIX_SHIFT) &
 		(H_PAGE_F_SECOND | H_PAGE_F_GIX);
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 338b7da468ce..3bcf269f8f55 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -45,7 +45,7 @@
  * generic accessors and iterators here
  */
 #define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
 {
 	real_pte_t rpte;
 	unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
 	 */
 	smp_rmb();
 
-	hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+	hidxp = (unsigned long *)(ptep + offset);
 	rpte.hidx = *hidxp;
 	return rpte;
 }
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
  * expected to modify the PTE bits accordingly and commit the PTE to memory.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-		unsigned int subpg_index, unsigned long hidx)
+					 unsigned int subpg_index,
+					 unsigned long hidx, int offset)
 {
-	unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+	unsigned long *hidxp = (unsigned long *)(ptep + offset);
 
 	rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
 	*hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
 }
 
 #define H_PTE_TABLE_SIZE	PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
 #define H_PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
 				 (sizeof(unsigned long) << PMD_INDEX_SIZE))
 #else
 #define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE	((sizeof(pud_t) << PUD_INDEX_SIZE) +	\
+				 (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
 #define H_PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
 #define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 0920eff731b3..935adcd92a81 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -23,7 +23,8 @@
 				 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
 #define H_PGTABLE_RANGE		(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+	defined(CONFIG_PPC_64K_PAGES)
 /*
  * only with hash 64k we need to use the second half of pmd page table
  * to store pointer to deposited pgtable_t
@@ -33,6 +34,16 @@
 #define H_PMD_CACHE_INDEX	H_PMD_INDEX_SIZE
 #endif
 /*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE)
+#endif
+/*
  * Define the address range of the kernel non-linear virtual area
  */
 #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 1fcfa425cefa..4746bc68d446 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+	pgd_t *pgd;
+
 	if (radix_enabled())
 		return radix__pgd_alloc(mm);
-	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
-		pgtable_gfp_flags(mm, GFP_KERNEL));
+
+	pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+			       pgtable_gfp_flags(mm, GFP_KERNEL));
+	memset(pgd, 0, PGD_TABLE_SIZE);
+
+	return pgd;
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+	return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
 		pgtable_gfp_flags(mm, GFP_KERNEL));
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+	kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 	 * ahead and flush the page walk cache
 	 */
 	flush_tlb_pgtable(tlb, address);
-        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+	pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 51017726d495..a6b9f1d74600 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
 extern unsigned long __pud_index_size;
 extern unsigned long __pgd_index_size;
 extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
 #define PTE_INDEX_SIZE  __pte_index_size
 #define PMD_INDEX_SIZE  __pmd_index_size
 #define PUD_INDEX_SIZE  __pud_index_size
 #define PGD_INDEX_SIZE  __pgd_index_size
 #define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
 /*
  * Because of use of pte fragments and THP, size of page table
  * are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
  */
 #ifndef __real_pte
 
-#define __real_pte(e,p)		((real_pte_t){(e)})
+#define __real_pte(e, p, o)		((real_pte_t){(e)})
 #define __rpte_to_pte(r)	((r).pte)
 #define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
 
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 176dfb73d42c..471b2274fbeb 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 					  EXC_HV, SOFTEN_TEST_HV, bitmask)
 
 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)		\
-	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
 	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 511acfd7ab0d..535add3f7791 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,7 +52,7 @@
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
 #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)
 #define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000)
-#define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 88e5e8f17e98..855e17d158b1 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -30,6 +30,16 @@
 #define PACA_IRQ_PMI		0x40
 
 /*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE)
+#endif
+
+/*
  * flags for paca->irq_soft_mask
  */
 #define IRQS_ENABLED		0
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
 static inline void may_hard_irq_enable(void)
 {
 	get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-	if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+	if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
 		__hard_irq_enable();
 }
 
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 9dcbfa6bbb91..d8b1e8e7e035 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
 	return false;
 }
 
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
 #endif /* CONFIG_KEXEC_CORE */
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 504a3c36ce5c..03bbd1149530 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
 #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX	PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index abddf5830ad5..5c5f75d005ad 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -27,6 +27,7 @@
 #else
 #define PMD_CACHE_INDEX	PMD_INDEX_SIZE
 #endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
 
 /*
  * Define the address range of the kernel non-linear virtual area
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 88187c285c70..9f421641a35c 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
 
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+	numa_cpu_lookup_table[cpu] = node;
+}
+
 static inline int early_cpu_to_node(int cpu)
 {
 	int nid;
@@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
 {
 	return 0;
 }
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
 extern int start_topology_update(void);
 extern int stop_topology_update(void);
 extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
 #else
 static inline int start_topology_update(void)
 {
@@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
 {
 	return 0;
 }
+static inline int find_and_online_cpu_nid(int cpu)
+{
+	return 0;
+}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index beea2182d754..0c0b66fc5bfb 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata)
 	eeh_pcid_put(dev);
 	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
 #ifdef CONFIG_PCI_IOV
-	eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
+		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 #endif
 	return NULL;
 }
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index ee832d344a5a..9b6e653e501a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -943,6 +943,8 @@ kernel_dbg_exc:
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
  * accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
  */
 
 .macro masked_interrupt_book3e paca_irq full_mask
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 243d072a225a..3ac87e53b3da 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
  *   triggered and won't automatically refire.
  * - If it was a HMI we return immediately since we handled it in realmode
  *   and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
  * This is called with r10 containing the value to OR to the paca field.
  */
 #define MASKED_INTERRUPT(_H)				\
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt:					\
 	ori	r10,r10,0xffff;				\
 	mtspr	SPRN_DEC,r10;				\
 	b	MASKED_DEC_HANDLER_LABEL;		\
-1:	andi.	r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);	\
-	bne	2f;					\
+1:	andi.	r10,r10,PACA_IRQ_MUST_HARD_MASK;	\
+	beq	2f;					\
 	mfspr	r10,SPRN_##_H##SRR1;			\
 	xori	r10,r10,MSR_EE; /* clear MSR_EE */	\
 	mtspr	SPRN_##_H##SRR1,r10;			\
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index adf044daafd7..acf4b2e0530c 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -874,7 +874,6 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
 		.mmu = 0,
 		.hash_ext = 0,
 		.radix_ext = 0,
-		.byte22 = OV5_FEAT(OV5_DRC_INFO),
 	},
 
 	/* option vector 6: IBM PAPR hints */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 5a8bfee6e187..04d0bbd7a1dd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
 	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
 		device_create_file(s, &dev_attr_pir);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+		!firmware_has_feature(FW_FEATURE_LPAR))
 		device_create_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
 	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
 		device_remove_file(s, &dev_attr_pir);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+		!firmware_has_feature(FW_FEATURE_LPAR))
 		device_remove_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 0c854816e653..5cb4e4687107 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep)
 	kmem_cache_free(kvm_pte_cache, ptep);
 }
 
+/* Like pmd_huge() and pmd_large(), but works regardless of config options */
+static inline int pmd_is_leaf(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			     unsigned int level, unsigned long mmu_seq)
 {
@@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	else
 		new_pmd = pmd_alloc_one(kvm->mm, gpa);
 
-	if (level == 0 && !(pmd && pmd_present(*pmd)))
+	if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
 		new_ptep = kvmppc_pte_alloc();
 
 	/* Check if we might have been invalidated; let the guest retry if so */
@@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 		new_pmd = NULL;
 	}
 	pmd = pmd_offset(pud, gpa);
-	if (pmd_large(*pmd)) {
-		/* Someone else has instantiated a large page here; retry */
-		ret = -EAGAIN;
-		goto out_unlock;
-	}
-	if (level == 1 && !pmd_none(*pmd)) {
+	if (pmd_is_leaf(*pmd)) {
+		unsigned long lgpa = gpa & PMD_MASK;
+
+		/*
+		 * If we raced with another CPU which has just put
+		 * a 2MB pte in after we saw a pte page, try again.
+		 */
+		if (level == 0 && !new_ptep) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+		/* Valid 2MB page here already, remove it */
+		old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+					      ~0UL, 0, lgpa, PMD_SHIFT);
+		kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
+		if (old & _PAGE_DIRTY) {
+			unsigned long gfn = lgpa >> PAGE_SHIFT;
+			struct kvm_memory_slot *memslot;
+			memslot = gfn_to_memslot(kvm, gfn);
+			if (memslot && memslot->dirty_bitmap)
+				kvmppc_update_dirty_map(memslot,
+							gfn, PMD_SIZE);
+		}
+	} else if (level == 1 && !pmd_none(*pmd)) {
 		/*
 		 * There's a page table page here, but we wanted
 		 * to install a large page.  Tell the caller and let
@@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	} else {
 		page = pages[0];
 		pfn = page_to_pfn(page);
-		if (PageHuge(page)) {
-			page = compound_head(page);
-			pte_size <<= compound_order(page);
+		if (PageCompound(page)) {
+			pte_size <<= compound_order(compound_head(page));
 			/* See if we can insert a 2MB large-page PTE here */
 			if (pte_size >= PMD_SIZE &&
-			    (gpa & PMD_MASK & PAGE_MASK) ==
-			    (hva & PMD_MASK & PAGE_MASK)) {
+			    (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+			    (hva & (PMD_SIZE - PAGE_SIZE))) {
 				level = 1;
 				pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
 			}
 		}
 		/* See if we can provide write access */
 		if (writing) {
-			/*
-			 * We assume gup_fast has set dirty on the host PTE.
-			 */
 			pgflags |= _PAGE_WRITE;
 		} else {
 			local_irq_save(flags);
 			ptep = find_current_mm_pte(current->mm->pgd,
 						   hva, NULL, NULL);
-			if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
+			if (ptep && pte_write(*ptep))
 				pgflags |= _PAGE_WRITE;
 			local_irq_restore(flags);
 		}
@@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte = pfn_pte(pfn, __pgprot(pgflags));
 		ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
 	}
-	if (ret == 0 || ret == -EAGAIN)
-		ret = RESUME_GUEST;
 
 	if (page) {
-		/*
-		 * We drop pages[0] here, not page because page might
-		 * have been set to the head page of a compound, but
-		 * we have to drop the reference on the correct tail
-		 * page to match the get inside gup()
-		 */
-		put_page(pages[0]);
+		if (!ret && (pgflags & _PAGE_WRITE))
+			set_page_dirty_lock(page);
+		put_page(page);
 	}
+
+	if (ret == 0 || ret == -EAGAIN)
+		ret = RESUME_GUEST;
 	return ret;
 }
 
@@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm)
 				continue;
 			pmd = pmd_offset(pud, 0);
 			for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
-				if (pmd_huge(*pmd)) {
+				if (pmd_is_leaf(*pmd)) {
 					pmd_clear(pmd);
 					continue;
 				}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 89707354c2ef..9cb9448163c4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	 */
 	trace_hardirqs_on();
 
-	guest_enter();
+	guest_enter_irqoff();
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
@@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
-	guest_exit();
-
 	trace_hardirqs_off();
 	set_irq_happened(trap);
 
@@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	kvmppc_set_host_core(pcpu);
 
 	local_irq_enable();
+	guest_exit();
 
 	/* Let secondaries go back to the offline loop */
 	for (i = 0; i < controlled_threads; ++i) {
@@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		goto up_out;
 
 	psize = vma_kernel_pagesize(vma);
-	porder = __ilog2(psize);
 
 	up_read(&current->mm->mmap_sem);
 
 	/* We can handle 4k, 64k or 16M pages in the VRMA */
-	err = -EINVAL;
-	if (!(psize == 0x1000 || psize == 0x10000 ||
-	      psize == 0x1000000))
-		goto out_srcu;
+	if (psize >= 0x1000000)
+		psize = 0x1000000;
+	else if (psize >= 0x10000)
+		psize = 0x10000;
+	else
+		psize = 0x1000;
+	porder = __ilog2(psize);
 
 	senc = slb_pgsize_encoding(psize);
 	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f31f357b8c5a..d33264697a31 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -320,7 +320,6 @@ kvm_novcpu_exit:
 	stw	r12, STACK_SLOT_TRAP(r1)
 	bl	kvmhv_commence_exit
 	nop
-	lwz	r12, STACK_SLOT_TRAP(r1)
 	b	kvmhv_switch_to_host
 
 /*
@@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 secondary_too_late:
 	li	r12, 0
+	stw	r12, STACK_SLOT_TRAP(r1)
 	cmpdi	r4, 0
 	beq	11f
 	stw	r12, VCPU_TRAP(r4)
@@ -1558,12 +1558,12 @@ mc_cont:
 3:	stw	r5,VCPU_SLB_MAX(r9)
 
 guest_bypass:
+	stw	r12, STACK_SLOT_TRAP(r1)
 	mr 	r3, r12
 	/* Increment exit count, poke other threads to exit */
 	bl	kvmhv_commence_exit
 	nop
 	ld	r9, HSTATE_KVM_VCPU(r13)
-	lwz	r12, VCPU_TRAP(r9)
 
 	/* Stop others sending VCPU interrupts to this physical CPU */
 	li	r0, -1
@@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
 	 * POWER7/POWER8 guest -> host partition switch code.
 	 * We don't have to lock against tlbies but we do
 	 * have to coordinate the hardware threads.
+	 * Here STACK_SLOT_TRAP(r1) contains the trap number.
 	 */
 kvmhv_switch_to_host:
 	/* Secondary threads wait for primary to do partition switch */
@@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	/* If HMI, call kvmppc_realmode_hmi_handler() */
+	lwz	r12, STACK_SLOT_TRAP(r1)
 	cmpwi	r12, BOOK3S_INTERRUPT_HMI
 	bne	27f
 	bl	kvmppc_realmode_hmi_handler
 	nop
 	cmpdi	r3, 0
-	li	r12, BOOK3S_INTERRUPT_HMI
 	/*
 	 * At this point kvmppc_realmode_hmi_handler may have resync-ed
 	 * the TB, and if it has, we must not subtract the guest timebase
@@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION
 	lwz	r8, KVM_SPLIT_DO_RESTORE(r3)
 	cmpwi	r8, 0
 	beq	47f
-	stw	r12, STACK_SLOT_TRAP(r1)
 	bl	kvmhv_p9_restore_lpcr
 	nop
-	lwz	r12, STACK_SLOT_TRAP(r1)
 	b	48f
 47:
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
@@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
 
+	lwz	r12, STACK_SLOT_TRAP(r1)	/* return trap # in r12 */
 	ld	r0, SFS+PPC_LR_STKOFF(r1)
 	addi	r1, r1, SFS
 	mtlr	r0
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f0f5cd4d2fe7..f9818d7d3381 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
 	if (!qpage) {
 		pr_err("Failed to allocate queue %d for VCPU %d\n",
 		       prio, xc->server_num);
-		return -ENOMEM;;
+		return -ENOMEM;
 	}
 	memset(qpage, 0, 1 << xive->q_order);
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 403e642c78f5..52c205373986 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
 int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		unsigned int rt, int is_default_endian)
 {
-	enum emulation_result emulated;
+	enum emulation_result emulated = EMULATE_DONE;
 
 	while (vcpu->arch.mmio_vmx_copy_nums) {
 		emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
@@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	kvm_sigset_deactivate(vcpu);
 
+#ifdef CONFIG_ALTIVEC
 out:
+#endif
 	vcpu_put(vcpu);
 	return r;
 }
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 1604110c4238..3f1803672c9b 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
 	dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
 	dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
 	dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
-	dr_cell->flags = cpu_to_be32(lmb->flags);
+	dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
 }
 
 static int drmem_update_dt_v2(struct device_node *memory,
@@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
 		}
 
 		if (prev_lmb->aa_index != lmb->aa_index ||
-		    prev_lmb->flags != lmb->flags)
+		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))
 			lmb_sets++;
 
 		prev_lmb = lmb;
@@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
 		}
 
 		if (prev_lmb->aa_index != lmb->aa_index ||
-		    prev_lmb->flags != lmb->flags) {
+		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {
 			/* end of one set, start of another */
 			dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
 			dr_cell++;
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
 	u32 i, n_lmbs;
 
 	n_lmbs = of_read_number(prop++, 1);
+	if (n_lmbs == 0)
+		return;
 
 	for (i = 0; i < n_lmbs; i++) {
 		read_drconf_v1_cell(&lmb, &prop);
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
 	u32 i, j, lmb_sets;
 
 	lmb_sets = of_read_number(prop++, 1);
+	if (lmb_sets == 0)
+		return;
 
 	for (i = 0; i < lmb_sets; i++) {
 		read_drconf_v2_cell(&dr_cell, &prop);
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
 	struct drmem_lmb *lmb;
 
 	drmem_info->n_lmbs = of_read_number(prop++, 1);
+	if (drmem_info->n_lmbs == 0)
+		return;
 
 	drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
 				   GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 	int lmb_index;
 
 	lmb_sets = of_read_number(prop++, 1);
+	if (lmb_sets == 0)
+		return;
 
 	/* first pass, calculate the number of LMBs */
 	p = prop;
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 5a69b51d08a3..d573d7d07f25 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	 * need to add in 0x1 if it's a read-only user page
 	 */
 	rflags = htab_convert_pte_flags(new_pte);
-	rpte = __real_pte(__pte(old_pte), ptep);
+	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
 	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
 	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ repeat:
 			return -1;
 		}
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-		new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
 	}
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 2253bbc6a599..e601d95c3b20 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 
 	subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
 	vpn  = hpt_vpn(ea, vsid, ssize);
-	rpte = __real_pte(__pte(old_pte), ptep);
+	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 	/*
 	 *None of the sub 4k page is hashed
 	 */
@@ -214,7 +214,7 @@ repeat:
 		return -1;
 	}
 
-	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
 	new_pte |= H_PAGE_HASHPTE;
 
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
@@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
 	rflags = htab_convert_pte_flags(new_pte);
-	rpte = __real_pte(__pte(old_pte), ptep);
+	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
 	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
 	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -327,7 +327,7 @@ repeat:
 		}
 
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-		new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
 	}
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7d07c7e17db6..cf290d415dcd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
 	__pmd_index_size = H_PMD_INDEX_SIZE;
 	__pud_index_size = H_PUD_INDEX_SIZE;
 	__pgd_index_size = H_PGD_INDEX_SIZE;
+	__pud_cache_index = H_PUD_CACHE_INDEX;
 	__pmd_cache_index = H_PMD_CACHE_INDEX;
 	__pte_table_size = H_PTE_TABLE_SIZE;
 	__pmd_table_size = H_PMD_TABLE_SIZE;
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 12511f5a015f..b320f5097a06 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	unsigned long vpn;
 	unsigned long old_pte, new_pte;
 	unsigned long rflags, pa, sz;
-	long slot;
+	long slot, offset;
 
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
@@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
 	rflags = htab_convert_pte_flags(new_pte);
-	rpte = __real_pte(__pte(old_pte), ptep);
+	if (unlikely(mmu_psize == MMU_PAGE_16G))
+		offset = PTRS_PER_PUD;
+	else
+		offset = PTRS_PER_PMD;
+	rpte = __real_pte(__pte(old_pte), ptep, offset);
 
 	sz = ((1UL) << shift);
 	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 			return -1;
 		}
 
-		new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
 	}
 
 	/*
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index eb8c6c8c4851..2b656e67f2ea 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -100,6 +100,6 @@ void pgtable_cache_init(void)
 	 * same size as either the pgd or pmd index except with THP enabled
 	 * on book3s 64
 	 */
-	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
-		pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+	if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+		pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
 }
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 314d19ab9385..edd8d0bc9364 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
 		numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
-	numa_cpu_lookup_table[cpu] = node;
-}
-
 static void map_cpu_to_node(int cpu, int node)
 {
 	update_numa_cpu_lookup_table(cpu, node);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 573a9a2ee455..2e10a964e290 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -17,9 +17,11 @@
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
 #include <linux/string_helpers.h>
+#include <linux/stop_machine.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
 #include <asm/dma.h>
 #include <asm/machdep.h>
 #include <asm/mmu.h>
@@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
 		     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 	trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
+
+	/*
+	 * The init_mm context is given the first available (non-zero) PID,
+	 * which is the "guard PID" and contains no page table. PIDR should
+	 * never be set to zero because that duplicates the kernel address
+	 * space at the 0x0... offset (quadrant 0)!
+	 *
+	 * An arbitrary PID that may later be allocated by the PID allocator
+	 * for userspace processes must not be used either, because that
+	 * would cause stale user mappings for that PID on CPUs outside of
+	 * the TLB invalidation scheme (because it won't be in mm_cpumask).
+	 *
+	 * So permanently carve out one PID for the purpose of a guard PID.
+	 */
+	init_mm.context.id = mmu_base_pid;
+	mmu_base_pid++;
 }
 
 static void __init radix_init_partition_table(void)
@@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
 	__pmd_index_size = RADIX_PMD_INDEX_SIZE;
 	__pud_index_size = RADIX_PUD_INDEX_SIZE;
 	__pgd_index_size = RADIX_PGD_INDEX_SIZE;
+	__pud_cache_index = RADIX_PUD_INDEX_SIZE;
 	__pmd_cache_index = RADIX_PMD_INDEX_SIZE;
 	__pte_table_size = RADIX_PTE_TABLE_SIZE;
 	__pmd_table_size = RADIX_PMD_TABLE_SIZE;
@@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
 
 	radix_init_iamr();
 	radix_init_pgtable();
-
+	/* Switch to the guard PID before turning on MMU */
+	radix__switch_mmu_context(NULL, &init_mm);
 	if (cpu_has_feature(CPU_FTR_HVMODE))
 		tlbiel_all();
 }
@@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
 	}
 	radix_init_iamr();
 
+	radix__switch_mmu_context(NULL, &init_mm);
 	if (cpu_has_feature(CPU_FTR_HVMODE))
 		tlbiel_all();
 }
@@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 	pud_clear(pud);
 }
 
+struct change_mapping_params {
+	pte_t *pte;
+	unsigned long start;
+	unsigned long end;
+	unsigned long aligned_start;
+	unsigned long aligned_end;
+};
+
+static int stop_machine_change_mapping(void *data)
+{
+	struct change_mapping_params *params =
+			(struct change_mapping_params *)data;
+
+	if (!data)
+		return -1;
+
+	spin_unlock(&init_mm.page_table_lock);
+	pte_clear(&init_mm, params->aligned_start, params->pte);
+	create_physical_mapping(params->aligned_start, params->start);
+	create_physical_mapping(params->end, params->aligned_end);
+	spin_lock(&init_mm.page_table_lock);
+	return 0;
+}
+
 static void remove_pte_table(pte_t *pte_start, unsigned long addr,
 			     unsigned long end)
 {
@@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
 	}
 }
 
+/*
+ * clear the pte and potentially split the mapping helper
+ */
+static void split_kernel_mapping(unsigned long addr, unsigned long end,
+				unsigned long size, pte_t *pte)
+{
+	unsigned long mask = ~(size - 1);
+	unsigned long aligned_start = addr & mask;
+	unsigned long aligned_end = addr + size;
+	struct change_mapping_params params;
+	bool split_region = false;
+
+	if ((end - addr) < size) {
+		/*
+		 * We're going to clear the PTE, but not flushed
+		 * the mapping, time to remap and flush. The
+		 * effects if visible outside the processor or
+		 * if we are running in code close to the
+		 * mapping we cleared, we are in trouble.
+		 */
+		if (overlaps_kernel_text(aligned_start, addr) ||
+			overlaps_kernel_text(end, aligned_end)) {
+			/*
+			 * Hack, just return, don't pte_clear
+			 */
+			WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
+				  "text, not splitting\n", addr, end);
+			return;
+		}
+		split_region = true;
+	}
+
+	if (split_region) {
+		params.pte = pte;
+		params.start = addr;
+		params.end = end;
+		params.aligned_start = addr & ~(size - 1);
+		params.aligned_end = min_t(unsigned long, aligned_end,
+				(unsigned long)__va(memblock_end_of_DRAM()));
+		stop_machine(stop_machine_change_mapping, &params, NULL);
+		return;
+	}
+
+	pte_clear(&init_mm, addr, pte);
+}
+
 static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
 			     unsigned long end)
 {
@@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
 			continue;
 
 		if (pmd_huge(*pmd)) {
-			if (!IS_ALIGNED(addr, PMD_SIZE) ||
-			    !IS_ALIGNED(next, PMD_SIZE)) {
-				WARN_ONCE(1, "%s: unaligned range\n", __func__);
-				continue;
-			}
-
-			pte_clear(&init_mm, addr, (pte_t *)pmd);
+			split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
 			continue;
 		}
 
@@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
 			continue;
 
 		if (pud_huge(*pud)) {
-			if (!IS_ALIGNED(addr, PUD_SIZE) ||
-			    !IS_ALIGNED(next, PUD_SIZE)) {
-				WARN_ONCE(1, "%s: unaligned range\n", __func__);
-				continue;
-			}
-
-			pte_clear(&init_mm, addr, (pte_t *)pud);
+			split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
 			continue;
 		}
 
@@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
 			continue;
 
 		if (pgd_huge(*pgd)) {
-			if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
-			    !IS_ALIGNED(next, PGDIR_SIZE)) {
-				WARN_ONCE(1, "%s: unaligned range\n", __func__);
-				continue;
-			}
-
-			pte_clear(&init_mm, addr, (pte_t *)pgd);
+			split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
 			continue;
 		}
 
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c9a623c2d8a2..28c980eb4422 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
 EXPORT_SYMBOL(__pgd_index_size);
 unsigned long __pmd_cache_index;
 EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
 unsigned long __pte_table_size;
 EXPORT_SYMBOL(__pte_table_size);
 unsigned long __pmd_table_size;
@@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
 	if (old & PATB_HR) {
 		asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
 			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+		asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
 		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
 	} else {
 		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 881ebd53ffc2..9b23f12e863c 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	unsigned int psize;
 	int ssize;
 	real_pte_t rpte;
-	int i;
+	int i, offset;
 
 	i = batch->index;
 
@@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		psize = get_slice_psize(mm, addr);
 		/* Mask the address for the correct page size */
 		addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+		if (unlikely(psize == MMU_PAGE_16G))
+			offset = PTRS_PER_PUD;
+		else
+			offset = PTRS_PER_PMD;
 #else
 		BUG();
 		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		 * support 64k pages, this might be different from the
 		 * hardware page size encoded in the slice table. */
 		addr &= PAGE_MASK;
+		offset = PTRS_PER_PTE;
 	}
 
 
@@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	}
 	WARN_ON(vsid == 0);
 	vpn = hpt_vpn(addr, vsid, ssize);
-	rpte = __real_pte(__pte(pte), ptep);
+	rpte = __real_pte(__pte(pte), ptep, offset);
 
 	/*
 	 * Check if we have an active batch on this CPU. If not, just
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 872d1f6dd11e..a9636d8cba15 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
 			break;
+		case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
+			PPC_LWZ_OFFS(r_A, r_skb, K);
+			break;
 		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
 			break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0a34b0cec7b7..0ef3d9580e98 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -240,6 +240,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 	 *   goto out;
 	 */
 	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
+	PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
 	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
 	PPC_BCC(COND_GE, out);
 
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index dd4c9b8b8a81..f6f55ab4980e 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
 	const struct cpumask *l_cpumask;
 
 	get_online_cpus();
-	for_each_online_node(nid) {
+	for_each_node_with_cpus(nid) {
 		l_cpumask = cpumask_of_node(nid);
-		cpu = cpumask_first(l_cpumask);
+		cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+		if (cpu >= nr_cpu_ids)
+			continue;
 		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
 				       get_hard_smp_processor_id(cpu));
 	}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 496e47696ed0..a6c92c78c9b2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 	s64 rc;
 
 	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
-		return -ENODEV;;
+		return -ENODEV;
 
 	pe = &phb->ioda.pe_array[pdn->pe_number];
 	if (pe->tce_bypass_enabled) {
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 4fb21e17504a..092715b9674b 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -80,6 +80,10 @@ static void pnv_setup_rfi_flush(void)
 		if (np && of_property_read_bool(np, "disabled"))
 			enable--;
 
+		np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
+		if (np && of_property_read_bool(np, "disabled"))
+			enable = 0;
+
 		of_node_put(np);
 		of_node_put(fw_features);
 	}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 2b3eb01ab110..b7c53a51c31b 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
 			rc = PTR_ERR(txwin->paste_kaddr);
 			goto free_window;
 		}
+	} else {
+		/*
+		 * A user mapping must ensure that context switch issues
+		 * CP_ABORT for this thread.
+		 */
+		rc = set_thread_uses_vas();
+		if (rc)
+			goto free_window;
 	}
 
-	/*
-	 * Now that we have a send window, ensure context switch issues
-	 * CP_ABORT for this thread.
-	 */
-	rc = -EINVAL;
-	if (set_thread_uses_vas() < 0)
-		goto free_window;
-
 	set_vinst_win(vinst, txwin);
 
 	return txwin;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index dceb51454d8d..652d3e96b812 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
 #include <asm/xics.h>
 #include <asm/xive.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
 			BUG_ON(cpu_online(cpu));
 			set_cpu_present(cpu, false);
 			set_hard_smp_processor_id(cpu, -1);
+			update_numa_cpu_lookup_table(cpu, -1);
 			break;
 		}
 		if (cpu >= nr_cpu_ids)
@@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
 	cpu_maps_update_done();
 }
 
-extern int find_and_online_cpu_nid(int cpu);
-
 static int dlpar_online_cpu(struct device_node *dn)
 {
 	int rc = 0;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 81d8614e7379..5e1ef9150182 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
 
 /*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+	struct device_node *np;
+
+	/* Hotplug Events */
+	np = of_find_node_by_path("/event-sources/hot-plug-events");
+	if (np != NULL) {
+		if (dlpar_workqueue_init() == 0)
+			request_event_sources_irqs(np, ras_hotplug_interrupt,
+						   "RAS_HOTPLUG");
+		of_node_put(np);
+	}
+
+	return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
+/*
  * Initialize handlers for the set of interrupts caused by hardware errors
  * and power system events.
  */
@@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
 		of_node_put(np);
 	}
 
-	/* Hotplug Events */
-	np = of_find_node_by_path("/event-sources/hot-plug-events");
-	if (np != NULL) {
-		if (dlpar_workqueue_init() == 0)
-			request_event_sources_irqs(np, ras_hotplug_interrupt,
-					   "RAS_HOTPLUG");
-		of_node_put(np);
-	}
-
 	/* EPOW Events */
 	np = of_find_node_by_path("/event-sources/epow-events");
 	if (np != NULL) {
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 372d7ada1a0c..1a527625acf7 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void)
 		if (types == L1D_FLUSH_NONE)
 			types = L1D_FLUSH_FALLBACK;
 
-		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
+		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
 			enable = false;
 	} else {
 		/* Default to fallback if case hcall is not available */
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index d9c4c9366049..091f1d0d0af1 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
 
 	rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
 	if (rc) {
-		pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+		pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+		       target, prio);
 		rc = -EIO;
 		goto fail;
 	}
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
 	/* Configure and enable the queue in HW */
 	rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
 	if (rc) {
-		pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+		pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+		       target, prio);
 		rc = -EIO;
 	} else {
 		q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
 	if (IS_ERR(qpage))
 		return PTR_ERR(qpage);
 
-	return xive_spapr_configure_queue(cpu, q, prio, qpage,
-					  xive_queue_shift);
+	return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+					  q, prio, qpage, xive_queue_shift);
 }
 
 static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
 	struct xive_q *q = &xc->queue[prio];
 	unsigned int alloc_order;
 	long rc;
+	int hw_cpu = get_hard_smp_processor_id(cpu);
 
-	rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+	rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
 	if (rc)
-		pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+		pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+		       hw_cpu, prio);
 
 	alloc_order = xive_alloc_order(xive_queue_shift);
 	free_pages((unsigned long)q->qpage, alloc_order);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b6722c246d9c..04807c7f64cc 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -8,7 +8,6 @@ config RISCV
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_IRQ
-	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_WANT_FRAME_POINTERS
 	select CLONE_BACKWARDS
 	select COMMON_CLK
@@ -20,7 +19,6 @@ config RISCV
 	select GENERIC_STRNLEN_USER
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_API_DEBUG
@@ -34,7 +32,6 @@ config RISCV
 	select HAVE_ARCH_TRACEHOOK
 	select MODULES_USE_ELF_RELA if MODULES
 	select THREAD_INFO_IN_TASK
-	select RISCV_IRQ_INTC
 	select RISCV_TIMER
 
 config MMU
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index c0319cbf1eec..5510366d169a 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -34,9 +34,9 @@
 #define wmb()		RISCV_FENCE(ow,ow)
 
 /* These barriers do not need to enforce ordering on devices, just memory. */
-#define smp_mb()	RISCV_FENCE(rw,rw)
-#define smp_rmb()	RISCV_FENCE(r,r)
-#define smp_wmb()	RISCV_FENCE(w,w)
+#define __smp_mb()	RISCV_FENCE(rw,rw)
+#define __smp_rmb()	RISCV_FENCE(r,r)
+#define __smp_wmb()	RISCV_FENCE(w,w)
 
 /*
  * This is a very specific barrier: it's currently only used in two places in
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 87fc045be51f..56fa592cfa34 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -172,6 +172,9 @@ ENTRY(handle_exception)
 	move a1, sp /* pt_regs */
 	tail do_IRQ
 1:
+	/* Exceptions run with interrupts enabled */
+	csrs sstatus, SR_SIE
+
 	/* Handle syscalls */
 	li t0, EXC_SYSCALL
 	beq s4, t0, handle_syscall
@@ -198,8 +201,6 @@ handle_syscall:
 	 */
 	addi s2, s2, 0x4
 	REG_S s2, PT_SEPC(sp)
-	/* System calls run with interrupts enabled */
-	csrs sstatus, SR_SIE
 	/* Trace syscalls, but only if requested by the user. */
 	REG_L t0, TASK_TI_FLAGS(tp)
 	andi t0, t0, _TIF_SYSCALL_TRACE
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 226eeb190f90..6e07ed37bbff 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -64,7 +64,7 @@ ENTRY(_start)
 	/* Start the kernel */
 	mv a0, s0
 	mv a1, s1
-	call sbi_save
+	call parse_dtb
 	tail start_kernel
 
 relocate:
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 09f7064e898c..c11f40c1b2a8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -144,7 +144,7 @@ asmlinkage void __init setup_vm(void)
 #endif
 }
 
-void __init sbi_save(unsigned int hartid, void *dtb)
+void __init parse_dtb(unsigned int hartid, void *dtb)
 {
 	early_init_dt_scan(__va(dtb));
 }
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 65154eaa3714..6c8ce15cde7b 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -63,6 +63,7 @@ static inline int init_new_context(struct task_struct *tsk,
 				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
 		/* pgd_alloc() did not account this pmd */
 		mm_inc_nr_pmds(mm);
+		mm_inc_nr_puds(mm);
 	}
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 13a133a6015c..a5621ea6d123 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -14,6 +14,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/ctl_reg.h>
+#include <asm/dwarf.h>
 #include <asm/errno.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
@@ -230,7 +231,7 @@ _PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
 	.hidden \name
 	.type \name,@function
 \name:
-	.cfi_startproc
+	CFI_STARTPROC
 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
 	exrl	0,0f
 #else
@@ -239,7 +240,7 @@ _PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
 #endif
 	j	.
 0:	br	\reg
-	.cfi_endproc
+	CFI_ENDPROC
 	.endm
 
 	GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
@@ -426,13 +427,13 @@ ENTRY(system_call)
 	UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
 	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
 	stmg	%r0,%r7,__PT_R0(%r11)
-	# clear user controlled register to prevent speculative use
-	xgr	%r0,%r0
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
 	stg	%r14,__PT_FLAGS(%r11)
 .Lsysc_do_svc:
+	# clear user controlled register to prevent speculative use
+	xgr	%r0,%r0
 	# load address of system call table
 	lg	%r10,__THREAD_sysc_table(%r13,%r12)
 	llgh	%r8,__PT_INT_CODE+2(%r11)
@@ -1439,6 +1440,7 @@ cleanup_critical:
 	stg	%r15,__LC_SYSTEM_TIMER
 0:	# update accounting time stamp
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
 	# set up saved register r11
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r9,STACK_FRAME_OVERHEAD(%r15)
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 69d7fcf48158..9aff72d3abda 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -2,8 +2,8 @@
 #include <linux/module.h>
 #include <asm/nospec-branch.h>
 
-int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF);
-int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL);
+int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
+int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL);
 
 static int __init nospectre_v2_setup_early(char *str)
 {
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 9c7d70715862..07c6e81163bf 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -22,22 +22,6 @@
 #include "trace.h"
 #include "trace-s390.h"
 
-
-static const intercept_handler_t instruction_handlers[256] = {
-	[0x01] = kvm_s390_handle_01,
-	[0x82] = kvm_s390_handle_lpsw,
-	[0x83] = kvm_s390_handle_diag,
-	[0xaa] = kvm_s390_handle_aa,
-	[0xae] = kvm_s390_handle_sigp,
-	[0xb2] = kvm_s390_handle_b2,
-	[0xb6] = kvm_s390_handle_stctl,
-	[0xb7] = kvm_s390_handle_lctl,
-	[0xb9] = kvm_s390_handle_b9,
-	[0xe3] = kvm_s390_handle_e3,
-	[0xe5] = kvm_s390_handle_e5,
-	[0xeb] = kvm_s390_handle_eb,
-};
-
 u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
@@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu)
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
 {
-	intercept_handler_t handler;
-
 	vcpu->stat.exit_instruction++;
 	trace_kvm_s390_intercept_instruction(vcpu,
 					     vcpu->arch.sie_block->ipa,
 					     vcpu->arch.sie_block->ipb);
-	handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
-	if (handler)
-		return handler(vcpu);
-	return -EOPNOTSUPP;
+
+	switch (vcpu->arch.sie_block->ipa >> 8) {
+	case 0x01:
+		return kvm_s390_handle_01(vcpu);
+	case 0x82:
+		return kvm_s390_handle_lpsw(vcpu);
+	case 0x83:
+		return kvm_s390_handle_diag(vcpu);
+	case 0xaa:
+		return kvm_s390_handle_aa(vcpu);
+	case 0xae:
+		return kvm_s390_handle_sigp(vcpu);
+	case 0xb2:
+		return kvm_s390_handle_b2(vcpu);
+	case 0xb6:
+		return kvm_s390_handle_stctl(vcpu);
+	case 0xb7:
+		return kvm_s390_handle_lctl(vcpu);
+	case 0xb9:
+		return kvm_s390_handle_b9(vcpu);
+	case 0xe3:
+		return kvm_s390_handle_e3(vcpu);
+	case 0xe5:
+		return kvm_s390_handle_e5(vcpu);
+	case 0xeb:
+		return kvm_s390_handle_eb(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index aabf46f5f883..b04616b57a94 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
+	const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+	const u64 ckc = vcpu->arch.sie_block->ckc;
+
+	if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+		if ((s64)ckc >= (s64)now)
+			return 0;
+	} else if (ckc >= now) {
 		return 0;
+	}
 	return ckc_interrupts_enabled(vcpu);
 }
 
@@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
 	return kvm_s390_get_cpu_timer(vcpu) >> 63;
 }
 
-static inline int is_ioirq(unsigned long irq_type)
-{
-	return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
-		(irq_type <= IRQ_PEND_IO_ISC_0));
-}
-
 static uint64_t isc_to_isc_bits(int isc)
 {
 	return (0x80 >> isc) << 24;
@@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis
 	return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
 }
 
-static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
 {
 	return vcpu->kvm->arch.float_int.pending_irqs |
-		vcpu->arch.local_int.pending_irqs |
+		vcpu->arch.local_int.pending_irqs;
+}
+
+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+{
+	return pending_irqs_no_gisa(vcpu) |
 		kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
 }
 
@@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 
 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 {
-	if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
+	if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
 		return;
 	else if (psw_ioint_disabled(vcpu))
 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
@@ -1011,24 +1017,6 @@ out:
 	return rc;
 }
 
-typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
-
-static const deliver_irq_t deliver_irq_funcs[] = {
-	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
-	[IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
-	[IRQ_PEND_PROG]           = __deliver_prog,
-	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
-	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
-	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
-	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
-	[IRQ_PEND_RESTART]        = __deliver_restart,
-	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
-	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
-	[IRQ_PEND_EXT_SERVICE]    = __deliver_service,
-	[IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
-	[IRQ_PEND_VIRTIO]         = __deliver_virtio,
-};
-
 /* Check whether an external call is pending (deliverable or not) */
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
 {
-	u64 now, cputm, sltime = 0;
+	const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+	const u64 ckc = vcpu->arch.sie_block->ckc;
+	u64 cputm, sltime = 0;
 
 	if (ckc_interrupts_enabled(vcpu)) {
-		now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
-		sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-		/* already expired or overflow? */
-		if (!sltime || vcpu->arch.sie_block->ckc <= now)
+		if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+			if ((s64)now < (s64)ckc)
+				sltime = tod_to_ns((s64)ckc - (s64)now);
+		} else if (now < ckc) {
+			sltime = tod_to_ns(ckc - now);
+		}
+		/* already expired */
+		if (!sltime)
 			return 0;
 		if (cpu_timer_interrupts_enabled(vcpu)) {
 			cputm = kvm_s390_get_cpu_timer(vcpu);
@@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	deliver_irq_t func;
 	int rc = 0;
 	unsigned long irq_type;
 	unsigned long irqs;
@@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	while ((irqs = deliverable_irqs(vcpu)) && !rc) {
 		/* bits are in the reverse order of interrupt priority */
 		irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
-		if (is_ioirq(irq_type)) {
+		switch (irq_type) {
+		case IRQ_PEND_IO_ISC_0:
+		case IRQ_PEND_IO_ISC_1:
+		case IRQ_PEND_IO_ISC_2:
+		case IRQ_PEND_IO_ISC_3:
+		case IRQ_PEND_IO_ISC_4:
+		case IRQ_PEND_IO_ISC_5:
+		case IRQ_PEND_IO_ISC_6:
+		case IRQ_PEND_IO_ISC_7:
 			rc = __deliver_io(vcpu, irq_type);
-		} else {
-			func = deliver_irq_funcs[irq_type];
-			if (!func) {
-				WARN_ON_ONCE(func == NULL);
-				clear_bit(irq_type, &li->pending_irqs);
-				continue;
-			}
-			rc = func(vcpu);
+			break;
+		case IRQ_PEND_MCHK_EX:
+		case IRQ_PEND_MCHK_REP:
+			rc = __deliver_machine_check(vcpu);
+			break;
+		case IRQ_PEND_PROG:
+			rc = __deliver_prog(vcpu);
+			break;
+		case IRQ_PEND_EXT_EMERGENCY:
+			rc = __deliver_emergency_signal(vcpu);
+			break;
+		case IRQ_PEND_EXT_EXTERNAL:
+			rc = __deliver_external_call(vcpu);
+			break;
+		case IRQ_PEND_EXT_CLOCK_COMP:
+			rc = __deliver_ckc(vcpu);
+			break;
+		case IRQ_PEND_EXT_CPU_TIMER:
+			rc = __deliver_cpu_timer(vcpu);
+			break;
+		case IRQ_PEND_RESTART:
+			rc = __deliver_restart(vcpu);
+			break;
+		case IRQ_PEND_SET_PREFIX:
+			rc = __deliver_set_prefix(vcpu);
+			break;
+		case IRQ_PEND_PFAULT_INIT:
+			rc = __deliver_pfault_init(vcpu);
+			break;
+		case IRQ_PEND_EXT_SERVICE:
+			rc = __deliver_service(vcpu);
+			break;
+		case IRQ_PEND_PFAULT_DONE:
+			rc = __deliver_pfault_done(vcpu);
+			break;
+		case IRQ_PEND_VIRTIO:
+			rc = __deliver_virtio(vcpu);
+			break;
+		default:
+			WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
+			clear_bit(irq_type, &li->pending_irqs);
 		}
 	}
 
@@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
 		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
+		if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
+			kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
 		break;
 	default:
 		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ba4c7092335a..339ac0964590 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+	{ "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
@@ -179,6 +180,28 @@ int kvm_arch_hardware_enable(void)
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 			      unsigned long end);
 
+static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
+{
+	u8 delta_idx = 0;
+
+	/*
+	 * The TOD jumps by delta, we have to compensate this by adding
+	 * -delta to the epoch.
+	 */
+	delta = -delta;
+
+	/* sign-extension - we're adding to signed values below */
+	if ((s64)delta < 0)
+		delta_idx = -1;
+
+	scb->epoch += delta;
+	if (scb->ecd & ECD_MEF) {
+		scb->epdx += delta_idx;
+		if (scb->epoch < delta)
+			scb->epdx += 1;
+	}
+}
+
 /*
  * This callback is executed during stop_machine(). All CPUs are therefore
  * temporarily stopped. In order not to change guest behavior, we have to
@@ -194,13 +217,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 	unsigned long long *delta = v;
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
-		kvm->arch.epoch -= *delta;
 		kvm_for_each_vcpu(i, vcpu, kvm) {
-			vcpu->arch.sie_block->epoch -= *delta;
+			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
+			if (i == 0) {
+				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
+				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
+			}
 			if (vcpu->arch.cputm_enabled)
 				vcpu->arch.cputm_start += *delta;
 			if (vcpu->arch.vsie_block)
-				vcpu->arch.vsie_block->epoch -= *delta;
+				kvm_clock_sync_scb(vcpu->arch.vsie_block,
+						   *delta);
 		}
 	}
 	return NOTIFY_OK;
@@ -902,12 +929,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 		return -EFAULT;
 
-	if (test_kvm_facility(kvm, 139))
-		kvm_s390_set_tod_clock_ext(kvm, &gtod);
-	else if (gtod.epoch_idx == 0)
-		kvm_s390_set_tod_clock(kvm, gtod.tod);
-	else
+	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 		return -EINVAL;
+	kvm_s390_set_tod_clock(kvm, &gtod);
 
 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 		gtod.epoch_idx, gtod.tod);
@@ -932,13 +956,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 {
-	u64 gtod;
+	struct kvm_s390_vm_tod_clock gtod = { 0 };
 
-	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
+			   sizeof(gtod.tod)))
 		return -EFAULT;
 
-	kvm_s390_set_tod_clock(kvm, gtod);
-	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
+	kvm_s390_set_tod_clock(kvm, &gtod);
+	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
 	return 0;
 }
 
@@ -2122,6 +2147,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)
 		/* we still need the basic sca for the ipte control */
 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+		return;
 	}
 	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
@@ -2389,6 +2415,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	mutex_lock(&vcpu->kvm->lock);
 	preempt_disable();
 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
 	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
@@ -3021,8 +3048,8 @@ retry:
 	return 0;
 }
 
-void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
-				 const struct kvm_s390_vm_tod_clock *gtod)
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+			    const struct kvm_s390_vm_tod_clock *gtod)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_s390_tod_clock_ext htod;
@@ -3034,10 +3061,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
 	get_tod_clock_ext((char *)&htod);
 
 	kvm->arch.epoch = gtod->tod - htod.tod;
-	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
-
-	if (kvm->arch.epoch > gtod->tod)
-		kvm->arch.epdx -= 1;
+	kvm->arch.epdx = 0;
+	if (test_kvm_facility(kvm, 139)) {
+		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
+		if (kvm->arch.epoch > gtod->tod)
+			kvm->arch.epdx -= 1;
+	}
 
 	kvm_s390_vcpu_block_all(kvm);
 	kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3050,22 +3079,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
-{
-	struct kvm_vcpu *vcpu;
-	int i;
-
-	mutex_lock(&kvm->lock);
-	preempt_disable();
-	kvm->arch.epoch = tod - get_tod_clock();
-	kvm_s390_vcpu_block_all(kvm);
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
-	kvm_s390_vcpu_unblock_all(kvm);
-	preempt_enable();
-	mutex_unlock(&kvm->lock);
-}
-
 /**
  * kvm_arch_fault_in_page - fault-in guest page if necessary
  * @vcpu: The corresponding virtual cpu
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index bd31b37b0e6f..f55ac0ef99ea 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,8 +19,6 @@
 #include <asm/processor.h>
 #include <asm/sclp.h>
 
-typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & ECB_TE))
 #define TDB_FORMAT1		1
@@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
-void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
-				 const struct kvm_s390_vm_tod_clock *gtod);
-void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+			    const struct kvm_s390_vm_tod_clock *gtod);
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c4c4e157c036..f0b4185158af 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
 /* Handle SCK (SET CLOCK) interception */
 static int handle_set_clock(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_vm_tod_clock gtod = { 0 };
 	int rc;
 	u8 ar;
-	u64 op2, val;
+	u64 op2;
 
 	vcpu->stat.instruction_sck++;
 
@@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 	op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (op2 & 7)	/* Operand must be on a doubleword boundary */
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
+	rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
-	kvm_s390_set_tod_clock(vcpu->kvm, val);
+	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
+	kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
 
 	kvm_s390_set_psw_cc(vcpu, 0);
 	return 0;
@@ -795,55 +796,60 @@ out:
 	return rc;
 }
 
-static const intercept_handler_t b2_handlers[256] = {
-	[0x02] = handle_stidp,
-	[0x04] = handle_set_clock,
-	[0x10] = handle_set_prefix,
-	[0x11] = handle_store_prefix,
-	[0x12] = handle_store_cpu_address,
-	[0x14] = kvm_s390_handle_vsie,
-	[0x21] = handle_ipte_interlock,
-	[0x29] = handle_iske,
-	[0x2a] = handle_rrbe,
-	[0x2b] = handle_sske,
-	[0x2c] = handle_test_block,
-	[0x30] = handle_io_inst,
-	[0x31] = handle_io_inst,
-	[0x32] = handle_io_inst,
-	[0x33] = handle_io_inst,
-	[0x34] = handle_io_inst,
-	[0x35] = handle_io_inst,
-	[0x36] = handle_io_inst,
-	[0x37] = handle_io_inst,
-	[0x38] = handle_io_inst,
-	[0x39] = handle_io_inst,
-	[0x3a] = handle_io_inst,
-	[0x3b] = handle_io_inst,
-	[0x3c] = handle_io_inst,
-	[0x50] = handle_ipte_interlock,
-	[0x56] = handle_sthyi,
-	[0x5f] = handle_io_inst,
-	[0x74] = handle_io_inst,
-	[0x76] = handle_io_inst,
-	[0x7d] = handle_stsi,
-	[0xb1] = handle_stfl,
-	[0xb2] = handle_lpswe,
-};
-
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 {
-	intercept_handler_t handler;
-
-	/*
-	 * A lot of B2 instructions are priviledged. Here we check for
-	 * the privileged ones, that we can handle in the kernel.
-	 * Anything else goes to userspace.
-	 */
-	handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-	if (handler)
-		return handler(vcpu);
-
-	return -EOPNOTSUPP;
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x02:
+		return handle_stidp(vcpu);
+	case 0x04:
+		return handle_set_clock(vcpu);
+	case 0x10:
+		return handle_set_prefix(vcpu);
+	case 0x11:
+		return handle_store_prefix(vcpu);
+	case 0x12:
+		return handle_store_cpu_address(vcpu);
+	case 0x14:
+		return kvm_s390_handle_vsie(vcpu);
+	case 0x21:
+	case 0x50:
+		return handle_ipte_interlock(vcpu);
+	case 0x29:
+		return handle_iske(vcpu);
+	case 0x2a:
+		return handle_rrbe(vcpu);
+	case 0x2b:
+		return handle_sske(vcpu);
+	case 0x2c:
+		return handle_test_block(vcpu);
+	case 0x30:
+	case 0x31:
+	case 0x32:
+	case 0x33:
+	case 0x34:
+	case 0x35:
+	case 0x36:
+	case 0x37:
+	case 0x38:
+	case 0x39:
+	case 0x3a:
+	case 0x3b:
+	case 0x3c:
+	case 0x5f:
+	case 0x74:
+	case 0x76:
+		return handle_io_inst(vcpu);
+	case 0x56:
+		return handle_sthyi(vcpu);
+	case 0x7d:
+		return handle_stsi(vcpu);
+	case 0xb1:
+		return handle_stfl(vcpu);
+	case 0xb2:
+		return handle_lpswe(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static int handle_epsw(struct kvm_vcpu *vcpu)
@@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static const intercept_handler_t b9_handlers[256] = {
-	[0x8a] = handle_ipte_interlock,
-	[0x8d] = handle_epsw,
-	[0x8e] = handle_ipte_interlock,
-	[0x8f] = handle_ipte_interlock,
-	[0xab] = handle_essa,
-	[0xaf] = handle_pfmf,
-};
-
 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
 {
-	intercept_handler_t handler;
-
-	/* This is handled just as for the B2 instructions. */
-	handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-	if (handler)
-		return handler(vcpu);
-
-	return -EOPNOTSUPP;
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x8a:
+	case 0x8e:
+	case 0x8f:
+		return handle_ipte_interlock(vcpu);
+	case 0x8d:
+		return handle_epsw(vcpu);
+	case 0xab:
+		return handle_essa(vcpu);
+	case 0xaf:
+		return handle_pfmf(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
@@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
-static const intercept_handler_t eb_handlers[256] = {
-	[0x2f] = handle_lctlg,
-	[0x25] = handle_stctg,
-	[0x60] = handle_ri,
-	[0x61] = handle_ri,
-	[0x62] = handle_ri,
-};
-
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
 {
-	intercept_handler_t handler;
-
-	handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
-	if (handler)
-		return handler(vcpu);
-	return -EOPNOTSUPP;
+	switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
+	case 0x25:
+		return handle_stctg(vcpu);
+	case 0x2f:
+		return handle_lctlg(vcpu);
+	case 0x60:
+	case 0x61:
+	case 0x62:
+		return handle_ri(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static int handle_tprot(struct kvm_vcpu *vcpu)
@@ -1346,10 +1347,12 @@ out_unlock:
 
 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
 {
-	/* For e5xx... instructions we only handle TPROT */
-	if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01)
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x01:
 		return handle_tprot(vcpu);
-	return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static int handle_sckpf(struct kvm_vcpu *vcpu)
@@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static const intercept_handler_t x01_handlers[256] = {
-	[0x04] = handle_ptff,
-	[0x07] = handle_sckpf,
-};
-
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
 {
-	intercept_handler_t handler;
-
-	handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-	if (handler)
-		return handler(vcpu);
-	return -EOPNOTSUPP;
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x04:
+		return handle_ptff(vcpu);
+	case 0x07:
+		return handle_sckpf(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index ec772700ff96..8961e3970901 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 {
 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	int guest_bp_isolation;
 	int rc;
 
 	handle_last_fault(vcpu, vsie_page);
@@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		s390_handle_mcck();
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+
+	/* save current guest state of bp isolation override */
+	guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+	/*
+	 * The guest is running with BPBC, so we have to force it on for our
+	 * nested guest. This is done by enabling BPBC globally, so the BPBC
+	 * control in the SCB (which the nested guest can modify) is simply
+	 * ignored.
+	 */
+	if (test_kvm_facility(vcpu->kvm, 82) &&
+	    vcpu->arch.sie_block->fpf & FPF_BPBC)
+		set_thread_flag(TIF_ISOLATE_BP_GUEST);
+
 	local_irq_disable();
 	guest_enter_irqoff();
 	local_irq_enable();
@@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	local_irq_disable();
 	guest_exit_irqoff();
 	local_irq_enable();
+
+	/* restore guest state for bp isolation override */
+	if (!guest_bp_isolation)
+		clear_thread_flag(TIF_ISOLATE_BP_GUEST);
+
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	if (rc == -EINTR) {
diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile
index 715def00a436..01d0f7fb14cc 100644
--- a/arch/sh/boot/dts/Makefile
+++ b/arch/sh/boot/dts/Makefile
@@ -1 +1,3 @@
-obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
+ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"")
+obj-y += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
+endif
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 6bf594ace663..8767e45f1b2b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -430,6 +430,8 @@ config SPARC_LEON
 	depends on SPARC32
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select USB_EHCI_BIG_ENDIAN_DESC
+	select USB_UHCI_BIG_ENDIAN_MMIO
+	select USB_UHCI_BIG_ENDIAN_DESC
 	---help---
 	  If you say Y here if you are running on a SPARC-LEON processor.
 	  The LEON processor is a synthesizable VHDL model of the
diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h
index 6f17528356b2..ea53e418f6c0 100644
--- a/arch/sparc/include/asm/bug.h
+++ b/arch/sparc/include/asm/bug.h
@@ -9,10 +9,14 @@
 void do_BUG(const char *file, int line);
 #define BUG() do {					\
 	do_BUG(__FILE__, __LINE__);			\
+	barrier_before_unreachable();			\
 	__builtin_trap();				\
 } while (0)
 #else
-#define BUG()		__builtin_trap()
+#define BUG() do {					\
+	barrier_before_unreachable();			\
+	__builtin_trap();				\
+} while (0)
 #endif
 
 #define HAVE_ARCH_BUG
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 847ddffbf38a..b5cfab711651 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -163,13 +163,10 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
 	pte_unmap(pte);
 }
 
-void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		pmd_t *pmdp, pmd_t pmd)
-{
-	pmd_t orig = *pmdp;
-
-	*pmdp = pmd;
 
+static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr,
+			   pmd_t orig, pmd_t pmd)
+{
 	if (mm == &init_mm)
 		return;
 
@@ -219,6 +216,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	}
 }
 
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	pmd_t orig = *pmdp;
+
+	*pmdp = pmd;
+	__set_pmd_acct(mm, addr, orig, pmd);
+}
+
 static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 		unsigned long address, pmd_t *pmdp, pmd_t pmd)
 {
@@ -227,6 +233,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 	do {
 		old = *pmdp;
 	} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
+	__set_pmd_acct(vma->vm_mm, address, old, pmd);
 
 	return old;
 }
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index aff152c87cf4..5a82bac5e0bc 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,6 +1,7 @@
 boot/compressed/vmlinux
 tools/test_get_len
 tools/insn_sanity
+tools/insn_decoder_test
 purgatory/kexec-purgatory.c
 purgatory/purgatory.ro
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 63bf349b2b24..0fa71a78ec99 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -423,12 +423,6 @@ config X86_MPPARSE
 	  For old smp systems that do not have proper acpi support. Newer systems
 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
-config X86_BIGSMP
-	bool "Support for big SMP systems with more than 8 CPUs"
-	depends on X86_32 && SMP
-	---help---
-	  This option is needed for the systems that have more than 8 CPUs
-
 config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
@@ -436,6 +430,7 @@ config GOLDFISH
 config RETPOLINE
 	bool "Avoid speculative indirect branches in kernel"
 	default y
+	select STACK_VALIDATION if HAVE_STACK_VALIDATION
 	help
 	  Compile kernel with the retpoline compiler options to guard against
 	  kernel-to-user data leaks by avoiding speculative indirect
@@ -460,6 +455,12 @@ config INTEL_RDT
 	  Say N if unsure.
 
 if X86_32
+config X86_BIGSMP
+	bool "Support for big SMP systems with more than 8 CPUs"
+	depends on SMP
+	---help---
+	  This option is needed for the systems that have more than 8 CPUs
+
 config X86_EXTENDED_PLATFORM
 	bool "Support for extended (non-PC) x86 platforms"
 	default y
@@ -949,25 +950,66 @@ config MAXSMP
 	  Enable maximum number of CPUS and NUMA Nodes for this architecture.
 	  If unsure, say N.
 
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+	int
+	default NR_CPUS_RANGE_END if MAXSMP
+	default    1 if !SMP
+	default    2
+
+config NR_CPUS_RANGE_END
+	int
+	depends on X86_32
+	default   64 if  SMP &&  X86_BIGSMP
+	default    8 if  SMP && !X86_BIGSMP
+	default    1 if !SMP
+
+config NR_CPUS_RANGE_END
+	int
+	depends on X86_64
+	default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+	default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+	default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+	int
+	depends on X86_32
+	default   32 if  X86_BIGSMP
+	default    8 if  SMP
+	default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+	int
+	depends on X86_64
+	default 8192 if  MAXSMP
+	default   64 if  SMP
+	default    1 if !SMP
+
 config NR_CPUS
 	int "Maximum number of CPUs" if SMP && !MAXSMP
-	range 2 8 if SMP && X86_32 && !X86_BIGSMP
-	range 2 64 if SMP && X86_32 && X86_BIGSMP
-	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
-	range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
-	default "1" if !SMP
-	default "8192" if MAXSMP
-	default "32" if SMP && X86_BIGSMP
-	default "8" if SMP && X86_32
-	default "64" if SMP
+	range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+	default NR_CPUS_DEFAULT
 	---help---
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
 	  supported value is 8192, otherwise the maximum value is 512.  The
 	  minimum value which makes sense is 2.
 
-	  This is purely to save memory - each supported CPU adds
-	  approximately eight kilobytes to the kernel image.
+	  This is purely to save memory: each supported CPU adds about 8KB
+	  to the kernel image.
 
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
@@ -1363,7 +1405,7 @@ config HIGHMEM4G
 
 config HIGHMEM64G
 	bool "64GB"
-	depends on !M486
+	depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
 	select X86_PAE
 	---help---
 	  Select this if you have a 32-bit processor and more than 4
@@ -2265,7 +2307,7 @@ choice
 	  it can be used to assist security vulnerability exploitation.
 
 	  This setting can be changed at boot time via the kernel command
-	  line parameter vsyscall=[native|emulate|none].
+	  line parameter vsyscall=[emulate|none].
 
 	  On a system with recent enough glibc (2.14 or newer) and no
 	  static binaries, you can say None without a performance penalty
@@ -2273,15 +2315,6 @@ choice
 
 	  If unsure, select "Emulate".
 
-	config LEGACY_VSYSCALL_NATIVE
-		bool "Native"
-		help
-		  Actual executable code is located in the fixed vsyscall
-		  address mapping, implementing time() efficiently. Since
-		  this makes the mapping executable, it can be used during
-		  security vulnerability exploitation (traditionally as
-		  ROP gadgets). This configuration is not recommended.
-
 	config LEGACY_VSYSCALL_EMULATE
 		bool "Emulate"
 		help
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 65a9a4716e34..8b8d2297d486 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -374,7 +374,7 @@ config X86_TSC
 
 config X86_CMPXCHG64
 	def_bool y
-	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
 
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
@@ -385,7 +385,7 @@ config X86_CMOV
 config X86_MINIMUM_CPU_FAMILY
 	int
 	default "64" if X86_64
-	default "6" if X86_32 && X86_P6_NOP
+	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
 	default "5" if X86_32 && X86_CMPXCHG64
 	default "4"
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad55160dcb9..498c1b812300 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
 ifdef CONFIG_RETPOLINE
-    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
-    ifneq ($(RETPOLINE_CFLAGS),)
-        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
-    endif
+ifneq ($(RETPOLINE_CFLAGS),)
+  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+endif
 endif
 
 archscripts: scripts_basic
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 353e20c3f114..886a9115af62 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
 	struct efi_uga_draw_protocol *uga = NULL, *first_uga;
 	efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
 	unsigned long nr_ugas;
-	u32 *handles = (u32 *)uga_handle;;
+	u32 *handles = (u32 *)uga_handle;
 	efi_status_t status = EFI_INVALID_PARAMETER;
 	int i;
 
@@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
 	struct efi_uga_draw_protocol *uga = NULL, *first_uga;
 	efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
 	unsigned long nr_ugas;
-	u64 *handles = (u64 *)uga_handle;;
+	u64 *handles = (u64 *)uga_handle;
 	efi_status_t status = EFI_INVALID_PARAMETER;
 	int i;
 
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
index 36870b26067a..d08805032f01 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
 {
 	unsigned int j;
 
-	state->lens[0] = 0;
-	state->lens[1] = 1;
-	state->lens[2] = 2;
-	state->lens[3] = 3;
+	/* initially all lanes are unused */
+	state->lens[0] = 0xFFFFFFFF00000000;
+	state->lens[1] = 0xFFFFFFFF00000001;
+	state->lens[2] = 0xFFFFFFFF00000002;
+	state->lens[3] = 0xFFFFFFFF00000003;
+
 	state->unused_lanes = 0xFF03020100;
 	for (j = 0; j < 4; j++)
 		state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3f48f695d5e6..be63330c5511 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,80 +97,78 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS	21*8
 
-	.macro ALLOC_PT_GPREGS_ON_STACK
-	addq	$-(15*8), %rsp
-	.endm
-
-	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
-	.if \r11
-	movq %r11, 6*8+\offset(%rsp)
-	.endif
-	.if \r8910
-	movq %r10, 7*8+\offset(%rsp)
-	movq %r9,  8*8+\offset(%rsp)
-	movq %r8,  9*8+\offset(%rsp)
-	.endif
-	.if \rax
-	movq %rax, 10*8+\offset(%rsp)
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
+	/*
+	 * Push registers and sanitize registers of values that a
+	 * speculation attack might otherwise want to exploit. The
+	 * lower registers are likely clobbered well before they
+	 * could be put to use in a speculative execution gadget.
+	 * Interleave XOR with PUSH for better uop scheduling:
+	 */
+	.if \save_ret
+	pushq	%rsi		/* pt_regs->si */
+	movq	8(%rsp), %rsi	/* temporarily store the return address in %rsi */
+	movq	%rdi, 8(%rsp)	/* pt_regs->di (overwriting original return address) */
+	.else
+	pushq   %rdi		/* pt_regs->di */
+	pushq   %rsi		/* pt_regs->si */
 	.endif
-	.if \rcx
-	movq %rcx, 11*8+\offset(%rsp)
+	pushq	\rdx		/* pt_regs->dx */
+	pushq   %rcx		/* pt_regs->cx */
+	pushq   \rax		/* pt_regs->ax */
+	pushq   %r8		/* pt_regs->r8 */
+	xorl	%r8d, %r8d	/* nospec   r8 */
+	pushq   %r9		/* pt_regs->r9 */
+	xorl	%r9d, %r9d	/* nospec   r9 */
+	pushq   %r10		/* pt_regs->r10 */
+	xorl	%r10d, %r10d	/* nospec   r10 */
+	pushq   %r11		/* pt_regs->r11 */
+	xorl	%r11d, %r11d	/* nospec   r11*/
+	pushq	%rbx		/* pt_regs->rbx */
+	xorl    %ebx, %ebx	/* nospec   rbx*/
+	pushq	%rbp		/* pt_regs->rbp */
+	xorl    %ebp, %ebp	/* nospec   rbp*/
+	pushq	%r12		/* pt_regs->r12 */
+	xorl	%r12d, %r12d	/* nospec   r12*/
+	pushq	%r13		/* pt_regs->r13 */
+	xorl	%r13d, %r13d	/* nospec   r13*/
+	pushq	%r14		/* pt_regs->r14 */
+	xorl	%r14d, %r14d	/* nospec   r14*/
+	pushq	%r15		/* pt_regs->r15 */
+	xorl	%r15d, %r15d	/* nospec   r15*/
+	UNWIND_HINT_REGS
+	.if \save_ret
+	pushq	%rsi		/* return address on top of stack */
 	.endif
-	movq %rdx, 12*8+\offset(%rsp)
-	movq %rsi, 13*8+\offset(%rsp)
-	movq %rdi, 14*8+\offset(%rsp)
-	UNWIND_HINT_REGS offset=\offset extra=0
-	.endm
-	.macro SAVE_C_REGS offset=0
-	SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
-	.endm
-	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
-	SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
-	.endm
-	.macro SAVE_C_REGS_EXCEPT_R891011
-	SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
-	.endm
-	.macro SAVE_C_REGS_EXCEPT_RCX_R891011
-	SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
-	.endm
-	.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
-	SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
-	.endm
-
-	.macro SAVE_EXTRA_REGS offset=0
-	movq %r15, 0*8+\offset(%rsp)
-	movq %r14, 1*8+\offset(%rsp)
-	movq %r13, 2*8+\offset(%rsp)
-	movq %r12, 3*8+\offset(%rsp)
-	movq %rbp, 4*8+\offset(%rsp)
-	movq %rbx, 5*8+\offset(%rsp)
-	UNWIND_HINT_REGS offset=\offset
-	.endm
-
-	.macro POP_EXTRA_REGS
+.endm
+
+.macro POP_REGS pop_rdi=1 skip_r11rcx=0
 	popq %r15
 	popq %r14
 	popq %r13
 	popq %r12
 	popq %rbp
 	popq %rbx
-	.endm
-
-	.macro POP_C_REGS
+	.if \skip_r11rcx
+	popq %rsi
+	.else
 	popq %r11
+	.endif
 	popq %r10
 	popq %r9
 	popq %r8
 	popq %rax
+	.if \skip_r11rcx
+	popq %rsi
+	.else
 	popq %rcx
+	.endif
 	popq %rdx
 	popq %rsi
+	.if \pop_rdi
 	popq %rdi
-	.endm
-
-	.macro icebp
-	.byte 0xf1
-	.endm
+	.endif
+.endm
 
 /*
  * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
@@ -178,17 +176,12 @@ For 32-bit we have the following conventions - kernel is built with
  * is just setting the LSB, which makes it an invalid stack address and is also
  * a signal to the unwinder that it's a pt_regs pointer in disguise.
  *
- * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
  * the original rbp.
  */
 .macro ENCODE_FRAME_POINTER ptregs_offset=0
 #ifdef CONFIG_FRAME_POINTER
-	.if \ptregs_offset
-		leaq \ptregs_offset(%rsp), %rbp
-	.else
-		mov %rsp, %rbp
-	.endif
-	orq	$0x1, %rbp
+	leaq 1+\ptregs_offset(%rsp), %rbp
 #endif
 .endm
 
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..6ad064c8cf35 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
 	 * exist, overwrite the RSB with entries which capture
 	 * speculative execution to prevent attack.
 	 */
-	/* Clobbers %ebx */
-	FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
 	/* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 30c8c5344c4a..805f52703ee3 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -55,7 +55,7 @@ END(native_usergs_sysret64)
 
 .macro TRACE_IRQS_FLAGS flags:req
 #ifdef CONFIG_TRACE_IRQFLAGS
-	bt	$9, \flags		/* interrupts off? */
+	btl	$9, \flags		/* interrupts off? */
 	jnc	1f
 	TRACE_IRQS_ON
 1:
@@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)
 
 	swapgs
 	/*
-	 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+	 * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
 	 * is not required to switch CR3.
 	 */
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
@@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)
 	pushq	%rcx				/* pt_regs->ip */
 GLOBAL(entry_SYSCALL_64_after_hwframe)
 	pushq	%rax				/* pt_regs->orig_ax */
-	pushq	%rdi				/* pt_regs->di */
-	pushq	%rsi				/* pt_regs->si */
-	pushq	%rdx				/* pt_regs->dx */
-	pushq	%rcx				/* pt_regs->cx */
-	pushq	$-ENOSYS			/* pt_regs->ax */
-	pushq	%r8				/* pt_regs->r8 */
-	pushq	%r9				/* pt_regs->r9 */
-	pushq	%r10				/* pt_regs->r10 */
-	pushq	%r11				/* pt_regs->r11 */
-	pushq	%rbx				/* pt_regs->rbx */
-	pushq	%rbp				/* pt_regs->rbp */
-	pushq	%r12				/* pt_regs->r12 */
-	pushq	%r13				/* pt_regs->r13 */
-	pushq	%r14				/* pt_regs->r14 */
-	pushq	%r15				/* pt_regs->r15 */
-	UNWIND_HINT_REGS
+
+	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 
 	TRACE_IRQS_OFF
 
@@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 syscall_return_via_sysret:
 	/* rcx and r11 are already restored (see code above) */
 	UNWIND_HINT_EMPTY
-	POP_EXTRA_REGS
-	popq	%rsi	/* skip r11 */
-	popq	%r10
-	popq	%r9
-	popq	%r8
-	popq	%rax
-	popq	%rsi	/* skip rcx */
-	popq	%rdx
-	popq	%rsi
+	POP_REGS pop_rdi=0 skip_r11rcx=1
 
 	/*
 	 * Now all regs are restored except RSP and RDI.
@@ -386,8 +364,7 @@ ENTRY(__switch_to_asm)
 	 * exist, overwrite the RSB with entries which capture
 	 * speculative execution to prevent attack.
 	 */
-	/* Clobbers %rbx */
-	FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
 	/* restore callee-saved registers */
@@ -471,9 +448,19 @@ END(irq_entries_start)
  *
  * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
  */
-.macro ENTER_IRQ_STACK regs=1 old_rsp
+.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
 	DEBUG_ENTRY_ASSERT_IRQS_OFF
+
+	.if \save_ret
+	/*
+	 * If save_ret is set, the original stack contains one additional
+	 * entry -- the return address. Therefore, move the address one
+	 * entry below %rsp to \old_rsp.
+	 */
+	leaq	8(%rsp), \old_rsp
+	.else
 	movq	%rsp, \old_rsp
+	.endif
 
 	.if \regs
 	UNWIND_HINT_REGS base=\old_rsp
@@ -519,6 +506,15 @@ END(irq_entries_start)
 	.if \regs
 	UNWIND_HINT_REGS indirect=1
 	.endif
+
+	.if \save_ret
+	/*
+	 * Push the return address to the stack. This return address can
+	 * be found at the "real" original RSP, which was offset by 8 at
+	 * the beginning of this macro.
+	 */
+	pushq	-8(\old_rsp)
+	.endif
 .endm
 
 /*
@@ -542,29 +538,65 @@ END(irq_entries_start)
 .endm
 
 /*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
+ * Interrupt entry helper function.
  *
- * Entry runs with interrupts off.
+ * Entry runs with interrupts off. Stack layout at entry:
+ * +----------------------------------------------------+
+ * | regs->ss						|
+ * | regs->rsp						|
+ * | regs->eflags					|
+ * | regs->cs						|
+ * | regs->ip						|
+ * +----------------------------------------------------+
+ * | regs->orig_ax = ~(interrupt number)		|
+ * +----------------------------------------------------+
+ * | return address					|
+ * +----------------------------------------------------+
  */
-
-/* 0(%rsp): ~(interrupt number) */
-	.macro interrupt func
+ENTRY(interrupt_entry)
+	UNWIND_HINT_FUNC
+	ASM_CLAC
 	cld
 
-	testb	$3, CS-ORIG_RAX(%rsp)
+	testb	$3, CS-ORIG_RAX+8(%rsp)
 	jz	1f
 	SWAPGS
-	call	switch_to_thread_stack
+
+	/*
+	 * Switch to the thread stack. The IRET frame and orig_ax are
+	 * on the stack, as well as the return address. RDI..R12 are
+	 * not (yet) on the stack and space has not (yet) been
+	 * allocated for them.
+	 */
+	pushq	%rdi
+
+	/* Need to switch before accessing the thread stack. */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+	movq	%rsp, %rdi
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+	 /*
+	  * We have RDI, return address, and orig_ax on the stack on
+	  * top of the IRET frame. That means offset=24
+	  */
+	UNWIND_HINT_IRET_REGS base=%rdi offset=24
+
+	pushq	7*8(%rdi)		/* regs->ss */
+	pushq	6*8(%rdi)		/* regs->rsp */
+	pushq	5*8(%rdi)		/* regs->eflags */
+	pushq	4*8(%rdi)		/* regs->cs */
+	pushq	3*8(%rdi)		/* regs->ip */
+	pushq	2*8(%rdi)		/* regs->orig_ax */
+	pushq	8(%rdi)			/* return address */
+	UNWIND_HINT_FUNC
+
+	movq	(%rdi), %rdi
 1:
 
-	ALLOC_PT_GPREGS_ON_STACK
-	SAVE_C_REGS
-	SAVE_EXTRA_REGS
-	ENCODE_FRAME_POINTER
+	PUSH_AND_CLEAR_REGS save_ret=1
+	ENCODE_FRAME_POINTER 8
 
-	testb	$3, CS(%rsp)
+	testb	$3, CS+8(%rsp)
 	jz	1f
 
 	/*
@@ -572,7 +604,7 @@ END(irq_entries_start)
 	 *
 	 * We need to tell lockdep that IRQs are off.  We can't do this until
 	 * we fix gsbase, and we should do it before enter_from_user_mode
-	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+	 * (which can take locks).  Since TRACE_IRQS_OFF is idempotent,
 	 * the simplest way to handle it is to just call it twice if
 	 * we enter from user mode.  There's no reason to optimize this since
 	 * TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -582,12 +614,15 @@ END(irq_entries_start)
 	CALL_enter_from_user_mode
 
 1:
-	ENTER_IRQ_STACK old_rsp=%rdi
+	ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
-	call	\func	/* rdi points to pt_regs */
-	.endm
+	ret
+END(interrupt_entry)
+
+
+/* Interrupt entry/exit. */
 
 	/*
 	 * The interrupt stubs push (~vector+0x80) onto the stack and
@@ -595,9 +630,10 @@ END(irq_entries_start)
 	 */
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
-	ASM_CLAC
 	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */
-	interrupt do_IRQ
+	call	interrupt_entry
+	UNWIND_HINT_REGS indirect=1
+	call	do_IRQ	/* rdi points to pt_regs */
 	/* 0(%rsp): old RSP */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_ANY)
@@ -622,15 +658,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 	ud2
 1:
 #endif
-	POP_EXTRA_REGS
-	popq	%r11
-	popq	%r10
-	popq	%r9
-	popq	%r8
-	popq	%rax
-	popq	%rcx
-	popq	%rdx
-	popq	%rsi
+	POP_REGS pop_rdi=0
 
 	/*
 	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
@@ -688,8 +716,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
 	ud2
 1:
 #endif
-	POP_EXTRA_REGS
-	POP_C_REGS
+	POP_REGS
 	addq	$8, %rsp	/* skip regs->orig_ax */
 	/*
 	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -799,10 +826,11 @@ END(common_interrupt)
 .macro apicinterrupt3 num sym do_sym
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS
-	ASM_CLAC
 	pushq	$~(\num)
 .Lcommon_\sym:
-	interrupt \do_sym
+	call	interrupt_entry
+	UNWIND_HINT_REGS indirect=1
+	call	\do_sym	/* rdi points to pt_regs */
 	jmp	ret_from_intr
 END(\sym)
 .endm
@@ -865,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
 
-/*
- * Switch to the thread stack.  This is called with the IRET frame and
- * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
- * space has not been allocated for them.)
- */
-ENTRY(switch_to_thread_stack)
-	UNWIND_HINT_FUNC
-
-	pushq	%rdi
-	/* Need to switch before accessing the thread stack. */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-	movq	%rsp, %rdi
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-
-	pushq	7*8(%rdi)		/* regs->ss */
-	pushq	6*8(%rdi)		/* regs->rsp */
-	pushq	5*8(%rdi)		/* regs->eflags */
-	pushq	4*8(%rdi)		/* regs->cs */
-	pushq	3*8(%rdi)		/* regs->ip */
-	pushq	2*8(%rdi)		/* regs->orig_ax */
-	pushq	8(%rdi)			/* return address */
-	UNWIND_HINT_FUNC
-
-	movq	(%rdi), %rdi
-	ret
-END(switch_to_thread_stack)
-
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -908,10 +908,8 @@ ENTRY(\sym)
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	ALLOC_PT_GPREGS_ON_STACK
-
 	.if \paranoid < 2
-	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
+	testb	$3, CS-ORIG_RAX(%rsp)		/* If coming from userspace, switch stacks */
 	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
 
@@ -1121,9 +1119,7 @@ ENTRY(xen_failsafe_callback)
 	addq	$0x30, %rsp
 	UNWIND_HINT_IRET_REGS
 	pushq	$-1 /* orig_ax = -1 => not a system call */
-	ALLOC_PT_GPREGS_ON_STACK
-	SAVE_C_REGS
-	SAVE_EXTRA_REGS
+	PUSH_AND_CLEAR_REGS
 	ENCODE_FRAME_POINTER
 	jmp	error_exit
 END(xen_failsafe_callback)
@@ -1170,8 +1166,7 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 ENTRY(paranoid_entry)
 	UNWIND_HINT_FUNC
 	cld
-	SAVE_C_REGS 8
-	SAVE_EXTRA_REGS 8
+	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
@@ -1211,21 +1206,20 @@ ENTRY(paranoid_exit)
 	jmp	.Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
 	TRACE_IRQS_IRETQ_DEBUG
+	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
 	jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Save all registers in pt_regs, and switch GS if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
 	UNWIND_HINT_FUNC
 	cld
-	SAVE_C_REGS 8
-	SAVE_EXTRA_REGS 8
+	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
-	xorl	%ebx, %ebx
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
 
@@ -1406,22 +1400,7 @@ ENTRY(nmi)
 	pushq	1*8(%rdx)	/* pt_regs->rip */
 	UNWIND_HINT_IRET_REGS
 	pushq   $-1		/* pt_regs->orig_ax */
-	pushq   %rdi		/* pt_regs->di */
-	pushq   %rsi		/* pt_regs->si */
-	pushq   (%rdx)		/* pt_regs->dx */
-	pushq   %rcx		/* pt_regs->cx */
-	pushq   %rax		/* pt_regs->ax */
-	pushq   %r8		/* pt_regs->r8 */
-	pushq   %r9		/* pt_regs->r9 */
-	pushq   %r10		/* pt_regs->r10 */
-	pushq   %r11		/* pt_regs->r11 */
-	pushq	%rbx		/* pt_regs->rbx */
-	pushq	%rbp		/* pt_regs->rbp */
-	pushq	%r12		/* pt_regs->r12 */
-	pushq	%r13		/* pt_regs->r13 */
-	pushq	%r14		/* pt_regs->r14 */
-	pushq	%r15		/* pt_regs->r15 */
-	UNWIND_HINT_REGS
+	PUSH_AND_CLEAR_REGS rdx=(%rdx)
 	ENCODE_FRAME_POINTER
 
 	/*
@@ -1631,7 +1610,6 @@ end_repeat_nmi:
 	 * frame to point back to repeat_nmi.
 	 */
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
-	ALLOC_PT_GPREGS_ON_STACK
 
 	/*
 	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
@@ -1655,8 +1633,7 @@ end_repeat_nmi:
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
-	POP_EXTRA_REGS
-	POP_C_REGS
+	POP_REGS
 
 	/*
 	 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 98d5358e4041..08425c42f8b7 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
+	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
 	/*
@@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
+	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -278,9 +298,9 @@ sysret32_from_system_call:
 	 */
 	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
 
-	xorq	%r8, %r8
-	xorq	%r9, %r9
-	xorq	%r10, %r10
+	xorl	%r8d, %r8d
+	xorl	%r9d, %r9d
+	xorl	%r10d, %r10d
 	swapgs
 	sysretl
 END(entry_SYSCALL_compat)
@@ -327,26 +347,47 @@ ENTRY(entry_INT80_compat)
 	 */
 	movl	%eax, %eax
 
+	/* switch to thread stack expects orig_ax and rdi to be pushed */
 	pushq	%rax			/* pt_regs->orig_ax */
+	pushq	%rdi			/* pt_regs->di */
 
-	/* switch to thread stack expects orig_ax to be pushed */
-	call	switch_to_thread_stack
+	/* Need to switch before accessing the thread stack. */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+	movq	%rsp, %rdi
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	pushq	%rdi			/* pt_regs->di */
+	pushq	6*8(%rdi)		/* regs->ss */
+	pushq	5*8(%rdi)		/* regs->rsp */
+	pushq	4*8(%rdi)		/* regs->eflags */
+	pushq	3*8(%rdi)		/* regs->cs */
+	pushq	2*8(%rdi)		/* regs->ip */
+	pushq	1*8(%rdi)		/* regs->orig_ax */
+
+	pushq	(%rdi)			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
+	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp */
+	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   %r12                    /* pt_regs->r12 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   %r13                    /* pt_regs->r13 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   %r14                    /* pt_regs->r14 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   %r15                    /* pt_regs->r15 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
 	/*
@@ -363,15 +404,3 @@ ENTRY(entry_INT80_compat)
 	TRACE_IRQS_ON
 	jmp	swapgs_restore_regs_and_return_to_usermode
 END(entry_INT80_compat)
-
-ENTRY(stub32_clone)
-	/*
-	 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
-	 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
-	 *
-	 * The native 64-bit kernel's sys_clone() implements the latter,
-	 * so we need to swap arguments here before calling it:
-	 */
-	xchg	%r8, %rcx
-	jmp	sys_clone
-ENDPROC(stub32_clone)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 448ac2161112..2a5e99cff859 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -8,12 +8,12 @@
 #
 0	i386	restart_syscall		sys_restart_syscall
 1	i386	exit			sys_exit
-2	i386	fork			sys_fork			sys_fork
+2	i386	fork			sys_fork
 3	i386	read			sys_read
 4	i386	write			sys_write
 5	i386	open			sys_open			compat_sys_open
 6	i386	close			sys_close
-7	i386	waitpid			sys_waitpid			sys32_waitpid
+7	i386	waitpid			sys_waitpid			compat_sys_x86_waitpid
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
@@ -78,7 +78,7 @@
 69	i386	ssetmask		sys_ssetmask
 70	i386	setreuid		sys_setreuid16
 71	i386	setregid		sys_setregid16
-72	i386	sigsuspend		sys_sigsuspend			sys_sigsuspend
+72	i386	sigsuspend		sys_sigsuspend
 73	i386	sigpending		sys_sigpending			compat_sys_sigpending
 74	i386	sethostname		sys_sethostname
 75	i386	setrlimit		sys_setrlimit			compat_sys_setrlimit
@@ -96,7 +96,7 @@
 87	i386	swapon			sys_swapon
 88	i386	reboot			sys_reboot
 89	i386	readdir			sys_old_readdir			compat_sys_old_readdir
-90	i386	mmap			sys_old_mmap			sys32_mmap
+90	i386	mmap			sys_old_mmap			compat_sys_x86_mmap
 91	i386	munmap			sys_munmap
 92	i386	truncate		sys_truncate			compat_sys_truncate
 93	i386	ftruncate		sys_ftruncate			compat_sys_ftruncate
@@ -126,7 +126,7 @@
 117	i386	ipc			sys_ipc				compat_sys_ipc
 118	i386	fsync			sys_fsync
 119	i386	sigreturn		sys_sigreturn			sys32_sigreturn
-120	i386	clone			sys_clone			stub32_clone
+120	i386	clone			sys_clone			compat_sys_x86_clone
 121	i386	setdomainname		sys_setdomainname
 122	i386	uname			sys_newuname
 123	i386	modify_ldt		sys_modify_ldt
@@ -186,8 +186,8 @@
 177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
 178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 179	i386	rt_sigsuspend		sys_rt_sigsuspend
-180	i386	pread64			sys_pread64			sys32_pread
-181	i386	pwrite64		sys_pwrite64			sys32_pwrite
+180	i386	pread64			sys_pread64			compat_sys_x86_pread
+181	i386	pwrite64		sys_pwrite64			compat_sys_x86_pwrite
 182	i386	chown			sys_chown16
 183	i386	getcwd			sys_getcwd
 184	i386	capget			sys_capget
@@ -196,14 +196,14 @@
 187	i386	sendfile		sys_sendfile			compat_sys_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
-190	i386	vfork			sys_vfork			sys_vfork
+190	i386	vfork			sys_vfork
 191	i386	ugetrlimit		sys_getrlimit			compat_sys_getrlimit
 192	i386	mmap2			sys_mmap_pgoff
-193	i386	truncate64		sys_truncate64			sys32_truncate64
-194	i386	ftruncate64		sys_ftruncate64			sys32_ftruncate64
-195	i386	stat64			sys_stat64			sys32_stat64
-196	i386	lstat64			sys_lstat64			sys32_lstat64
-197	i386	fstat64			sys_fstat64			sys32_fstat64
+193	i386	truncate64		sys_truncate64			compat_sys_x86_truncate64
+194	i386	ftruncate64		sys_ftruncate64			compat_sys_x86_ftruncate64
+195	i386	stat64			sys_stat64			compat_sys_x86_stat64
+196	i386	lstat64			sys_lstat64			compat_sys_x86_lstat64
+197	i386	fstat64			sys_fstat64			compat_sys_x86_fstat64
 198	i386	lchown32		sys_lchown
 199	i386	getuid32		sys_getuid
 200	i386	getgid32		sys_getgid
@@ -231,7 +231,7 @@
 # 222 is unused
 # 223 is unused
 224	i386	gettid			sys_gettid
-225	i386	readahead		sys_readahead			sys32_readahead
+225	i386	readahead		sys_readahead			compat_sys_x86_readahead
 226	i386	setxattr		sys_setxattr
 227	i386	lsetxattr		sys_lsetxattr
 228	i386	fsetxattr		sys_fsetxattr
@@ -256,7 +256,7 @@
 247	i386	io_getevents		sys_io_getevents		compat_sys_io_getevents
 248	i386	io_submit		sys_io_submit			compat_sys_io_submit
 249	i386	io_cancel		sys_io_cancel
-250	i386	fadvise64		sys_fadvise64			sys32_fadvise64
+250	i386	fadvise64		sys_fadvise64			compat_sys_x86_fadvise64
 # 251 is available for reuse (was briefly sys_set_zone_reclaim)
 252	i386	exit_group		sys_exit_group
 253	i386	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
@@ -278,7 +278,7 @@
 269	i386	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 270	i386	tgkill			sys_tgkill
 271	i386	utimes			sys_utimes			compat_sys_utimes
-272	i386	fadvise64_64		sys_fadvise64_64		sys32_fadvise64_64
+272	i386	fadvise64_64		sys_fadvise64_64		compat_sys_x86_fadvise64_64
 273	i386	vserver
 274	i386	mbind			sys_mbind
 275	i386	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
@@ -306,7 +306,7 @@
 297	i386	mknodat			sys_mknodat
 298	i386	fchownat		sys_fchownat
 299	i386	futimesat		sys_futimesat			compat_sys_futimesat
-300	i386	fstatat64		sys_fstatat64			sys32_fstatat
+300	i386	fstatat64		sys_fstatat64			compat_sys_x86_fstatat
 301	i386	unlinkat		sys_unlinkat
 302	i386	renameat		sys_renameat
 303	i386	linkat			sys_linkat
@@ -320,7 +320,7 @@
 311	i386	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 312	i386	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
 313	i386	splice			sys_splice
-314	i386	sync_file_range		sys_sync_file_range		sys32_sync_file_range
+314	i386	sync_file_range		sys_sync_file_range		compat_sys_x86_sync_file_range
 315	i386	tee			sys_tee
 316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice
 317	i386	move_pages		sys_move_pages			compat_sys_move_pages
@@ -330,7 +330,7 @@
 321	i386	signalfd		sys_signalfd			compat_sys_signalfd
 322	i386	timerfd_create		sys_timerfd_create
 323	i386	eventfd			sys_eventfd
-324	i386	fallocate		sys_fallocate			sys32_fallocate
+324	i386	fallocate		sys_fallocate			compat_sys_x86_fallocate
 325	i386	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
 326	i386	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
 327	i386	signalfd4		sys_signalfd4			compat_sys_signalfd4
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 577fa8adb785..8560ef68a9d6 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,10 +42,8 @@
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
-#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
-	NATIVE;
-#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+static enum { EMULATE, NONE } vsyscall_mode =
+#ifdef CONFIG_LEGACY_VSYSCALL_NONE
 	NONE;
 #else
 	EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
 	if (str) {
 		if (!strcmp("emulate", str))
 			vsyscall_mode = EMULATE;
-		else if (!strcmp("native", str))
-			vsyscall_mode = NATIVE;
 		else if (!strcmp("none", str))
 			vsyscall_mode = NONE;
 		else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
 	WARN_ON_ONCE(address != regs->ip);
 
-	/* This should be unreachable in NATIVE mode. */
-	if (WARN_ON(vsyscall_mode == NATIVE))
-		return false;
-
 	if (vsyscall_mode == NONE) {
 		warn_bad_vsyscall(KERN_INFO, regs,
 				  "vsyscall attempted with vsyscall=none");
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
 
 	if (vsyscall_mode != NONE) {
 		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
-			     vsyscall_mode == NATIVE
-			     ? PAGE_KERNEL_VSYSCALL
-			     : PAGE_KERNEL_VVAR);
+			     PAGE_KERNEL_VVAR);
 		set_vsyscall_pgtable_user_bits(swapper_pg_dir);
 	}
 
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 731153a4681e..56457cb73448 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu)
 		break;
 
 	case INTEL_FAM6_SANDYBRIDGE_X:
-		switch (cpu_data(cpu).x86_mask) {
+		switch (cpu_data(cpu).x86_stepping) {
 		case 6: rev = 0x618; break;
 		case 7: rev = 0x70c; break;
 		}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index ae64d0b69729..cf372b90557e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void)
 	 * on PMU interrupt
 	 */
 	if (boot_cpu_data.x86_model == 28
-	    && boot_cpu_data.x86_mask < 10) {
+	    && boot_cpu_data.x86_stepping < 10) {
 		pr_cont("LBR disabled due to erratum");
 		return;
 	}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index a5604c352930..408879b0c0d4 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = {
 
 static __init void p6_pmu_rdpmc_quirk(void)
 {
-	if (boot_cpu_data.x86_mask < 9) {
+	if (boot_cpu_data.x86_stepping < 9) {
 		/*
 		 * PPro erratum 26; fixed in stepping 9 and above.
 		 */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 6d8044ab1060..22ec65bc033a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3606,7 +3606,7 @@ static struct intel_uncore_type skx_uncore_imc = {
 };
 
 static struct attribute *skx_upi_uncore_formats_attr[] = {
-	&format_attr_event_ext.attr,
+	&format_attr_event.attr,
 	&format_attr_umask_ext.attr,
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 96cd33bbfc85..6512498bbef6 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -51,15 +51,14 @@
 #define AA(__x)		((unsigned long)(__x))
 
 
-asmlinkage long sys32_truncate64(const char __user *filename,
-				 unsigned long offset_low,
-				 unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
+		       unsigned long, offset_low, unsigned long, offset_high)
 {
        return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
 }
 
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
-				  unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
+		       unsigned long, offset_low, unsigned long, offset_high)
 {
        return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
 }
@@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
 	return 0;
 }
 
-asmlinkage long sys32_stat64(const char __user *filename,
-			     struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
+		       struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_stat(filename, &stat);
@@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
 	return ret;
 }
 
-asmlinkage long sys32_lstat64(const char __user *filename,
-			      struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
+		       struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_lstat(filename, &stat);
@@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
 	return ret;
 }
 
-asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
+		       struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_fstat(fd, &stat);
@@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
 	return ret;
 }
 
-asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
-			      struct stat64 __user *statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
+		       const char __user *, filename,
+		       struct stat64 __user *, statbuf, int, flag)
 {
 	struct kstat stat;
 	int error;
@@ -153,7 +154,7 @@ struct mmap_arg_struct32 {
 	unsigned int offset;
 };
 
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
+COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
 {
 	struct mmap_arg_struct32 a;
 
@@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
 			       a.offset>>PAGE_SHIFT);
 }
 
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
-			      int options)
+COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *,
+		       stat_addr, int, options)
 {
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 
 /* warning: next two assume little endian */
-asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
-			    u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
+		       u32, count, u32, poslo, u32, poshi)
 {
 	return sys_pread64(fd, ubuf, count,
 			 ((loff_t)AA(poshi) << 32) | AA(poslo));
 }
 
-asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
-			     u32 count, u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
+		       u32, count, u32, poslo, u32, poshi)
 {
 	return sys_pwrite64(fd, ubuf, count,
 			  ((loff_t)AA(poshi) << 32) | AA(poslo));
@@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
  * Some system calls that need sign extended arguments. This could be
  * done by a generic wrapper.
  */
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
-			__u32 len_low, __u32 len_high, int advice)
+COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
+		       __u32, offset_high, __u32, len_low, __u32, len_high,
+		       int, advice)
 {
 	return sys_fadvise64_64(fd,
 			       (((u64)offset_high)<<32) | offset_low,
@@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
 				advice);
 }
 
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
-				   size_t count)
+COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
+		       unsigned int, off_hi, size_t, count)
 {
 	return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
 }
 
-asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
-				      unsigned n_low, unsigned n_hi,  int flags)
+COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
+		       unsigned int, off_hi, unsigned int, n_low,
+		       unsigned int, n_hi, int, flags)
 {
 	return sys_sync_file_range(fd,
 				   ((u64)off_hi << 32) | off_low,
 				   ((u64)n_hi << 32) | n_low, flags);
 }
 
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
-				size_t len, int advice)
+COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
+		       unsigned int, offset_hi, size_t, len, int, advice)
 {
 	return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
 				len, advice);
 }
 
-asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
-				unsigned offset_hi, unsigned len_lo,
-				unsigned len_hi)
+COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
+		       unsigned int, offset_lo, unsigned int, offset_hi,
+		       unsigned int, len_lo, unsigned int, len_hi)
 {
 	return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
 			     ((u64)len_hi << 32) | len_lo);
 }
+
+/*
+ * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
+ */
+COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
+		       unsigned long, newsp, int __user *, parent_tidptr,
+		       unsigned long, tls_val, int __user *, child_tidptr)
+{
+	return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr,
+			tls_val);
+}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 44f5d79d5105..11881726ed37 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
 	if (boot_cpu_data.x86 == 0x0F &&
 	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 	    boot_cpu_data.x86_model <= 0x05 &&
-	    boot_cpu_data.x86_mask < 0x0A)
+	    boot_cpu_data.x86_stepping < 0x0A)
 		return 1;
 	else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
 		return 1;
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015ddcf26..c356098b6fb9 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
 #ifndef _ASM_X86_MACH_DEFAULT_APM_H
 #define _ASM_X86_MACH_DEFAULT_APM_H
 
+#include <asm/nospec-branch.h>
+
 #ifdef APM_ZERO_SEGS
 #	define APM_DO_ZERO_SEGS \
 		"pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 	 * N.B. We do NOT need a cld after the BIOS call
 	 * because we always save and restore the flags.
 	 */
+	firmware_restrict_branch_speculation_start();
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 		  "=S" (*esi)
 		: "a" (func), "b" (ebx_in), "c" (ecx_in)
 		: "memory", "cc");
+	firmware_restrict_branch_speculation_end();
 }
 
 static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
 	 * N.B. We do NOT need a cld after the BIOS call
 	 * because we always save and restore the flags.
 	 */
+	firmware_restrict_branch_speculation_start();
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
 		  "=S" (si)
 		: "a" (func), "b" (ebx_in), "c" (ecx_in)
 		: "memory", "cc");
+	firmware_restrict_branch_speculation_end();
 	return error;
 }
 
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d111616524b..1908214b9125 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 30d406146016..e1259f043ae9 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 
 	asm ("cmp %1,%2; sbb %0,%0;"
 			:"=r" (mask)
-			:"r"(size),"r" (index)
+			:"g"(size),"r" (index)
 			:"cc");
 	return mask;
 }
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 3fa039855b8f..9f645ba57dbb 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)CONST_MASK(nr))
 			: "memory");
 	} else {
-		asm volatile(LOCK_PREFIX "bts %1,%0"
+		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
 			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
 	}
 }
@@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
 
 /**
@@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr)
 			: CONST_MASK_ADDR(nr, addr)
 			: "iq" ((u8)~CONST_MASK(nr)));
 	} else {
-		asm volatile(LOCK_PREFIX "btr %1,%0"
+		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
 			: BITOP_ADDR(addr)
 			: "Ir" (nr));
 	}
@@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
 
 static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
 }
 
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
  */
 static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
 }
 
 /**
@@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 			: CONST_MASK_ADDR(nr, addr)
 			: "iq" ((u8)CONST_MASK(nr)));
 	} else {
-		asm volatile(LOCK_PREFIX "btc %1,%0"
+		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
 			: BITOP_ADDR(addr)
 			: "Ir" (nr));
 	}
@@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
+	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
+	                 *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 {
 	bool oldbit;
 
-	asm("bts %2,%1"
+	asm(__ASM_SIZE(bts) " %2,%1"
 	    CC_SET(c)
 	    : CC_OUT(c) (oldbit), ADDR
 	    : "Ir" (nr));
@@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
  */
 static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
+	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
+	                 *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 {
 	bool oldbit;
 
-	asm volatile("btr %2,%1"
+	asm volatile(__ASM_SIZE(btr) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr));
@@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 {
 	bool oldbit;
 
-	asm volatile("btc %2,%1"
+	asm volatile(__ASM_SIZE(btc) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr) : "memory");
@@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
  */
 static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
+	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
+	                 *addr, "Ir", nr, "%0", c);
 }
 
 static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
@@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 {
 	bool oldbit;
 
-	asm volatile("bt %2,%1"
+	asm volatile(__ASM_SIZE(bt) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit)
 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 34d99af43994..6804d6642767 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -5,23 +5,20 @@
 #include <linux/stringify.h>
 
 /*
- * Since some emulators terminate on UD2, we cannot use it for WARN.
- * Since various instruction decoders disagree on the length of UD1,
- * we cannot use it either. So use UD0 for WARN.
+ * Despite that some emulators terminate on UD2, we use it for WARN().
  *
- * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
- *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
- *  with various things like the Intel SDM instruction encoding rules)
+ * Since various instruction decoders/specs disagree on the encoding of
+ * UD0/UD1.
  */
 
-#define ASM_UD0		".byte 0x0f, 0xff"
+#define ASM_UD0		".byte 0x0f, 0xff" /* + ModRM (for Intel) */
 #define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
 #define ASM_UD2		".byte 0x0f, 0x0b"
 
 #define INSN_UD0	0xff0f
 #define INSN_UD2	0x0b0f
 
-#define LEN_UD0		2
+#define LEN_UD2		2
 
 #ifdef CONFIG_GENERIC_BUG
 
@@ -77,7 +74,11 @@ do {								\
 	unreachable();						\
 } while (0)
 
-#define __WARN_FLAGS(flags)	_BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
+#define __WARN_FLAGS(flags)					\
+do {								\
+	_BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags));		\
+	annotate_reachable();					\
+} while (0)
 
 #include <asm-generic/bug.h>
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 70eddb3922ff..736771c9822e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-		asm_volatile_goto("1: jmp 6f\n"
-			 "2:\n"
-			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-			         "((5f-4f) - (2b-1b)),0x90\n"
-			 "3:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 4f - .\n"		/* repl offset */
-			 " .word %P1\n"			/* always replace */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 5f - 4f\n"		/* repl len */
-			 " .byte 3b - 2b\n"		/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_replacement,\"ax\"\n"
-			 "4: jmp %l[t_no]\n"
-			 "5:\n"
-			 ".previous\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 0\n"			/* no replacement */
-			 " .word %P0\n"			/* feature bit */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 0\n"			/* repl len */
-			 " .byte 0\n"			/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_aux,\"ax\"\n"
-			 "6:\n"
-			 " testb %[bitnum],%[cap_byte]\n"
-			 " jnz %l[t_yes]\n"
-			 " jmp %l[t_no]\n"
-			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
-			     [bitnum] "i" (1 << (bit & 7)),
-			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
-			 : : t_yes, t_no);
-	t_yes:
-		return true;
-	t_no:
-		return false;
+	asm_volatile_goto("1: jmp 6f\n"
+		 "2:\n"
+		 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			 "((5f-4f) - (2b-1b)),0x90\n"
+		 "3:\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 4f - .\n"		/* repl offset */
+		 " .word %P[always]\n"		/* always replace */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 5f - 4f\n"		/* repl len */
+		 " .byte 3b - 2b\n"		/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_replacement,\"ax\"\n"
+		 "4: jmp %l[t_no]\n"
+		 "5:\n"
+		 ".previous\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 0\n"			/* no replacement */
+		 " .word %P[feature]\n"		/* feature bit */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 0\n"			/* repl len */
+		 " .byte 0\n"			/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_aux,\"ax\"\n"
+		 "6:\n"
+		 " testb %[bitnum],%[cap_byte]\n"
+		 " jnz %l[t_yes]\n"
+		 " jmp %l[t_no]\n"
+		 ".previous\n"
+		 : : [feature]  "i" (bit),
+		     [always]   "i" (X86_FEATURE_ALWAYS),
+		     [bitnum]   "i" (1 << (bit & 7)),
+		     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+		 : : t_yes, t_no);
+t_yes:
+	return true;
+t_no:
+	return false;
 }
 
 #define static_cpu_has(bit)					\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3f74e2..d554c11e01ff 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
 #define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -315,6 +316,7 @@
 #define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */
 #define X86_FEATURE_AVX512_VNNI		(16*32+11) /* Vector Neural Network Instructions */
 #define X86_FEATURE_AVX512_BITALG	(16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME			(16*32+13) /* Intel Total Memory Encryption */
 #define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
@@ -327,6 +329,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb80b91..a399c1ebf6f0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor-flags.h>
 #include <asm/tlb.h>
+#include <asm/nospec-branch.h>
 
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
 
 extern asmlinkage unsigned long efi_call_phys(void *, ...);
 
-#define arch_efi_call_virt_setup()	kernel_fpu_begin()
-#define arch_efi_call_virt_teardown()	kernel_fpu_end()
+#define arch_efi_call_virt_setup()					\
+({									\
+	kernel_fpu_begin();						\
+	firmware_restrict_branch_speculation_start();			\
+})
+
+#define arch_efi_call_virt_teardown()					\
+({									\
+	firmware_restrict_branch_speculation_end();			\
+	kernel_fpu_end();						\
+})
+
 
 /*
  * Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
 	efi_sync_low_kernel_mappings();					\
 	preempt_disable();						\
 	__kernel_fpu_begin();						\
+	firmware_restrict_branch_speculation_start();			\
 									\
 	if (efi_scratch.use_pgd) {					\
 		efi_scratch.prev_cr3 = __read_cr3();			\
@@ -91,6 +103,7 @@ struct efi_scratch {
 		__flush_tlb_all();					\
 	}								\
 									\
+	firmware_restrict_branch_speculation_end();			\
 	__kernel_fpu_end();						\
 	preempt_enable();						\
 })
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd6f57a54a26..b605a5b6a30c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -507,6 +507,7 @@ struct kvm_vcpu_arch {
 	u64 smi_count;
 	bool tpr_access_reporting;
 	u64 ia32_xss;
+	u64 microcode_version;
 
 	/*
 	 * Paging state of the vcpu
@@ -1095,6 +1096,8 @@ struct kvm_x86_ops {
 	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
 	int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
 	int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+
+	int (*get_msr_feature)(struct kvm_msr_entry *entry);
 };
 
 struct kvm_arch_async_pf {
@@ -1464,7 +1467,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define put_smstate(type, buf, offset, val)                      \
 	*(type *)((buf) + (offset) - 0x7e00) = val
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end);
-
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 55520cec8b27..6cf0e4cb7b97 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -37,7 +37,13 @@ struct cpu_signature {
 
 struct device;
 
-enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+enum ucode_state {
+	UCODE_OK	= 0,
+	UCODE_NEW,
+	UCODE_UPDATED,
+	UCODE_NFOUND,
+	UCODE_ERROR,
+};
 
 struct microcode_ops {
 	enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +60,7 @@ struct microcode_ops {
 	 * are being called.
 	 * See also the "Synchronization" section in microcode_core.c.
 	 */
-	int (*apply_microcode) (int cpu);
+	enum ucode_state (*apply_microcode) (int cpu);
 	int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 };
 
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c931b88982a0..1de72ce514cd 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
 	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
 #else
 	BUG();
+	return (void *)fix_to_virt(FIX_HOLE);
 #endif
 }
 
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 4d57894635f2..f928ad9b143f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -6,6 +6,51 @@
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS		16	/* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)	\
+	mov	$(nr/2), reg;			\
+771:						\
+	call	772f;				\
+773:	/* speculation trap */			\
+	pause;					\
+	lfence;					\
+	jmp	773b;				\
+772:						\
+	call	774f;				\
+775:	/* speculation trap */			\
+	pause;					\
+	lfence;					\
+	jmp	775b;				\
+774:						\
+	dec	reg;				\
+	jnz	771b;				\
+	add	$(BITS_PER_LONG/8) * nr, sp;
 
 #ifdef __ASSEMBLY__
 
@@ -23,6 +68,18 @@
 .endm
 
 /*
+ * This should be used immediately before an indirect jump/call. It tells
+ * objtool the subsequent indirect jump/call is vouched safe for retpoline
+ * builds.
+ */
+.macro ANNOTATE_RETPOLINE_SAFE
+	.Lannotate_\@:
+	.pushsection .discard.retpoline_safe
+	_ASM_PTR .Lannotate_\@
+	.popsection
+.endm
+
+/*
  * These are the bare retpoline primitives for indirect jmp and call.
  * Do not use these directly; they only exist to make the ALTERNATIVE
  * invocation below less ugly.
@@ -58,9 +115,9 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(jmp *\reg),				\
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),	\
 		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
-		__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	jmp	*\reg
 #endif
@@ -69,18 +126,25 @@
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(call *\reg),				\
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg),	\
 		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	call	*\reg
 #endif
 .endm
 
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-	ALTERNATIVE "", "call __clear_rsb", \ftr
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
+		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
+		\ftr
+.Lskip_rsb_\@:
 #endif
 .endm
 
@@ -92,6 +156,12 @@
 	".long 999b - .\n\t"					\
 	".popsection\n\t"
 
+#define ANNOTATE_RETPOLINE_SAFE					\
+	"999:\n\t"						\
+	".pushsection .discard.retpoline_safe\n\t"		\
+	_ASM_PTR " 999b\n\t"					\
+	".popsection\n\t"
+
 #if defined(CONFIG_X86_64) && defined(RETPOLINE)
 
 /*
@@ -101,6 +171,7 @@
 # define CALL_NOSPEC						\
 	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE(						\
+	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
 	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
 	X86_FEATURE_RETPOLINE)
@@ -112,7 +183,10 @@
  * otherwise we'll run out of registers. We don't care about CET
  * here, anyway.
  */
-# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",	\
+# define CALL_NOSPEC						\
+	ALTERNATIVE(						\
+	ANNOTATE_RETPOLINE_SAFE					\
+	"call *%[thunk_target]\n",				\
 	"       jmp    904f;\n"					\
 	"       .align 16\n"					\
 	"901:	call   903f;\n"					\
@@ -155,20 +229,90 @@ extern char __indirect_thunk_end[];
 static inline void vmexit_fill_RSB(void)
 {
 #ifdef CONFIG_RETPOLINE
-	alternative_input("",
-			  "call __fill_rsb",
-			  X86_FEATURE_RETPOLINE,
-			  ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+	unsigned long loops;
+
+	asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+		      ALTERNATIVE("jmp 910f",
+				  __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+				  X86_FEATURE_RETPOLINE)
+		      "910:"
+		      : "=r" (loops), ASM_CALL_CONSTRAINT
+		      : : "memory" );
 #endif
 }
 
+#define alternative_msr_write(_msr, _val, _feature)		\
+	asm volatile(ALTERNATIVE("",				\
+				 "movl %[msr], %%ecx\n\t"	\
+				 "movl %[val], %%eax\n\t"	\
+				 "movl $0, %%edx\n\t"		\
+				 "wrmsr",			\
+				 _feature)			\
+		     : : [msr] "i" (_msr), [val] "i" (_val)	\
+		     : "eax", "ecx", "edx", "memory")
+
 static inline void indirect_branch_prediction_barrier(void)
 {
-	alternative_input("",
-			  "call __ibp_barrier",
-			  X86_FEATURE_USE_IBPB,
-			  ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
+	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+			      X86_FEATURE_USE_IBPB);
 }
 
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ *
+ * (Implemented as CPP macros due to header hell.)
+ */
+#define firmware_restrict_branch_speculation_start()			\
+do {									\
+	preempt_disable();						\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,	\
+			      X86_FEATURE_USE_IBRS_FW);			\
+} while (0)
+
+#define firmware_restrict_branch_speculation_end()			\
+do {									\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,			\
+			      X86_FEATURE_USE_IBRS_FW);			\
+	preempt_enable();						\
+} while (0)
+
 #endif /* __ASSEMBLY__ */
+
+/*
+ * Below is used in the eBPF JIT compiler and emits the byte sequence
+ * for the following assembly:
+ *
+ * With retpolines configured:
+ *
+ *    callq do_rop
+ *  spec_trap:
+ *    pause
+ *    lfence
+ *    jmp spec_trap
+ *  do_rop:
+ *    mov %rax,(%rsp)
+ *    retq
+ *
+ * Without retpolines configured:
+ *
+ *    jmp *%rax
+ */
+#ifdef CONFIG_RETPOLINE
+# define RETPOLINE_RAX_BPF_JIT_SIZE	17
+# define RETPOLINE_RAX_BPF_JIT()				\
+	EMIT1_off32(0xE8, 7);	 /* callq do_rop */		\
+	/* spec_trap: */					\
+	EMIT2(0xF3, 0x90);       /* pause */			\
+	EMIT3(0x0F, 0xAE, 0xE8); /* lfence */			\
+	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\
+	/* do_rop: */						\
+	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\
+	EMIT1(0xC3);             /* retq */
+#else
+# define RETPOLINE_RAX_BPF_JIT_SIZE	2
+# define RETPOLINE_RAX_BPF_JIT()				\
+	EMIT2(0xFF, 0xE0);	 /* jmp *%rax */
+#endif
+
 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..d652a3808065 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 892df375b615..c83a2f418cea 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,6 +7,7 @@
 #ifdef CONFIG_PARAVIRT
 #include <asm/pgtable_types.h>
 #include <asm/asm.h>
+#include <asm/nospec-branch.h>
 
 #include <asm/paravirt_types.h>
 
@@ -297,9 +298,9 @@ static inline void __flush_tlb_global(void)
 {
 	PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
 }
-static inline void __flush_tlb_single(unsigned long addr)
+static inline void __flush_tlb_one_user(unsigned long addr)
 {
-	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
+	PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
 }
 
 static inline void flush_tlb_others(const struct cpumask *cpumask,
@@ -879,23 +880,27 @@ extern void default_banner(void);
 
 #define INTERRUPT_RETURN						\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+		  ANNOTATE_RETPOLINE_SAFE;					\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
 
 #define DISABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
 		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+		  ANNOTATE_RETPOLINE_SAFE;					\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #define ENABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
 		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+		  ANNOTATE_RETPOLINE_SAFE;					\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #ifdef CONFIG_X86_32
 #define GET_CR0_INTO_EAX				\
 	push %ecx; push %edx;				\
+	ANNOTATE_RETPOLINE_SAFE;				\
 	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
 	pop %edx; pop %ecx
 #else	/* !CONFIG_X86_32 */
@@ -917,21 +922,25 @@ extern void default_banner(void);
  */
 #define SWAPGS								\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
-		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
+		  ANNOTATE_RETPOLINE_SAFE;					\
+		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);		\
 		 )
 
 #define GET_CR2_INTO_RAX				\
-	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+	ANNOTATE_RETPOLINE_SAFE;				\
+	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
 
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+		  ANNOTATE_RETPOLINE_SAFE;					\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)                                        \
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
 		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+		  ANNOTATE_RETPOLINE_SAFE;				    \
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 #endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 6ec54d01972d..180bc0bff0fb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -43,6 +43,7 @@
 #include <asm/desc_defs.h>
 #include <asm/kmap_types.h>
 #include <asm/pgtable_types.h>
+#include <asm/nospec-branch.h>
 
 struct page;
 struct thread_struct;
@@ -217,7 +218,7 @@ struct pv_mmu_ops {
 	/* TLB operations */
 	void (*flush_tlb_user)(void);
 	void (*flush_tlb_kernel)(void);
-	void (*flush_tlb_single)(unsigned long addr);
+	void (*flush_tlb_one_user)(unsigned long addr);
 	void (*flush_tlb_others)(const struct cpumask *cpus,
 				 const struct flush_tlb_info *info);
 
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
  * offset into the paravirt_patch_template structure, and can therefore be
  * freely converted back into a structure offset.
  */
-#define PARAVIRT_CALL	"call *%c[paravirt_opptr];"
+#define PARAVIRT_CALL					\
+	ANNOTATE_RETPOLINE_SAFE				\
+	"call *%c[paravirt_opptr];"
 
 /*
  * These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ba3c523aaf16..a06b07399d17 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
 {
 	bool oldbit;
 
-	asm volatile("bt "__percpu_arg(2)",%1"
+	asm volatile("btl "__percpu_arg(2)",%1"
 			CC_SET(c)
 			: CC_OUT(c) (oldbit)
 			: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 63c2552b6b65..b444d83cfc95 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
 {
 	pmdval_t v = native_pmd_val(pmd);
 
-	return __pmd(v | set);
+	return native_make_pmd(v | set);
 }
 
 static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
 {
 	pmdval_t v = native_pmd_val(pmd);
 
-	return __pmd(v & ~clear);
+	return native_make_pmd(v & ~clear);
 }
 
 static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
 {
 	pudval_t v = native_pud_val(pud);
 
-	return __pud(v | set);
+	return native_make_pud(v | set);
 }
 
 static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
 {
 	pudval_t v = native_pud_val(pud);
 
-	return __pud(v & ~clear);
+	return native_make_pud(v & ~clear);
 }
 
 static inline pud_t pud_mkold(pud_t pud)
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index e67c0620aec2..b3ec519e3982 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[];
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
 void paging_init(void);
+void sync_initial_page_table(void);
 
 /*
  * Define this if things work differently on an i386 and an i486:
@@ -61,7 +62,7 @@ void paging_init(void);
 #define kpte_clear_flush(ptep, vaddr)		\
 do {						\
 	pte_clear(&init_mm, (vaddr), (ptep));	\
-	__flush_tlb_one((vaddr));		\
+	__flush_tlb_one_kernel((vaddr));		\
 } while (0)
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 81462e9a34f6..1149d2112b2e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];
 #define swapper_pg_dir init_top_pgt
 
 extern void paging_init(void);
+static inline void sync_initial_page_table(void) { }
 
 #define pte_ERROR(e)					\
 	pr_err("%s:%d: bad pte %p(%016lx)\n",		\
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3696398a9475..acfe755562a6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,7 +174,6 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
 #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -206,7 +205,6 @@ enum page_cache_mode {
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VSYSCALL	__pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
 #define PAGE_KERNEL_VVAR	__pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
 
 #define PAGE_KERNEL_IO		__pgprot(__PAGE_KERNEL_IO)
@@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)
 #else
 #include <asm-generic/pgtable-nopud.h>
 
+static inline pud_t native_make_pud(pudval_t val)
+{
+	return (pud_t) { .p4d.pgd = native_make_pgd(val) };
+}
+
 static inline pudval_t native_pud_val(pud_t pud)
 {
 	return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 #else
 #include <asm-generic/pgtable-nopmd.h>
 
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+	return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
+}
+
 static inline pmdval_t native_pmd_val(pmd_t pmd)
 {
 	return native_pgd_val(pmd.pud.p4d.pgd);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 793bae7e7ce3..b0ccd4847a58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -91,7 +91,7 @@ struct cpuinfo_x86 {
 	__u8			x86;		/* CPU family */
 	__u8			x86_vendor;	/* CPU vendor */
 	__u8			x86_model;
-	__u8			x86_mask;
+	__u8			x86_stepping;
 #ifdef CONFIG_X86_64
 	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
 	int			x86_tlbsize;
@@ -109,7 +109,7 @@ struct cpuinfo_x86 {
 	char			x86_vendor_id[16];
 	char			x86_model_id[64];
 	/* in KB - valid for CPUS which support this call: */
-	int			x86_cache_size;
+	unsigned int		x86_cache_size;
 	int			x86_cache_alignment;	/* In bytes */
 	/* Cache QoS architectural values: */
 	int			x86_cache_max_rmid;	/* max index */
@@ -977,7 +977,5 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
-
-void __ibp_barrier(void);
-
+void microcode_check(void);
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4e44250e7d0d..4cf11d88d3b3 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -17,7 +17,7 @@
 #define _REFCOUNT_EXCEPTION				\
 	".pushsection .text..refcount\n"		\
 	"111:\tlea %[counter], %%" _ASM_CX "\n"		\
-	"112:\t" ASM_UD0 "\n"				\
+	"112:\t" ASM_UD2 "\n"				\
 	ASM_UNREACHABLE					\
 	".popsection\n"					\
 	"113:\n"					\
@@ -67,13 +67,13 @@ static __always_inline __must_check
 bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
 	GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
-				  r->refs.counter, "er", i, "%0", e);
+				  r->refs.counter, "er", i, "%0", e, "cx");
 }
 
 static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
 	GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
-				 r->refs.counter, "%0", e);
+				 r->refs.counter, "%0", e, "cx");
 }
 
 static __always_inline __must_check
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index f91c365e57c3..4914a3e7c803 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,8 +2,7 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
-#define __CLOBBERS_MEM		"memory"
-#define __CLOBBERS_MEM_CC_CX	"memory", "cc", "cx"
+#define __CLOBBERS_MEM(clb...)	"memory", ## clb
 
 #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
 
@@ -40,18 +39,19 @@ do {									\
 #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 #define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
-	__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+	__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
 
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc)		\
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
 	__GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc,			\
-		    __CLOBBERS_MEM_CC_CX)
+		    __CLOBBERS_MEM(clobbers))
 
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
 	__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc,		\
-		    __CLOBBERS_MEM, vcon (val))
+		    __CLOBBERS_MEM(), vcon (val))
 
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc)	\
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc,	\
+				  clobbers...)				\
 	__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc,	\
-		    __CLOBBERS_MEM_CC_CX, vcon (val))
+		    __CLOBBERS_MEM(clobbers), vcon (val))
 
 #endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index d6baf23782bc..5c019d23d06b 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
 
 #if defined(CONFIG_X86_64)
 extern char __end_rodata_hpage_align[];
+extern char __entry_trampoline_start[], __entry_trampoline_end[];
 #endif
 
 #endif	/* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 461f53d27708..a4189762b266 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
+void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
 void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 82c34ee25a65..906794aa034e 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -20,31 +20,43 @@
 #include <asm/ia32.h>
 
 /* ia32/sys_ia32.c */
-asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long);
-asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long,
+					  unsigned long);
+asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long,
+					   unsigned long);
 
-asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
-asmlinkage long sys32_fstatat(unsigned int, const char __user *,
+asmlinkage long compat_sys_x86_stat64(const char __user *,
+				      struct stat64 __user *);
+asmlinkage long compat_sys_x86_lstat64(const char __user *,
+				       struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *,
 			      struct stat64 __user *, int);
 struct mmap_arg_struct32;
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
+asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *);
 
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
+asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *,
+				       int);
 
-asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
-asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
+asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32,
+				     u32);
+asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32,
+				      u32, u32);
 
-long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
-long sys32_vm86_warning(void);
+asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32,
+					    int);
 
-asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
-asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
-				      unsigned, unsigned, int);
-asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
-asmlinkage long sys32_fallocate(int, int, unsigned,
-				unsigned, unsigned, unsigned);
+asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int,
+					    size_t);
+asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int,
+					       unsigned int, unsigned int,
+					       int);
+asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int,
+					 size_t, int);
+asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int,
+					 unsigned int, unsigned int);
+asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *,
+				     unsigned long, int __user *);
 
 /* ia32/ia32_signal.c */
 asmlinkage long sys32_sigreturn(void);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 2b8f18ca5874..84137c22fdfa 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 #else
 #define __flush_tlb() __native_flush_tlb()
 #define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
 #endif
 
 static inline bool tlb_defer_switch_to_init_mm(void)
@@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void)
 /*
  * flush one page in the user mapping
  */
-static inline void __native_flush_tlb_single(unsigned long addr)
+static inline void __native_flush_tlb_one_user(unsigned long addr)
 {
 	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
 
@@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void)
 /*
  * flush one page in the kernel mapping
  */
-static inline void __flush_tlb_one(unsigned long addr)
+static inline void __flush_tlb_one_kernel(unsigned long addr)
 {
 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-	__flush_tlb_single(addr);
+
+	/*
+	 * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
+	 * paravirt equivalent.  Even with PCID, this is sufficient: we only
+	 * use PCID if we also use global PTEs for the kernel mapping, and
+	 * INVLPG flushes global translations across all address spaces.
+	 *
+	 * If PTI is on, then the kernel is mapped with non-global PTEs, and
+	 * __flush_tlb_one_user() will flush the given address for the current
+	 * kernel address space and for its usermode counterpart, but it does
+	 * not flush it for other address spaces.
+	 */
+	__flush_tlb_one_user(addr);
 
 	if (!static_cpu_has(X86_FEATURE_PTI))
 		return;
 
 	/*
-	 * __flush_tlb_single() will have cleared the TLB entry for this ASID,
-	 * but since kernel space is replicated across all, we must also
-	 * invalidate all others.
+	 * See above.  We need to propagate the flush to all other address
+	 * spaces.  In principle, we only need to propagate it to kernelmode
+	 * address spaces, but the extra bookkeeping we would need is not
+	 * worth it.
 	 */
 	invalidate_other_asid();
 }
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 8b6780751132..5db8b0b10766 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
 #define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
 #define INTR_TYPE_HARD_EXCEPTION	(3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION	(5 << 8) /* ICE breakpoint - undocumented */
 #define INTR_TYPE_SOFT_EXCEPTION	(6 << 8) /* software exception */
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 197c2e6c7376..099414345865 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -241,24 +241,24 @@
 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
 
 struct hv_reenlightenment_control {
-	u64 vector:8;
-	u64 reserved1:8;
-	u64 enabled:1;
-	u64 reserved2:15;
-	u64 target_vp:32;
+	__u64 vector:8;
+	__u64 reserved1:8;
+	__u64 enabled:1;
+	__u64 reserved2:15;
+	__u64 target_vp:32;
 };
 
 #define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
 #define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
 
 struct hv_tsc_emulation_control {
-	u64 enabled:1;
-	u64 reserved:63;
+	__u64 enabled:1;
+	__u64 reserved:63;
 };
 
 struct hv_tsc_emulation_status {
-	u64 inprogress:1;
-	u64 reserved:63;
+	__u64 inprogress:1;
+	__u64 reserved:63;
 };
 
 #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 7a2ade4aa235..6cfa9c8cb7d6 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,7 @@
 #define KVM_FEATURE_PV_EOI		6
 #define KVM_FEATURE_PV_UNHALT		7
 #define KVM_FEATURE_PV_TLB_FLUSH	9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT	10
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 91723461dc1f..435db58a7bad 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -30,6 +30,7 @@ struct mce {
 	__u64 synd;	/* MCA_SYND MSR: only valid on SMCA systems */
 	__u64 ipid;	/* MCA_IPID MSR: only valid on SMCA systems */
 	__u64 ppin;	/* Protected Processor Inventory Number */
+	__u32 microcode;/* Microcode revision */
 };
 
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 6db28f17ff28..c88e0b127810 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -235,7 +235,7 @@ int amd_cache_northbridges(void)
 	if (boot_cpu_data.x86 == 0x10 &&
 	    boot_cpu_data.x86_model >= 0x8 &&
 	    (boot_cpu_data.x86_model > 0x9 ||
-	     boot_cpu_data.x86_mask >= 0x1))
+	     boot_cpu_data.x86_stepping >= 0x1))
 		amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
 
 	if (boot_cpu_data.x86 == 0x15)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 25ddf02598d2..b203af0855b5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static u32 hsx_deadline_rev(void)
 {
-	switch (boot_cpu_data.x86_mask) {
+	switch (boot_cpu_data.x86_stepping) {
 	case 0x02: return 0x3a; /* EP */
 	case 0x04: return 0x0f; /* EX */
 	}
@@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void)
 
 static u32 bdx_deadline_rev(void)
 {
-	switch (boot_cpu_data.x86_mask) {
+	switch (boot_cpu_data.x86_stepping) {
 	case 0x02: return 0x00000011;
 	case 0x03: return 0x0700000e;
 	case 0x04: return 0x0f00000c;
@@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void)
 
 static u32 skx_deadline_rev(void)
 {
-	switch (boot_cpu_data.x86_mask) {
+	switch (boot_cpu_data.x86_stepping) {
 	case 0x03: return 0x01000136;
 	case 0x04: return 0x02000014;
 	}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ad2e410974f..7c5538769f7e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
 	do {
 		rep_nop();
 		now = rdtsc();
-	} while ((now - start) < 40000000000UL / HZ &&
+	} while ((now - start) < 40000000000ULL / HZ &&
 		time_before_eq(jiffies, end));
 }
 
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3cc471beb50b..bb6f7a2148d7 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	struct irq_desc *desc = irq_data_to_desc(irqd);
+	bool managed = irqd_affinity_is_managed(irqd);
 
 	lockdep_assert_held(&vector_lock);
 
 	trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
 			    apicd->cpu);
 
-	/* Setup the vector move, if required  */
-	if (apicd->vector && cpu_online(apicd->cpu)) {
+	/*
+	 * If there is no vector associated or if the associated vector is
+	 * the shutdown vector, which is associated to make PCI/MSI
+	 * shutdown mode work, then there is nothing to release. Clear out
+	 * prev_vector for this and the offlined target case.
+	 */
+	apicd->prev_vector = 0;
+	if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
+		goto setnew;
+	/*
+	 * If the target CPU of the previous vector is online, then mark
+	 * the vector as move in progress and store it for cleanup when the
+	 * first interrupt on the new vector arrives. If the target CPU is
+	 * offline then the regular release mechanism via the cleanup
+	 * vector is not possible and the vector can be immediately freed
+	 * in the underlying matrix allocator.
+	 */
+	if (cpu_online(apicd->cpu)) {
 		apicd->move_in_progress = true;
 		apicd->prev_vector = apicd->vector;
 		apicd->prev_cpu = apicd->cpu;
 	} else {
-		apicd->prev_vector = 0;
+		irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
+				managed);
 	}
 
+setnew:
 	apicd->vector = newvec;
 	apicd->cpu = newcpu;
 	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 46b675aaf20b..f11910b44638 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 
 	uv_gre_table = gre;
 	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+		unsigned long size = ((unsigned long)(gre->limit - lgre)
+					<< UV_GAM_RANGE_SHFT);
+		int order = 0;
+		char suffix[] = " KMGTPE";
+
+		while (size > 9999 && order < sizeof(suffix)) {
+			size /= 1024;
+			order++;
+		}
+
 		if (!index) {
 			pr_info("UV: GAM Range Table...\n");
 			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
 		}
-		pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
+		pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
 			index++,
 			(unsigned long)lgre << UV_GAM_RANGE_SHFT,
 			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
-			((unsigned long)(gre->limit - lgre)) >>
-				(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+			size, suffix[order],
 			gre->type, gre->nasid, gre->sockid, gre->pnode);
 
 		lgre = gre->limit;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index fa1261eefa16..f91ba53e06c8 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -18,7 +18,7 @@ void foo(void)
 	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
 	OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
 	OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
-	OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+	OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
 	OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
 	OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
 	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5bddbdcbc4a3..f0e6456ca7d3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 		return;
 	}
 
-	if (c->x86_model == 6 && c->x86_mask == 1) {
+	if (c->x86_model == 6 && c->x86_stepping == 1) {
 		const int K6_BUG_LOOP = 1000000;
 		int n;
 		void (*f_vide)(void);
@@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 
 	/* K6 with old style WHCR */
 	if (c->x86_model < 8 ||
-	   (c->x86_model == 8 && c->x86_mask < 8)) {
+	   (c->x86_model == 8 && c->x86_stepping < 8)) {
 		/* We can only write allocate on the low 508Mb */
 		if (mbytes > 508)
 			mbytes = 508;
@@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 		return;
 	}
 
-	if ((c->x86_model == 8 && c->x86_mask > 7) ||
+	if ((c->x86_model == 8 && c->x86_stepping > 7) ||
 	     c->x86_model == 9 || c->x86_model == 13) {
 		/* The more serious chips .. */
 
@@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
 	 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
 	 * As per AMD technical note 27212 0.2
 	 */
-	if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+	if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
 		rdmsr(MSR_K7_CLK_CTL, l, h);
 		if ((l & 0xfff00000) != 0x20000000) {
 			pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
@@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
 	 * but they are not certified as MP capable.
 	 */
 	/* Athlon 660/661 is valid. */
-	if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
-	    (c->x86_mask == 1)))
+	if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
+	    (c->x86_stepping == 1)))
 		return;
 
 	/* Duron 670 is valid */
-	if ((c->x86_model == 7) && (c->x86_mask == 0))
+	if ((c->x86_model == 7) && (c->x86_stepping == 0))
 		return;
 
 	/*
@@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
 	 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
 	 * more.
 	 */
-	if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
-	    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+	if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
+	    ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
 	     (c->x86_model > 7))
 		if (cpu_has(c, X86_FEATURE_MP))
 			return;
@@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	/*  Set MTRR capability flag if appropriate */
 	if (c->x86 == 5)
 		if (c->x86_model == 13 || c->x86_model == 9 ||
-		    (c->x86_model == 8 && c->x86_mask >= 8))
+		    (c->x86_model == 8 && c->x86_stepping >= 8))
 			set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
 	 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
 	 * all up to and including B1.
 	 */
-	if (c->x86_model <= 1 && c->x86_mask <= 1)
+	if (c->x86_model <= 1 && c->x86_stepping <= 1)
 		set_cpu_cap(c, X86_FEATURE_CPB);
 }
 
@@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 	/* AMD errata T13 (order #21922) */
 	if ((c->x86 == 6)) {
 		/* Duron Rev A0 */
-		if (c->x86_model == 3 && c->x86_mask == 0)
+		if (c->x86_model == 3 && c->x86_stepping == 0)
 			size = 64;
 		/* Tbird rev A1/A2 */
 		if (c->x86_model == 4 &&
-			(c->x86_mask == 0 || c->x86_mask == 1))
+			(c->x86_stepping == 0 || c->x86_stepping == 1))
 			size = 256;
 	}
 	return size;
@@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
 	}
 
 	/* OSVW unavailable or ID unknown, match family-model-stepping range */
-	ms = (cpu->x86_model << 4) | cpu->x86_mask;
+	ms = (cpu->x86_model << 4) | cpu->x86_stepping;
 	while ((range = *erratum++))
 		if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
 		    (ms >= AMD_MODEL_RANGE_START(range)) &&
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 71949bf2de5a..bfca937bdcc3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
 		return SPECTRE_V2_CMD_NONE;
 	else {
-		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
-					  sizeof(arg));
+		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
 		if (ret < 0)
 			return SPECTRE_V2_CMD_AUTO;
 
@@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 		}
 
 		if (i >= ARRAY_SIZE(mitigation_options)) {
-			pr_err("unknown option (%s). Switching to AUTO select\n",
-			       mitigation_options[i].option);
+			pr_err("unknown option (%s). Switching to AUTO select\n", arg);
 			return SPECTRE_V2_CMD_AUTO;
 		}
 	}
@@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
 	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
 	    !IS_ENABLED(CONFIG_RETPOLINE)) {
-		pr_err("%s selected but not compiled in. Switching to AUTO select\n",
-		       mitigation_options[i].option);
+		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
@@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)
 			goto retpoline_auto;
 		break;
 	}
-	pr_err("kernel not compiled with retpoline; no mitigation available!");
+	pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
 	return;
 
 retpoline_auto:
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 	retpoline_amd:
 		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-			pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
 			goto retpoline_generic;
 		}
 		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@@ -281,7 +278,7 @@ retpoline_auto:
 	pr_info("%s\n", spectre_v2_strings[mode]);
 
 	/*
-	 * If neither SMEP or KPTI are available, there is a risk of
+	 * If neither SMEP nor PTI are available, there is a risk of
 	 * hitting userspace addresses in the RSB after a context switch
 	 * from a shallow call stack to a deeper one. To prevent this fill
 	 * the entire RSB, even when using IBRS.
@@ -295,21 +292,29 @@ retpoline_auto:
 	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
 	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
 		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-		pr_info("Filling RSB on context switch\n");
+		pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
 	}
 
 	/* Initialize Indirect Branch Prediction Barrier if supported */
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
-		pr_info("Enabling Indirect Branch Prediction Barrier\n");
+		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
+	}
+
+	/*
+	 * Retpoline means the kernel is safe because it has no indirect
+	 * branches. But firmware isn't, so use IBRS to protect that.
+	 */
+	if (boot_cpu_has(X86_FEATURE_IBRS)) {
+		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+		pr_info("Enabling Restricted Speculation for firmware calls\n");
 	}
 }
 
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev,
-			  struct device_attribute *attr, char *buf)
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
 		return sprintf(buf, "Not affected\n");
@@ -318,28 +323,21 @@ ssize_t cpu_show_meltdown(struct device *dev,
 	return sprintf(buf, "Vulnerable\n");
 }
 
-ssize_t cpu_show_spectre_v1(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
 		return sprintf(buf, "Not affected\n");
 	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
 
-ssize_t cpu_show_spectre_v2(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
 		return sprintf(buf, "Not affected\n");
 
-	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+	return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
 		       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+		       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
 		       spectre_v2_module_string());
 }
 #endif
-
-void __ibp_barrier(void)
-{
-	__wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
-}
-EXPORT_SYMBOL_GPL(__ibp_barrier);
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index c578cd29c2d2..e5ec0f11c0de 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
 			clear_cpu_cap(c, X86_FEATURE_TSC);
 			break;
 		case 8:
-			switch (c->x86_mask) {
+			switch (c->x86_stepping) {
 			default:
 			name = "2";
 				break;
@@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 	 *  - Note, it seems this may only be in engineering samples.
 	 */
 	if ((c->x86 == 6) && (c->x86_model == 9) &&
-				(c->x86_mask == 1) && (size == 65))
+				(c->x86_stepping == 1) && (size == 65))
 		size -= 1;
 	return size;
 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d63f4b5706e4..348cf4821240 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
 		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
 		c->x86		= x86_family(tfms);
 		c->x86_model	= x86_model(tfms);
-		c->x86_mask	= x86_stepping(tfms);
+		c->x86_stepping	= x86_stepping(tfms);
 
 		if (cap0 & (1<<19)) {
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	int i;
 
 	c->loops_per_jiffy = loops_per_jiffy;
-	c->x86_cache_size = -1;
+	c->x86_cache_size = 0;
 	c->x86_vendor = X86_VENDOR_UNKNOWN;
-	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
+	c->x86_model = c->x86_stepping = 0;	/* So far unknown... */
 	c->x86_vendor_id[0] = '\0'; /* Unset */
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_max_cores = 1;
@@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 
 	pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
-	if (c->x86_mask || c->cpuid_level >= 0)
-		pr_cont(", stepping: 0x%x)\n", c->x86_mask);
+	if (c->x86_stepping || c->cpuid_level >= 0)
+		pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
 	else
 		pr_cont(")\n");
 }
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
 	return 0;
 }
 core_initcall(init_cpu_syscore);
+
+/*
+ * The microcode loader calls this upon late microcode load to recheck features,
+ * only when microcode has been updated. Caller holds microcode_mutex and CPU
+ * hotplug lock.
+ */
+void microcode_check(void)
+{
+	struct cpuinfo_x86 info;
+
+	perf_check_microcode();
+
+	/* Reload CPUID max function as it might've changed. */
+	info.cpuid_level = cpuid_eax(0);
+
+	/*
+	 * Copy all capability leafs to pick up the synthetic ones so that
+	 * memcmp() below doesn't fail on that. The ones coming from CPUID will
+	 * get overwritten in get_cpu_cap().
+	 */
+	memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
+
+	get_cpu_cap(&info);
+
+	if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
+		return;
+
+	pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
+	pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+}
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6b4bb335641f..8949b7ae6d92 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 
 	/* common case step number/rev -- exceptions handled below */
 	c->x86_model = (dir1 >> 4) + 1;
-	c->x86_mask = dir1 & 0xf;
+	c->x86_stepping = dir1 & 0xf;
 
 	/* Now cook; the original recipe is by Channing Corn, from Cyrix.
 	 * We do the same thing for each generation: we work out
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 319bf989fad1..c3af167d0a70 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
 /*
  * Early microcode releases for the Spectre v2 mitigation were broken.
  * Information taken from;
- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
  * - https://kb.vmware.com/s/article/52345
  * - Microcode revisions observed in the wild
  * - Release note from 20180108 microcode release
@@ -116,15 +116,13 @@ struct sku_microcode {
 	u32 microcode;
 };
 static const struct sku_microcode spectre_bad_microcodes[] = {
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x84 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x80 },
 	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
 	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
-	{ INTEL_FAM6_SKYLAKE_MOBILE,	0x03,	0xc2 },
-	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },
 	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
 	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
 	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },
@@ -136,8 +134,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
 	{ INTEL_FAM6_HASWELL_X,		0x02,	0x3b },
 	{ INTEL_FAM6_HASWELL_X,		0x04,	0x10 },
 	{ INTEL_FAM6_IVYBRIDGE_X,	0x04,	0x42a },
-	/* Updated in the 20180108 release; blacklist until we know otherwise */
-	{ INTEL_FAM6_ATOM_GEMINI_LAKE,	0x01,	0x22 },
 	/* Observed in the wild */
 	{ INTEL_FAM6_SANDYBRIDGE_X,	0x06,	0x61b },
 	{ INTEL_FAM6_SANDYBRIDGE_X,	0x07,	0x712 },
@@ -147,9 +143,16 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 {
 	int i;
 
+	/*
+	 * We know that the hypervisor lie to us on the microcode version so
+	 * we may as well hope that it is running the correct version.
+	 */
+	if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+		return false;
+
 	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
 		if (c->x86_model == spectre_bad_microcodes[i].model &&
-		    c->x86_mask == spectre_bad_microcodes[i].stepping)
+		    c->x86_stepping == spectre_bad_microcodes[i].stepping)
 			return (c->microcode <= spectre_bad_microcodes[i].microcode);
 	}
 	return false;
@@ -196,7 +199,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	 * need the microcode to have already been loaded... so if it is
 	 * not, recommend a BIOS update and disable large pages.
 	 */
-	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
 	    c->microcode < 0x20e) {
 		pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
 		clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -212,7 +215,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 
 	/* CPUID workaround for 0F33/0F34 CPU */
 	if (c->x86 == 0xF && c->x86_model == 0x3
-	    && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+	    && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
 		c->x86_phys_bits = 36;
 
 	/*
@@ -310,7 +313,7 @@ int ppro_with_ram_bug(void)
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 	    boot_cpu_data.x86 == 6 &&
 	    boot_cpu_data.x86_model == 1 &&
-	    boot_cpu_data.x86_mask < 8) {
+	    boot_cpu_data.x86_stepping < 8) {
 		pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
 		return 1;
 	}
@@ -327,7 +330,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
 	 * Mask B, Pentium, but not Pentium MMX
 	 */
 	if (c->x86 == 5 &&
-	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
+	    c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
 	    c->x86_model <= 3) {
 		/*
 		 * Remember we have B step Pentia with bugs
@@ -370,7 +373,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 	 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
 	 * model 3 mask 3
 	 */
-	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+	if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
 		clear_cpu_cap(c, X86_FEATURE_SEP);
 
 	/*
@@ -388,7 +391,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 	 * P4 Xeon erratum 037 workaround.
 	 * Hardware prefetcher may cause stale data to be loaded into the cache.
 	 */
-	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
 		if (msr_set_bit(MSR_IA32_MISC_ENABLE,
 				MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
 			pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@@ -403,7 +406,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 	 * Specification Update").
 	 */
 	if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
-	    (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+	    (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
 		set_cpu_bug(c, X86_BUG_11AP);
 
 
@@ -650,7 +653,7 @@ static void init_intel(struct cpuinfo_x86 *c)
 		case 6:
 			if (l2 == 128)
 				p = "Celeron (Mendocino)";
-			else if (c->x86_mask == 0 || c->x86_mask == 5)
+			else if (c->x86_stepping == 0 || c->x86_stepping == 5)
 				p = "Celeron-A";
 			break;
 
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 410629f10ad3..589b948e6e01 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -819,7 +819,7 @@ static __init void rdt_quirks(void)
 			cache_alloc_hsw_probe();
 		break;
 	case INTEL_FAM6_SKYLAKE_X:
-		if (boot_cpu_data.x86_mask <= 4)
+		if (boot_cpu_data.x86_stepping <= 4)
 			set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
 	}
 }
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index bdab7d2f51af..fca759d272a1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 		goto out_common_fail;
 	}
 	closid = ret;
+	ret = 0;
 
 	rdtgrp->closid = closid;
 	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..e956eb267061 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 
 extern struct mca_config mca_cfg;
 
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ *    are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ *    recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
 #endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a611eb..466f47301334 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -56,6 +56,9 @@
 
 static DEFINE_MUTEX(mce_log_mutex);
 
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
 
@@ -105,6 +108,10 @@ static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -126,6 +133,8 @@ void mce_setup(struct mce *m)
 
 	if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
 		rdmsrl(MSR_PPIN, m->ppin);
+
+	m->microcode = boot_cpu_data.microcode;
 }
 
 DEFINE_PER_CPU(struct mce, injectm);
@@ -234,7 +243,7 @@ static void __print_mce(struct mce *m)
 			m->cs, m->ip);
 
 		if (m->cs == __KERNEL_CS)
-			pr_cont("{%pS}", (void *)m->ip);
+			pr_cont("{%pS}", (void *)(unsigned long)m->ip);
 		pr_cont("\n");
 	}
 
@@ -258,7 +267,7 @@ static void __print_mce(struct mce *m)
 	 */
 	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
 		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
-		cpu_data(m->extcpu).microcode);
+		m->microcode);
 }
 
 static void print_mce(struct mce *m)
@@ -590,7 +599,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 
 	if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
 		pfn = mce->addr >> PAGE_SHIFT;
-		memory_failure(pfn, 0);
+		if (!memory_failure(pfn, 0))
+			mce_unmap_kpfn(pfn);
 	}
 
 	return NOTIFY_OK;
@@ -1057,12 +1067,13 @@ static int do_memory_failure(struct mce *m)
 	ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
 	if (ret)
 		pr_err("Memory error not recovered");
+	else
+		mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
 	return ret;
 }
 
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
 {
 	unsigned long decoy_addr;
 
@@ -1073,7 +1084,7 @@ void arch_unmap_kpfn(unsigned long pfn)
 	 * We would like to just call:
 	 *	set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
 	 * but doing that would radically increase the odds of a
-	 * speculative access to the posion page because we'd have
+	 * speculative access to the poison page because we'd have
 	 * the virtual address of the kernel 1:1 mapping sitting
 	 * around in registers.
 	 * Instead we get tricky.  We create a non-canonical address
@@ -1098,7 +1109,6 @@ void arch_unmap_kpfn(unsigned long pfn)
 
 	if (set_memory_np(decoy_addr, 1))
 		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
 }
 #endif
 
@@ -2081,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,
 	if (kstrtou64(buf, 0, &new) < 0)
 		return -EINVAL;
 
+	mutex_lock(&mce_sysfs_mutex);
 	if (mca_cfg.ignore_ce ^ !!new) {
 		if (new) {
 			/* disable ce features */
@@ -2093,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,
 			on_each_cpu(mce_enable_ce, (void *)1, 1);
 		}
 	}
+	mutex_unlock(&mce_sysfs_mutex);
+
 	return size;
 }
 
@@ -2105,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,
 	if (kstrtou64(buf, 0, &new) < 0)
 		return -EINVAL;
 
+	mutex_lock(&mce_sysfs_mutex);
 	if (mca_cfg.cmci_disabled ^ !!new) {
 		if (new) {
 			/* disable cmci */
@@ -2116,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,
 			on_each_cpu(mce_enable_ce, NULL, 1);
 		}
 	}
+	mutex_unlock(&mce_sysfs_mutex);
+
 	return size;
 }
 
@@ -2123,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,
 				      struct device_attribute *attr,
 				      const char *buf, size_t size)
 {
-	ssize_t ret = device_store_int(s, attr, buf, size);
+	unsigned long old_check_interval = check_interval;
+	ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+	if (check_interval == old_check_interval)
+		return ret;
+
+	if (check_interval < 1)
+		check_interval = 1;
+
+	mutex_lock(&mce_sysfs_mutex);
 	mce_restart();
+	mutex_unlock(&mce_sysfs_mutex);
+
 	return ret;
 }
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 330b8462d426..48179928ff38 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 		return -EINVAL;
 
 	ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
-	if (ret != UCODE_OK)
+	if (ret > UCODE_UPDATED)
 		return -EINVAL;
 
 	return 0;
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 	return patch_size;
 }
 
-static int apply_microcode_amd(int cpu)
+static enum ucode_state apply_microcode_amd(int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
 
 	p = find_patch(cpu);
 	if (!p)
-		return 0;
+		return UCODE_NFOUND;
 
 	mc_amd  = p->data;
 	uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
 	if (rev >= mc_amd->hdr.patch_id) {
 		c->microcode = rev;
 		uci->cpu_sig.rev = rev;
-		return 0;
+		return UCODE_OK;
 	}
 
 	if (__apply_microcode_amd(mc_amd)) {
 		pr_err("CPU%d: update failed for patch_level=0x%08x\n",
 			cpu, mc_amd->hdr.patch_id);
-		return -1;
+		return UCODE_ERROR;
 	}
 	pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
 		mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
 	uci->cpu_sig.rev = mc_amd->hdr.patch_id;
 	c->microcode = mc_amd->hdr.patch_id;
 
-	return 0;
+	return UCODE_UPDATED;
 }
 
 static int install_equiv_cpu_table(const u8 *buf)
@@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
 static enum ucode_state
 load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
 {
+	struct ucode_patch *p;
 	enum ucode_state ret;
 
 	/* free old equiv table */
 	free_equiv_cpu_table();
 
 	ret = __load_microcode_amd(family, data, size);
-
-	if (ret != UCODE_OK)
+	if (ret != UCODE_OK) {
 		cleanup();
+		return ret;
+	}
 
-#ifdef CONFIG_X86_32
-	/* save BSP's matching patch for early load */
-	if (save) {
-		struct ucode_patch *p = find_patch(0);
-		if (p) {
-			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
-			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
-							       PATCH_MAX_SIZE));
-		}
+	p = find_patch(0);
+	if (!p) {
+		return ret;
+	} else {
+		if (boot_cpu_data.microcode == p->patch_id)
+			return ret;
+
+		ret = UCODE_NEW;
 	}
-#endif
+
+	/* save BSP's matching patch for early load */
+	if (!save)
+		return ret;
+
+	memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+	memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+
 	return ret;
 }
 
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 319dd65f98a2..10c4fc2c91f8 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -22,13 +22,16 @@
 #define pr_fmt(fmt) "microcode: " fmt
 
 #include <linux/platform_device.h>
+#include <linux/stop_machine.h>
 #include <linux/syscore_ops.h>
 #include <linux/miscdevice.h>
 #include <linux/capability.h>
 #include <linux/firmware.h>
 #include <linux/kernel.h>
+#include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/cpu.h>
+#include <linux/nmi.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
  */
 static DEFINE_MUTEX(microcode_mutex);
 
+/*
+ * Serialize late loading so that CPUs get updated one-by-one.
+ */
+static DEFINE_SPINLOCK(update_lock);
+
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 
 struct cpu_info_ctx {
@@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
 	return ret;
 }
 
-struct apply_microcode_ctx {
-	int err;
-};
-
 static void apply_microcode_local(void *arg)
 {
-	struct apply_microcode_ctx *ctx = arg;
+	enum ucode_state *err = arg;
 
-	ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+	*err = microcode_ops->apply_microcode(smp_processor_id());
 }
 
 static int apply_microcode_on_target(int cpu)
 {
-	struct apply_microcode_ctx ctx = { .err = 0 };
+	enum ucode_state err;
 	int ret;
 
-	ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
-	if (!ret)
-		ret = ctx.err;
-
+	ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
+	if (!ret) {
+		if (err == UCODE_ERROR)
+			ret = 1;
+	}
 	return ret;
 }
 
@@ -489,31 +494,124 @@ static void __exit microcode_dev_exit(void)
 /* fake device for request_firmware */
 static struct platform_device	*microcode_pdev;
 
-static int reload_for_cpu(int cpu)
+/*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+ * - HT siblings must be idle and not execute other code while the other sibling
+ *   is loading microcode in order to avoid any negative interactions caused by
+ *   the loading.
+ *
+ * - In addition, microcode update on the cores must be serialized until this
+ *   requirement can be relaxed in the future. Right now, this is conservative
+ *   and good.
+ */
+#define SPINUNIT 100 /* 100 nsec */
+
+static int check_online_cpus(void)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	enum ucode_state ustate;
-	int err = 0;
+	if (num_online_cpus() == num_present_cpus())
+		return 0;
 
-	if (!uci->valid)
-		return err;
+	pr_err("Not all CPUs online, aborting microcode update.\n");
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
-	if (ustate == UCODE_OK)
-		apply_microcode_on_target(cpu);
-	else
-		if (ustate == UCODE_ERROR)
-			err = -EINVAL;
-	return err;
+	return -EINVAL;
+}
+
+static atomic_t late_cpus_in;
+static atomic_t late_cpus_out;
+
+static int __wait_for_cpus(atomic_t *t, long long timeout)
+{
+	int all_cpus = num_online_cpus();
+
+	atomic_inc(t);
+
+	while (atomic_read(t) < all_cpus) {
+		if (timeout < SPINUNIT) {
+			pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+				all_cpus - atomic_read(t));
+			return 1;
+		}
+
+		ndelay(SPINUNIT);
+		timeout -= SPINUNIT;
+
+		touch_nmi_watchdog();
+	}
+	return 0;
+}
+
+/*
+ * Returns:
+ * < 0 - on error
+ *   0 - no update done
+ *   1 - microcode was updated
+ */
+static int __reload_late(void *info)
+{
+	int cpu = smp_processor_id();
+	enum ucode_state err;
+	int ret = 0;
+
+	/*
+	 * Wait for all CPUs to arrive. A load will not be attempted unless all
+	 * CPUs show up.
+	 * */
+	if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
+		return -1;
+
+	spin_lock(&update_lock);
+	apply_microcode_local(&err);
+	spin_unlock(&update_lock);
+
+	if (err > UCODE_NFOUND) {
+		pr_warn("Error reloading microcode on CPU %d\n", cpu);
+		return -1;
+	/* siblings return UCODE_OK because their engine got updated already */
+	} else if (err == UCODE_UPDATED || err == UCODE_OK) {
+		ret = 1;
+	} else {
+		return ret;
+	}
+
+	/*
+	 * Increase the wait timeout to a safe value here since we're
+	 * serializing the microcode update and that could take a while on a
+	 * large number of CPUs. And that is fine as the *actual* timeout will
+	 * be determined by the last CPU finished updating and thus cut short.
+	 */
+	if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
+		panic("Timeout during microcode update!\n");
+
+	return ret;
+}
+
+/*
+ * Reload microcode late on all CPUs. Wait for a sec until they
+ * all gather together.
+ */
+static int microcode_reload_late(void)
+{
+	int ret;
+
+	atomic_set(&late_cpus_in,  0);
+	atomic_set(&late_cpus_out, 0);
+
+	ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+	if (ret > 0)
+		microcode_check();
+
+	return ret;
 }
 
 static ssize_t reload_store(struct device *dev,
 			    struct device_attribute *attr,
 			    const char *buf, size_t size)
 {
+	enum ucode_state tmp_ret = UCODE_OK;
+	int bsp = boot_cpu_data.cpu_index;
 	unsigned long val;
-	int cpu;
-	ssize_t ret = 0, tmp_ret;
+	ssize_t ret = 0;
 
 	ret = kstrtoul(buf, 0, &val);
 	if (ret)
@@ -522,23 +620,24 @@ static ssize_t reload_store(struct device *dev,
 	if (val != 1)
 		return size;
 
+	tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+	if (tmp_ret != UCODE_NEW)
+		return size;
+
 	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-	for_each_online_cpu(cpu) {
-		tmp_ret = reload_for_cpu(cpu);
-		if (tmp_ret != 0)
-			pr_warn("Error reloading microcode on CPU %d\n", cpu);
 
-		/* save retval of the first encountered reload error */
-		if (!ret)
-			ret = tmp_ret;
-	}
-	if (!ret)
-		perf_check_microcode();
+	ret = check_online_cpus();
+	if (ret)
+		goto put;
+
+	mutex_lock(&microcode_mutex);
+	ret = microcode_reload_late();
 	mutex_unlock(&microcode_mutex);
+
+put:
 	put_online_cpus();
 
-	if (!ret)
+	if (ret >= 0)
 		ret = size;
 
 	return ret;
@@ -606,10 +705,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
 	if (system_state != SYSTEM_RUNNING)
 		return UCODE_NFOUND;
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
-						     refresh_fw);
-
-	if (ustate == UCODE_OK) {
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, refresh_fw);
+	if (ustate == UCODE_NEW) {
 		pr_debug("CPU%d updated upon init\n", cpu);
 		apply_microcode_on_target(cpu);
 	}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index f7c55b0e753a..32b8e5724f96 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	if (!mc)
 		return 0;
 
+	/*
+	 * Save us the MSR write below - which is a particular expensive
+	 * operation - when the other hyperthread has updated the microcode
+	 * already.
+	 */
+	rev = intel_get_microcode_revision();
+	if (rev >= mc->hdr.rev) {
+		uci->cpu_sig.rev = rev;
+		return UCODE_OK;
+	}
+
+	/*
+	 * Writeback and invalidate caches before updating microcode to avoid
+	 * internal issues depending on what the microcode is updating.
+	 */
+	native_wbinvd();
+
 	/* write microcode via MSR 0x79 */
 	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
 
@@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 	return 0;
 }
 
-static int apply_microcode_intel(int cpu)
+static enum ucode_state apply_microcode_intel(int cpu)
 {
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct microcode_intel *mc;
-	struct ucode_cpu_info *uci;
-	struct cpuinfo_x86 *c;
 	static int prev_rev;
 	u32 rev;
 
 	/* We should bind the task to the CPU */
 	if (WARN_ON(raw_smp_processor_id() != cpu))
-		return -1;
+		return UCODE_ERROR;
 
-	uci = ucode_cpu_info + cpu;
-	mc = uci->mc;
+	/* Look for a newer patch in our cache: */
+	mc = find_patch(uci);
 	if (!mc) {
-		/* Look for a newer patch in our cache: */
-		mc = find_patch(uci);
+		mc = uci->mc;
 		if (!mc)
-			return 0;
+			return UCODE_NFOUND;
 	}
 
+	/*
+	 * Save us the MSR write below - which is a particular expensive
+	 * operation - when the other hyperthread has updated the microcode
+	 * already.
+	 */
+	rev = intel_get_microcode_revision();
+	if (rev >= mc->hdr.rev) {
+		uci->cpu_sig.rev = rev;
+		c->microcode = rev;
+		return UCODE_OK;
+	}
+
+	/*
+	 * Writeback and invalidate caches before updating microcode to avoid
+	 * internal issues depending on what the microcode is updating.
+	 */
+	native_wbinvd();
+
 	/* write microcode via MSR 0x79 */
 	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
 
@@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)
 	if (rev != mc->hdr.rev) {
 		pr_err("CPU%d update to revision 0x%x failed\n",
 		       cpu, mc->hdr.rev);
-		return -1;
+		return UCODE_ERROR;
 	}
 
 	if (rev != prev_rev) {
@@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)
 		prev_rev = rev;
 	}
 
-	c = &cpu_data(cpu);
-
 	uci->cpu_sig.rev = rev;
 	c->microcode = rev;
 
-	return 0;
+	return UCODE_UPDATED;
 }
 
 static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
@@ -830,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	unsigned int leftover = size;
 	unsigned int curr_mc_size = 0, new_mc_size = 0;
 	unsigned int csig, cpf;
+	enum ucode_state ret = UCODE_OK;
 
 	while (leftover) {
 		struct microcode_header_intel mc_header;
@@ -871,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 			new_mc  = mc;
 			new_mc_size = mc_size;
 			mc = NULL;	/* trigger new vmalloc */
+			ret = UCODE_NEW;
 		}
 
 		ucode_ptr += mc_size;
@@ -900,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
 		 cpu, new_rev, uci->cpu_sig.rev);
 
-	return UCODE_OK;
+	return ret;
 }
 
 static int get_ucode_fw(void *to, const void *from, size_t n)
@@ -921,7 +955,7 @@ static bool is_blacklisted(unsigned int cpu)
 	 */
 	if (c->x86 == 6 &&
 	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
-	    c->x86_mask == 0x01 &&
+	    c->x86_stepping == 0x01 &&
 	    llc_size_per_core > 2621440 &&
 	    c->microcode < 0x0b000021) {
 		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
@@ -944,7 +978,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
 		return UCODE_NFOUND;
 
 	sprintf(name, "intel-ucode/%02x-%02x-%02x",
-		c->x86, c->x86_model, c->x86_mask);
+		c->x86, c->x86_model, c->x86_stepping);
 
 	if (request_firmware_direct(&firmware, name, device)) {
 		pr_debug("data file %s load failed\n", name);
@@ -982,7 +1016,7 @@ static struct microcode_ops microcode_intel_ops = {
 
 static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
 {
-	u64 llc_size = c->x86_cache_size * 1024;
+	u64 llc_size = c->x86_cache_size * 1024ULL;
 
 	do_div(llc_size, c->x86_max_cores);
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fdc55215d44d..e12ee86906c6 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
 	 */
 	if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
 	    boot_cpu_data.x86_model == 1 &&
-	    boot_cpu_data.x86_mask <= 7) {
+	    boot_cpu_data.x86_stepping <= 7) {
 		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
 			pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
 			return -EINVAL;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 40d5a8a75212..7468de429087 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,8 +711,8 @@ void __init mtrr_bp_init(void)
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 			    boot_cpu_data.x86 == 0xF &&
 			    boot_cpu_data.x86_model == 0x3 &&
-			    (boot_cpu_data.x86_mask == 0x3 ||
-			     boot_cpu_data.x86_mask == 0x4))
+			    (boot_cpu_data.x86_stepping == 0x3 ||
+			     boot_cpu_data.x86_stepping == 0x4))
 				phys_addr = 36;
 
 			size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index e7ecedafa1c8..2c8522a39ed5 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   c->x86_model,
 		   c->x86_model_id[0] ? c->x86_model_id : "unknown");
 
-	if (c->x86_mask || c->cpuid_level >= 0)
-		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+	if (c->x86_stepping || c->cpuid_level >= 0)
+		seq_printf(m, "stepping\t: %d\n", c->x86_stepping);
 	else
 		seq_puts(m, "stepping\t: unknown\n");
 	if (c->microcode)
@@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	}
 
 	/* Cache size */
-	if (c->x86_cache_size >= 0)
-		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+	if (c->x86_cache_size)
+		seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size);
 
 	show_cpuinfo_core(m, c, cpu);
 	show_cpuinfo_misc(m, c);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c29020907886..b59e4fb40fd9 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -37,7 +37,7 @@
 #define X86		new_cpu_data+CPUINFO_x86
 #define X86_VENDOR	new_cpu_data+CPUINFO_x86_vendor
 #define X86_MODEL	new_cpu_data+CPUINFO_x86_model
-#define X86_MASK	new_cpu_data+CPUINFO_x86_mask
+#define X86_STEPPING	new_cpu_data+CPUINFO_x86_stepping
 #define X86_HARD_MATH	new_cpu_data+CPUINFO_hard_math
 #define X86_CPUID	new_cpu_data+CPUINFO_cpuid_level
 #define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
@@ -332,7 +332,7 @@ ENTRY(startup_32_smp)
 	shrb $4,%al
 	movb %al,X86_MODEL
 	andb $0x0f,%cl		# mask mask revision
-	movb %cl,X86_MASK
+	movb %cl,X86_STEPPING
 	movl %edx,X86_CAPABILITY
 
 .Lis486:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 04a625f0fcda..0f545b3cf926 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -23,6 +23,7 @@
 #include <asm/nops.h>
 #include "../entry/calling.h"
 #include <asm/export.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -134,6 +135,7 @@ ENTRY(secondary_startup_64)
 
 	/* Ensure I am executing from virtual addresses */
 	movq	$1f, %rax
+	ANNOTATE_RETPOLINE_SAFE
 	jmp	*%rax
 1:
 	UNWIND_HINT_EMPTY
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 2f723301eb58..38deafebb21b 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -23,7 +23,7 @@
 /*
  * this changes the io permissions bitmap in the current task.
  */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
 {
 	struct thread_struct *t = &current->thread;
 	struct tss_struct *tss;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index bd36f3c33cd0..0715f827607c 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
 
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
+	bool is_in_entry_trampoline_section = false;
+
+#ifdef CONFIG_X86_64
+	is_in_entry_trampoline_section =
+		(addr >= (unsigned long)__entry_trampoline_start &&
+		 addr < (unsigned long)__entry_trampoline_end);
+#endif
 	return  (addr >= (unsigned long)__kprobes_text_start &&
 		 addr < (unsigned long)__kprobes_text_end) ||
 		(addr >= (unsigned long)__entry_text_start &&
-		 addr < (unsigned long)__entry_text_end);
+		 addr < (unsigned long)__entry_text_end) ||
+		is_in_entry_trampoline_section;
 }
 
 int __init arch_init_kprobes(void)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4e37d1a851a6..bc1a27280c4b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -49,7 +49,7 @@
 
 static int kvmapf = 1;
 
-static int parse_no_kvmapf(char *arg)
+static int __init parse_no_kvmapf(char *arg)
 {
         kvmapf = 0;
         return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
 early_param("no-kvmapf", parse_no_kvmapf);
 
 static int steal_acc = 1;
-static int parse_no_stealacc(char *arg)
+static int __init parse_no_stealacc(char *arg)
 {
         steal_acc = 0;
         return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 static int kvmclock_vsyscall = 1;
-static int parse_no_kvmclock_vsyscall(char *arg)
+static int __init parse_no_kvmclock_vsyscall(char *arg)
 {
         kvmclock_vsyscall = 0;
         return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
 #endif
 		pa |= KVM_ASYNC_PF_ENABLED;
 
-		/* Async page fault support for L1 hypervisor is optional */
-		if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
-			(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
-			wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
+		if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
+			pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
+		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
 		__this_cpu_write(apf_reason.enabled, 1);
 		printk(KERN_INFO"KVM setup async PF for cpu %d\n",
 		       smp_processor_id());
@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
 		pv_time_ops.steal_clock = kvm_steal_clock;
 	}
 
-	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+	    !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
 		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
 {
 	int cpu;
 
-	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+	    !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
 		for_each_possible_cpu(cpu) {
 			zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
 				GFP_KERNEL, cpu_to_node(cpu));
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 1f790cf9d38f..3b7427aa7d85 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
 				goto overflow;
 			break;
 		case R_X86_64_PC32:
+		case R_X86_64_PLT32:
 			value -= (u64)address;
 			*(u32 *)location = value;
 			break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index da0c160e5589..f58336af095c 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 				goto overflow;
 			break;
 		case R_X86_64_PC32:
+		case R_X86_64_PLT32:
 			if (*(u32 *)loc != 0)
 				goto invalid_relocation;
 			val -= (u64)loc;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 27d0a1712663..f1c5eb99d445 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -410,7 +410,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 	processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
 	processor.cpuflag = CPU_ENABLED;
 	processor.cpufeature = (boot_cpu_data.x86 << 8) |
-	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;
 	processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
 	processor.reserved[0] = 0;
 	processor.reserved[1] = 0;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 041096bdef86..99dc79e76bdc 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -200,9 +200,9 @@ static void native_flush_tlb_global(void)
 	__native_flush_tlb_global();
 }
 
-static void native_flush_tlb_single(unsigned long addr)
+static void native_flush_tlb_one_user(unsigned long addr)
 {
-	__native_flush_tlb_single(addr);
+	__native_flush_tlb_one_user(addr);
 }
 
 struct static_key paravirt_steal_enabled;
@@ -401,7 +401,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 
 	.flush_tlb_user = native_flush_tlb,
 	.flush_tlb_kernel = native_flush_tlb_global,
-	.flush_tlb_single = native_flush_tlb_single,
+	.flush_tlb_one_user = native_flush_tlb_one_user,
 	.flush_tlb_others = native_flush_tlb_others,
 
 	.pgd_alloc = __paravirt_pgd_alloc,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1ae67e982af7..4c616be28506 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p)
 
 	kasan_init();
 
-#ifdef CONFIG_X86_32
-	/* sync back kernel address range */
-	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
-			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-			KERNEL_PGD_PTRS);
-
 	/*
-	 * sync back low identity map too.  It is used for example
-	 * in the 32-bit EFI stub.
+	 * Sync back kernel address range.
+	 *
+	 * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+	 * this call?
 	 */
-	clone_pgd_range(initial_page_table,
-			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+	sync_initial_page_table();
 
 	tboot_probe();
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 497aa766fab3..ea554f812ee1 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)
 	/* Setup cpu initialized, callin, callout masks */
 	setup_cpu_local_masks();
 
-#ifdef CONFIG_X86_32
 	/*
 	 * Sync back kernel address range again.  We already did this in
 	 * setup_arch(), but percpu data also needs to be available in
 	 * the smpboot asm.  We can't reliably pick up percpu mappings
 	 * using vmalloc_fault(), because exception dispatch needs
 	 * percpu data.
+	 *
+	 * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+	 * this call?
 	 */
-	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
-			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-			KERNEL_PGD_PTRS);
-
-	/*
-	 * sync back low identity map too.  It is used for example
-	 * in the 32-bit EFI stub.
-	 */
-	clone_pgd_range(initial_page_table,
-			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+	sync_initial_page_table();
 }
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index ac057f9b0763..0d930d8987cc 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void)
 	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
 #define CHECK_CSI_OFFSET(name)	  BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_code)  != 8);
+
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code)  != 8);
 	 /*
 	 * Ensure that the size of each si_field never changes.
 	 * If it does, it is a sign that the
@@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void)
 	CHECK_CSI_SIZE  (_kill, 2*sizeof(int));
 	CHECK_SI_SIZE   (_kill, 2*sizeof(int));
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+
 	CHECK_CSI_OFFSET(_timer);
 	CHECK_CSI_SIZE  (_timer, 3*sizeof(int));
 	CHECK_SI_SIZE   (_timer, 6*sizeof(int));
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_tid)     != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_value)   != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid)     != 0x0C);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value)   != 0x14);
+
 	CHECK_CSI_OFFSET(_rt);
 	CHECK_CSI_SIZE  (_rt, 3*sizeof(int));
 	CHECK_SI_SIZE   (_rt, 4*sizeof(int));
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)   != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_uid)   != 0x14);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid)   != 0x0C);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid)   != 0x10);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
 	CHECK_CSI_OFFSET(_sigchld);
 	CHECK_CSI_SIZE  (_sigchld, 5*sizeof(int));
 	CHECK_SI_SIZE   (_sigchld, 8*sizeof(int));
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)    != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_uid)    != 0x14);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_utime)  != 0x20);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_stime)  != 0x28);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid)    != 0x0C);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid)    != 0x10);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime)  != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime)  != 0x1C);
+
 #ifdef CONFIG_X86_X32_ABI
 	CHECK_CSI_OFFSET(_sigchld_x32);
 	CHECK_CSI_SIZE  (_sigchld_x32, 7*sizeof(int));
 	/* no _sigchld_x32 in the generic siginfo_t */
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime)  != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime)  != 0x20);
 #endif
 
 	CHECK_CSI_OFFSET(_sigfault);
 	CHECK_CSI_SIZE  (_sigfault, 4*sizeof(int));
 	CHECK_SI_SIZE   (_sigfault, 8*sizeof(int));
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
+
+	BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
+
+	BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18);
+
+	BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
+
 	CHECK_CSI_OFFSET(_sigpoll);
 	CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
 	CHECK_SI_SIZE   (_sigpoll, 4*sizeof(int));
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_fd)     != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd)   != 0x10);
+
 	CHECK_CSI_OFFSET(_sigsys);
 	CHECK_CSI_SIZE  (_sigsys, 3*sizeof(int));
 	CHECK_SI_SIZE   (_sigsys, 4*sizeof(int));
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_syscall)   != 0x18);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_arch)      != 0x1C);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall)   != 0x10);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch)      != 0x14);
+
 	/* any new si_fields should be added here */
 }
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f27facbaa9b..ff99e2b6fc54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void)
 	cpu_set_state_online(me);
 }
 
-void __init native_smp_cpus_done(unsigned int max_cpus)
+void __init calculate_max_logical_packages(void)
 {
 	int ncpus;
 
-	pr_debug("Boot done\n");
 	/*
 	 * Today neither Intel nor AMD support heterogenous systems so
 	 * extrapolate the boot cpu's data to all packages.
@@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
 	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
 	pr_info("Max logical packages: %u\n", __max_logical_packages);
+}
+
+void __init native_smp_cpus_done(unsigned int max_cpus)
+{
+	pr_debug("Boot done\n");
+
+	calculate_max_logical_packages();
 
 	if (x86_has_numa_in_package)
 		set_sched_topology(x86_numa_in_package_topology);
@@ -1430,8 +1436,8 @@ static void remove_siblinginfo(int cpu)
 	cpumask_clear(cpu_llc_shared_mask(cpu));
 	cpumask_clear(topology_sibling_cpumask(cpu));
 	cpumask_clear(topology_core_cpumask(cpu));
-	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
+	c->booted_cores = 0;
 	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
 	recompute_smt_state();
 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 446c9ef8cfc3..3d9b2308e7fa 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -181,7 +181,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr)
 		break;
 
 	case BUG_TRAP_TYPE_WARN:
-		regs->ip += LEN_UD0;
+		regs->ip += LEN_UD2;
 		return 1;
 	}
 
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 1f9188f5357c..feb28fee6cea 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -5,7 +5,6 @@
 #include <asm/unwind.h>
 #include <asm/orc_types.h>
 #include <asm/orc_lookup.h>
-#include <asm/sections.h>
 
 #define orc_warn(fmt, ...) \
 	printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
 	}
 
 	/* vmlinux .init slow lookup: */
-	if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
+	if (init_kernel_text(ip))
 		return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
 				  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
 
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5edb27f1a2c4..9d0b5af7db91 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
 	return;
 
 check_vip:
-	if (VEFLAGS & X86_EFLAGS_VIP) {
+	if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) ==
+	    (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {
 		save_v86_state(regs, VM86_STI);
 		return;
 	}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9b138a06c1a4..b854ebf5851b 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -118,9 +118,11 @@ SECTIONS
 
 #ifdef CONFIG_X86_64
 		. = ALIGN(PAGE_SIZE);
+		VMLINUX_SYMBOL(__entry_trampoline_start) = .;
 		_entry_trampoline = .;
 		*(.entry_trampoline)
 		. = ALIGN(PAGE_SIZE);
+		VMLINUX_SYMBOL(__entry_trampoline_end) = .;
 		ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
 #endif
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a0c5a69bc7c4..b671fc2d0422 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_PV_EOI) |
 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
 			     (1 << KVM_FEATURE_PV_UNHALT) |
-			     (1 << KVM_FEATURE_PV_TLB_FLUSH);
+			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
+			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 924ac8ce9d50..391dda8d43b7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
-	struct kvm_lapic *apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 	int i;
 
-	apic_debug("%s\n", __func__);
+	if (!apic)
+		return;
 
-	ASSERT(vcpu);
-	apic = vcpu->arch.apic;
-	ASSERT(apic != NULL);
+	apic_debug("%s\n", __func__);
 
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2165,7 +2164,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	 */
 	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
 	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
-	kvm_lapic_reset(vcpu, false);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
 	return 0;
@@ -2569,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 
 	pe = xchg(&apic->pending_events, 0);
 	if (test_bit(KVM_APIC_INIT, &pe)) {
-		kvm_lapic_reset(vcpu, true);
 		kvm_vcpu_reset(vcpu, true);
 		if (kvm_vcpu_is_bsp(apic->vcpu))
 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8eca1d04aeb8..763bb3bade63 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	else
 		pte_access &= ~ACC_WRITE_MASK;
 
+	if (!kvm_is_mmio_pfn(pfn))
+		spte |= shadow_me_mask;
+
 	spte |= (u64)pfn << PAGE_SHIFT;
-	spte |= shadow_me_mask;
 
 	if (pte_access & ACC_WRITE_MASK) {
 
@@ -3029,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
 		return RET_PF_RETRY;
 	}
 
-	return -EFAULT;
+	return RET_PF_EMULATE;
 }
 
 static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
@@ -5080,7 +5082,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
 
 /* The caller should hold mmu-lock before calling this function. */
-static bool
+static __always_inline bool
 slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			slot_level_handler fn, int start_level, int end_level,
 			gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
@@ -5110,7 +5112,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	return flush;
 }
 
-static bool
+static __always_inline bool
 slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		  slot_level_handler fn, int start_level, int end_level,
 		  bool lock_flush_tlb)
@@ -5121,7 +5123,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			lock_flush_tlb);
 }
 
-static bool
+static __always_inline bool
 slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		      slot_level_handler fn, bool lock_flush_tlb)
 {
@@ -5129,7 +5131,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
 }
 
-static bool
+static __always_inline bool
 slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			slot_level_handler fn, bool lock_flush_tlb)
 {
@@ -5137,7 +5139,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
 }
 
-static bool
+static __always_inline bool
 slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		 slot_level_handler fn, bool lock_flush_tlb)
 {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a74828..be9c839e2c89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,6 +49,7 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
+#include <asm/microcode.h>
 #include <asm/nospec-branch.h>
 
 #include <asm/virtext.h>
@@ -178,6 +179,8 @@ struct vcpu_svm {
 	uint64_t sysenter_eip;
 	uint64_t tsc_aux;
 
+	u64 msr_decfg;
+
 	u64 next_rip;
 
 	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
@@ -300,6 +303,8 @@ module_param(vgif, int, 0444);
 static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
+static u8 rsm_ins_bytes[] = "\x0f\xaa";
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1383,6 +1388,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_intercept(svm, INTERCEPT_SKINIT);
 	set_intercept(svm, INTERCEPT_WBINVD);
 	set_intercept(svm, INTERCEPT_XSETBV);
+	set_intercept(svm, INTERCEPT_RSM);
 
 	if (!kvm_mwait_in_guest()) {
 		set_intercept(svm, INTERCEPT_MONITOR);
@@ -1902,6 +1908,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	u32 dummy;
 	u32 eax = 1;
 
+	vcpu->arch.microcode_version = 0x01000065;
 	svm->spec_ctrl = 0;
 
 	if (!init_event) {
@@ -3699,6 +3706,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
 	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
+static int rsm_interception(struct vcpu_svm *svm)
+{
+	return x86_emulate_instruction(&svm->vcpu, 0, 0,
+				       rsm_ins_bytes, 2) == EMULATE_DONE;
+}
+
 static int rdpmc_interception(struct vcpu_svm *svm)
 {
 	int err;
@@ -3860,6 +3873,22 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 	return 0;
 }
 
+static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+	msr->data = 0;
+
+	switch (msr->index) {
+	case MSR_F10H_DECFG:
+		if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+			msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+		break;
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3935,9 +3964,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 		msr_info->data = svm->spec_ctrl;
 		break;
-	case MSR_IA32_UCODE_REV:
-		msr_info->data = 0x01000065;
-		break;
 	case MSR_F15H_IC_CFG: {
 
 		int family, model;
@@ -3955,6 +3981,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data = 0x1E;
 		}
 		break;
+	case MSR_F10H_DECFG:
+		msr_info->data = svm->msr_decfg;
+		break;
 	default:
 		return kvm_get_msr_common(vcpu, msr_info);
 	}
@@ -4133,6 +4162,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
+	case MSR_F10H_DECFG: {
+		struct kvm_msr_entry msr_entry;
+
+		msr_entry.index = msr->index;
+		if (svm_get_msr_feature(&msr_entry))
+			return 1;
+
+		/* Check the supported bits */
+		if (data & ~msr_entry.data)
+			return 1;
+
+		/* Don't allow the guest to change a bit, #GP */
+		if (!msr->host_initiated && (data ^ msr_entry.data))
+			return 1;
+
+		svm->msr_decfg = data;
+		break;
+	}
 	case MSR_IA32_APICBASE:
 		if (kvm_vcpu_apicv_active(vcpu))
 			avic_update_vapic_bar(to_svm(vcpu), data);
@@ -4541,7 +4588,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_MWAIT]			= mwait_interception,
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
 	[SVM_EXIT_NPF]				= npf_interception,
-	[SVM_EXIT_RSM]                          = emulate_on_interception,
+	[SVM_EXIT_RSM]                          = rsm_interception,
 	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,
 	[SVM_EXIT_AVIC_UNACCELERATED_ACCESS]	= avic_unaccelerated_access_interception,
 };
@@ -5355,7 +5402,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	 * being speculatively taken.
 	 */
 	if (svm->spec_ctrl)
-		wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+		native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
 
 	asm volatile (
 		"push %%" _ASM_BP "; \n\t"
@@ -5464,11 +5511,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
 	 * save it.
 	 */
-	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-		rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+		svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
 	if (svm->spec_ctrl)
-		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
 	/* Eliminate branch target predictions from guest mode */
 	vmexit_fill_RSB();
@@ -6236,16 +6283,18 @@ e_free:
 
 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
+	void __user *measure = (void __user *)(uintptr_t)argp->data;
 	struct kvm_sev_info *sev = &kvm->arch.sev_info;
 	struct sev_data_launch_measure *data;
 	struct kvm_sev_launch_measure params;
+	void __user *p = NULL;
 	void *blob = NULL;
 	int ret;
 
 	if (!sev_guest(kvm))
 		return -ENOTTY;
 
-	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+	if (copy_from_user(&params, measure, sizeof(params)))
 		return -EFAULT;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6256,17 +6305,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	if (!params.len)
 		goto cmd;
 
-	if (params.uaddr) {
+	p = (void __user *)(uintptr_t)params.uaddr;
+	if (p) {
 		if (params.len > SEV_FW_BLOB_MAX_SIZE) {
 			ret = -EINVAL;
 			goto e_free;
 		}
 
-		if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
-			ret = -EFAULT;
-			goto e_free;
-		}
-
 		ret = -ENOMEM;
 		blob = kmalloc(params.len, GFP_KERNEL);
 		if (!blob)
@@ -6290,13 +6335,13 @@ cmd:
 		goto e_free_blob;
 
 	if (blob) {
-		if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+		if (copy_to_user(p, blob, params.len))
 			ret = -EFAULT;
 	}
 
 done:
 	params.len = data->len;
-	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+	if (copy_to_user(measure, &params, sizeof(params)))
 		ret = -EFAULT;
 e_free_blob:
 	kfree(blob);
@@ -6597,7 +6642,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	struct page **pages;
 	void *blob, *hdr;
 	unsigned long n;
-	int ret;
+	int ret, offset;
 
 	if (!sev_guest(kvm))
 		return -ENOTTY;
@@ -6623,6 +6668,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	if (!data)
 		goto e_unpin_memory;
 
+	offset = params.guest_uaddr & (PAGE_SIZE - 1);
+	data->guest_address = __sme_page_pa(pages[0]) + offset;
+	data->guest_len = params.guest_len;
+
 	blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
 	if (IS_ERR(blob)) {
 		ret = PTR_ERR(blob);
@@ -6637,8 +6686,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
 		ret = PTR_ERR(hdr);
 		goto e_free_blob;
 	}
-	data->trans_address = __psp_pa(blob);
-	data->trans_len = params.trans_len;
+	data->hdr_address = __psp_pa(hdr);
+	data->hdr_len = params.hdr_len;
 
 	data->handle = sev->handle;
 	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
@@ -6821,6 +6870,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.vcpu_unblocking = svm_vcpu_unblocking,
 
 	.update_bp_intercept = update_bp_intercept,
+	.get_msr_feature = svm_get_msr_feature,
 	.get_msr = svm_get_msr,
 	.set_msr = svm_set_msr,
 	.get_segment_base = svm_get_segment_base,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f427723dc7db..2d87603f9179 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
+#include <asm/microcode.h>
 #include <asm/nospec-branch.h>
 
 #include "trace.h"
@@ -1044,6 +1045,13 @@ static inline bool is_machine_check(u32 intr_info)
 		(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
 }
 
+/* Undocumented: icebp/int1 */
+static inline bool is_icebp(u32 intr_info)
+{
+	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+		== (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
+}
+
 static inline bool cpu_has_vmx_msr_bitmap(void)
 {
 	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
@@ -3226,6 +3234,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
 	return !(val & ~valid_bits);
 }
 
+static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+{
+	return 1;
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -4485,7 +4498,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
 			      SECONDARY_EXEC_DESC);
 		hw_cr4 &= ~X86_CR4_UMIP;
-	} else
+	} else if (!is_guest_mode(vcpu) ||
+	           !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
 		vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
 				SECONDARY_EXEC_DESC);
 
@@ -5765,6 +5779,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx->rmode.vm86_active = 0;
 	vmx->spec_ctrl = 0;
 
+	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(vcpu, 0);
 
@@ -6171,7 +6186,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
 			vcpu->arch.dr6 &= ~15;
 			vcpu->arch.dr6 |= dr6 | DR6_RTM;
-			if (!(dr6 & ~DR6_RESERVED)) /* icebp */
+			if (is_icebp(intr_info))
 				skip_emulated_instruction(vcpu);
 
 			kvm_queue_exception(vcpu, DB_VECTOR);
@@ -9452,7 +9467,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	 * being speculatively taken.
 	 */
 	if (vmx->spec_ctrl)
-		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
@@ -9587,11 +9602,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
 	 * save it.
 	 */
-	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+		vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
 	if (vmx->spec_ctrl)
-		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
 	/* Eliminate branch target predictions from guest mode */
 	vmexit_fill_RSB();
@@ -10136,7 +10151,10 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 			(unsigned long)(vmcs12->posted_intr_desc_addr &
 			(PAGE_SIZE - 1)));
 	}
-	if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
+	if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
+		vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
+			      CPU_BASED_USE_MSR_BITMAPS);
+	else
 		vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
 				CPU_BASED_USE_MSR_BITMAPS);
 }
@@ -10224,8 +10242,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 	 *    updated to reflect this when L1 (or its L2s) actually write to
 	 *    the MSR.
 	 */
-	bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
-	bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+	bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+	bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
 
 	/* Nothing to do if the MSR bitmap is not in use.  */
 	if (!cpu_has_vmx_msr_bitmap() ||
@@ -11196,7 +11214,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (ret)
 		return ret;
 
-	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+	/*
+	 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
+	 * by event injection, halt vcpu.
+	 */
+	if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
 		return kvm_vcpu_halt(vcpu);
 
 	vmx->nested.nested_run_pending = 1;
@@ -12287,6 +12310,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.vcpu_put = vmx_vcpu_put,
 
 	.update_bp_intercept = update_exception_bitmap,
+	.get_msr_feature = vmx_get_msr_feature,
 	.get_msr = vmx_get_msr,
 	.set_msr = vmx_set_msr,
 	.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a0b545ac20..18b5ca7a3197 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1049,6 +1049,45 @@ static u32 emulated_msrs[] = {
 
 static unsigned num_emulated_msrs;
 
+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+	MSR_F10H_DECFG,
+	MSR_IA32_UCODE_REV,
+};
+
+static unsigned int num_msr_based_features;
+
+static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+	switch (msr->index) {
+	case MSR_IA32_UCODE_REV:
+		rdmsrl(msr->index, msr->data);
+		break;
+	default:
+		if (kvm_x86_ops->get_msr_feature(msr))
+			return 1;
+	}
+	return 0;
+}
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+	struct kvm_msr_entry msr;
+	int r;
+
+	msr.index = index;
+	r = kvm_get_msr_feature(&msr);
+	if (r)
+		return r;
+
+	*data = msr.data;
+
+	return 0;
+}
+
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
 	if (efer & efer_reserved_bits)
@@ -2222,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 	switch (msr) {
 	case MSR_AMD64_NB_CFG:
-	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_UCODE_WRITE:
 	case MSR_VM_HSAVE_PA:
 	case MSR_AMD64_PATCH_LOADER:
@@ -2230,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_AMD64_DC_CFG:
 		break;
 
+	case MSR_IA32_UCODE_REV:
+		if (msr_info->host_initiated)
+			vcpu->arch.microcode_version = data;
+		break;
 	case MSR_EFER:
 		return set_efer(vcpu, data);
 	case MSR_K7_HWCR:
@@ -2525,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = 0;
 		break;
 	case MSR_IA32_UCODE_REV:
-		msr_info->data = 0x100000000ULL;
+		msr_info->data = vcpu->arch.microcode_version;
 		break;
 	case MSR_MTRRcap:
 	case 0x200 ... 0x2ff:
@@ -2680,13 +2722,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
 		    int (*do_msr)(struct kvm_vcpu *vcpu,
 				  unsigned index, u64 *data))
 {
-	int i, idx;
+	int i;
 
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	for (i = 0; i < msrs->nmsrs; ++i)
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	return i;
 }
@@ -2785,6 +2825,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_SET_BOOT_CPU_ID:
  	case KVM_CAP_SPLIT_IRQCHIP:
 	case KVM_CAP_IMMEDIATE_EXIT:
+	case KVM_CAP_GET_MSR_FEATURES:
 		r = 1;
 		break;
 	case KVM_CAP_ADJUST_CLOCK:
@@ -2899,6 +2940,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
 			goto out;
 		r = 0;
 		break;
+	case KVM_GET_MSR_FEATURE_INDEX_LIST: {
+		struct kvm_msr_list __user *user_msr_list = argp;
+		struct kvm_msr_list msr_list;
+		unsigned int n;
+
+		r = -EFAULT;
+		if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+			goto out;
+		n = msr_list.nmsrs;
+		msr_list.nmsrs = num_msr_based_features;
+		if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+			goto out;
+		r = -E2BIG;
+		if (n < msr_list.nmsrs)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(user_msr_list->indices, &msr_based_features,
+				 num_msr_based_features * sizeof(u32)))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_GET_MSRS:
+		r = msr_io(NULL, argp, do_get_msr_feature, 1);
+		break;
 	}
 	default:
 		r = -EINVAL;
@@ -3636,12 +3702,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_MSRS:
+	case KVM_GET_MSRS: {
+		int idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = msr_io(vcpu, argp, do_get_msr, 1);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
-	case KVM_SET_MSRS:
+	}
+	case KVM_SET_MSRS: {
+		int idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = msr_io(vcpu, argp, do_set_msr, 0);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
+	}
 	case KVM_TPR_ACCESS_REPORTING: {
 		struct kvm_tpr_access_ctl tac;
 
@@ -4464,6 +4536,19 @@ static void kvm_init_msr_list(void)
 		j++;
 	}
 	num_emulated_msrs = j;
+
+	for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+		struct kvm_msr_entry msr;
+
+		msr.index = msr_based_features[i];
+		if (kvm_get_msr_feature(&msr))
+			continue;
+
+		if (j < i)
+			msr_based_features[j] = msr_based_features[i];
+		j++;
+	}
+	num_msr_based_features = j;
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -8017,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
+	kvm_lapic_reset(vcpu, init_event);
+
 	vcpu->arch.hflags = 0;
 
 	vcpu->arch.smi_pending = 0;
@@ -8460,10 +8547,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 			return r;
 	}
 
-	if (!size) {
-		r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
-		WARN_ON(r < 0);
-	}
+	if (!size)
+		vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
 
 	return 0;
 }
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700cc6dc..25a972c61b0a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
 lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
index d6f848d1211d..2dd1fe13a37b 100644
--- a/arch/x86/lib/cpu.c
+++ b/arch/x86/lib/cpu.c
@@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig)
 {
 	unsigned int fam, model;
 
-	 fam = x86_family(sig);
+	fam = x86_family(sig);
 
 	model = (sig >> 4) & 0xf;
 
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 7b881d03d0dd..3cdf06128d13 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
 
 asm(
 	".type just_return_func, @function\n"
+	".globl just_return_func\n"
 	"just_return_func:\n"
 	"	ret\n"
 	".size just_return_func, .-just_return_func\n"
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3a5e03..c909961e678a 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
 
 .macro THUNK reg
 	.section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
 GENERATE_THUNK(r14)
 GENERATE_THUNK(r15)
 #endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
-	mov	$(\nr / 2), %_ASM_BX
-	.align 16
-771:
-	call	772f
-773:						/* speculation trap */
-	pause
-	lfence
-	jmp	773b
-	.align 16
-772:
-	call	774f
-775:						/* speculation trap */
-	pause
-	lfence
-	jmp	775b
-	.align 16
-774:
-	dec	%_ASM_BX
-	jnz	771b
-	add	$((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS		16	/* To avoid underflow */
-
-ENTRY(__fill_rsb)
-	STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
-	ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
-	STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
-	ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index b9283cc27622..476d810639a8 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)
 
 	for_each_possible_cpu(cpu)
 		setup_cpu_entry_area(cpu);
+
+	/*
+	 * This is the last essential update to swapper_pgdir which needs
+	 * to be synchronized to initial_page_table on 32bit.
+	 */
+	sync_initial_page_table();
 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 800de815519c..25a30b5d6582 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	if (!pmd_k)
 		return -1;
 
-	if (pmd_huge(*pmd_k))
+	if (pmd_large(*pmd_k))
 		return 0;
 
 	pte_k = pte_offset_kernel(pmd_k, address);
@@ -475,7 +475,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
 		BUG();
 
-	if (pud_huge(*pud))
+	if (pud_large(*pud))
 		return 0;
 
 	pmd = pmd_offset(pud, address);
@@ -486,7 +486,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
 		BUG();
 
-	if (pmd_huge(*pmd))
+	if (pmd_large(*pmd))
 		return 0;
 
 	pte_ref = pte_offset_kernel(pmd_ref, address);
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	tsk = current;
 	mm = tsk->mm;
 
-	/*
-	 * Detect and handle instructions that would cause a page fault for
-	 * both a tracked kernel page and a userspace page.
-	 */
 	prefetchw(&mm->mmap_sem);
 
 	if (unlikely(kmmio_fault(regs, address)))
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 79cb066f40c0..396e1f0151ac 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
 }
 #endif /* CONFIG_HIGHMEM */
 
+void __init sync_initial_page_table(void)
+{
+	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+
+	/*
+	 * sync back low identity map too.  It is used for example
+	 * in the 32-bit EFI stub.
+	 */
+	clone_pgd_range(initial_page_table,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+}
+
 void __init native_pagetable_init(void)
 {
 	unsigned long pfn, va;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ab42c852069..af11a2890235 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -256,7 +256,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
 	 * It's enough to flush this one mapping.
 	 * (PGE mappings get flushed as well)
 	 */
-	__flush_tlb_one(vaddr);
+	__flush_tlb_one_kernel(vaddr);
 }
 
 void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
@@ -800,17 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 
 #define PAGE_INUSE 0xFD
 
-static void __meminit free_pagetable(struct page *page, int order,
-		struct vmem_altmap *altmap)
+static void __meminit free_pagetable(struct page *page, int order)
 {
 	unsigned long magic;
 	unsigned int nr_pages = 1 << order;
 
-	if (altmap) {
-		vmem_altmap_free(altmap, nr_pages);
-		return;
-	}
-
 	/* bootmem page has reserved flag */
 	if (PageReserved(page)) {
 		__ClearPageReserved(page);
@@ -826,9 +820,17 @@ static void __meminit free_pagetable(struct page *page, int order,
 		free_pages((unsigned long)page_address(page), order);
 }
 
-static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
+static void __meminit free_hugepage_table(struct page *page,
 		struct vmem_altmap *altmap)
 {
+	if (altmap)
+		vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE);
+	else
+		free_pagetable(page, get_order(PMD_SIZE));
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
 	pte_t *pte;
 	int i;
 
@@ -839,14 +841,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
 	}
 
 	/* free a pte talbe */
-	free_pagetable(pmd_page(*pmd), 0, altmap);
+	free_pagetable(pmd_page(*pmd), 0);
 	spin_lock(&init_mm.page_table_lock);
 	pmd_clear(pmd);
 	spin_unlock(&init_mm.page_table_lock);
 }
 
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
-		struct vmem_altmap *altmap)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 {
 	pmd_t *pmd;
 	int i;
@@ -858,14 +859,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
 	}
 
 	/* free a pmd talbe */
-	free_pagetable(pud_page(*pud), 0, altmap);
+	free_pagetable(pud_page(*pud), 0);
 	spin_lock(&init_mm.page_table_lock);
 	pud_clear(pud);
 	spin_unlock(&init_mm.page_table_lock);
 }
 
-static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
-		struct vmem_altmap *altmap)
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
 {
 	pud_t *pud;
 	int i;
@@ -877,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
 	}
 
 	/* free a pud talbe */
-	free_pagetable(p4d_page(*p4d), 0, altmap);
+	free_pagetable(p4d_page(*p4d), 0);
 	spin_lock(&init_mm.page_table_lock);
 	p4d_clear(p4d);
 	spin_unlock(&init_mm.page_table_lock);
@@ -885,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
 
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
-		 struct vmem_altmap *altmap, bool direct)
+		 bool direct)
 {
 	unsigned long next, pages = 0;
 	pte_t *pte;
@@ -916,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 			 * freed when offlining, or simplely not in use.
 			 */
 			if (!direct)
-				free_pagetable(pte_page(*pte), 0, altmap);
+				free_pagetable(pte_page(*pte), 0);
 
 			spin_lock(&init_mm.page_table_lock);
 			pte_clear(&init_mm, addr, pte);
@@ -939,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 
 			page_addr = page_address(pte_page(*pte));
 			if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
-				free_pagetable(pte_page(*pte), 0, altmap);
+				free_pagetable(pte_page(*pte), 0);
 
 				spin_lock(&init_mm.page_table_lock);
 				pte_clear(&init_mm, addr, pte);
@@ -974,9 +974,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
 			if (IS_ALIGNED(addr, PMD_SIZE) &&
 			    IS_ALIGNED(next, PMD_SIZE)) {
 				if (!direct)
-					free_pagetable(pmd_page(*pmd),
-						       get_order(PMD_SIZE),
-						       altmap);
+					free_hugepage_table(pmd_page(*pmd),
+							    altmap);
 
 				spin_lock(&init_mm.page_table_lock);
 				pmd_clear(pmd);
@@ -989,9 +988,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
 				page_addr = page_address(pmd_page(*pmd));
 				if (!memchr_inv(page_addr, PAGE_INUSE,
 						PMD_SIZE)) {
-					free_pagetable(pmd_page(*pmd),
-						       get_order(PMD_SIZE),
-						       altmap);
+					free_hugepage_table(pmd_page(*pmd),
+							    altmap);
 
 					spin_lock(&init_mm.page_table_lock);
 					pmd_clear(pmd);
@@ -1003,8 +1001,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
 		}
 
 		pte_base = (pte_t *)pmd_page_vaddr(*pmd);
-		remove_pte_table(pte_base, addr, next, altmap, direct);
-		free_pte_table(pte_base, pmd, altmap);
+		remove_pte_table(pte_base, addr, next, direct);
+		free_pte_table(pte_base, pmd);
 	}
 
 	/* Call free_pmd_table() in remove_pud_table(). */
@@ -1033,8 +1031,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
 			    IS_ALIGNED(next, PUD_SIZE)) {
 				if (!direct)
 					free_pagetable(pud_page(*pud),
-						       get_order(PUD_SIZE),
-						       altmap);
+						       get_order(PUD_SIZE));
 
 				spin_lock(&init_mm.page_table_lock);
 				pud_clear(pud);
@@ -1048,8 +1045,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
 				if (!memchr_inv(page_addr, PAGE_INUSE,
 						PUD_SIZE)) {
 					free_pagetable(pud_page(*pud),
-						       get_order(PUD_SIZE),
-						       altmap);
+						       get_order(PUD_SIZE));
 
 					spin_lock(&init_mm.page_table_lock);
 					pud_clear(pud);
@@ -1062,7 +1058,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
 
 		pmd_base = pmd_offset(pud, 0);
 		remove_pmd_table(pmd_base, addr, next, direct, altmap);
-		free_pmd_table(pmd_base, pud, altmap);
+		free_pmd_table(pmd_base, pud);
 	}
 
 	if (direct)
@@ -1094,7 +1090,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
 		 * to adapt for boot-time switching between 4 and 5 level page tables.
 		 */
 		if (CONFIG_PGTABLE_LEVELS == 5)
-			free_pud_table(pud_base, p4d, altmap);
+			free_pud_table(pud_base, p4d);
 	}
 
 	if (direct)
@@ -1193,8 +1189,8 @@ void __init mem_init(void)
 	register_page_bootmem_info();
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
-			 PAGE_SIZE, KCORE_OTHER);
+	if (get_gate_vma(&init_mm))
+		kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
 
 	mem_init_print_info(NULL);
 }
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c45b6ec5357b..e2db83bebc3b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -820,5 +820,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
 		pte_clear(&init_mm, addr, pte);
-	__flush_tlb_one(addr);
+	__flush_tlb_one_kernel(addr);
 }
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 58477ec3d66d..7c8686709636 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -168,7 +168,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 		return -1;
 	}
 
-	__flush_tlb_one(f->addr);
+	__flush_tlb_one_kernel(f->addr);
 	return 0;
 }
 
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 01f682cf77a8..40a6085063d6 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -15,6 +15,7 @@
 #include <asm/page.h>
 #include <asm/processor-flags.h>
 #include <asm/msr-index.h>
+#include <asm/nospec-branch.h>
 
 	.text
 	.code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
 	movq	%rax, %r8		/* Workarea encryption routine */
 	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */
 
+	ANNOTATE_RETPOLINE_SAFE
 	call	*%rax			/* Call the encryption routine */
 
 	pop	%r12
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 004abf9ebf12..34cda7e0551b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)
 
 	return 0;
 }
+
+/**
+ * pud_free_pmd_page - Clear pud entry and free pmd page.
+ * @pud: Pointer to a PUD.
+ *
+ * Context: The pud range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pud_free_pmd_page(pud_t *pud)
+{
+	pmd_t *pmd;
+	int i;
+
+	if (pud_none(*pud))
+		return 1;
+
+	pmd = (pmd_t *)pud_page_vaddr(*pud);
+
+	for (i = 0; i < PTRS_PER_PMD; i++)
+		if (!pmd_free_pte_page(&pmd[i]))
+			return 0;
+
+	pud_clear(pud);
+	free_page((unsigned long)pmd);
+
+	return 1;
+}
+
+/**
+ * pmd_free_pte_page - Clear pmd entry and free pte page.
+ * @pmd: Pointer to a PMD.
+ *
+ * Context: The pmd range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+	pte_t *pte;
+
+	if (pmd_none(*pmd))
+		return 1;
+
+	pte = (pte_t *)pmd_page_vaddr(*pmd);
+	pmd_clear(pmd);
+	free_page((unsigned long)pte);
+
+	return 1;
+}
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index c3c5274410a9..9bb7f0ab9fe6 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -63,7 +63,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 	 * It's enough to flush this one mapping.
 	 * (PGE mappings get flushed as well)
 	 */
-	__flush_tlb_one(vaddr);
+	__flush_tlb_one_kernel(vaddr);
 }
 
 unsigned long __FIXADDR_TOP = 0xfffff000;
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index ce38f165489b..631507f0c198 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
 }
 
 /*
- * Clone the ESPFIX P4D into the user space visinble page table
+ * Clone the ESPFIX P4D into the user space visible page table
  */
 static void __init pti_setup_espfix64(void)
 {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8dcc0607f805..7f1a51399674 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -498,7 +498,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 	 *    flush that changes context.tlb_gen from 2 to 3.  If they get
 	 *    processed on this CPU in reverse order, we'll see
 	 *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
-	 *    If we were to use __flush_tlb_single() and set local_tlb_gen to
+	 *    If we were to use __flush_tlb_one_user() and set local_tlb_gen to
 	 *    3, we'd be break the invariant: we'd update local_tlb_gen above
 	 *    1 without the full flush that's needed for tlb_gen 2.
 	 *
@@ -519,7 +519,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 
 		addr = f->start;
 		while (addr < f->end) {
-			__flush_tlb_single(addr);
+			__flush_tlb_one_user(addr);
 			addr += PAGE_SIZE;
 		}
 		if (local)
@@ -666,7 +666,7 @@ static void do_kernel_range_flush(void *info)
 
 	/* flush range by one by one 'invlpg' */
 	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-		__flush_tlb_one(addr);
+		__flush_tlb_one_kernel(addr);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4923d92f918d..b725154182cc 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,10 +11,11 @@
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
-#include <asm/set_memory.h>
 #include <linux/bpf.h>
 
+#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
+
 /*
  * assembly code in arch/x86/net/bpf_jit.S
  */
@@ -60,7 +61,12 @@ static bool is_imm8(int value)
 
 static bool is_simm32(s64 value)
 {
-	return value == (s64) (s32) value;
+	return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+	return value == (u64)(u32)value;
 }
 
 /* mov dst, src */
@@ -97,16 +103,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JLE 0x7E
 #define X86_JG  0x7F
 
-static void bpf_flush_icache(void *start, void *end)
-{
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	smp_wmb();
-	flush_icache_range((unsigned long)start, (unsigned long)end);
-	set_fs(old_fs);
-}
-
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
@@ -211,7 +207,7 @@ struct jit_context {
 /* emit x64 prologue code for BPF program and check it's size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
 	u8 *prog = *pprog;
 	int cnt = 0;
@@ -246,18 +242,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
 	/* mov qword ptr [rbp+24],r15 */
 	EMIT4(0x4C, 0x89, 0x7D, 24);
 
-	/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
-	 * we need to reset the counter to 0. It's done in two instructions,
-	 * resetting rax register to 0 (xor on eax gets 0 extended), and
-	 * moving it to the counter location.
-	 */
+	if (!ebpf_from_cbpf) {
+		/* Clear the tail call counter (tail_call_cnt): for eBPF tail
+		 * calls we need to reset the counter to 0. It's done in two
+		 * instructions, resetting rax register to 0, and moving it
+		 * to the counter location.
+		 */
 
-	/* xor eax, eax */
-	EMIT2(0x31, 0xc0);
-	/* mov qword ptr [rbp+32], rax */
-	EMIT4(0x48, 0x89, 0x45, 32);
+		/* xor eax, eax */
+		EMIT2(0x31, 0xc0);
+		/* mov qword ptr [rbp+32], rax */
+		EMIT4(0x48, 0x89, 0x45, 32);
+
+		BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+	}
 
-	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
 	*pprog = prog;
 }
 
@@ -290,7 +289,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	EMIT2(0x89, 0xD2);                        /* mov edx, edx */
 	EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
 	      offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 43 /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
 
@@ -299,7 +298,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	 */
 	EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 32
+#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
 	EMIT2(X86_JA, OFFSET2);                   /* ja out */
 	label2 = cnt;
 	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
@@ -313,7 +312,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	 *   goto out;
 	 */
 	EMIT3(0x48, 0x85, 0xC0);		  /* test rax,rax */
-#define OFFSET3 10
+#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
 	EMIT2(X86_JE, OFFSET3);                   /* je out */
 	label3 = cnt;
 
@@ -326,7 +325,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	 * rdi == ctx (1st arg)
 	 * rax == prog->bpf_func + prologue_size
 	 */
-	EMIT2(0xFF, 0xE0);                        /* jmp rax */
+	RETPOLINE_RAX_BPF_JIT();
 
 	/* out: */
 	BUILD_BUG_ON(cnt - label1 != OFFSET1);
@@ -355,6 +354,86 @@ static void emit_load_skb_data_hlen(u8 **pprog)
 	*pprog = prog;
 }
 
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+			   u32 dst_reg, const u32 imm32)
+{
+	u8 *prog = *pprog;
+	u8 b1, b2, b3;
+	int cnt = 0;
+
+	/* optimization: if imm32 is positive, use 'mov %eax, imm32'
+	 * (which zero-extends imm32) to save 2 bytes.
+	 */
+	if (sign_propagate && (s32)imm32 < 0) {
+		/* 'mov %rax, imm32' sign extends imm32 */
+		b1 = add_1mod(0x48, dst_reg);
+		b2 = 0xC7;
+		b3 = 0xC0;
+		EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+		goto done;
+	}
+
+	/* optimization: if imm32 is zero, use 'xor %eax, %eax'
+	 * to save 3 bytes.
+	 */
+	if (imm32 == 0) {
+		if (is_ereg(dst_reg))
+			EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+		b2 = 0x31; /* xor */
+		b3 = 0xC0;
+		EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+		goto done;
+	}
+
+	/* mov %eax, imm32 */
+	if (is_ereg(dst_reg))
+		EMIT1(add_1mod(0x40, dst_reg));
+	EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+	*pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+			   const u32 imm32_hi, const u32 imm32_lo)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+		/* For emitting plain u32, where sign bit must not be
+		 * propagated LLVM tends to load imm64 over mov32
+		 * directly, so save couple of bytes by just doing
+		 * 'mov %eax, imm32' instead.
+		 */
+		emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+	} else {
+		/* movabsq %rax, imm64 */
+		EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+		EMIT(imm32_lo, 4);
+		EMIT(imm32_hi, 4);
+	}
+
+	*pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is64) {
+		/* mov dst, src */
+		EMIT_mov(dst_reg, src_reg);
+	} else {
+		/* mov32 dst, src */
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT1(add_2mod(0x40, dst_reg, src_reg));
+		EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+	}
+
+	*pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -368,7 +447,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	int proglen = 0;
 	u8 *prog = temp;
 
-	emit_prologue(&prog, bpf_prog->aux->stack_depth);
+	emit_prologue(&prog, bpf_prog->aux->stack_depth,
+		      bpf_prog_was_classic(bpf_prog));
 
 	if (seen_ld_abs)
 		emit_load_skb_data_hlen(&prog);
@@ -377,7 +457,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		const s32 imm32 = insn->imm;
 		u32 dst_reg = insn->dst_reg;
 		u32 src_reg = insn->src_reg;
-		u8 b1 = 0, b2 = 0, b3 = 0;
+		u8 b2 = 0, b3 = 0;
 		s64 jmp_offset;
 		u8 jmp_cond;
 		bool reload_skb_data;
@@ -413,16 +493,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
-			/* mov dst, src */
 		case BPF_ALU64 | BPF_MOV | BPF_X:
-			EMIT_mov(dst_reg, src_reg);
-			break;
-
-			/* mov32 dst, src */
 		case BPF_ALU | BPF_MOV | BPF_X:
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT1(add_2mod(0x40, dst_reg, src_reg));
-			EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+			emit_mov_reg(&prog,
+				     BPF_CLASS(insn->code) == BPF_ALU64,
+				     dst_reg, src_reg);
 			break;
 
 			/* neg dst */
@@ -485,58 +560,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			break;
 
 		case BPF_ALU64 | BPF_MOV | BPF_K:
-			/* optimization: if imm32 is positive,
-			 * use 'mov eax, imm32' (which zero-extends imm32)
-			 * to save 2 bytes
-			 */
-			if (imm32 < 0) {
-				/* 'mov rax, imm32' sign extends imm32 */
-				b1 = add_1mod(0x48, dst_reg);
-				b2 = 0xC7;
-				b3 = 0xC0;
-				EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
-				break;
-			}
-
 		case BPF_ALU | BPF_MOV | BPF_K:
-			/* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
-			 * to save 3 bytes.
-			 */
-			if (imm32 == 0) {
-				if (is_ereg(dst_reg))
-					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
-				b2 = 0x31; /* xor */
-				b3 = 0xC0;
-				EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
-				break;
-			}
-
-			/* mov %eax, imm32 */
-			if (is_ereg(dst_reg))
-				EMIT1(add_1mod(0x40, dst_reg));
-			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+			emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+				       dst_reg, imm32);
 			break;
 
 		case BPF_LD | BPF_IMM | BPF_DW:
-			/* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
-			 * to save 7 bytes.
-			 */
-			if (insn[0].imm == 0 && insn[1].imm == 0) {
-				b1 = add_2mod(0x48, dst_reg, dst_reg);
-				b2 = 0x31; /* xor */
-				b3 = 0xC0;
-				EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
-				insn++;
-				i++;
-				break;
-			}
-
-			/* movabsq %rax, imm64 */
-			EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
-			EMIT(insn[0].imm, 4);
-			EMIT(insn[1].imm, 4);
-
+			emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
 			insn++;
 			i++;
 			break;
@@ -593,36 +623,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU | BPF_MUL | BPF_X:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
 		case BPF_ALU64 | BPF_MUL | BPF_X:
-			EMIT1(0x50); /* push rax */
-			EMIT1(0x52); /* push rdx */
+		{
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+			if (dst_reg != BPF_REG_0)
+				EMIT1(0x50); /* push rax */
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x52); /* push rdx */
 
 			/* mov r11, dst_reg */
 			EMIT_mov(AUX_REG, dst_reg);
 
 			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov rax, src_reg */
-				EMIT_mov(BPF_REG_0, src_reg);
+				emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
 			else
-				/* mov rax, imm32 */
-				EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+				emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
 
-			if (BPF_CLASS(insn->code) == BPF_ALU64)
+			if (is64)
 				EMIT1(add_1mod(0x48, AUX_REG));
 			else if (is_ereg(AUX_REG))
 				EMIT1(add_1mod(0x40, AUX_REG));
 			/* mul(q) r11 */
 			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 
-			/* mov r11, rax */
-			EMIT_mov(AUX_REG, BPF_REG_0);
-
-			EMIT1(0x5A); /* pop rdx */
-			EMIT1(0x58); /* pop rax */
-
-			/* mov dst_reg, r11 */
-			EMIT_mov(dst_reg, AUX_REG);
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x5A); /* pop rdx */
+			if (dst_reg != BPF_REG_0) {
+				/* mov dst_reg, rax */
+				EMIT_mov(dst_reg, BPF_REG_0);
+				EMIT1(0x58); /* pop rax */
+			}
 			break;
-
+		}
 			/* shifts */
 		case BPF_ALU | BPF_LSH | BPF_K:
 		case BPF_ALU | BPF_RSH | BPF_K:
@@ -640,7 +672,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			case BPF_RSH: b3 = 0xE8; break;
 			case BPF_ARSH: b3 = 0xF8; break;
 			}
-			EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+			if (imm32 == 1)
+				EMIT2(0xD1, add_1reg(b3, dst_reg));
+			else
+				EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 			break;
 
 		case BPF_ALU | BPF_LSH | BPF_X:
@@ -1187,7 +1223,7 @@ skip_init_addrs:
 	 * may converge on the last pass. In such case do one more
 	 * pass to emit the final image
 	 */
-	for (pass = 0; pass < 10 || image; pass++) {
+	for (pass = 0; pass < 20 || image; pass++) {
 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
 		if (proglen <= 0) {
 			image = NULL;
@@ -1214,13 +1250,13 @@ skip_init_addrs:
 			}
 		}
 		oldproglen = proglen;
+		cond_resched();
 	}
 
 	if (bpf_jit_enable > 1)
 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
 
 	if (image) {
-		bpf_flush_icache(header, image + proglen);
 		if (!prog->is_func || extra_pass) {
 			bpf_jit_binary_lock_ro(header);
 		} else {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 174c59774cc9..a7a7677265b6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -460,7 +460,7 @@ static int nmi_setup(void)
 		goto fail;
 
 	for_each_possible_cpu(cpu) {
-		if (!cpu)
+		if (!IS_ENABLED(CONFIG_SMP) || !cpu)
 			continue;
 
 		memcpy(per_cpu(cpu_msrs, cpu).counters,
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 2c67bae6bb53..fb1df9488e98 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -79,7 +79,7 @@ static void intel_mid_power_off(void)
 
 static void intel_mid_reboot(void)
 {
-	intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+	intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
 }
 
 static unsigned long __init intel_mid_calibrate_tsc(void)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index c2e9285d1bf1..db77e087adaf 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
 		local_flush_tlb();
 		stat->d_alltlb++;
 	} else {
-		__flush_tlb_single(msg->address);
+		__flush_tlb_one_user(msg->address);
 		stat->d_onetlb++;
 	}
 	stat->d_requestee++;
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index de53bd15df5a..24bb7598774e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -102,7 +102,7 @@ ENTRY(startup_32)
 	 * don't we'll eventually crash trying to execute encrypted
 	 * instructions.
 	 */
-	bt	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+	btl	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
 	jnc	.Ldone
 	movl	$MSR_K8_SYSCFG, %ecx
 	rdmsr
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5d73c443e778..220e97841e49 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
 		break;
 
 	case R_X86_64_PC32:
+	case R_X86_64_PLT32:
 		/*
 		 * PC relative relocations don't need to be adjusted unless
 		 * referencing a percpu symbol.
+		 *
+		 * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
 		 */
 		if (is_percpu_sym(sym, symname))
 			add_reloc(&relocs32neg, offset);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c047f42552e1..3c2c2530737e 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 	if (!xen_initial_domain()) {
 		add_preferred_console("xenboot", 0, NULL);
-		add_preferred_console("tty", 0, NULL);
-		add_preferred_console("hvc", 0, NULL);
 		if (pci_xen)
 			x86_init.pci.arch_init = pci_xen_init;
 	} else {
@@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 		xen_boot_params_init_edd();
 	}
+
+	add_preferred_console("tty", 0, NULL);
+	add_preferred_console("hvc", 0, NULL);
+
 #ifdef CONFIG_PCI
 	/* PCI BIOS service won't work from a PV guest. */
 	pci_probe &= ~PCI_PROBE_BIOS;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index d85076223a69..aae88fec9941 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1300,12 +1300,12 @@ static void xen_flush_tlb(void)
 	preempt_enable();
 }
 
-static void xen_flush_tlb_single(unsigned long addr)
+static void xen_flush_tlb_one_user(unsigned long addr)
 {
 	struct mmuext_op *op;
 	struct multicall_space mcs;
 
-	trace_xen_mmu_flush_tlb_single(addr);
+	trace_xen_mmu_flush_tlb_one_user(addr);
 
 	preempt_disable();
 
@@ -2370,7 +2370,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 
 	.flush_tlb_user = xen_flush_tlb,
 	.flush_tlb_kernel = xen_flush_tlb,
-	.flush_tlb_single = xen_flush_tlb_single,
+	.flush_tlb_one_user = xen_flush_tlb_one_user,
 	.flush_tlb_others = xen_flush_tlb_others,
 
 	.pgd_alloc = xen_pgd_alloc,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 77c959cf81e7..7a43b2ae19f1 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
 
 	if (xen_hvm_domain())
 		native_smp_cpus_done(max_cpus);
+	else
+		calculate_max_logical_packages();
 
 	if (xen_have_vcpu_info_placement)
 		return;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9f96cc5d743..1d83152c761b 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,12 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/tick.h>
+#include <linux/percpu-defs.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
 #include <xen/events.h>
 
+#include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 #include <asm/fixmap.h>
@@ -15,6 +18,8 @@
 #include "mmu.h"
 #include "pmu.h"
 
+static DEFINE_PER_CPU(u64, spec_ctrl);
+
 void xen_arch_pre_suspend(void)
 {
 	xen_save_time_memory_area();
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
 
 static void xen_vcpu_notify_restore(void *data)
 {
+	if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+		wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
+
 	/* Boot processor notified via generic timekeeping_resume() */
 	if (smp_processor_id() == 0)
 		return;
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
 
 static void xen_vcpu_notify_suspend(void *data)
 {
+	u64 tmp;
+
 	tick_suspend_local();
+
+	if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+		rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
+		this_cpu_write(spec_ctrl, tmp);
+		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+	}
 }
 
 void xen_arch_resume(void)
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 623720a11143..732631ce250f 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -16,6 +16,7 @@
  */
 
 #include <linux/dma-contiguous.h>
+#include <linux/dma-direct.h>
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
@@ -123,7 +124,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 			      unsigned long attrs)
 {
 	unsigned long ret;
-	unsigned long uncached = 0;
+	unsigned long uncached;
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct page *page = NULL;
 
@@ -144,15 +145,27 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 	if (!page)
 		return NULL;
 
-	ret = (unsigned long)page_address(page);
+	*handle = phys_to_dma(dev, page_to_phys(page));
 
-	/* We currently don't support coherent memory outside KSEG */
+#ifdef CONFIG_MMU
+	if (PageHighMem(page)) {
+		void *p;
 
+		p = dma_common_contiguous_remap(page, size, VM_MAP,
+						pgprot_noncached(PAGE_KERNEL),
+						__builtin_return_address(0));
+		if (!p) {
+			if (!dma_release_from_contiguous(dev, page, count))
+				__free_pages(page, get_order(size));
+		}
+		return p;
+	}
+#endif
+	ret = (unsigned long)page_address(page);
 	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
 	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
 	uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
-	*handle = virt_to_bus((void *)ret);
 	__invalidate_dcache_range(ret, size);
 
 	return (void *)uncached;
@@ -161,13 +174,20 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
 			    dma_addr_t dma_handle, unsigned long attrs)
 {
-	unsigned long addr = (unsigned long)vaddr +
-		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
-	struct page *page = virt_to_page(addr);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-	BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR ||
-	       addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
+	unsigned long addr = (unsigned long)vaddr;
+	struct page *page;
+
+	if (addr >= XCHAL_KSEG_BYPASS_VADDR &&
+	    addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) {
+		addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
+		page = virt_to_page(addr);
+	} else {
+#ifdef CONFIG_MMU
+		dma_common_free_remap(vaddr, size, VM_MAP);
+#endif
+		page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
+	}
 
 	if (!dma_release_from_contiguous(dev, page, count))
 		__free_pages(page, get_order(size));
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index d776ec0d7b22..34aead7dcb48 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -79,19 +79,75 @@ void __init zones_init(void)
 	free_area_init_node(0, zones_size, ARCH_PFN_OFFSET, NULL);
 }
 
+#ifdef CONFIG_HIGHMEM
+static void __init free_area_high(unsigned long pfn, unsigned long end)
+{
+	for (; pfn < end; pfn++)
+		free_highmem_page(pfn_to_page(pfn));
+}
+
+static void __init free_highpages(void)
+{
+	unsigned long max_low = max_low_pfn;
+	struct memblock_region *mem, *res;
+
+	reset_all_zones_managed_pages();
+	/* set highmem page free */
+	for_each_memblock(memory, mem) {
+		unsigned long start = memblock_region_memory_base_pfn(mem);
+		unsigned long end = memblock_region_memory_end_pfn(mem);
+
+		/* Ignore complete lowmem entries */
+		if (end <= max_low)
+			continue;
+
+		if (memblock_is_nomap(mem))
+			continue;
+
+		/* Truncate partial highmem entries */
+		if (start < max_low)
+			start = max_low;
+
+		/* Find and exclude any reserved regions */
+		for_each_memblock(reserved, res) {
+			unsigned long res_start, res_end;
+
+			res_start = memblock_region_reserved_base_pfn(res);
+			res_end = memblock_region_reserved_end_pfn(res);
+
+			if (res_end < start)
+				continue;
+			if (res_start < start)
+				res_start = start;
+			if (res_start > end)
+				res_start = end;
+			if (res_end > end)
+				res_end = end;
+			if (res_start != start)
+				free_area_high(start, res_start);
+			start = res_end;
+			if (start == end)
+				break;
+		}
+
+		/* And now free anything which remains */
+		if (start < end)
+			free_area_high(start, end);
+	}
+}
+#else
+static void __init free_highpages(void)
+{
+}
+#endif
+
 /*
  * Initialize memory pages.
  */
 
 void __init mem_init(void)
 {
-#ifdef CONFIG_HIGHMEM
-	unsigned long tmp;
-
-	reset_all_zones_managed_pages();
-	for (tmp = max_low_pfn; tmp < max_pfn; tmp++)
-		free_highmem_page(pfn_to_page(tmp));
-#endif
+	free_highpages();
 
 	max_mapnr = max_pfn - ARCH_PFN_OFFSET;
 	high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4117524ca45b..c2033a232a44 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -812,7 +812,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	struct gendisk *disk;
 	struct request_queue *q;
 	struct blkcg_gq *blkg;
-	struct module *owner;
 	unsigned int major, minor;
 	int key_len, part, ret;
 	char *body;
@@ -904,9 +903,7 @@ fail_unlock:
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
 fail:
-	owner = disk->fops->owner;
-	put_disk(disk);
-	module_put(owner);
+	put_disk_and_module(disk);
 	/*
 	 * If queue was bypassing, we should retry.  Do so after a
 	 * short msleep().  It isn't strictly necessary but queue
@@ -931,13 +928,9 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
-	struct module *owner;
-
 	spin_unlock_irq(ctx->disk->queue->queue_lock);
 	rcu_read_unlock();
-	owner = ctx->disk->fops->owner;
-	put_disk(ctx->disk);
-	module_put(owner);
+	put_disk_and_module(ctx->disk);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 2d1a7bbe0634..6d82c4f7fadd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2434,7 +2434,7 @@ blk_qc_t submit_bio(struct bio *bio)
 		unsigned int count;
 
 		if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
-			count = queue_logical_block_size(bio->bi_disk->queue);
+			count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
 		else
 			count = bio_sectors(bio);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df93102e2149..16e83e6df404 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -712,7 +712,6 @@ static void __blk_mq_requeue_request(struct request *rq)
 
 	trace_block_rq_requeue(q, rq);
 	wbt_requeue(q->rq_wb, &rq->issue_stat);
-	blk_mq_sched_requeue_request(rq);
 
 	if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
 		blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
@@ -725,6 +724,9 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
 {
 	__blk_mq_requeue_request(rq);
 
+	/* this request will be re-inserted to io scheduler queue */
+	blk_mq_sched_requeue_request(rq);
+
 	BUG_ON(blk_queued_rq(rq));
 	blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
 }
@@ -3164,6 +3166,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		cpu_relax();
 	}
 
+	__set_current_state(TASK_RUNNING);
 	return false;
 }
 
diff --git a/block/genhd.c b/block/genhd.c
index 88a53c188cb7..9656f9e9f99e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -547,7 +547,7 @@ static int exact_lock(dev_t devt, void *data)
 {
 	struct gendisk *p = data;
 
-	if (!get_disk(p))
+	if (!get_disk_and_module(p))
 		return -1;
 	return 0;
 }
@@ -717,6 +717,11 @@ void del_gendisk(struct gendisk *disk)
 	blk_integrity_del(disk);
 	disk_del_events(disk);
 
+	/*
+	 * Block lookups of the disk until all bdevs are unhashed and the
+	 * disk is marked as dead (GENHD_FL_UP cleared).
+	 */
+	down_write(&disk->lookup_sem);
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
@@ -731,6 +736,7 @@ void del_gendisk(struct gendisk *disk)
 	bdev_unhash_inode(disk_devt(disk));
 	set_capacity(disk, 0);
 	disk->flags &= ~GENHD_FL_UP;
+	up_write(&disk->lookup_sem);
 
 	if (!(disk->flags & GENHD_FL_HIDDEN))
 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -809,16 +815,28 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 
 		spin_lock_bh(&ext_devt_lock);
 		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
-		if (part && get_disk(part_to_disk(part))) {
+		if (part && get_disk_and_module(part_to_disk(part))) {
 			*partno = part->partno;
 			disk = part_to_disk(part);
 		}
 		spin_unlock_bh(&ext_devt_lock);
 	}
 
-	if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) {
-		put_disk(disk);
+	if (!disk)
+		return NULL;
+
+	/*
+	 * Synchronize with del_gendisk() to not return disk that is being
+	 * destroyed.
+	 */
+	down_read(&disk->lookup_sem);
+	if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
+		     !(disk->flags & GENHD_FL_UP))) {
+		up_read(&disk->lookup_sem);
+		put_disk_and_module(disk);
 		disk = NULL;
+	} else {
+		up_read(&disk->lookup_sem);
 	}
 	return disk;
 }
@@ -1418,6 +1436,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 			kfree(disk);
 			return NULL;
 		}
+		init_rwsem(&disk->lookup_sem);
 		disk->node_id = node_id;
 		if (disk_expand_part_tbl(disk, 0)) {
 			free_part_stats(&disk->part0);
@@ -1453,7 +1472,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 }
 EXPORT_SYMBOL(__alloc_disk_node);
 
-struct kobject *get_disk(struct gendisk *disk)
+struct kobject *get_disk_and_module(struct gendisk *disk)
 {
 	struct module *owner;
 	struct kobject *kobj;
@@ -1471,17 +1490,30 @@ struct kobject *get_disk(struct gendisk *disk)
 	return kobj;
 
 }
-
-EXPORT_SYMBOL(get_disk);
+EXPORT_SYMBOL(get_disk_and_module);
 
 void put_disk(struct gendisk *disk)
 {
 	if (disk)
 		kobject_put(&disk_to_dev(disk)->kobj);
 }
-
 EXPORT_SYMBOL(put_disk);
 
+/*
+ * This is a counterpart of get_disk_and_module() and thus also of
+ * get_gendisk().
+ */
+void put_disk_and_module(struct gendisk *disk)
+{
+	if (disk) {
+		struct module *owner = disk->fops->owner;
+
+		put_disk(disk);
+		module_put(owner);
+	}
+}
+EXPORT_SYMBOL(put_disk_and_module);
+
 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
 {
 	char event[] = "DISK_RO=1";
diff --git a/block/ioctl.c b/block/ioctl.c
index 1668506d8ed8..3884d810efd2 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -225,7 +225,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 
 	if (start + len > i_size_read(bdev->bd_inode))
 		return -EINVAL;
-	truncate_inode_pages_range(mapping, start, start + len);
+	truncate_inode_pages_range(mapping, start, start + len - 1);
 	return blkdev_issue_discard(bdev, start >> 9, len >> 9,
 				    GFP_KERNEL, flags);
 }
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index f95c60774ce8..0d6d25e32e1f 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -833,6 +833,7 @@ static struct elevator_type kyber_sched = {
 		.limit_depth = kyber_limit_depth,
 		.prepare_request = kyber_prepare_request,
 		.finish_request = kyber_finish_request,
+		.requeue_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
 		.has_work = kyber_has_work,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index c56f211c8440..8ec0ba9f5386 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -536,12 +536,21 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
 }
 
 /*
+ * Nothing to do here. This is defined only to ensure that .finish_request
+ * method is called upon request completion.
+ */
+static void dd_prepare_request(struct request *rq, struct bio *bio)
+{
+}
+
+/*
  * For zoned block devices, write unlock the target zone of
  * completed write requests. Do this while holding the zone lock
  * spinlock so that the zone is never unlocked while deadline_fifo_request()
- * while deadline_next_request() are executing.
+ * or deadline_next_request() are executing. This function is called for
+ * all requests, whether or not these requests complete successfully.
  */
-static void dd_completed_request(struct request *rq)
+static void dd_finish_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
@@ -756,7 +765,8 @@ static struct elevator_type mq_deadline = {
 	.ops.mq = {
 		.insert_requests	= dd_insert_requests,
 		.dispatch_request	= dd_dispatch_request,
-		.completed_request	= dd_completed_request,
+		.prepare_request	= dd_prepare_request,
+		.finish_request		= dd_finish_request,
 		.next_request		= elv_rb_latter_request,
 		.former_request		= elv_rb_former_request,
 		.bio_merge		= dd_bio_merge,
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 91622db9aedf..08dabcd8b6ae 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -51,6 +51,12 @@ const char *bdevname(struct block_device *bdev, char *buf)
 
 EXPORT_SYMBOL(bdevname);
 
+const char *bio_devname(struct bio *bio, char *buf)
+{
+	return disk_name(bio->bi_disk, bio->bi_partno, buf);
+}
+EXPORT_SYMBOL(bio_devname);
+
 /*
  * There's very little reason to use this, you should really
  * have a struct block_device just about everywhere and use
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 9ed51d0c6b1d..e4929eec547f 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -490,7 +490,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
 
 	if (!found_com_id) {
 		pr_debug("Could not find OPAL comid for device. Returning early\n");
-		return -EOPNOTSUPP;;
+		return -EOPNOTSUPP;
 	}
 
 	dev->comid = comid;
diff --git a/certs/blacklist_nohashes.c b/certs/blacklist_nohashes.c
index 73fd99098ad7..753b703ef0ef 100644
--- a/certs/blacklist_nohashes.c
+++ b/certs/blacklist_nohashes.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "blacklist.h"
 
-const char __initdata *const blacklist_hashes[] = {
+const char __initconst *const blacklist_hashes[] = {
 	NULL
 };
diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index 1f4e25f10049..598906b1e28d 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -106,6 +106,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 		pr_devel("sinfo %u: Direct signer is key %x\n",
 			 sinfo->index, key_serial(key));
 		x509 = NULL;
+		sig = sinfo->sig;
 		goto matched;
 	}
 	if (PTR_ERR(key) != -ENOKEY)
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 39e6de0c2761..97c77f66b20d 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -270,7 +270,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 				sinfo->index);
 			return 0;
 		}
-		ret = public_key_verify_signature(p->pub, p->sig);
+		ret = public_key_verify_signature(p->pub, x509->sig);
 		if (ret < 0)
 			return ret;
 		x509->signer = p;
@@ -366,8 +366,7 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7,
  *
  *  (*) -EBADMSG if some part of the message was invalid, or:
  *
- *  (*) 0 if no signature chains were found to be blacklisted or to contain
- *	unsupported crypto, or:
+ *  (*) 0 if a signature chain passed verification, or:
  *
  *  (*) -EKEYREJECTED if a blacklisted key was encountered, or:
  *
@@ -423,8 +422,11 @@ int pkcs7_verify(struct pkcs7_message *pkcs7,
 
 	for (sinfo = pkcs7->signed_infos; sinfo; sinfo = sinfo->next) {
 		ret = pkcs7_verify_one(pkcs7, sinfo);
-		if (sinfo->blacklisted && actual_ret == -ENOPKG)
-			actual_ret = -EKEYREJECTED;
+		if (sinfo->blacklisted) {
+			if (actual_ret == -ENOPKG)
+				actual_ret = -EKEYREJECTED;
+			continue;
+		}
 		if (ret < 0) {
 			if (ret == -ENOPKG) {
 				sinfo->unsupported_crypto = true;
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index de996586762a..e929fe1e4106 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -79,9 +79,11 @@ int public_key_verify_signature(const struct public_key *pkey,
 
 	BUG_ON(!pkey);
 	BUG_ON(!sig);
-	BUG_ON(!sig->digest);
 	BUG_ON(!sig->s);
 
+	if (!sig->digest)
+		return -ENOPKG;
+
 	alg_name = sig->pkey_algo;
 	if (strcmp(sig->pkey_algo, "rsa") == 0) {
 		/* The data wangled by the RSA algorithm is typically padded
diff --git a/crypto/asymmetric_keys/restrict.c b/crypto/asymmetric_keys/restrict.c
index 86fb68508952..7c93c7728454 100644
--- a/crypto/asymmetric_keys/restrict.c
+++ b/crypto/asymmetric_keys/restrict.c
@@ -67,8 +67,9 @@ __setup("ca_keys=", ca_keys_setup);
  *
  * Returns 0 if the new certificate was accepted, -ENOKEY if we couldn't find a
  * matching parent certificate in the trusted list, -EKEYREJECTED if the
- * signature check fails or the key is blacklisted and some other error if
- * there is a matching certificate but the signature check cannot be performed.
+ * signature check fails or the key is blacklisted, -ENOPKG if the signature
+ * uses unsupported crypto, or some other error if there is a matching
+ * certificate but the signature check cannot be performed.
  */
 int restrict_link_by_signature(struct key *dest_keyring,
 			       const struct key_type *type,
@@ -88,6 +89,8 @@ int restrict_link_by_signature(struct key *dest_keyring,
 		return -EOPNOTSUPP;
 
 	sig = payload->data[asym_auth];
+	if (!sig)
+		return -ENOPKG;
 	if (!sig->auth_ids[0] && !sig->auth_ids[1])
 		return -ENOKEY;
 
@@ -139,6 +142,8 @@ static int key_or_keyring_common(struct key *dest_keyring,
 		return -EOPNOTSUPP;
 
 	sig = payload->data[asym_auth];
+	if (!sig)
+		return -ENOPKG;
 	if (!sig->auth_ids[0] && !sig->auth_ids[1])
 		return -ENOKEY;
 
@@ -222,9 +227,9 @@ static int key_or_keyring_common(struct key *dest_keyring,
  *
  * Returns 0 if the new certificate was accepted, -ENOKEY if we
  * couldn't find a matching parent certificate in the trusted list,
- * -EKEYREJECTED if the signature check fails, and some other error if
- * there is a matching certificate but the signature check cannot be
- * performed.
+ * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses
+ * unsupported crypto, or some other error if there is a matching certificate
+ * but the signature check cannot be performed.
  */
 int restrict_link_by_key_or_keyring(struct key *dest_keyring,
 				    const struct key_type *type,
@@ -249,9 +254,9 @@ int restrict_link_by_key_or_keyring(struct key *dest_keyring,
  *
  * Returns 0 if the new certificate was accepted, -ENOKEY if we
  * couldn't find a matching parent certificate in the trusted list,
- * -EKEYREJECTED if the signature check fails, and some other error if
- * there is a matching certificate but the signature check cannot be
- * performed.
+ * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses
+ * unsupported crypto, or some other error if there is a matching certificate
+ * but the signature check cannot be performed.
  */
 int restrict_link_by_key_or_keyring_chain(struct key *dest_keyring,
 					  const struct key_type *type,
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index a965b9d80559..ded148783303 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -20,6 +20,20 @@
 #include <crypto/sha3.h>
 #include <asm/unaligned.h>
 
+/*
+ * On some 32-bit architectures (mn10300 and h8300), GCC ends up using
+ * over 1 KB of stack if we inline the round calculation into the loop
+ * in keccakf(). On the other hand, on 64-bit architectures with plenty
+ * of [64-bit wide] general purpose registers, not inlining it severely
+ * hurts performance. So let's use 64-bitness as a heuristic to decide
+ * whether to inline or not.
+ */
+#ifdef CONFIG_64BIT
+#define SHA3_INLINE	inline
+#else
+#define SHA3_INLINE	noinline
+#endif
+
 #define KECCAK_ROUNDS 24
 
 static const u64 keccakf_rndc[24] = {
@@ -35,111 +49,115 @@ static const u64 keccakf_rndc[24] = {
 
 /* update the state with given number of rounds */
 
-static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25])
+static SHA3_INLINE void keccakf_round(u64 st[25])
 {
 	u64 t[5], tt, bc[5];
-	int round;
 
-	for (round = 0; round < KECCAK_ROUNDS; round++) {
+	/* Theta */
+	bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
+	bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
+	bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
+	bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
+	bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
+
+	t[0] = bc[4] ^ rol64(bc[1], 1);
+	t[1] = bc[0] ^ rol64(bc[2], 1);
+	t[2] = bc[1] ^ rol64(bc[3], 1);
+	t[3] = bc[2] ^ rol64(bc[4], 1);
+	t[4] = bc[3] ^ rol64(bc[0], 1);
+
+	st[0] ^= t[0];
+
+	/* Rho Pi */
+	tt = st[1];
+	st[ 1] = rol64(st[ 6] ^ t[1], 44);
+	st[ 6] = rol64(st[ 9] ^ t[4], 20);
+	st[ 9] = rol64(st[22] ^ t[2], 61);
+	st[22] = rol64(st[14] ^ t[4], 39);
+	st[14] = rol64(st[20] ^ t[0], 18);
+	st[20] = rol64(st[ 2] ^ t[2], 62);
+	st[ 2] = rol64(st[12] ^ t[2], 43);
+	st[12] = rol64(st[13] ^ t[3], 25);
+	st[13] = rol64(st[19] ^ t[4],  8);
+	st[19] = rol64(st[23] ^ t[3], 56);
+	st[23] = rol64(st[15] ^ t[0], 41);
+	st[15] = rol64(st[ 4] ^ t[4], 27);
+	st[ 4] = rol64(st[24] ^ t[4], 14);
+	st[24] = rol64(st[21] ^ t[1],  2);
+	st[21] = rol64(st[ 8] ^ t[3], 55);
+	st[ 8] = rol64(st[16] ^ t[1], 45);
+	st[16] = rol64(st[ 5] ^ t[0], 36);
+	st[ 5] = rol64(st[ 3] ^ t[3], 28);
+	st[ 3] = rol64(st[18] ^ t[3], 21);
+	st[18] = rol64(st[17] ^ t[2], 15);
+	st[17] = rol64(st[11] ^ t[1], 10);
+	st[11] = rol64(st[ 7] ^ t[2],  6);
+	st[ 7] = rol64(st[10] ^ t[0],  3);
+	st[10] = rol64(    tt ^ t[1],  1);
+
+	/* Chi */
+	bc[ 0] = ~st[ 1] & st[ 2];
+	bc[ 1] = ~st[ 2] & st[ 3];
+	bc[ 2] = ~st[ 3] & st[ 4];
+	bc[ 3] = ~st[ 4] & st[ 0];
+	bc[ 4] = ~st[ 0] & st[ 1];
+	st[ 0] ^= bc[ 0];
+	st[ 1] ^= bc[ 1];
+	st[ 2] ^= bc[ 2];
+	st[ 3] ^= bc[ 3];
+	st[ 4] ^= bc[ 4];
+
+	bc[ 0] = ~st[ 6] & st[ 7];
+	bc[ 1] = ~st[ 7] & st[ 8];
+	bc[ 2] = ~st[ 8] & st[ 9];
+	bc[ 3] = ~st[ 9] & st[ 5];
+	bc[ 4] = ~st[ 5] & st[ 6];
+	st[ 5] ^= bc[ 0];
+	st[ 6] ^= bc[ 1];
+	st[ 7] ^= bc[ 2];
+	st[ 8] ^= bc[ 3];
+	st[ 9] ^= bc[ 4];
+
+	bc[ 0] = ~st[11] & st[12];
+	bc[ 1] = ~st[12] & st[13];
+	bc[ 2] = ~st[13] & st[14];
+	bc[ 3] = ~st[14] & st[10];
+	bc[ 4] = ~st[10] & st[11];
+	st[10] ^= bc[ 0];
+	st[11] ^= bc[ 1];
+	st[12] ^= bc[ 2];
+	st[13] ^= bc[ 3];
+	st[14] ^= bc[ 4];
+
+	bc[ 0] = ~st[16] & st[17];
+	bc[ 1] = ~st[17] & st[18];
+	bc[ 2] = ~st[18] & st[19];
+	bc[ 3] = ~st[19] & st[15];
+	bc[ 4] = ~st[15] & st[16];
+	st[15] ^= bc[ 0];
+	st[16] ^= bc[ 1];
+	st[17] ^= bc[ 2];
+	st[18] ^= bc[ 3];
+	st[19] ^= bc[ 4];
+
+	bc[ 0] = ~st[21] & st[22];
+	bc[ 1] = ~st[22] & st[23];
+	bc[ 2] = ~st[23] & st[24];
+	bc[ 3] = ~st[24] & st[20];
+	bc[ 4] = ~st[20] & st[21];
+	st[20] ^= bc[ 0];
+	st[21] ^= bc[ 1];
+	st[22] ^= bc[ 2];
+	st[23] ^= bc[ 3];
+	st[24] ^= bc[ 4];
+}
 
-		/* Theta */
-		bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
-		bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
-		bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
-		bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
-		bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
-
-		t[0] = bc[4] ^ rol64(bc[1], 1);
-		t[1] = bc[0] ^ rol64(bc[2], 1);
-		t[2] = bc[1] ^ rol64(bc[3], 1);
-		t[3] = bc[2] ^ rol64(bc[4], 1);
-		t[4] = bc[3] ^ rol64(bc[0], 1);
-
-		st[0] ^= t[0];
-
-		/* Rho Pi */
-		tt = st[1];
-		st[ 1] = rol64(st[ 6] ^ t[1], 44);
-		st[ 6] = rol64(st[ 9] ^ t[4], 20);
-		st[ 9] = rol64(st[22] ^ t[2], 61);
-		st[22] = rol64(st[14] ^ t[4], 39);
-		st[14] = rol64(st[20] ^ t[0], 18);
-		st[20] = rol64(st[ 2] ^ t[2], 62);
-		st[ 2] = rol64(st[12] ^ t[2], 43);
-		st[12] = rol64(st[13] ^ t[3], 25);
-		st[13] = rol64(st[19] ^ t[4],  8);
-		st[19] = rol64(st[23] ^ t[3], 56);
-		st[23] = rol64(st[15] ^ t[0], 41);
-		st[15] = rol64(st[ 4] ^ t[4], 27);
-		st[ 4] = rol64(st[24] ^ t[4], 14);
-		st[24] = rol64(st[21] ^ t[1],  2);
-		st[21] = rol64(st[ 8] ^ t[3], 55);
-		st[ 8] = rol64(st[16] ^ t[1], 45);
-		st[16] = rol64(st[ 5] ^ t[0], 36);
-		st[ 5] = rol64(st[ 3] ^ t[3], 28);
-		st[ 3] = rol64(st[18] ^ t[3], 21);
-		st[18] = rol64(st[17] ^ t[2], 15);
-		st[17] = rol64(st[11] ^ t[1], 10);
-		st[11] = rol64(st[ 7] ^ t[2],  6);
-		st[ 7] = rol64(st[10] ^ t[0],  3);
-		st[10] = rol64(    tt ^ t[1],  1);
-
-		/* Chi */
-		bc[ 0] = ~st[ 1] & st[ 2];
-		bc[ 1] = ~st[ 2] & st[ 3];
-		bc[ 2] = ~st[ 3] & st[ 4];
-		bc[ 3] = ~st[ 4] & st[ 0];
-		bc[ 4] = ~st[ 0] & st[ 1];
-		st[ 0] ^= bc[ 0];
-		st[ 1] ^= bc[ 1];
-		st[ 2] ^= bc[ 2];
-		st[ 3] ^= bc[ 3];
-		st[ 4] ^= bc[ 4];
-
-		bc[ 0] = ~st[ 6] & st[ 7];
-		bc[ 1] = ~st[ 7] & st[ 8];
-		bc[ 2] = ~st[ 8] & st[ 9];
-		bc[ 3] = ~st[ 9] & st[ 5];
-		bc[ 4] = ~st[ 5] & st[ 6];
-		st[ 5] ^= bc[ 0];
-		st[ 6] ^= bc[ 1];
-		st[ 7] ^= bc[ 2];
-		st[ 8] ^= bc[ 3];
-		st[ 9] ^= bc[ 4];
-
-		bc[ 0] = ~st[11] & st[12];
-		bc[ 1] = ~st[12] & st[13];
-		bc[ 2] = ~st[13] & st[14];
-		bc[ 3] = ~st[14] & st[10];
-		bc[ 4] = ~st[10] & st[11];
-		st[10] ^= bc[ 0];
-		st[11] ^= bc[ 1];
-		st[12] ^= bc[ 2];
-		st[13] ^= bc[ 3];
-		st[14] ^= bc[ 4];
-
-		bc[ 0] = ~st[16] & st[17];
-		bc[ 1] = ~st[17] & st[18];
-		bc[ 2] = ~st[18] & st[19];
-		bc[ 3] = ~st[19] & st[15];
-		bc[ 4] = ~st[15] & st[16];
-		st[15] ^= bc[ 0];
-		st[16] ^= bc[ 1];
-		st[17] ^= bc[ 2];
-		st[18] ^= bc[ 3];
-		st[19] ^= bc[ 4];
-
-		bc[ 0] = ~st[21] & st[22];
-		bc[ 1] = ~st[22] & st[23];
-		bc[ 2] = ~st[23] & st[24];
-		bc[ 3] = ~st[24] & st[20];
-		bc[ 4] = ~st[20] & st[21];
-		st[20] ^= bc[ 0];
-		st[21] ^= bc[ 1];
-		st[22] ^= bc[ 2];
-		st[23] ^= bc[ 3];
-		st[24] ^= bc[ 4];
+static void __optimize("O3") keccakf(u64 st[25])
+{
+	int round;
 
+	for (round = 0; round < KECCAK_ROUNDS; round++) {
+		keccakf_round(st);
 		/* Iota */
 		st[0] ^= keccakf_rndc[round];
 	}
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
index 11b113f8e367..ebb626ffb5fa 100644
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -74,10 +74,10 @@ void __init acpi_watchdog_init(void)
 		res.start = gas->address;
 		if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
 			res.flags = IORESOURCE_MEM;
-			res.end = res.start + ALIGN(gas->access_width, 4);
+			res.end = res.start + ALIGN(gas->access_width, 4) - 1;
 		} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
 			res.flags = IORESOURCE_IO;
-			res.end = res.start + gas->access_width;
+			res.end = res.start + gas->access_width - 1;
 		} else {
 			pr_warn("Unsupported address space: %u\n",
 				gas->space_id);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 7128488a3a72..f2eb6c37ea0a 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -70,7 +70,6 @@ static async_cookie_t async_cookie;
 static bool battery_driver_registered;
 static int battery_bix_broken_package;
 static int battery_notification_delay_ms;
-static int battery_full_discharging;
 static unsigned int cache_time = 1000;
 module_param(cache_time, uint, 0644);
 MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
@@ -215,12 +214,9 @@ static int acpi_battery_get_property(struct power_supply *psy,
 		return -ENODEV;
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
-		if (battery->state & ACPI_BATTERY_STATE_DISCHARGING) {
-			if (battery_full_discharging && battery->rate_now == 0)
-				val->intval = POWER_SUPPLY_STATUS_FULL;
-			else
-				val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
-		} else if (battery->state & ACPI_BATTERY_STATE_CHARGING)
+		if (battery->state & ACPI_BATTERY_STATE_DISCHARGING)
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		else if (battery->state & ACPI_BATTERY_STATE_CHARGING)
 			val->intval = POWER_SUPPLY_STATUS_CHARGING;
 		else if (acpi_battery_is_charged(battery))
 			val->intval = POWER_SUPPLY_STATUS_FULL;
@@ -1170,12 +1166,6 @@ battery_notification_delay_quirk(const struct dmi_system_id *d)
 	return 0;
 }
 
-static int __init battery_full_discharging_quirk(const struct dmi_system_id *d)
-{
-	battery_full_discharging = 1;
-	return 0;
-}
-
 static const struct dmi_system_id bat_dmi_table[] __initconst = {
 	{
 		.callback = battery_bix_broken_package_quirk,
@@ -1193,38 +1183,6 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire V5-573G"),
 		},
 	},
-	{
-		.callback = battery_full_discharging_quirk,
-		.ident = "ASUS GL502VSK",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GL502VSK"),
-		},
-	},
-	{
-		.callback = battery_full_discharging_quirk,
-		.ident = "ASUS UX305LA",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "UX305LA"),
-		},
-	},
-	{
-		.callback = battery_full_discharging_quirk,
-		.ident = "ASUS UX360UA",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "UX360UA"),
-		},
-	},
-	{
-		.callback = battery_full_discharging_quirk,
-		.ident = "ASUS UX410UAK",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "UX410UAK"),
-		},
-	},
 	{},
 };
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 676c9788e1c8..0dad0bd9327b 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -660,13 +660,15 @@ struct acpi_device *acpi_companion_match(const struct device *dev)
  * acpi_of_match_device - Match device object using the "compatible" property.
  * @adev: ACPI device object to match.
  * @of_match_table: List of device IDs to match against.
+ * @of_id: OF ID if matched
  *
  * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of
  * identifiers and a _DSD object with the "compatible" property, use that
  * property to match against the given list of identifiers.
  */
 static bool acpi_of_match_device(struct acpi_device *adev,
-				 const struct of_device_id *of_match_table)
+				 const struct of_device_id *of_match_table,
+				 const struct of_device_id **of_id)
 {
 	const union acpi_object *of_compatible, *obj;
 	int i, nval;
@@ -690,8 +692,11 @@ static bool acpi_of_match_device(struct acpi_device *adev,
 		const struct of_device_id *id;
 
 		for (id = of_match_table; id->compatible[0]; id++)
-			if (!strcasecmp(obj->string.pointer, id->compatible))
+			if (!strcasecmp(obj->string.pointer, id->compatible)) {
+				if (of_id)
+					*of_id = id;
 				return true;
+			}
 	}
 
 	return false;
@@ -762,10 +767,11 @@ static bool __acpi_match_device_cls(const struct acpi_device_id *id,
 	return true;
 }
 
-static const struct acpi_device_id *__acpi_match_device(
-	struct acpi_device *device,
-	const struct acpi_device_id *ids,
-	const struct of_device_id *of_ids)
+static bool __acpi_match_device(struct acpi_device *device,
+				const struct acpi_device_id *acpi_ids,
+				const struct of_device_id *of_ids,
+				const struct acpi_device_id **acpi_id,
+				const struct of_device_id **of_id)
 {
 	const struct acpi_device_id *id;
 	struct acpi_hardware_id *hwid;
@@ -775,30 +781,32 @@ static const struct acpi_device_id *__acpi_match_device(
 	 * driver for it.
 	 */
 	if (!device || !device->status.present)
-		return NULL;
+		return false;
 
 	list_for_each_entry(hwid, &device->pnp.ids, list) {
 		/* First, check the ACPI/PNP IDs provided by the caller. */
-		for (id = ids; id->id[0] || id->cls; id++) {
-			if (id->id[0] && !strcmp((char *) id->id, hwid->id))
-				return id;
-			else if (id->cls && __acpi_match_device_cls(id, hwid))
-				return id;
+		if (acpi_ids) {
+			for (id = acpi_ids; id->id[0] || id->cls; id++) {
+				if (id->id[0] && !strcmp((char *)id->id, hwid->id))
+					goto out_acpi_match;
+				if (id->cls && __acpi_match_device_cls(id, hwid))
+					goto out_acpi_match;
+			}
 		}
 
 		/*
 		 * Next, check ACPI_DT_NAMESPACE_HID and try to match the
 		 * "compatible" property if found.
-		 *
-		 * The id returned by the below is not valid, but the only
-		 * caller passing non-NULL of_ids here is only interested in
-		 * whether or not the return value is NULL.
 		 */
-		if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id)
-		    && acpi_of_match_device(device, of_ids))
-			return id;
+		if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id))
+			return acpi_of_match_device(device, of_ids, of_id);
 	}
-	return NULL;
+	return false;
+
+out_acpi_match:
+	if (acpi_id)
+		*acpi_id = id;
+	return true;
 }
 
 /**
@@ -815,32 +823,29 @@ static const struct acpi_device_id *__acpi_match_device(
 const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
 					       const struct device *dev)
 {
-	return __acpi_match_device(acpi_companion_match(dev), ids, NULL);
+	const struct acpi_device_id *id = NULL;
+
+	__acpi_match_device(acpi_companion_match(dev), ids, NULL, &id, NULL);
+	return id;
 }
 EXPORT_SYMBOL_GPL(acpi_match_device);
 
-void *acpi_get_match_data(const struct device *dev)
+const void *acpi_device_get_match_data(const struct device *dev)
 {
 	const struct acpi_device_id *match;
 
-	if (!dev->driver)
-		return NULL;
-
-	if (!dev->driver->acpi_match_table)
-		return NULL;
-
 	match = acpi_match_device(dev->driver->acpi_match_table, dev);
 	if (!match)
 		return NULL;
 
-	return (void *)match->driver_data;
+	return (const void *)match->driver_data;
 }
-EXPORT_SYMBOL_GPL(acpi_get_match_data);
+EXPORT_SYMBOL_GPL(acpi_device_get_match_data);
 
 int acpi_match_device_ids(struct acpi_device *device,
 			  const struct acpi_device_id *ids)
 {
-	return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT;
+	return __acpi_match_device(device, ids, NULL, NULL, NULL) ? 0 : -ENOENT;
 }
 EXPORT_SYMBOL(acpi_match_device_ids);
 
@@ -849,10 +854,12 @@ bool acpi_driver_match_device(struct device *dev,
 {
 	if (!drv->acpi_match_table)
 		return acpi_of_match_device(ACPI_COMPANION(dev),
-					    drv->of_match_table);
+					    drv->of_match_table,
+					    NULL);
 
-	return !!__acpi_match_device(acpi_companion_match(dev),
-				     drv->acpi_match_table, drv->of_match_table);
+	return __acpi_match_device(acpi_companion_match(dev),
+				   drv->acpi_match_table, drv->of_match_table,
+				   NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(acpi_driver_match_device);
 
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index d9f38c645e4a..30a572956557 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1927,6 +1927,9 @@ static int acpi_ec_suspend_noirq(struct device *dev)
 	    ec->reference_count >= 1)
 		acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_DISABLE);
 
+	if (acpi_sleep_no_ec_events())
+		acpi_ec_enter_noirq(ec);
+
 	return 0;
 }
 
@@ -1934,6 +1937,9 @@ static int acpi_ec_resume_noirq(struct device *dev)
 {
 	struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev));
 
+	if (acpi_sleep_no_ec_events())
+		acpi_ec_leave_noirq(ec);
+
 	if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) &&
 	    ec->reference_count >= 1)
 		acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_ENABLE);
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index bbe48ad20886..eb09ef55c38a 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2675,10 +2675,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 	else
 		ndr_desc->numa_node = NUMA_NO_NODE;
 
-	if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH)
+	/*
+	 * Persistence domain bits are hierarchical, if
+	 * ACPI_NFIT_CAPABILITY_CACHE_FLUSH is set then
+	 * ACPI_NFIT_CAPABILITY_MEM_FLUSH is implied.
+	 */
+	if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH)
 		set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags);
-
-	if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH)
+	else if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH)
 		set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags);
 
 	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 8ccaae3550d2..85167603b9c9 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -103,25 +103,27 @@ int acpi_map_pxm_to_node(int pxm)
  */
 int acpi_map_pxm_to_online_node(int pxm)
 {
-	int node, n, dist, min_dist;
+	int node, min_node;
 
 	node = acpi_map_pxm_to_node(pxm);
 
 	if (node == NUMA_NO_NODE)
 		node = 0;
 
+	min_node = node;
 	if (!node_online(node)) {
-		min_dist = INT_MAX;
+		int min_dist = INT_MAX, dist, n;
+
 		for_each_online_node(n) {
 			dist = node_distance(node, n);
 			if (dist < min_dist) {
 				min_dist = dist;
-				node = n;
+				min_node = n;
 			}
 		}
 	}
 
-	return node;
+	return min_node;
 }
 EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
 
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 466d1503aba0..5815356ea6ad 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1271,11 +1271,11 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 	return 0;
 }
 
-static void *
+static const void *
 acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
 				  const struct device *dev)
 {
-	return acpi_get_match_data(dev);
+	return acpi_device_get_match_data(dev);
 }
 
 #define DECLARE_ACPI_FWNODE_OPS(ops) \
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 89e97d21a89c..9d52743080a4 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -115,6 +115,7 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console)
 			table->serial_port.access_width))) {
 		default:
 			pr_err("Unexpected SPCR Access Width.  Defaulting to byte size\n");
+			/* fall through */
 		case 8:
 			iotype = "mmio";
 			break;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 15e3d3c2260d..764b63a5aade 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1991,8 +1991,14 @@ static void binder_send_failed_reply(struct binder_transaction *t,
 					&target_thread->reply_error.work);
 				wake_up_interruptible(&target_thread->wait);
 			} else {
-				WARN(1, "Unexpected reply error: %u\n",
-						target_thread->reply_error.cmd);
+				/*
+				 * Cannot get here for normal operation, but
+				 * we can if multiple synchronous transactions
+				 * are sent without blocking for responses.
+				 * Just ignore the 2nd error in this case.
+				 */
+				pr_warn("Unexpected reply error: %u\n",
+					target_thread->reply_error.cmd);
 			}
 			binder_inner_proc_unlock(target_thread->proc);
 			binder_thread_dec_tmpref(target_thread);
@@ -2193,7 +2199,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 	int debug_id = buffer->debug_id;
 
 	binder_debug(BINDER_DEBUG_TRANSACTION,
-		     "%d buffer release %d, size %zd-%zd, failed at %p\n",
+		     "%d buffer release %d, size %zd-%zd, failed at %pK\n",
 		     proc->pid, buffer->debug_id,
 		     buffer->data_size, buffer->offsets_size, failed_at);
 
@@ -3705,7 +3711,7 @@ static int binder_thread_write(struct binder_proc *proc,
 				}
 			}
 			binder_debug(BINDER_DEBUG_DEAD_BINDER,
-				     "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n",
+				     "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n",
 				     proc->pid, thread->pid, (u64)cookie,
 				     death);
 			if (death == NULL) {
@@ -4376,6 +4382,15 @@ static int binder_thread_release(struct binder_proc *proc,
 
 	binder_inner_proc_unlock(thread->proc);
 
+	/*
+	 * This is needed to avoid races between wake_up_poll() above and
+	 * and ep_remove_waitqueue() called for other reasons (eg the epoll file
+	 * descriptor being closed); ep_remove_waitqueue() holds an RCU read
+	 * lock, so we can be sure it's done after calling synchronize_rcu().
+	 */
+	if (thread->looper & BINDER_LOOPER_STATE_POLL)
+		synchronize_rcu();
+
 	if (send_reply)
 		binder_send_failed_reply(send_reply, BR_DEAD_REPLY);
 	binder_release_work(proc, &thread->todo);
@@ -4391,6 +4406,8 @@ static __poll_t binder_poll(struct file *filp,
 	bool wait_for_proc_work;
 
 	thread = binder_get_thread(proc);
+	if (!thread)
+		return POLLERR;
 
 	binder_inner_proc_lock(thread->proc);
 	thread->looper |= BINDER_LOOPER_STATE_POLL;
@@ -5034,7 +5051,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
 	spin_lock(&t->lock);
 	to_proc = t->to_proc;
 	seq_printf(m,
-		   "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d",
+		   "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d",
 		   prefix, t->debug_id, t,
 		   t->from ? t->from->proc->pid : 0,
 		   t->from ? t->from->pid : 0,
@@ -5058,7 +5075,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
 	}
 	if (buffer->target_node)
 		seq_printf(m, " node %d", buffer->target_node->debug_id);
-	seq_printf(m, " size %zd:%zd data %p\n",
+	seq_printf(m, " size %zd:%zd data %pK\n",
 		   buffer->data_size, buffer->offsets_size,
 		   buffer->data);
 }
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 355a95a83a34..1ff17799769d 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -550,7 +550,9 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	  .driver_data = board_ahci_yes_fbs },
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9230),
 	  .driver_data = board_ahci_yes_fbs },
-	{ PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642),
+	{ PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642), /* highpoint rocketraid 642L */
+	  .driver_data = board_ahci_yes_fbs },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0645), /* highpoint rocketraid 644L */
 	  .driver_data = board_ahci_yes_fbs },
 
 	/* Promise */
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index a0de7a38430c..7adcf3caabd0 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -665,6 +665,16 @@ int ahci_stop_engine(struct ata_port *ap)
 	if ((tmp & (PORT_CMD_START | PORT_CMD_LIST_ON)) == 0)
 		return 0;
 
+	/*
+	 * Don't try to issue commands but return with ENODEV if the
+	 * AHCI controller not available anymore (e.g. due to PCIe hot
+	 * unplugging). Otherwise a 500ms delay for each port is added.
+	 */
+	if (tmp == 0xffffffff) {
+		dev_err(ap->host->dev, "AHCI controller unavailable!\n");
+		return -ENODEV;
+	}
+
 	/* setting HBA to idle */
 	tmp &= ~PORT_CMD_START;
 	writel(tmp, port_mmio + PORT_CMD);
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 341d0ef82cbd..30cc8f1a31e1 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -340,7 +340,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
  * 2) regulator for controlling the targets power (optional)
  * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node,
  *    or for non devicetree enabled platforms a single clock
- *	4) phys (optional)
+ * 4) phys (optional)
  *
  * RETURNS:
  * The allocated ahci_host_priv on success, otherwise an ERR_PTR value
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3c09122bf038..7431ccd03316 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4530,6 +4530,25 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "PIONEER DVD-RW  DVR-212D",	NULL,	ATA_HORKAGE_NOSETXFER },
 	{ "PIONEER DVD-RW  DVR-216D",	NULL,	ATA_HORKAGE_NOSETXFER },
 
+	/* Crucial BX100 SSD 500GB has broken LPM support */
+	{ "CT500BX100SSD1",		NULL,	ATA_HORKAGE_NOLPM },
+
+	/* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
+	{ "Crucial_CT512MX100*",	"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM |
+						ATA_HORKAGE_NOLPM, },
+	/* 512GB MX100 with newer firmware has only LPM issues */
+	{ "Crucial_CT512MX100*",	NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM |
+						ATA_HORKAGE_NOLPM, },
+
+	/* 480GB+ M500 SSDs have both queued TRIM and LPM issues */
+	{ "Crucial_CT480M500*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM |
+						ATA_HORKAGE_NOLPM, },
+	{ "Crucial_CT960M500*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM |
+						ATA_HORKAGE_NOLPM, },
+
 	/* devices that don't properly handle queued TRIM commands */
 	{ "Micron_M500_*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
@@ -4541,7 +4560,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Crucial_CT*MX100*",		"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
-	{ "Samsung SSD 8*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+	{ "Samsung SSD 840*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "Samsung SSD 850*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "FCCT*M500*",			NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
@@ -5401,8 +5422,7 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
 	 * We guarantee to LLDs that they will have at least one
 	 * non-zero sg if the command is a data command.
 	 */
-	if (WARN_ON_ONCE(ata_is_data(prot) &&
-			 (!qc->sg || !qc->n_elem || !qc->nbytes)))
+	if (ata_is_data(prot) && (!qc->sg || !qc->n_elem || !qc->nbytes))
 		goto sys_err;
 
 	if (ata_is_dma(prot) || (ata_is_pio(prot) &&
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 11c3137d7b0a..c016829a38fd 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -815,7 +815,8 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
 
 	if (ap->pflags & ATA_PFLAG_LOADING)
 		ap->pflags &= ~ATA_PFLAG_LOADING;
-	else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
+	else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) &&
+		!(ap->flags & ATA_FLAG_SAS_HOST))
 		schedule_delayed_work(&ap->hotplug_task, 0);
 
 	if (ap->pflags & ATA_PFLAG_RECOVERED)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 66be961c93a4..89a9d4a2efc8 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3316,6 +3316,12 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 		goto invalid_fld;
 	}
 
+	/* We may not issue NCQ commands to devices not supporting NCQ */
+	if (ata_is_ncq(tf->protocol) && !ata_ncq_enabled(dev)) {
+		fp = 1;
+		goto invalid_fld;
+	}
+
 	/* sanity check for pio multi commands */
 	if ((cdb[1] & 0xe0) && !is_multi_taskfile(tf)) {
 		fp = 1;
@@ -4282,7 +4288,7 @@ static inline void ata_scsi_dump_cdb(struct ata_port *ap,
 #ifdef ATA_DEBUG
 	struct scsi_device *scsidev = cmd->device;
 
-	DPRINTK("CDB (%u:%d,%d,%d) %9ph\n",
+	DPRINTK("CDB (%u:%d,%d,%lld) %9ph\n",
 		ap->print_id,
 		scsidev->channel, scsidev->id, scsidev->lun,
 		cmd->cmnd);
@@ -4309,7 +4315,9 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
 		if (likely((scsi_op != ATA_16) || !atapi_passthru16)) {
 			/* relay SCSI command to ATAPI device */
 			int len = COMMAND_SIZE(scsi_op);
-			if (unlikely(len > scmd->cmd_len || len > dev->cdb_len))
+			if (unlikely(len > scmd->cmd_len ||
+				     len > dev->cdb_len ||
+				     scmd->cmd_len > ATAPI_CDB_LEN))
 				goto bad_cdb_len;
 
 			xlat_func = atapi_xlat;
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 80ee2f2a50d0..6456e07db72a 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -146,6 +146,7 @@
 enum sata_rcar_type {
 	RCAR_GEN1_SATA,
 	RCAR_GEN2_SATA,
+	RCAR_GEN3_SATA,
 	RCAR_R8A7790_ES1_SATA,
 };
 
@@ -784,26 +785,11 @@ static void sata_rcar_setup_port(struct ata_host *host)
 	ioaddr->command_addr	= ioaddr->cmd_addr + (ATA_REG_CMD << 2);
 }
 
-static void sata_rcar_init_controller(struct ata_host *host)
+static void sata_rcar_init_module(struct sata_rcar_priv *priv)
 {
-	struct sata_rcar_priv *priv = host->private_data;
 	void __iomem *base = priv->base;
 	u32 val;
 
-	/* reset and setup phy */
-	switch (priv->type) {
-	case RCAR_GEN1_SATA:
-		sata_rcar_gen1_phy_init(priv);
-		break;
-	case RCAR_GEN2_SATA:
-	case RCAR_R8A7790_ES1_SATA:
-		sata_rcar_gen2_phy_init(priv);
-		break;
-	default:
-		dev_warn(host->dev, "SATA phy is not initialized\n");
-		break;
-	}
-
 	/* SATA-IP reset state */
 	val = ioread32(base + ATAPI_CONTROL1_REG);
 	val |= ATAPI_CONTROL1_RESET;
@@ -824,10 +810,33 @@ static void sata_rcar_init_controller(struct ata_host *host)
 	/* ack and mask */
 	iowrite32(0, base + SATAINTSTAT_REG);
 	iowrite32(0x7ff, base + SATAINTMASK_REG);
+
 	/* enable interrupts */
 	iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG);
 }
 
+static void sata_rcar_init_controller(struct ata_host *host)
+{
+	struct sata_rcar_priv *priv = host->private_data;
+
+	/* reset and setup phy */
+	switch (priv->type) {
+	case RCAR_GEN1_SATA:
+		sata_rcar_gen1_phy_init(priv);
+		break;
+	case RCAR_GEN2_SATA:
+	case RCAR_GEN3_SATA:
+	case RCAR_R8A7790_ES1_SATA:
+		sata_rcar_gen2_phy_init(priv);
+		break;
+	default:
+		dev_warn(host->dev, "SATA phy is not initialized\n");
+		break;
+	}
+
+	sata_rcar_init_module(priv);
+}
+
 static const struct of_device_id sata_rcar_match[] = {
 	{
 		/* Deprecated by "renesas,sata-r8a7779" */
@@ -856,7 +865,7 @@ static const struct of_device_id sata_rcar_match[] = {
 	},
 	{
 		.compatible = "renesas,sata-r8a7795",
-		.data = (void *)RCAR_GEN2_SATA
+		.data = (void *)RCAR_GEN3_SATA
 	},
 	{
 		.compatible = "renesas,rcar-gen2-sata",
@@ -864,7 +873,7 @@ static const struct of_device_id sata_rcar_match[] = {
 	},
 	{
 		.compatible = "renesas,rcar-gen3-sata",
-		.data = (void *)RCAR_GEN2_SATA
+		.data = (void *)RCAR_GEN3_SATA
 	},
 	{ },
 };
@@ -982,11 +991,18 @@ static int sata_rcar_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	/* ack and mask */
-	iowrite32(0, base + SATAINTSTAT_REG);
-	iowrite32(0x7ff, base + SATAINTMASK_REG);
-	/* enable interrupts */
-	iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG);
+	if (priv->type == RCAR_GEN3_SATA) {
+		sata_rcar_gen2_phy_init(priv);
+		sata_rcar_init_module(priv);
+	} else {
+		/* ack and mask */
+		iowrite32(0, base + SATAINTSTAT_REG);
+		iowrite32(0x7ff, base + SATAINTMASK_REG);
+
+		/* enable interrupts */
+		iowrite32(ATAPI_INT_ENABLE_SATAINT,
+			  base + ATAPI_INT_ENABLE_REG);
+	}
 
 	ata_host_resume(host);
 
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 0277f36be85b..6e737142ceaa 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -3173,14 +3173,10 @@ static void init_sram(struct idt77252_dev *card)
 				    (u32) 0xffffffff);
 	}
 
-	writel((SAR_FBQ0_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
-	writel((SAR_FBQ1_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
-	writel((SAR_FBQ2_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
-	writel((SAR_FBQ3_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
+	writel((SAR_FBQ0_LOW << 28) | (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
+	writel((SAR_FBQ1_LOW << 28) | (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
+	writel((SAR_FBQ2_LOW << 28) | (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
+	writel((SAR_FBQ3_LOW << 28) | (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
 
 	/* Initialize rate table  */
 	for (i = 0; i < 256; i++) {
diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
index 9180b9bd5821..834509506ef6 100644
--- a/drivers/auxdisplay/img-ascii-lcd.c
+++ b/drivers/auxdisplay/img-ascii-lcd.c
@@ -97,7 +97,7 @@ static struct img_ascii_lcd_config boston_config = {
 static void malta_update(struct img_ascii_lcd_ctx *ctx)
 {
 	unsigned int i;
-	int err;
+	int err = 0;
 
 	for (i = 0; i < ctx->cfg->num_chars; i++) {
 		err = regmap_write(ctx->regmap,
@@ -180,7 +180,7 @@ static int sead3_wait_lcd_idle(struct img_ascii_lcd_ctx *ctx)
 static void sead3_update(struct img_ascii_lcd_ctx *ctx)
 {
 	unsigned int i;
-	int err;
+	int err = 0;
 
 	for (i = 0; i < ctx->cfg->num_chars; i++) {
 		err = sead3_wait_lcd_idle(ctx);
@@ -224,7 +224,7 @@ MODULE_DEVICE_TABLE(of, img_ascii_lcd_matches);
 
 /**
  * img_ascii_lcd_scroll() - scroll the display by a character
- * @arg: really a pointer to the private data structure
+ * @t: really a pointer to the private data structure
  *
  * Scroll the current message along the LCD by one character, rearming the
  * timer if required.
diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c
index ea7869c0d7f9..ec5e8800f8ad 100644
--- a/drivers/auxdisplay/panel.c
+++ b/drivers/auxdisplay/panel.c
@@ -1372,7 +1372,7 @@ static void panel_process_inputs(void)
 				break;
 			input->rise_timer = 0;
 			input->state = INPUT_ST_RISING;
-			/* no break here, fall through */
+			/* fall through */
 		case INPUT_ST_RISING:
 			if ((phys_curr & input->mask) != input->value) {
 				input->state = INPUT_ST_LOW;
@@ -1385,11 +1385,11 @@ static void panel_process_inputs(void)
 			}
 			input->high_timer = 0;
 			input->state = INPUT_ST_HIGH;
-			/* no break here, fall through */
+			/* fall through */
 		case INPUT_ST_HIGH:
 			if (input_state_high(input))
 				break;
-			/* no break here, fall through */
+			/* fall through */
 		case INPUT_ST_FALLING:
 			input_state_falling(input);
 		}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index b2261f92f2f1..5847364f25d9 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -310,6 +310,9 @@ static void __device_link_del(struct device_link *link)
 	dev_info(link->consumer, "Dropping the link to %s\n",
 		 dev_name(link->supplier));
 
+	if (link->flags & DL_FLAG_PM_RUNTIME)
+		pm_runtime_drop_link(link->consumer);
+
 	list_del(&link->s_node);
 	list_del(&link->c_node);
 	device_link_free(link);
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index a8ac86e4d79e..6637fc319269 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -321,7 +321,8 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq)
 		return;
 
 	if (device_may_wakeup(wirq->dev)) {
-		if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)
+		if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+		    !pm_runtime_status_suspended(wirq->dev))
 			enable_irq(wirq->irq);
 
 		enable_irq_wake(wirq->irq);
@@ -343,7 +344,8 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
 	if (device_may_wakeup(wirq->dev)) {
 		disable_irq_wake(wirq->irq);
 
-		if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)
+		if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+		    !pm_runtime_status_suspended(wirq->dev))
 			disable_irq_nosync(wirq->irq);
 	}
 }
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 302236281d83..8f205f6461ed 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1410,9 +1410,8 @@ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 }
 EXPORT_SYMBOL(fwnode_graph_parse_endpoint);
 
-void *device_get_match_data(struct device *dev)
+const void *device_get_match_data(struct device *dev)
 {
-	return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data,
-				  dev);
+	return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev);
 }
 EXPORT_SYMBOL_GPL(device_get_match_data);
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index ba8acca036df..cb0f1aad20b7 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -55,7 +55,7 @@ config BCMA_DRIVER_PCI
 
 config BCMA_DRIVER_PCI_HOSTMODE
 	bool "Driver for PCI core working in hostmode"
-	depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY
+	depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY && BCMA = y
 	help
 	  PCI core hostmode operation (external PCI bus).
 
diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index f1eb4d3e1d57..f4161064365c 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -203,7 +203,7 @@ static void bcma_pmu_resources_init(struct bcma_drv_cc *cc)
 	 * Add some delay; allow resources to come up and settle.
 	 * Delay is required for SoC (early init).
 	 */
-	mdelay(2);
+	usleep_range(2000, 2500);
 }
 
 /* Disable to allow reading SPROM. Don't know the adventages of enabling it. */
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 925842996986..63410ecfe640 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -297,6 +297,7 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0016) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0018) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_FOXCONN, 0xe092) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_HP, 0x804a) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a0) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43aa) },
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index e5aa62fcf5a8..3aaf6af3ec23 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1758,7 +1758,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 	if (unit[drive].type->code == FD_NODRIVE)
 		return NULL;
 	*part = 0;
-	return get_disk(unit[drive].gendisk);
+	return get_disk_and_module(unit[drive].gendisk);
 }
 
 static int __init amiga_floppy_probe(struct platform_device *pdev)
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 8bc3b9fd8dd2..dfb2c2622e5a 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1917,7 +1917,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 	if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS)
 		return NULL;
 	*part = 0;
-	return get_disk(unit[drive].disk);
+	return get_disk_and_module(unit[drive].disk);
 }
 
 static int __init atari_floppy_init (void)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 8028a3a7e7fd..deea78e485da 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -456,7 +456,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
 
 	mutex_lock(&brd_devices_mutex);
 	brd = brd_init_one(MINOR(dev) / max_part, &new);
-	kobj = brd ? get_disk(brd->brd_disk) : NULL;
+	kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL;
 	mutex_unlock(&brd_devices_mutex);
 
 	if (new)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index eae484acfbbc..8ec7235fc93b 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4505,7 +4505,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 	if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type))
 		return NULL;
 	*part = 0;
-	return get_disk(disks[drive]);
+	return get_disk_and_module(disks[drive]);
 }
 
 static int __init do_floppy_init(void)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d5fe720cf149..ee62d2d517bf 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -266,7 +266,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 	struct iov_iter i;
 	ssize_t bw;
 
-	iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);
+	iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
 
 	file_start_write(file);
 	bw = vfs_iter_write(file, &i, ppos, 0);
@@ -1922,7 +1922,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
 	if (err < 0)
 		kobj = NULL;
 	else
-		kobj = get_disk(lo->lo_disk);
+		kobj = get_disk_and_module(lo->lo_disk);
 	mutex_unlock(&loop_index_mutex);
 
 	*part = 0;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5f2a4240a204..86258b00a1d4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1591,7 +1591,7 @@ again:
 			if (new_index < 0) {
 				mutex_unlock(&nbd_index_mutex);
 				printk(KERN_ERR "nbd: failed to add new device\n");
-				return ret;
+				return new_index;
 			}
 			nbd = idr_find(&nbd_index_idr, new_index);
 		}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 531a0915066b..c61d20c9f3f8 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1122,7 +1122,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
 	pkt->sector = new_sector;
 
 	bio_reset(pkt->bio);
-	bio_set_set(pkt->bio, pd->bdev);
+	bio_set_dev(pkt->bio, pd->bdev);
 	bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0);
 	pkt->bio->bi_iter.bi_sector = new_sector;
 	pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 84434d3ea19b..64e066eba72e 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -799,7 +799,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 		return NULL;
 
 	*part = 0;
-	return get_disk(swd->unit[drive].disk);
+	return get_disk_and_module(swd->unit[drive].disk);
 }
 
 static int swim_add_floppy(struct swim_priv *swd, enum drive_location location)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e126e4cac2ca..92ec1bbece51 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -262,6 +262,7 @@ static DEFINE_SPINLOCK(minor_lock);
 
 static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
 static void blkfront_gather_backend_features(struct blkfront_info *info);
+static int negotiate_mq(struct blkfront_info *info);
 
 static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
 {
@@ -1774,11 +1775,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
 	unsigned int i, max_page_order;
 	unsigned int ring_page_order;
 
+	if (!info)
+		return -ENODEV;
+
 	max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
 					      "max-ring-page-order", 0);
 	ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
 	info->nr_ring_pages = 1 << ring_page_order;
 
+	err = negotiate_mq(info);
+	if (err)
+		goto destroy_blkring;
+
 	for (i = 0; i < info->nr_rings; i++) {
 		struct blkfront_ring_info *rinfo = &info->rinfo[i];
 
@@ -1978,11 +1986,6 @@ static int blkfront_probe(struct xenbus_device *dev,
 	}
 
 	info->xbdev = dev;
-	err = negotiate_mq(info);
-	if (err) {
-		kfree(info);
-		return err;
-	}
 
 	mutex_init(&info->mutex);
 	info->vdevice = vdevice;
@@ -2099,10 +2102,6 @@ static int blkfront_resume(struct xenbus_device *dev)
 
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
-	err = negotiate_mq(info);
-	if (err)
-		return err;
-
 	err = talk_to_blkback(dev, info);
 	if (!err)
 		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 41c95c9b2ab4..8f9130ab5887 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -332,7 +332,7 @@ static const struct block_device_operations z2_fops =
 static struct kobject *z2_find(dev_t dev, int *part, void *data)
 {
 	*part = 0;
-	return get_disk(z2ram_gendisk);
+	return get_disk_and_module(z2ram_gendisk);
 }
 
 static struct request_queue *z2_queue;
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 07e55cd8f8c8..d8bbd661dbdb 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -392,4 +392,16 @@ config BT_QCOMSMD
 	  Say Y here to compile support for HCI over Qualcomm SMD into the
 	  kernel or say M to compile as a module.
 
+config BT_HCIRSI
+	tristate "Redpine HCI support"
+	default n
+	select RSI_COEX
+	help
+	  Redpine BT driver.
+	  This driver handles BT traffic from upper layers and pass
+	  to the RSI_91x coex module for further scheduling to device
+
+	  Say Y here to compile support for HCI over Redpine into the
+	  kernel or say M to compile as a module.
+
 endmenu
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 4e4e44d09796..03cfc1b20c4a 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -28,6 +28,8 @@ obj-$(CONFIG_BT_QCA)		+= btqca.o
 
 obj-$(CONFIG_BT_HCIUART_NOKIA)	+= hci_nokia.o
 
+obj-$(CONFIG_BT_HCIRSI)		+= btrsi.o
+
 btmrvl-y			:= btmrvl_main.o
 btmrvl-$(CONFIG_DEBUG_FS)	+= btmrvl_debugfs.o
 
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 204afe66de92..3d7a5c149af3 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -203,6 +203,12 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ }	/* Terminating entry */
 };
 
+static inline void ath3k_log_failed_loading(int err, int len, int size)
+{
+	BT_ERR("Error in firmware loading err = %d, len = %d, size = %d",
+			err, len, size);
+}
+
 #define USB_REQ_DFU_DNLOAD	1
 #define BULK_SIZE		4096
 #define FW_HDR_SIZE		20
@@ -227,15 +233,16 @@ static int ath3k_load_firmware(struct usb_device *udev,
 		return -ENOMEM;
 	}
 
-	memcpy(send_buf, firmware->data, 20);
+	memcpy(send_buf, firmware->data, FW_HDR_SIZE);
 	err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR,
-			      0, 0, send_buf, 20, USB_CTRL_SET_TIMEOUT);
+			      0, 0, send_buf, FW_HDR_SIZE,
+			      USB_CTRL_SET_TIMEOUT);
 	if (err < 0) {
 		BT_ERR("Can't change to loading configuration err");
 		goto error;
 	}
-	sent += 20;
-	count -= 20;
+	sent += FW_HDR_SIZE;
+	count -= FW_HDR_SIZE;
 
 	pipe = usb_sndbulkpipe(udev, 0x02);
 
@@ -250,8 +257,7 @@ static int ath3k_load_firmware(struct usb_device *udev,
 					&len, 3000);
 
 		if (err || (len != size)) {
-			BT_ERR("Error in firmware loading err = %d,"
-				"len = %d, size = %d", err, len, size);
+			ath3k_log_failed_loading(err, len, size);
 			goto error;
 		}
 
@@ -350,8 +356,7 @@ static int ath3k_load_fwfile(struct usb_device *udev,
 		err = usb_bulk_msg(udev, pipe, send_buf, size,
 					&len, 3000);
 		if (err || (len != size)) {
-			BT_ERR("Error in firmware loading err = %d,"
-				"len = %d, size = %d", err, len, size);
+			ath3k_log_failed_loading(err, len, size);
 			kfree(send_buf);
 			return err;
 		}
@@ -398,7 +403,7 @@ static int ath3k_set_normal_mode(struct usb_device *udev)
 static int ath3k_load_patch(struct usb_device *udev)
 {
 	unsigned char fw_state;
-	char filename[ATH3K_NAME_LEN] = {0};
+	char filename[ATH3K_NAME_LEN];
 	const struct firmware *firmware;
 	struct ath3k_version fw_version;
 	__u32 pt_rom_version, pt_build_version;
@@ -451,7 +456,7 @@ static int ath3k_load_patch(struct usb_device *udev)
 static int ath3k_load_syscfg(struct usb_device *udev)
 {
 	unsigned char fw_state;
-	char filename[ATH3K_NAME_LEN] = {0};
+	char filename[ATH3K_NAME_LEN];
 	const struct firmware *firmware;
 	struct ath3k_version fw_version;
 	int clk_value, ret;
@@ -522,7 +527,6 @@ static int ath3k_probe(struct usb_interface *intf,
 
 	/* load patch and sysconfig files for AR3012 */
 	if (id->driver_info & BTUSB_ATH3012) {
-
 		/* New firmware with patch and sysconfig files already loaded */
 		if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001)
 			return -ENODEV;
@@ -565,7 +569,7 @@ static int ath3k_probe(struct usb_interface *intf,
 
 static void ath3k_disconnect(struct usb_interface *intf)
 {
-	BT_DBG("ath3k_disconnect intf %p", intf);
+	BT_DBG("%s intf %p", __func__, intf);
 }
 
 static struct usb_driver ath3k_driver = {
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index b280d466f05b..f6c694a1b9b0 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -183,7 +183,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode,
 		return -EFAULT;
 	}
 
-	skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_ATOMIC);
+	skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_KERNEL);
 	if (!skb) {
 		BT_ERR("No free skb");
 		return -ENOMEM;
diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c
new file mode 100644
index 000000000000..5034325e417c
--- /dev/null
+++ b/drivers/bluetooth/btrsi.c
@@ -0,0 +1,188 @@
+/**
+ * Copyright (c) 2017 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <asm/unaligned.h>
+#include <net/rsi_91x.h>
+#include <net/genetlink.h>
+
+#define RSI_HEADROOM_FOR_BT_HAL	16
+#define RSI_FRAME_DESC_SIZE	16
+
+struct rsi_hci_adapter {
+	void *priv;
+	struct rsi_proto_ops *proto_ops;
+	struct hci_dev *hdev;
+};
+
+static int rsi_hci_open(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int rsi_hci_close(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int rsi_hci_flush(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int rsi_hci_send_pkt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct rsi_hci_adapter *h_adapter = hci_get_drvdata(hdev);
+	struct sk_buff *new_skb = NULL;
+
+	switch (hci_skb_pkt_type(skb)) {
+	case HCI_COMMAND_PKT:
+		hdev->stat.cmd_tx++;
+		break;
+	case HCI_ACLDATA_PKT:
+		hdev->stat.acl_tx++;
+		break;
+	case HCI_SCODATA_PKT:
+		hdev->stat.sco_tx++;
+		break;
+	}
+
+	if (skb_headroom(skb) < RSI_HEADROOM_FOR_BT_HAL) {
+		/* Insufficient skb headroom - allocate a new skb */
+		new_skb = skb_realloc_headroom(skb, RSI_HEADROOM_FOR_BT_HAL);
+		if (unlikely(!new_skb))
+			return -ENOMEM;
+		bt_cb(new_skb)->pkt_type = hci_skb_pkt_type(skb);
+		kfree_skb(skb);
+		skb = new_skb;
+	}
+
+	return h_adapter->proto_ops->coex_send_pkt(h_adapter->priv, skb,
+						   RSI_BT_Q);
+}
+
+static int rsi_hci_recv_pkt(void *priv, const u8 *pkt)
+{
+	struct rsi_hci_adapter *h_adapter = priv;
+	struct hci_dev *hdev = h_adapter->hdev;
+	struct sk_buff *skb;
+	int pkt_len = get_unaligned_le16(pkt) & 0x0fff;
+
+	skb = dev_alloc_skb(pkt_len);
+	if (!skb)
+		return -ENOMEM;
+
+	memcpy(skb->data, pkt + RSI_FRAME_DESC_SIZE, pkt_len);
+	skb_put(skb, pkt_len);
+	h_adapter->hdev->stat.byte_rx += skb->len;
+
+	hci_skb_pkt_type(skb) = pkt[14];
+
+	return hci_recv_frame(hdev, skb);
+}
+
+static int rsi_hci_attach(void *priv, struct rsi_proto_ops *ops)
+{
+	struct rsi_hci_adapter *h_adapter = NULL;
+	struct hci_dev *hdev;
+	int err = 0;
+
+	h_adapter = kzalloc(sizeof(*h_adapter), GFP_KERNEL);
+	if (!h_adapter)
+		return -ENOMEM;
+
+	h_adapter->priv = priv;
+	ops->set_bt_context(priv, h_adapter);
+	h_adapter->proto_ops = ops;
+
+	hdev = hci_alloc_dev();
+	if (!hdev) {
+		BT_ERR("Failed to alloc HCI device");
+		goto err;
+	}
+
+	h_adapter->hdev = hdev;
+
+	if (ops->get_host_intf(priv) == RSI_HOST_INTF_SDIO)
+		hdev->bus = HCI_SDIO;
+	else
+		hdev->bus = HCI_USB;
+
+	hci_set_drvdata(hdev, h_adapter);
+	hdev->dev_type = HCI_PRIMARY;
+	hdev->open = rsi_hci_open;
+	hdev->close = rsi_hci_close;
+	hdev->flush = rsi_hci_flush;
+	hdev->send = rsi_hci_send_pkt;
+
+	err = hci_register_dev(hdev);
+	if (err < 0) {
+		BT_ERR("HCI registration failed with errcode %d", err);
+		hci_free_dev(hdev);
+		goto err;
+	}
+
+	return 0;
+err:
+	h_adapter->hdev = NULL;
+	kfree(h_adapter);
+	return -EINVAL;
+}
+
+static void rsi_hci_detach(void *priv)
+{
+	struct rsi_hci_adapter *h_adapter = priv;
+	struct hci_dev *hdev;
+
+	if (!h_adapter)
+		return;
+
+	hdev = h_adapter->hdev;
+	if (hdev) {
+		hci_unregister_dev(hdev);
+		hci_free_dev(hdev);
+		h_adapter->hdev = NULL;
+	}
+
+	kfree(h_adapter);
+}
+
+const struct rsi_mod_ops rsi_bt_ops = {
+	.attach	= rsi_hci_attach,
+	.detach	= rsi_hci_detach,
+	.recv_pkt = rsi_hci_recv_pkt,
+};
+EXPORT_SYMBOL(rsi_bt_ops);
+
+static int rsi_91x_bt_module_init(void)
+{
+	return 0;
+}
+
+static void rsi_91x_bt_module_exit(void)
+{
+	return;
+}
+
+module_init(rsi_91x_bt_module_init);
+module_exit(rsi_91x_bt_module_exit);
+MODULE_AUTHOR("Redpine Signals Inc");
+MODULE_DESCRIPTION("RSI BT driver");
+MODULE_SUPPORTED_DEVICE("RSI-BT");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 6e2ad748abba..437f080deaab 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -35,6 +35,60 @@
 #define RTL_ROM_LMP_8761A	0x8761
 #define RTL_ROM_LMP_8822B	0x8822
 
+#define IC_MATCH_FL_LMPSUBV	(1 << 0)
+#define IC_MATCH_FL_HCIREV	(1 << 1)
+#define IC_INFO(lmps, hcir) \
+	.match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV, \
+	.lmp_subver = (lmps), \
+	.hci_rev = (hcir)
+
+struct id_table {
+	__u16 match_flags;
+	__u16 lmp_subver;
+	__u16 hci_rev;
+	bool config_needed;
+	char *fw_name;
+	char *cfg_name;
+};
+
+static const struct id_table ic_id_table[] = {
+	/* 8723B */
+	{ IC_INFO(RTL_ROM_LMP_8723B, 0xb),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8723b_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8723b_config.bin" },
+
+	/* 8723D */
+	{ IC_INFO(RTL_ROM_LMP_8723B, 0xd),
+	  .config_needed = true,
+	  .fw_name  = "rtl_bt/rtl8723d_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8723d_config.bin" },
+
+	/* 8821A */
+	{ IC_INFO(RTL_ROM_LMP_8821A, 0xa),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8821a_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8821a_config.bin" },
+
+	/* 8821C */
+	{ IC_INFO(RTL_ROM_LMP_8821A, 0xc),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8821c_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8821c_config.bin" },
+
+	/* 8761A */
+	{ IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0,
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8761a_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8761a_config.bin" },
+
+	/* 8822B */
+	{ IC_INFO(RTL_ROM_LMP_8822B, 0xb),
+	  .config_needed = true,
+	  .fw_name  = "rtl_bt/rtl8822b_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8822b_config.bin" },
+	};
+
 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 {
 	struct rtl_rom_version_evt *rom_version;
@@ -64,9 +118,9 @@ static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 	return 0;
 }
 
-static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
-				   const struct firmware *fw,
-				   unsigned char **_buf)
+static int rtlbt_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
+				const struct firmware *fw,
+				unsigned char **_buf)
 {
 	const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 };
 	struct rtl_epatch_header *epatch_info;
@@ -88,6 +142,8 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
 		{ RTL_ROM_LMP_8821A, 2 },
 		{ RTL_ROM_LMP_8761A, 3 },
 		{ RTL_ROM_LMP_8822B, 8 },
+		{ RTL_ROM_LMP_8723B, 9 },	/* 8723D */
+		{ RTL_ROM_LMP_8821A, 10 },	/* 8821C */
 	};
 
 	ret = rtl_read_rom_version(hdev, &rom_version);
@@ -320,8 +376,8 @@ out:
 	return ret;
 }
 
-static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
-				const char *fw_name)
+static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 hci_rev,
+				u16 lmp_subver)
 {
 	unsigned char *fw_data = NULL;
 	const struct firmware *fw;
@@ -330,39 +386,40 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
 	u8 *cfg_buff = NULL;
 	u8 *tbuff;
 	char *cfg_name = NULL;
-	bool config_needed = false;
+	char *fw_name = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ic_id_table); i++) {
+		if ((ic_id_table[i].match_flags & IC_MATCH_FL_LMPSUBV) &&
+		    (ic_id_table[i].lmp_subver != lmp_subver))
+			continue;
+		if ((ic_id_table[i].match_flags & IC_MATCH_FL_HCIREV) &&
+		    (ic_id_table[i].hci_rev != hci_rev))
+			continue;
 
-	switch (lmp_subver) {
-	case RTL_ROM_LMP_8723B:
-		cfg_name = "rtl_bt/rtl8723b_config.bin";
-		break;
-	case RTL_ROM_LMP_8821A:
-		cfg_name = "rtl_bt/rtl8821a_config.bin";
-		break;
-	case RTL_ROM_LMP_8761A:
-		cfg_name = "rtl_bt/rtl8761a_config.bin";
-		break;
-	case RTL_ROM_LMP_8822B:
-		cfg_name = "rtl_bt/rtl8822b_config.bin";
-		config_needed = true;
-		break;
-	default:
-		BT_ERR("%s: rtl: no config according to lmp_subver %04x",
-		       hdev->name, lmp_subver);
 		break;
 	}
 
+	if (i >= ARRAY_SIZE(ic_id_table)) {
+		BT_ERR("%s: unknown IC info, lmp subver %04x, hci rev %04x",
+		       hdev->name, lmp_subver, hci_rev);
+		return -EINVAL;
+	}
+
+	cfg_name = ic_id_table[i].cfg_name;
+
 	if (cfg_name) {
 		cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff);
 		if (cfg_sz < 0) {
 			cfg_sz = 0;
-			if (config_needed)
+			if (ic_id_table[i].config_needed)
 				BT_ERR("Necessary config file %s not found\n",
 				       cfg_name);
 		}
 	} else
 		cfg_sz = 0;
 
+	fw_name = ic_id_table[i].fw_name;
 	bt_dev_info(hdev, "rtl: loading %s", fw_name);
 	ret = request_firmware(&fw, fw_name, &hdev->dev);
 	if (ret < 0) {
@@ -370,7 +427,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
 		goto err_req_fw;
 	}
 
-	ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data);
+	ret = rtlbt_parse_firmware(hdev, lmp_subver, fw, &fw_data);
 	if (ret < 0)
 		goto out;
 
@@ -429,7 +486,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 {
 	struct sk_buff *skb;
 	struct hci_rp_read_local_version *resp;
-	u16 lmp_subver;
+	u16 hci_rev, lmp_subver;
 
 	skb = btrtl_read_local_version(hdev);
 	if (IS_ERR(skb))
@@ -441,6 +498,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 		    resp->hci_ver, resp->hci_rev,
 		    resp->lmp_ver, resp->lmp_subver);
 
+	hci_rev = le16_to_cpu(resp->hci_rev);
 	lmp_subver = le16_to_cpu(resp->lmp_subver);
 	kfree_skb(skb);
 
@@ -455,17 +513,10 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 	case RTL_ROM_LMP_3499:
 		return btrtl_setup_rtl8723a(hdev);
 	case RTL_ROM_LMP_8723B:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8723b_fw.bin");
 	case RTL_ROM_LMP_8821A:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8821a_fw.bin");
 	case RTL_ROM_LMP_8761A:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8761a_fw.bin");
 	case RTL_ROM_LMP_8822B:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8822b_fw.bin");
+		return btrtl_setup_rtl8723b(hdev, hci_rev, lmp_subver);
 	default:
 		bt_dev_info(hdev, "rtl: assuming no firmware upload needed");
 		return 0;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 2a55380ad730..5cd868ea28ed 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usb/quirks.h>
@@ -230,7 +231,6 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 },
-	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 },
@@ -263,6 +263,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
 
 	/* QCA ROME chipset */
+	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME },
@@ -339,6 +340,7 @@ static const struct usb_device_id blacklist_table[] = {
 
 	/* Intel Bluetooth devices */
 	{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW },
+	{ USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW },
 	{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
 	{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
@@ -373,12 +375,30 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8822BE Bluetooth devices */
+	{ USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
+
 	/* Silicon Wave based devices */
 	{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
 
 	{ }	/* Terminating entry */
 };
 
+/* The Bluetooth USB module build into some devices needs to be reset on resume,
+ * this is a problem with the platform (likely shutting off all power) not with
+ * the module itself. So we use a DMI list to match known broken platforms.
+ */
+static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
+	{
+		/* Dell OptiPlex 3060 (QCA ROME device 0cf3:e007) */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"),
+		},
+	},
+	{}
+};
+
 #define BTUSB_MAX_ISOC_FRAMES	10
 
 #define BTUSB_INTR_RUNNING	0
@@ -2057,6 +2077,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	case 0x0c:	/* WsP */
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		break;
 	default:
 		BT_ERR("%s: Unsupported Intel hardware variant (%u)",
@@ -2149,6 +2171,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 		break;
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
 			 le16_to_cpu(ver.hw_variant),
 			 le16_to_cpu(ver.hw_revision),
@@ -2180,6 +2204,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 		break;
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
 			 le16_to_cpu(ver.hw_variant),
 			 le16_to_cpu(ver.hw_revision),
@@ -2945,6 +2971,9 @@ static int btusb_probe(struct usb_interface *intf,
 	hdev->send   = btusb_send_frame;
 	hdev->notify = btusb_notify;
 
+	if (dmi_check_system(btusb_needs_reset_resume_table))
+		interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
+
 #ifdef CONFIG_PM
 	err = btusb_config_oob_wake(hdev);
 	if (err)
@@ -3031,12 +3060,6 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_QCA_ROME) {
 		data->setup_on_usb = btusb_setup_qca;
 		hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
-
-		/* QCA Rome devices lose their updated firmware over suspend,
-		 * but the USB hub doesn't notice any status change.
-		 * explicitly request a device reset on resume.
-		 */
-		interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
 	}
 
 #ifdef CONFIG_BT_HCIBTUSB_RTL
diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
index 14ae7ee88acb..d568fbd94d6c 100644
--- a/drivers/bluetooth/hci_ath.c
+++ b/drivers/bluetooth/hci_ath.c
@@ -71,12 +71,12 @@ static int ath_wakeup_ar3k(struct tty_struct *tty)
 	/* Clear RTS first */
 	tty->driver->ops->tiocmget(tty);
 	tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS);
-	mdelay(20);
+	msleep(20);
 
 	/* Set RTS, wake up board */
 	tty->driver->ops->tiocmget(tty);
 	tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00);
-	mdelay(20);
+	msleep(20);
 
 	status = tty->driver->ops->tiocmget(tty);
 	return status;
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 0438a64b8185..40b9fb247010 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -244,7 +244,9 @@ static irqreturn_t bcm_host_wake(int irq, void *data)
 
 	bt_dev_dbg(bdev, "Host wake IRQ");
 
-	pm_request_resume(bdev->dev);
+	pm_runtime_get(bdev->dev);
+	pm_runtime_mark_last_busy(bdev->dev);
+	pm_runtime_put_autosuspend(bdev->dev);
 
 	return IRQ_HANDLED;
 }
@@ -301,7 +303,7 @@ static const struct bcm_set_sleep_mode default_sleep_params = {
 	.usb_auto_sleep = 0,
 	.usb_resume_timeout = 0,
 	.break_to_host = 0,
-	.pulsed_host_wake = 0,
+	.pulsed_host_wake = 1,
 };
 
 static int bcm_setup_sleep(struct hci_uart *hu)
@@ -586,8 +588,11 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count)
 	} else if (!bcm->rx_skb) {
 		/* Delay auto-suspend when receiving completed packet */
 		mutex_lock(&bcm_device_lock);
-		if (bcm->dev && bcm_device_exists(bcm->dev))
-			pm_request_resume(bcm->dev->dev);
+		if (bcm->dev && bcm_device_exists(bcm->dev)) {
+			pm_runtime_get(bcm->dev->dev);
+			pm_runtime_mark_last_busy(bcm->dev->dev);
+			pm_runtime_put_autosuspend(bcm->dev->dev);
+		}
 		mutex_unlock(&bcm_device_lock);
 	}
 
@@ -922,12 +927,13 @@ static int bcm_get_resources(struct bcm_device *dev)
 
 	dev->clk = devm_clk_get(dev->dev, NULL);
 
-	dev->device_wakeup = devm_gpiod_get(dev->dev, "device-wakeup",
-					    GPIOD_OUT_LOW);
+	dev->device_wakeup = devm_gpiod_get_optional(dev->dev, "device-wakeup",
+						     GPIOD_OUT_LOW);
 	if (IS_ERR(dev->device_wakeup))
 		return PTR_ERR(dev->device_wakeup);
 
-	dev->shutdown = devm_gpiod_get(dev->dev, "shutdown", GPIOD_OUT_LOW);
+	dev->shutdown = devm_gpiod_get_optional(dev->dev, "shutdown",
+						GPIOD_OUT_LOW);
 	if (IS_ERR(dev->shutdown))
 		return PTR_ERR(dev->shutdown);
 
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 1b4417a623a4..2f30dcad96bd 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -650,7 +650,7 @@ static int download_firmware(struct ll_device *lldev)
 			break;
 		case ACTION_DELAY:	/* sleep */
 			bt_dev_info(lldev->hu.hdev, "sleep command in scr");
-			mdelay(((struct bts_action_delay *)action_ptr)->msec);
+			msleep(((struct bts_action_delay *)action_ptr)->msec);
 			break;
 		}
 		len -= (sizeof(struct bts_action) +
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 4d46003c46cf..cdaeeea7999c 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -630,7 +630,7 @@ static int sysc_init_dts_quirks(struct sysc *ddata)
 	for (i = 0; i < ARRAY_SIZE(sysc_dts_quirks); i++) {
 		prop = of_get_property(np, sysc_dts_quirks[i].name, &len);
 		if (!prop)
-			break;
+			continue;
 
 		ddata->cfg.quirks |= sysc_dts_quirks[i].mask;
 	}
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index d1f5bb534e0e..6e9df558325b 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -162,7 +162,7 @@ static int via_rng_init(struct hwrng *rng)
 	/* Enable secondary noise source on CPUs where it is present. */
 
 	/* Nehemiah stepping 8 and higher */
-	if ((c->x86_model == 9) && (c->x86_mask > 7))
+	if ((c->x86_model == 9) && (c->x86_stepping > 7))
 		lo |= VIA_NOISESRC2;
 
 	/* Esther */
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index 4d1dc8b46877..f95b9c75175b 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -457,7 +457,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf,
 			    size_t count)
 {
 	int size = 0;
-	int expected;
+	u32 expected;
 
 	if (!chip)
 		return -EBUSY;
@@ -474,7 +474,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf,
 	}
 
 	expected = be32_to_cpu(*(__be32 *)(buf + 2));
-	if (expected > count) {
+	if (expected > count || expected < TPM_HEADER_SIZE) {
 		size = -EIO;
 		goto out;
 	}
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 76df4fbcf089..9e80a953d693 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -1190,6 +1190,10 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max)
 			break;
 
 		recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len);
+		if (recd > num_bytes) {
+			total = -EFAULT;
+			break;
+		}
 
 		rlength = be32_to_cpu(tpm_cmd.header.out.length);
 		if (rlength < offsetof(struct tpm_getrandom_out, rng_data) +
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index c17e75348a99..a700f8f9ead7 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -683,6 +683,10 @@ static int tpm2_unseal_cmd(struct tpm_chip *chip,
 	if (!rc) {
 		data_len = be16_to_cpup(
 			(__be16 *) &buf.data[TPM_HEADER_SIZE + 4]);
+		if (data_len < MIN_KEY_SIZE ||  data_len > MAX_KEY_SIZE + 1) {
+			rc = -EFAULT;
+			goto out;
+		}
 
 		rlength = be32_to_cpu(((struct tpm2_cmd *)&buf)
 					->header.out.length);
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index c1dd39eaaeeb..6116cd05e228 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -473,7 +473,8 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count)
 static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	int size = 0;
-	int expected, status;
+	int status;
+	u32 expected;
 
 	if (count < TPM_HEADER_SIZE) {
 		size = -EIO;
@@ -488,7 +489,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	}
 
 	expected = be32_to_cpu(*(__be32 *)(buf + 2));
-	if ((size_t) expected > count) {
+	if (((size_t) expected > count) || (expected < TPM_HEADER_SIZE)) {
 		size = -EIO;
 		goto out;
 	}
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
index c6428771841f..caa86b19c76d 100644
--- a/drivers/char/tpm/tpm_i2c_nuvoton.c
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -281,7 +281,11 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	struct device *dev = chip->dev.parent;
 	struct i2c_client *client = to_i2c_client(dev);
 	s32 rc;
-	int expected, status, burst_count, retries, size = 0;
+	int status;
+	int burst_count;
+	int retries;
+	int size = 0;
+	u32 expected;
 
 	if (count < TPM_HEADER_SIZE) {
 		i2c_nuvoton_ready(chip);    /* return to idle */
@@ -323,7 +327,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 		 * to machine native
 		 */
 		expected = be32_to_cpu(*(__be32 *) (buf + 2));
-		if (expected > count) {
+		if (expected > count || expected < size) {
 			dev_err(dev, "%s() expected > count\n", __func__);
 			size = -EIO;
 			continue;
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 183a5f54d875..da074e3db19b 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -270,7 +270,8 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
 	int size = 0;
-	int expected, status;
+	int status;
+	u32 expected;
 
 	if (count < TPM_HEADER_SIZE) {
 		size = -EIO;
@@ -285,7 +286,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	}
 
 	expected = be32_to_cpu(*(__be32 *) (buf + 2));
-	if (expected > count) {
+	if (expected > count || expected < TPM_HEADER_SIZE) {
 		size = -EIO;
 		goto out;
 	}
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 44301a3d9963..a07f6451694a 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -449,17 +449,17 @@ struct bcm2835_pll_ana_bits {
 static const struct bcm2835_pll_ana_bits bcm2835_ana_default = {
 	.mask0 = 0,
 	.set0 = 0,
-	.mask1 = (u32)~(A2W_PLL_KI_MASK | A2W_PLL_KP_MASK),
+	.mask1 = A2W_PLL_KI_MASK | A2W_PLL_KP_MASK,
 	.set1 = (2 << A2W_PLL_KI_SHIFT) | (8 << A2W_PLL_KP_SHIFT),
-	.mask3 = (u32)~A2W_PLL_KA_MASK,
+	.mask3 = A2W_PLL_KA_MASK,
 	.set3 = (2 << A2W_PLL_KA_SHIFT),
 	.fb_prediv_mask = BIT(14),
 };
 
 static const struct bcm2835_pll_ana_bits bcm2835_ana_pllh = {
-	.mask0 = (u32)~(A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK),
+	.mask0 = A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK,
 	.set0 = (2 << A2W_PLLH_KA_SHIFT) | (2 << A2W_PLLH_KI_LOW_SHIFT),
-	.mask1 = (u32)~(A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK),
+	.mask1 = A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK,
 	.set1 = (6 << A2W_PLLH_KP_SHIFT),
 	.mask3 = 0,
 	.set3 = 0,
@@ -623,8 +623,10 @@ static int bcm2835_pll_on(struct clk_hw *hw)
 		     ~A2W_PLL_CTRL_PWRDN);
 
 	/* Take the PLL out of reset. */
+	spin_lock(&cprman->regs_lock);
 	cprman_write(cprman, data->cm_ctrl_reg,
 		     cprman_read(cprman, data->cm_ctrl_reg) & ~CM_PLL_ANARST);
+	spin_unlock(&cprman->regs_lock);
 
 	/* Wait for the PLL to lock. */
 	timeout = ktime_add_ns(ktime_get(), LOCK_TIMEOUT_NS);
@@ -701,9 +703,11 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw,
 	}
 
 	/* Unmask the reference clock from the oscillator. */
+	spin_lock(&cprman->regs_lock);
 	cprman_write(cprman, A2W_XOSC_CTRL,
 		     cprman_read(cprman, A2W_XOSC_CTRL) |
 		     data->reference_enable_mask);
+	spin_unlock(&cprman->regs_lock);
 
 	if (do_ana_setup_first)
 		bcm2835_pll_write_ana(cprman, data->ana_reg_base, ana);
diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c
index 9f7f931d6b2f..5eb50c31e455 100644
--- a/drivers/clk/clk-aspeed.c
+++ b/drivers/clk/clk-aspeed.c
@@ -205,6 +205,18 @@ static const struct aspeed_clk_soc_data ast2400_data = {
 	.calc_pll = aspeed_ast2400_calc_pll,
 };
 
+static int aspeed_clk_is_enabled(struct clk_hw *hw)
+{
+	struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+	u32 clk = BIT(gate->clock_idx);
+	u32 enval = (gate->flags & CLK_GATE_SET_TO_DISABLE) ? 0 : clk;
+	u32 reg;
+
+	regmap_read(gate->map, ASPEED_CLK_STOP_CTRL, &reg);
+
+	return ((reg & clk) == enval) ? 1 : 0;
+}
+
 static int aspeed_clk_enable(struct clk_hw *hw)
 {
 	struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
@@ -215,6 +227,11 @@ static int aspeed_clk_enable(struct clk_hw *hw)
 
 	spin_lock_irqsave(gate->lock, flags);
 
+	if (aspeed_clk_is_enabled(hw)) {
+		spin_unlock_irqrestore(gate->lock, flags);
+		return 0;
+	}
+
 	if (gate->reset_idx >= 0) {
 		/* Put IP in reset */
 		regmap_update_bits(gate->map, ASPEED_RESET_CTRL, rst, rst);
@@ -255,17 +272,6 @@ static void aspeed_clk_disable(struct clk_hw *hw)
 	spin_unlock_irqrestore(gate->lock, flags);
 }
 
-static int aspeed_clk_is_enabled(struct clk_hw *hw)
-{
-	struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
-	u32 clk = BIT(gate->clock_idx);
-	u32 reg;
-
-	regmap_read(gate->map, ASPEED_CLK_STOP_CTRL, &reg);
-
-	return (reg & clk) ? 0 : 1;
-}
-
 static const struct clk_ops aspeed_clk_gate_ops = {
 	.enable = aspeed_clk_enable,
 	.disable = aspeed_clk_disable,
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 0f686a9dac3e..076d4244d672 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1125,8 +1125,10 @@ static int clk_core_round_rate_nolock(struct clk_core *core,
 {
 	lockdep_assert_held(&prepare_lock);
 
-	if (!core)
+	if (!core) {
+		req->rate = 0;
 		return 0;
+	}
 
 	clk_core_init_rate_req(core, req);
 
@@ -2309,8 +2311,11 @@ static int clk_core_set_phase_nolock(struct clk_core *core, int degrees)
 
 	trace_clk_set_phase(core, degrees);
 
-	if (core->ops->set_phase)
+	if (core->ops->set_phase) {
 		ret = core->ops->set_phase(core->hw, degrees);
+		if (!ret)
+			core->phase = degrees;
+	}
 
 	trace_clk_set_phase_complete(core, degrees);
 
@@ -2968,22 +2973,37 @@ static int __clk_core_init(struct clk_core *core)
 	core->rate = core->req_rate = rate;
 
 	/*
+	 * Enable CLK_IS_CRITICAL clocks so newly added critical clocks
+	 * don't get accidentally disabled when walking the orphan tree and
+	 * reparenting clocks
+	 */
+	if (core->flags & CLK_IS_CRITICAL) {
+		unsigned long flags;
+
+		clk_core_prepare(core);
+
+		flags = clk_enable_lock();
+		clk_core_enable(core);
+		clk_enable_unlock(flags);
+	}
+
+	/*
 	 * walk the list of orphan clocks and reparent any that newly finds a
 	 * parent.
 	 */
 	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
 		struct clk_core *parent = __clk_init_parent(orphan);
-		unsigned long flags;
 
 		/*
-		 * we could call __clk_set_parent, but that would result in a
-		 * redundant call to the .set_rate op, if it exists
+		 * We need to use __clk_set_parent_before() and _after() to
+		 * to properly migrate any prepare/enable count of the orphan
+		 * clock. This is important for CLK_IS_CRITICAL clocks, which
+		 * are enabled during init but might not have a parent yet.
 		 */
 		if (parent) {
 			/* update the clk tree topology */
-			flags = clk_enable_lock();
-			clk_reparent(orphan, parent);
-			clk_enable_unlock(flags);
+			__clk_set_parent_before(orphan, parent);
+			__clk_set_parent_after(orphan, parent, NULL);
 			__clk_recalc_accuracies(orphan);
 			__clk_recalc_rates(orphan, 0);
 		}
@@ -3000,16 +3020,6 @@ static int __clk_core_init(struct clk_core *core)
 	if (core->ops->init)
 		core->ops->init(core->hw);
 
-	if (core->flags & CLK_IS_CRITICAL) {
-		unsigned long flags;
-
-		clk_core_prepare(core);
-
-		flags = clk_enable_lock();
-		clk_core_enable(core);
-		clk_enable_unlock(flags);
-	}
-
 	kref_init(&core->ref);
 out:
 	clk_pm_runtime_put(core);
diff --git a/drivers/clk/hisilicon/clk-hi3660-stub.c b/drivers/clk/hisilicon/clk-hi3660-stub.c
index 9b6c72bbddf9..e8b2c43b1bb8 100644
--- a/drivers/clk/hisilicon/clk-hi3660-stub.c
+++ b/drivers/clk/hisilicon/clk-hi3660-stub.c
@@ -149,6 +149,8 @@ static int hi3660_stub_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(stub_clk_chan.mbox);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
 	freq_reg = devm_ioremap(dev, res->start, resource_size(res));
 	if (!freq_reg)
 		return -ENOMEM;
diff --git a/drivers/clk/imx/clk-imx51-imx53.c b/drivers/clk/imx/clk-imx51-imx53.c
index c864992e6983..caa8bd40692c 100644
--- a/drivers/clk/imx/clk-imx51-imx53.c
+++ b/drivers/clk/imx/clk-imx51-imx53.c
@@ -131,7 +131,17 @@ static const char *ieee1588_sels[] = { "pll3_sw", "pll4_sw", "dummy" /* usbphy2_
 static struct clk *clk[IMX5_CLK_END];
 static struct clk_onecell_data clk_data;
 
-static struct clk ** const uart_clks[] __initconst = {
+static struct clk ** const uart_clks_mx51[] __initconst = {
+	&clk[IMX5_CLK_UART1_IPG_GATE],
+	&clk[IMX5_CLK_UART1_PER_GATE],
+	&clk[IMX5_CLK_UART2_IPG_GATE],
+	&clk[IMX5_CLK_UART2_PER_GATE],
+	&clk[IMX5_CLK_UART3_IPG_GATE],
+	&clk[IMX5_CLK_UART3_PER_GATE],
+	NULL
+};
+
+static struct clk ** const uart_clks_mx50_mx53[] __initconst = {
 	&clk[IMX5_CLK_UART1_IPG_GATE],
 	&clk[IMX5_CLK_UART1_PER_GATE],
 	&clk[IMX5_CLK_UART2_IPG_GATE],
@@ -321,8 +331,6 @@ static void __init mx5_clocks_common_init(void __iomem *ccm_base)
 	clk_prepare_enable(clk[IMX5_CLK_TMAX1]);
 	clk_prepare_enable(clk[IMX5_CLK_TMAX2]); /* esdhc2, fec */
 	clk_prepare_enable(clk[IMX5_CLK_TMAX3]); /* esdhc1, esdhc4 */
-
-	imx_register_uart_clocks(uart_clks);
 }
 
 static void __init mx50_clocks_init(struct device_node *np)
@@ -388,6 +396,8 @@ static void __init mx50_clocks_init(struct device_node *np)
 
 	r = clk_round_rate(clk[IMX5_CLK_USBOH3_PER_GATE], 54000000);
 	clk_set_rate(clk[IMX5_CLK_USBOH3_PER_GATE], r);
+
+	imx_register_uart_clocks(uart_clks_mx50_mx53);
 }
 CLK_OF_DECLARE(imx50_ccm, "fsl,imx50-ccm", mx50_clocks_init);
 
@@ -477,6 +487,8 @@ static void __init mx51_clocks_init(struct device_node *np)
 	val = readl(MXC_CCM_CLPCR);
 	val |= 1 << 23;
 	writel(val, MXC_CCM_CLPCR);
+
+	imx_register_uart_clocks(uart_clks_mx51);
 }
 CLK_OF_DECLARE(imx51_ccm, "fsl,imx51-ccm", mx51_clocks_init);
 
@@ -606,5 +618,7 @@ static void __init mx53_clocks_init(struct device_node *np)
 
 	r = clk_round_rate(clk[IMX5_CLK_USBOH3_PER_GATE], 54000000);
 	clk_set_rate(clk[IMX5_CLK_USBOH3_PER_GATE], r);
+
+	imx_register_uart_clocks(uart_clks_mx50_mx53);
 }
 CLK_OF_DECLARE(imx53_ccm, "fsl,imx53-ccm", mx53_clocks_init);
diff --git a/drivers/clk/qcom/apcs-msm8916.c b/drivers/clk/qcom/apcs-msm8916.c
index 246957f1a413..b1cc8dbcd327 100644
--- a/drivers/clk/qcom/apcs-msm8916.c
+++ b/drivers/clk/qcom/apcs-msm8916.c
@@ -49,11 +49,10 @@ static int qcom_apcs_msm8916_clk_probe(struct platform_device *pdev)
 	struct clk_regmap_mux_div *a53cc;
 	struct regmap *regmap;
 	struct clk_init_data init = { };
-	int ret;
+	int ret = -ENODEV;
 
 	regmap = dev_get_regmap(parent, NULL);
-	if (IS_ERR(regmap)) {
-		ret = PTR_ERR(regmap);
+	if (!regmap) {
 		dev_err(dev, "failed to get regmap: %d\n", ret);
 		return ret;
 	}
diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
index 72b16ed1012b..3b97f60540ad 100644
--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
@@ -762,7 +762,7 @@ static struct ccu_mp out_a_clk = {
 		.features	= CCU_FEATURE_FIXED_PREDIV,
 		.hw.init	= CLK_HW_INIT_PARENTS("out-a",
 						      clk_out_parents,
-						      &ccu_div_ops,
+						      &ccu_mp_ops,
 						      0),
 	},
 };
@@ -783,7 +783,7 @@ static struct ccu_mp out_b_clk = {
 		.features	= CCU_FEATURE_FIXED_PREDIV,
 		.hw.init	= CLK_HW_INIT_PARENTS("out-b",
 						      clk_out_parents,
-						      &ccu_div_ops,
+						      &ccu_mp_ops,
 						      0),
 	},
 };
@@ -804,7 +804,7 @@ static struct ccu_mp out_c_clk = {
 		.features	= CCU_FEATURE_FIXED_PREDIV,
 		.hw.init	= CLK_HW_INIT_PARENTS("out-c",
 						      clk_out_parents,
-						      &ccu_div_ops,
+						      &ccu_mp_ops,
 						      0),
 	},
 };
diff --git a/drivers/clk/ti/clk-33xx.c b/drivers/clk/ti/clk-33xx.c
index 612491a26070..12e0a2d19911 100644
--- a/drivers/clk/ti/clk-33xx.c
+++ b/drivers/clk/ti/clk-33xx.c
@@ -45,7 +45,7 @@ static const struct omap_clkctrl_bit_data am3_gpio4_bit_data[] __initconst = {
 
 static const struct omap_clkctrl_reg_data am3_l4_per_clkctrl_regs[] __initconst = {
 	{ AM3_CPGMAC0_CLKCTRL, NULL, CLKF_SW_SUP, "cpsw_125mhz_gclk", "cpsw_125mhz_clkdm" },
-	{ AM3_LCDC_CLKCTRL, NULL, CLKF_SW_SUP, "lcd_gclk", "lcdc_clkdm" },
+	{ AM3_LCDC_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_SET_RATE_PARENT, "lcd_gclk", "lcdc_clkdm" },
 	{ AM3_USB_OTG_HS_CLKCTRL, NULL, CLKF_SW_SUP, "usbotg_fck", "l3s_clkdm" },
 	{ AM3_TPTC0_CLKCTRL, NULL, CLKF_SW_SUP, "l3_gclk", "l3_clkdm" },
 	{ AM3_EMIF_CLKCTRL, NULL, CLKF_SW_SUP, "dpll_ddr_m2_div2_ck", "l3_clkdm" },
diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c
index 2b7c2e017665..63c5ddb50187 100644
--- a/drivers/clk/ti/clk-43xx.c
+++ b/drivers/clk/ti/clk-43xx.c
@@ -187,7 +187,7 @@ static const struct omap_clkctrl_reg_data am4_l4_per_clkctrl_regs[] __initconst
 	{ AM4_OCP2SCP0_CLKCTRL, NULL, CLKF_SW_SUP, "l4ls_gclk" },
 	{ AM4_OCP2SCP1_CLKCTRL, NULL, CLKF_SW_SUP, "l4ls_gclk" },
 	{ AM4_EMIF_CLKCTRL, NULL, CLKF_SW_SUP, "dpll_ddr_m2_ck", "emif_clkdm" },
-	{ AM4_DSS_CORE_CLKCTRL, NULL, CLKF_SW_SUP, "disp_clk", "dss_clkdm" },
+	{ AM4_DSS_CORE_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_SET_RATE_PARENT, "disp_clk", "dss_clkdm" },
 	{ AM4_CPGMAC0_CLKCTRL, NULL, CLKF_SW_SUP, "cpsw_125mhz_gclk", "cpsw_125mhz_clkdm" },
 	{ 0 },
 };
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
index afa0d6bfc5c1..421b05392220 100644
--- a/drivers/clk/ti/clkctrl.c
+++ b/drivers/clk/ti/clkctrl.c
@@ -537,6 +537,8 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 		init.parent_names = &reg_data->parent;
 		init.num_parents = 1;
 		init.flags = 0;
+		if (reg_data->flags & CLKF_SET_RATE_PARENT)
+			init.flags |= CLK_SET_RATE_PARENT;
 		init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
 				      node->parent->name, node->name,
 				      reg_data->offset, 0);
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index b3b4ed9b6874..d2e5382821a4 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -386,6 +386,7 @@ config ATMEL_PIT
 
 config ATMEL_ST
 	bool "Atmel ST timer support" if COMPILE_TEST
+	depends on HAS_IOMEM
 	select TIMER_OF
 	select MFD_SYSCON
 	help
diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c
index 4927355f9cbe..471b428d8034 100644
--- a/drivers/clocksource/arc_timer.c
+++ b/drivers/clocksource/arc_timer.c
@@ -251,9 +251,14 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id)
 	int irq_reenable = clockevent_state_periodic(evt);
 
 	/*
-	 * Any write to CTRL reg ACks the interrupt, we rewrite the
-	 * Count when [N]ot [H]alted bit.
-	 * And re-arm it if perioid by [I]nterrupt [E]nable bit
+	 * 1. ACK the interrupt
+	 *    - For ARC700, any write to CTRL reg ACKs it, so just rewrite
+	 *      Count when [N]ot [H]alted bit.
+	 *    - For HS3x, it is a bit subtle. On taken count-down interrupt,
+	 *      IP bit [3] is set, which needs to be cleared for ACK'ing.
+	 *      The write below can only update the other two bits, hence
+	 *      explicitly clears IP bit
+	 * 2. Re-arm interrupt if periodic by writing to IE bit [0]
 	 */
 	write_aux_reg(ARC_REG_TIMER0_CTRL, irq_reenable | TIMER_CTRL_NH);
 
diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c
index 3ee7e6fea621..846d18daf893 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/fsl_ftm_timer.c
@@ -281,7 +281,7 @@ static int __init __ftm_clk_init(struct device_node *np, char *cnt_name,
 
 static unsigned long __init ftm_clk_init(struct device_node *np)
 {
-	unsigned long freq;
+	long freq;
 
 	freq = __ftm_clk_init(np, "ftm-evt-counter-en", "ftm-evt");
 	if (freq <= 0)
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index a04808a21d4e..986b6796b631 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -166,7 +166,7 @@ static int __init __gic_clocksource_init(void)
 
 	/* Set clocksource mask. */
 	count_width = read_gic_config() & GIC_CONFIG_COUNTBITS;
-	count_width >>= __fls(GIC_CONFIG_COUNTBITS);
+	count_width >>= __ffs(GIC_CONFIG_COUNTBITS);
 	count_width *= 4;
 	count_width += 32;
 	gic_clocksource.mask = CLOCKSOURCE_MASK(count_width);
@@ -205,12 +205,12 @@ static int __init gic_clocksource_of_init(struct device_node *node)
 	} else if (of_property_read_u32(node, "clock-frequency",
 					&gic_frequency)) {
 		pr_err("GIC frequency not specified.\n");
-		return -EINVAL;;
+		return -EINVAL;
 	}
 	gic_timer_irq = irq_of_parse_and_map(node, 0);
 	if (!gic_timer_irq) {
 		pr_err("GIC timer IRQ not specified.\n");
-		return -EINVAL;;
+		return -EINVAL;
 	}
 
 	ret = __gic_clocksource_init();
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 2a3fe83ec337..3b56ea3f52af 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -334,7 +334,7 @@ static int __init sun5i_timer_init(struct device_node *node)
 	timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
 	if (IS_ERR(timer_base)) {
 		pr_err("Can't map registers\n");
-		return PTR_ERR(timer_base);;
+		return PTR_ERR(timer_base);
 	}
 
 	irq = irq_of_parse_and_map(node, 0);
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 3a88e33b0cfe..fb586e09682d 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -44,10 +44,10 @@ config ARM_DT_BL_CPUFREQ
 
 config ARM_SCPI_CPUFREQ
 	tristate "SCPI based CPUfreq driver"
-	depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
+	depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
 	help
-	  This adds the CPUfreq driver support for ARM big.LITTLE platforms
-	  using SCPI protocol for CPU power management.
+	  This adds the CPUfreq driver support for ARM platforms using SCPI
+	  protocol for CPU power management.
 
 	  This driver uses SCPI Message Protocol driver to interact with the
 	  firmware providing the CPU DVFS functionality.
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 3a2ca0f79daf..d0c34df0529c 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -629,7 +629,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
 	if (c->x86_vendor == X86_VENDOR_INTEL) {
 		if ((c->x86 == 15) &&
 		    (c->x86_model == 6) &&
-		    (c->x86_mask == 8)) {
+		    (c->x86_stepping == 8)) {
 			pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
 			return -ENODEV;
 		    }
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 942632a27b50..f730b6528c18 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 		break;
 
 	case 7:
-		switch (c->x86_mask) {
+		switch (c->x86_stepping) {
 		case 0:
 			longhaul_version = TYPE_LONGHAUL_V1;
 			cpu_model = CPU_SAMUEL2;
@@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 			break;
 		case 1 ... 15:
 			longhaul_version = TYPE_LONGHAUL_V2;
-			if (c->x86_mask < 8) {
+			if (c->x86_stepping < 8) {
 				cpu_model = CPU_SAMUEL2;
 				cpuname = "C3 'Samuel 2' [C5B]";
 			} else {
@@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 		numscales = 32;
 		memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
 		memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
-		switch (c->x86_mask) {
+		switch (c->x86_stepping) {
 		case 0 ... 1:
 			cpu_model = CPU_NEHEMIAH;
 			cpuname = "C3 'Nehemiah A' [C5XLOE]";
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index fd77812313f3..a25741b1281b 100644
--- a/drivers/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 #endif
 
 	/* Errata workaround */
-	cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
+	cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping;
 	switch (cpuid) {
 	case 0x0f07:
 	case 0x0f0a:
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index 80ac313e6c59..302e9ce793a0 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -131,7 +131,7 @@ static int check_powernow(void)
 		return 0;
 	}
 
-	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
+	if ((c->x86_model == 6) && (c->x86_stepping == 0)) {
 		pr_info("K7 660[A0] core detected, enabling errata workarounds\n");
 		have_a0 = 1;
 	}
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index 7b596fa38ad2..6bebc1f9f55a 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -351,7 +351,13 @@ struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name)
 static int s3c_cpufreq_init(struct cpufreq_policy *policy)
 {
 	policy->clk = clk_arm;
-	return cpufreq_generic_init(policy, ftab, cpu_cur.info->latency);
+
+	policy->cpuinfo.transition_latency = cpu_cur.info->latency;
+
+	if (ftab)
+		return cpufreq_table_validate_and_show(policy, ftab);
+
+	return 0;
 }
 
 static int __init s3c_cpufreq_initclks(void)
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index c32a833e1b00..d300a163945f 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -51,15 +51,23 @@ static unsigned int scpi_cpufreq_get_rate(unsigned int cpu)
 static int
 scpi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index)
 {
+	unsigned long freq = policy->freq_table[index].frequency;
 	struct scpi_data *priv = policy->driver_data;
-	u64 rate = policy->freq_table[index].frequency * 1000;
+	u64 rate = freq * 1000;
 	int ret;
 
 	ret = clk_set_rate(priv->clk, rate);
-	if (!ret && (clk_get_rate(priv->clk) != rate))
-		ret = -EIO;
 
-	return ret;
+	if (ret)
+		return ret;
+
+	if (clk_get_rate(priv->clk) != rate)
+		return -EIO;
+
+	arch_set_freq_scale(policy->related_cpus, freq,
+			    policy->cpuinfo.max_freq);
+
+	return 0;
 }
 
 static int
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 41bc5397f4bb..4fa5adf16c70 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -37,7 +37,7 @@ struct cpu_id
 {
 	__u8	x86;            /* CPU family */
 	__u8	x86_model;	/* model */
-	__u8	x86_mask;	/* stepping */
+	__u8	x86_stepping;	/* stepping */
 };
 
 enum {
@@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
 {
 	if ((c->x86 == x->x86) &&
 	    (c->x86_model == x->x86_model) &&
-	    (c->x86_mask == x->x86_mask))
+	    (c->x86_stepping == x->x86_stepping))
 		return 1;
 	return 0;
 }
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index 8085ec9000d1..e3a9962ee410 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void)
 		ebx = cpuid_ebx(0x00000001);
 		ebx &= 0x000000FF;
 
-		pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+		pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping);
 
-		switch (c->x86_mask) {
+		switch (c->x86_stepping) {
 		case 4:
 			/*
 			 * B-stepping [M-P4-M]
@@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void)
 				msr_lo, msr_hi);
 		if ((msr_hi & (1<<18)) &&
 		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
-			if (c->x86_mask == 0x01) {
+			if (c->x86_stepping == 0x01) {
 				pr_debug("early PIII version\n");
 				return SPEEDSTEP_CPU_PIII_C_EARLY;
 			} else
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 75d280cb2dc0..e843cf410373 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
 		 * without any error (HW optimizations for later
 		 * CAAM eras), then try again.
 		 */
+		if (ret)
+			break;
+
 		rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
 		if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
-		    !(rdsta_val & (1 << sh_idx)))
+		    !(rdsta_val & (1 << sh_idx))) {
 			ret = -EAGAIN;
-		if (ret)
 			break;
+		}
+
 		dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx);
 		/* Clear the contents before recreating the descriptor */
 		memset(desc, 0x00, CAAM_CMD_SZ * 7);
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index fcfa5b1eae61..b3afb6cc9d72 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error)
 {
 	int ret;
 
-	ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error);
+	ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
 	if (ret)
 		return ret;
 
@@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
 			return rc;
 	}
 
-	return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error);
+	return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
 }
 
 static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
@@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
 			return rc;
 	}
 
-	return __sev_do_cmd_locked(cmd, 0, &argp->error);
+	return __sev_do_cmd_locked(cmd, NULL, &argp->error);
 }
 
 static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
@@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission);
 
 int sev_guest_df_flush(int *error)
 {
-	return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error);
+	return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
 }
 EXPORT_SYMBOL_GPL(sev_guest_df_flush);
 
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 4b6642a25df5..1c6cbda56afe 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -512,7 +512,7 @@ static int __init padlock_init(void)
 
 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
 
-	if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) {
+	if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) {
 		ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
 		cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
 		printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 188f44b7eb27..5d64c08b7f47 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -1922,15 +1922,21 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 	uint32_t aes_control;
 	unsigned long flags;
 	int err;
+	u8 *iv;
 
 	aes_control = SSS_AES_KEY_CHANGE_MODE;
 	if (mode & FLAGS_AES_DECRYPT)
 		aes_control |= SSS_AES_MODE_DECRYPT;
 
-	if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC)
+	if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) {
 		aes_control |= SSS_AES_CHAIN_MODE_CBC;
-	else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR)
+		iv = req->info;
+	} else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) {
 		aes_control |= SSS_AES_CHAIN_MODE_CTR;
+		iv = req->info;
+	} else {
+		iv = NULL; /* AES_ECB */
+	}
 
 	if (dev->ctx->keylen == AES_KEYSIZE_192)
 		aes_control |= SSS_AES_KEY_SIZE_192;
@@ -1961,7 +1967,7 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 		goto outdata_error;
 
 	SSS_AES_WRITE(dev, AES_CONTROL, aes_control);
-	s5p_set_aes(dev, dev->ctx->aes_key, req->info, dev->ctx->keylen);
+	s5p_set_aes(dev, dev->ctx->aes_key, iv, dev->ctx->keylen);
 
 	s5p_set_dma_indata(dev,  dev->sg_src);
 	s5p_set_dma_outdata(dev, dev->sg_dst);
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
index 0d01d1624252..63d636424161 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
@@ -28,7 +28,7 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
 	algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
 	ss = algt->ss;
 
-	spin_lock(&ss->slock);
+	spin_lock_bh(&ss->slock);
 
 	writel(mode, ss->base + SS_CTL);
 
@@ -51,6 +51,6 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
 	}
 
 	writel(0, ss->base + SS_CTL);
-	spin_unlock(&ss->slock);
-	return dlen;
+	spin_unlock_bh(&ss->slock);
+	return 0;
 }
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 9c80e0cb1664..6882fa2f8bad 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1138,6 +1138,10 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
 
+	if (!src) {
+		to_talitos_ptr(ptr, 0, 0, is_sec1);
+		return 1;
+	}
 	if (sg_count == 1) {
 		to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
 		return sg_count;
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 473af694ad1c..ecdc292aa4e4 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -246,12 +246,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 {
 	long avail;
 
-	/*
-	 * The device driver is allowed to sleep, in order to make the
-	 * memory directly accessible.
-	 */
-	might_sleep();
-
 	if (!dax_dev)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index f652a0e0f5a2..3548caa9e933 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c
@@ -163,6 +163,7 @@ struct mv_xor_v2_device {
 	void __iomem *dma_base;
 	void __iomem *glob_base;
 	struct clk *clk;
+	struct clk *reg_clk;
 	struct tasklet_struct irq_tasklet;
 	struct list_head free_sw_desc;
 	struct dma_device dmadev;
@@ -749,13 +750,26 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	xor_dev->reg_clk = devm_clk_get(&pdev->dev, "reg");
+	if (PTR_ERR(xor_dev->reg_clk) != -ENOENT) {
+		if (!IS_ERR(xor_dev->reg_clk)) {
+			ret = clk_prepare_enable(xor_dev->reg_clk);
+			if (ret)
+				return ret;
+		} else {
+			return PTR_ERR(xor_dev->reg_clk);
+		}
+	}
+
 	xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
+	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) {
+		ret = EPROBE_DEFER;
+		goto disable_reg_clk;
+	}
 	if (!IS_ERR(xor_dev->clk)) {
 		ret = clk_prepare_enable(xor_dev->clk);
 		if (ret)
-			return ret;
+			goto disable_reg_clk;
 	}
 
 	ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
@@ -866,8 +880,9 @@ free_hw_desq:
 free_msi_irqs:
 	platform_msi_domain_free_irqs(&pdev->dev);
 disable_clk:
-	if (!IS_ERR(xor_dev->clk))
-		clk_disable_unprepare(xor_dev->clk);
+	clk_disable_unprepare(xor_dev->clk);
+disable_reg_clk:
+	clk_disable_unprepare(xor_dev->reg_clk);
 	return ret;
 }
 
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index e3ff162c03fc..d0cacdb0713e 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -917,7 +917,7 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 
 	rcar_dmac_chan_configure_desc(chan, desc);
 
-	max_chunk_size = (RCAR_DMATCR_MASK + 1) << desc->xfer_shift;
+	max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift;
 
 	/*
 	 * Allocate and fill the transfer chunk descriptors. We own the only
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 8b16ec595fa7..329cb96f886f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3147,7 +3147,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
 	struct amd64_family_type *fam_type = NULL;
 
 	pvt->ext_model  = boot_cpu_data.x86_model >> 4;
-	pvt->stepping	= boot_cpu_data.x86_mask;
+	pvt->stepping	= boot_cpu_data.x86_stepping;
 	pvt->model	= boot_cpu_data.x86_model;
 	pvt->fam	= boot_cpu_data.x86;
 
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index f34430f99fd8..872100215ca0 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -279,7 +279,7 @@ static const u32 correrrthrsld[] = {
  * sbridge structs
  */
 
-#define NUM_CHANNELS		4	/* Max channels per MC */
+#define NUM_CHANNELS		6	/* Max channels per MC */
 #define MAX_DIMMS		3	/* Max DIMMS per channel */
 #define KNL_MAX_CHAS		38	/* KNL max num. of Cache Home Agents */
 #define KNL_MAX_CHANNELS	6	/* KNL max num. of PCI channels */
diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c
index 0a44d43802fe..3ec4c715e240 100644
--- a/drivers/extcon/extcon-axp288.c
+++ b/drivers/extcon/extcon-axp288.c
@@ -1,7 +1,6 @@
 /*
  * extcon-axp288.c - X-Power AXP288 PMIC extcon cable detection driver
  *
- * Copyright (C) 2016-2017 Hans de Goede <[email protected]>
  * Copyright (C) 2015 Intel Corporation
  * Author: Ramakrishna Pallala <[email protected]>
  *
@@ -98,15 +97,13 @@ struct axp288_extcon_info {
 	struct device *dev;
 	struct regmap *regmap;
 	struct regmap_irq_chip_data *regmap_irqc;
-	struct delayed_work det_work;
 	int irq[EXTCON_IRQ_END];
 	struct extcon_dev *edev;
 	unsigned int previous_cable;
-	bool first_detect_done;
 };
 
 /* Power up/down reason string array */
-static char *axp288_pwr_up_down_info[] = {
+static const char * const axp288_pwr_up_down_info[] = {
 	"Last wake caused by user pressing the power button",
 	"Last wake caused by a charger insertion",
 	"Last wake caused by a battery insertion",
@@ -124,7 +121,7 @@ static char *axp288_pwr_up_down_info[] = {
  */
 static void axp288_extcon_log_rsi(struct axp288_extcon_info *info)
 {
-	char **rsi;
+	const char * const *rsi;
 	unsigned int val, i, clear_mask = 0;
 	int ret;
 
@@ -140,25 +137,6 @@ static void axp288_extcon_log_rsi(struct axp288_extcon_info *info)
 	regmap_write(info->regmap, AXP288_PS_BOOT_REASON_REG, clear_mask);
 }
 
-static void axp288_chrg_detect_complete(struct axp288_extcon_info *info)
-{
-	/*
-	 * We depend on other drivers to do things like mux the data lines,
-	 * enable/disable vbus based on the id-pin, etc. Sometimes the BIOS has
-	 * not set these things up correctly resulting in the initial charger
-	 * cable type detection giving a wrong result and we end up not charging
-	 * or charging at only 0.5A.
-	 *
-	 * So we schedule a second cable type detection after 2 seconds to
-	 * give the other drivers time to load and do their thing.
-	 */
-	if (!info->first_detect_done) {
-		queue_delayed_work(system_wq, &info->det_work,
-				   msecs_to_jiffies(2000));
-		info->first_detect_done = true;
-	}
-}
-
 static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info)
 {
 	int ret, stat, cfg, pwr_stat;
@@ -223,8 +201,6 @@ no_vbus:
 		info->previous_cable = cable;
 	}
 
-	axp288_chrg_detect_complete(info);
-
 	return 0;
 
 dev_det_ret:
@@ -246,11 +222,8 @@ static irqreturn_t axp288_extcon_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void axp288_extcon_det_work(struct work_struct *work)
+static void axp288_extcon_enable(struct axp288_extcon_info *info)
 {
-	struct axp288_extcon_info *info =
-		container_of(work, struct axp288_extcon_info, det_work.work);
-
 	regmap_update_bits(info->regmap, AXP288_BC_GLOBAL_REG,
 						BC_GLOBAL_RUN, 0);
 	/* Enable the charger detection logic */
@@ -272,7 +245,6 @@ static int axp288_extcon_probe(struct platform_device *pdev)
 	info->regmap = axp20x->regmap;
 	info->regmap_irqc = axp20x->regmap_irqc;
 	info->previous_cable = EXTCON_NONE;
-	INIT_DELAYED_WORK(&info->det_work, axp288_extcon_det_work);
 
 	platform_set_drvdata(pdev, info);
 
@@ -318,7 +290,7 @@ static int axp288_extcon_probe(struct platform_device *pdev)
 	}
 
 	/* Start charger cable type detection */
-	queue_delayed_work(system_wq, &info->det_work, 0);
+	axp288_extcon_enable(info);
 
 	return 0;
 }
diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index c8691b5a9cb0..191e99f06a9a 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -153,8 +153,9 @@ static int int3496_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	/* queue initial processing of id-pin */
+	/* process id-pin so that we start with the right status */
 	queue_delayed_work(system_wq, &data->work, 0);
+	flush_delayed_work(&data->work);
 
 	platform_set_drvdata(pdev, data);
 
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index c16600f30611..0bdea60c65dd 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -639,7 +639,7 @@ static void __exit dcdbas_exit(void)
 	platform_driver_unregister(&dcdbas_driver);
 }
 
-module_init(dcdbas_init);
+subsys_initcall_sync(dcdbas_init);
 module_exit(dcdbas_exit);
 
 MODULE_DESCRIPTION(DRIVER_DESCRIPTION " (version " DRIVER_VERSION ")");
diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index da661bf8cb96..13c1edd37e96 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -68,11 +68,11 @@ void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
 	efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
 	efi_status_t status;
 	efi_physical_addr_t log_location, log_last_entry;
-	struct linux_efi_tpm_eventlog *log_tbl;
+	struct linux_efi_tpm_eventlog *log_tbl = NULL;
 	unsigned long first_entry_addr, last_entry_addr;
 	size_t log_size, last_entry_size;
 	efi_bool_t truncated;
-	void *tcg2_protocol;
+	void *tcg2_protocol = NULL;
 
 	status = efi_call_early(locate_protocol, &tcg2_guid, NULL,
 				&tcg2_protocol);
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index e76de57dd617..ebaea8b1594b 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -14,7 +14,6 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
@@ -37,10 +36,9 @@ struct gpio_rcar_priv {
 	struct platform_device *pdev;
 	struct gpio_chip gpio_chip;
 	struct irq_chip irq_chip;
-	struct clk *clk;
 	unsigned int irq_parent;
+	atomic_t wakeup_path;
 	bool has_both_edge_trigger;
-	bool needs_clk;
 };
 
 #define IOINTSEL 0x00	/* General IO/Interrupt Switching Register */
@@ -186,13 +184,10 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on)
 		}
 	}
 
-	if (!p->clk)
-		return 0;
-
 	if (on)
-		clk_enable(p->clk);
+		atomic_inc(&p->wakeup_path);
 	else
-		clk_disable(p->clk);
+		atomic_dec(&p->wakeup_path);
 
 	return 0;
 }
@@ -330,17 +325,14 @@ static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset,
 
 struct gpio_rcar_info {
 	bool has_both_edge_trigger;
-	bool needs_clk;
 };
 
 static const struct gpio_rcar_info gpio_rcar_info_gen1 = {
 	.has_both_edge_trigger = false,
-	.needs_clk = false,
 };
 
 static const struct gpio_rcar_info gpio_rcar_info_gen2 = {
 	.has_both_edge_trigger = true,
-	.needs_clk = true,
 };
 
 static const struct of_device_id gpio_rcar_of_table[] = {
@@ -403,7 +395,6 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins)
 	ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args);
 	*npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK;
 	p->has_both_edge_trigger = info->has_both_edge_trigger;
-	p->needs_clk = info->needs_clk;
 
 	if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) {
 		dev_warn(&p->pdev->dev,
@@ -440,16 +431,6 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, p);
 
-	p->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(p->clk)) {
-		if (p->needs_clk) {
-			dev_err(dev, "unable to get clock\n");
-			ret = PTR_ERR(p->clk);
-			goto err0;
-		}
-		p->clk = NULL;
-	}
-
 	pm_runtime_enable(dev);
 
 	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
@@ -531,11 +512,24 @@ static int gpio_rcar_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused gpio_rcar_suspend(struct device *dev)
+{
+	struct gpio_rcar_priv *p = dev_get_drvdata(dev);
+
+	if (atomic_read(&p->wakeup_path))
+		device_set_wakeup_path(dev);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(gpio_rcar_pm_ops, gpio_rcar_suspend, NULL);
+
 static struct platform_driver gpio_rcar_device_driver = {
 	.probe		= gpio_rcar_probe,
 	.remove		= gpio_rcar_remove,
 	.driver		= {
 		.name	= "gpio_rcar",
+		.pm     = &gpio_rcar_pm_ops,
 		.of_match_table = of_match_ptr(gpio_rcar_of_table),
 	}
 };
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 564bb7a31da4..84e5a9df2344 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -241,6 +241,19 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 
 		desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
 						&of_flags);
+		/*
+		 * -EPROBE_DEFER in our case means that we found a
+		 * valid GPIO property, but no controller has been
+		 * registered so far.
+		 *
+		 * This means we don't need to look any further for
+		 * alternate name conventions, and we should really
+		 * preserve the return code for our user to be able to
+		 * retry probing later.
+		 */
+		if (IS_ERR(desc) && PTR_ERR(desc) == -EPROBE_DEFER)
+			return desc;
+
 		if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT))
 			break;
 	}
@@ -250,7 +263,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 		desc = of_find_spi_gpio(dev, con_id, &of_flags);
 
 	/* Special handling for regulator GPIOs if used */
-	if (IS_ERR(desc))
+	if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER)
 		desc = of_find_regulator_gpio(dev, con_id, &of_flags);
 
 	if (IS_ERR(desc))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d5a2eefd6c3e..74edba18b159 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1156,7 +1156,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 512	/* Reserve at most 512 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 128	/* Reserve at most 128 WB slots for amdgpu-owned rings. */
 
 struct amdgpu_wb {
 	struct amdgpu_bo	*wb_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 57afad79f55d..8fa850a070e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
 	size_t size;
 	u32 retry = 3;
 
+	if (amdgpu_acpi_pcie_notify_device_ready(adev))
+		return -EINVAL;
+
 	/* Get the device handle */
 	handle = ACPI_HANDLE(&adev->pdev->dev);
 	if (!handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index e2c3c5ec42d1..c53095b3b0fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -568,6 +568,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
 	/* HG _PR3 doesn't seem to work on this A+A weston board */
 	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0, 0, 0, 0, 0 },
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 8ca3783f2deb..7a073ac5f9c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
 		/* don't do anything if sink is not display port, i.e.,
 		 * passive dp->(dvi|hdmi) adaptor
 		 */
-		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
-			int saved_dpms = connector->dpms;
-			/* Only turn off the display if it's physically disconnected */
-			if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
-				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-			} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
-				/* Don't try to start link training before we
-				 * have the dpcd */
-				if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
-					return;
-
-				/* set it to OFF so that drm_helper_connector_dpms()
-				 * won't return immediately since the current state
-				 * is ON at this point.
-				 */
-				connector->dpms = DRM_MODE_DPMS_OFF;
-				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-			}
-			connector->dpms = saved_dpms;
+		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+		    amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) &&
+		    amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+			/* Don't start link training before we have the DPCD */
+			if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+				return;
+
+			/* Turn the connector off and back on immediately, which
+			 * will trigger link training
+			 */
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 		}
 	}
 }
@@ -736,9 +729,11 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status ret = connector_status_disconnected;
 	int r;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	if (encoder) {
 		struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -757,8 +752,12 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
 	/* check acpi lid status ??? */
 
 	amdgpu_connector_update_scratch_regs(connector, ret);
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
+
 	return ret;
 }
 
@@ -868,9 +867,11 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status ret = connector_status_disconnected;
 	int r;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	encoder = amdgpu_connector_best_single_encoder(connector);
 	if (!encoder)
@@ -924,8 +925,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
 	amdgpu_connector_update_scratch_regs(connector, ret);
 
 out:
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
 
 	return ret;
 }
@@ -988,9 +991,11 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status ret = connector_status_disconnected;
 	bool dret = false, broken_edid = false;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
 		ret = connector->status;
@@ -1115,8 +1120,10 @@ out:
 	amdgpu_connector_update_scratch_regs(connector, ret);
 
 exit:
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
 
 	return ret;
 }
@@ -1359,9 +1366,11 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
 	int r;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
 		ret = connector->status;
@@ -1429,8 +1438,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 
 	amdgpu_connector_update_scratch_regs(connector, ret);
 out:
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 00a50cc5ec9a..66cb10cdc7c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -492,7 +492,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 
 		/* clear wb memory */
-		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
+		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
 	}
 
 	return 0;
@@ -530,8 +530,9 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
  */
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 {
+	wb >>= 3;
 	if (wb < adev->wb.num_wb)
-		__clear_bit(wb >> 3, adev->wb.used);
+		__clear_bit(wb, adev->wb.used);
 }
 
 /**
@@ -1455,11 +1456,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.hw)
 			continue;
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
-			amdgpu_free_static_csa(adev);
-			amdgpu_device_wb_fini(adev);
-			amdgpu_device_vram_scratch_fini(adev);
-		}
 
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
@@ -1486,6 +1482,13 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
+
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+			amdgpu_free_static_csa(adev);
+			amdgpu_device_wb_fini(adev);
+			amdgpu_device_vram_scratch_fini(adev);
+		}
+
 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
 		/* XXX handle errors */
 		if (r) {
@@ -2060,9 +2063,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
 	DRM_INFO("amdgpu: finishing device.\n");
 	adev->shutdown = true;
-	if (adev->mode_info.mode_config_initialized)
-		drm_crtc_force_disable_all(adev->ddev);
-
+	if (adev->mode_info.mode_config_initialized){
+		if (!amdgpu_device_has_dc_support(adev))
+			drm_crtc_force_disable_all(adev->ddev);
+		else
+			drm_atomic_helper_shutdown(adev->ddev);
+	}
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fbdev_fini(adev);
@@ -2284,14 +2290,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 			}
 			drm_modeset_unlock_all(dev);
-		} else {
-			/*
-			 * There is no equivalent atomic helper to turn on
-			 * display, so we defined our own function for this,
-			 * once suspend resume is supported by the atomic
-			 * framework this will be reworked
-			 */
-			amdgpu_dm_display_resume(adev);
 		}
 	}
 
@@ -2726,7 +2724,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	if (amdgpu_device_has_dc_support(adev)) {
 		if (drm_atomic_helper_resume(adev->ddev, state))
 			dev_info(adev->dev, "drm resume failed:%d\n", r);
-		amdgpu_dm_display_resume(adev);
 	} else {
 		drm_helper_resume_force_mode(adev->ddev);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index e48b4ec88c8c..ca6c931dabfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 	struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
 
 	if (robj) {
-		if (robj->gem_base.import_attach)
-			drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
 		amdgpu_mn_unregister(robj);
 		amdgpu_bo_unref(&robj);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index e14ab34d8262..7c2be32c5aea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
 static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man)
 {
 	struct amdgpu_gtt_mgr *mgr = man->priv;
-
+	spin_lock(&mgr->lock);
 	drm_mm_takedown(&mgr->mm);
 	spin_unlock(&mgr->lock);
 	kfree(mgr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 56bcd59c3399..36483e0d3c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -257,7 +257,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 	r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
 	if (r) {
 		adev->irq.installed = false;
-		flush_work(&adev->hotplug_work);
+		if (!amdgpu_device_has_dc_support(adev))
+			flush_work(&adev->hotplug_work);
 		cancel_work_sync(&adev->reset_work);
 		return r;
 	}
@@ -282,7 +283,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
 		adev->irq.installed = false;
 		if (adev->irq.msi_enabled)
 			pci_disable_msi(adev->pdev);
-		flush_work(&adev->hotplug_work);
+		if (!amdgpu_device_has_dc_support(adev))
+			flush_work(&adev->hotplug_work);
 		cancel_work_sync(&adev->reset_work);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 54f06c959340..2264c5c97009 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -352,6 +352,7 @@ struct amdgpu_mode_info {
 	u16 firmware_flags;
 	/* pointer to backlight encoder */
 	struct amdgpu_encoder *bl_encoder;
+	u8 bl_level; /* saved backlight level */
 	struct amdgpu_audio	audio; /* audio stuff */
 	int			num_crtc; /* number of crtcs */
 	int			num_hpd; /* number of hpd pins */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5c4c3e0d527b..1220322c1680 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -56,6 +56,8 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
 	amdgpu_bo_kunmap(bo);
 
+	if (bo->gem_base.import_attach)
+		drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
 	drm_gem_object_release(&bo->gem_base);
 	amdgpu_bo_unref(&bo->parent);
 	if (!list_empty(&bo->shadow_list)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13044e66dcaf..561d3312af32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -481,7 +481,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
 	result = 0;
 
 	if (*pos < 12) {
-		early[0] = amdgpu_ring_get_rptr(ring);
+		early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
 		early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
 		early[2] = ring->wptr & ring->buf_mask;
 		for (i = *pos / 4; i < 3 && size; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b2eae86bf906..5c26a8e806b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -299,12 +299,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 
 	cancel_delayed_work_sync(&adev->uvd.idle_work);
 
-	for (i = 0; i < adev->uvd.max_handles; ++i)
-		if (atomic_read(&adev->uvd.handles[i]))
-			break;
+	/* only valid for physical mode */
+	if (adev->asic_type < CHIP_POLARIS10) {
+		for (i = 0; i < adev->uvd.max_handles; ++i)
+			if (atomic_read(&adev->uvd.handles[i]))
+				break;
 
-	if (i == AMDGPU_MAX_UVD_HANDLES)
-		return 0;
+		if (i == adev->uvd.max_handles)
+			return 0;
+	}
 
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
 	ptr = adev->uvd.cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 2af26d2da127..d702fb8e3427 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -34,7 +34,7 @@
 #include <linux/backlight.h>
 #include "bif/bif_4_1_d.h"
 
-static u8
+u8
 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
 {
 	u8 backlight_level;
@@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
 	return backlight_level;
 }
 
-static void
+void
 amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
 					    u8 backlight_level)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
index 2bdec40515ce..f77cbdef679e 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
@@ -25,6 +25,11 @@
 #define __ATOMBIOS_ENCODER_H__
 
 u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev);
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+						   u8 backlight_level);
+u8
 amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
 void
 amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index f34bc68aadfb..022f303463fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2921,6 +2921,11 @@ static int dce_v10_0_hw_fini(void *handle)
 
 static int dce_v10_0_suspend(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v10_0_hw_fini(handle);
 }
 
@@ -2929,6 +2934,9 @@ static int dce_v10_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v10_0_hw_init(handle);
 
 	/* turn on the BL */
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 26378bd6aba4..800a9f36ab4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -3047,6 +3047,11 @@ static int dce_v11_0_hw_fini(void *handle)
 
 static int dce_v11_0_suspend(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v11_0_hw_fini(handle);
 }
 
@@ -3055,6 +3060,9 @@ static int dce_v11_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v11_0_hw_init(handle);
 
 	/* turn on the BL */
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index bd2c4f727df6..b8368f69ce1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2787,6 +2787,11 @@ static int dce_v6_0_hw_fini(void *handle)
 
 static int dce_v6_0_suspend(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v6_0_hw_fini(handle);
 }
 
@@ -2795,6 +2800,9 @@ static int dce_v6_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v6_0_hw_init(handle);
 
 	/* turn on the BL */
@@ -3093,7 +3101,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
 		tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
 		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
 		schedule_work(&adev->hotplug_work);
-		DRM_INFO("IH: HPD%d\n", hpd + 1);
+		DRM_DEBUG("IH: HPD%d\n", hpd + 1);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index c008dc030687..012e0a9ae0ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2819,6 +2819,11 @@ static int dce_v8_0_hw_fini(void *handle)
 
 static int dce_v8_0_suspend(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v8_0_hw_fini(handle);
 }
 
@@ -2827,6 +2832,9 @@ static int dce_v8_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v8_0_hw_init(handle);
 
 	/* turn on the BL */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a066c5eda135..a4309698e76c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4384,34 +4384,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 	case CHIP_KAVERI:
 		adev->gfx.config.max_shader_engines = 1;
 		adev->gfx.config.max_tile_pipes = 4;
-		if ((adev->pdev->device == 0x1304) ||
-		    (adev->pdev->device == 0x1305) ||
-		    (adev->pdev->device == 0x130C) ||
-		    (adev->pdev->device == 0x130F) ||
-		    (adev->pdev->device == 0x1310) ||
-		    (adev->pdev->device == 0x1311) ||
-		    (adev->pdev->device == 0x131C)) {
-			adev->gfx.config.max_cu_per_sh = 8;
-			adev->gfx.config.max_backends_per_se = 2;
-		} else if ((adev->pdev->device == 0x1309) ||
-			   (adev->pdev->device == 0x130A) ||
-			   (adev->pdev->device == 0x130D) ||
-			   (adev->pdev->device == 0x1313) ||
-			   (adev->pdev->device == 0x131D)) {
-			adev->gfx.config.max_cu_per_sh = 6;
-			adev->gfx.config.max_backends_per_se = 2;
-		} else if ((adev->pdev->device == 0x1306) ||
-			   (adev->pdev->device == 0x1307) ||
-			   (adev->pdev->device == 0x130B) ||
-			   (adev->pdev->device == 0x130E) ||
-			   (adev->pdev->device == 0x1315) ||
-			   (adev->pdev->device == 0x131B)) {
-			adev->gfx.config.max_cu_per_sh = 4;
-			adev->gfx.config.max_backends_per_se = 1;
-		} else {
-			adev->gfx.config.max_cu_per_sh = 3;
-			adev->gfx.config.max_backends_per_se = 1;
-		}
+		adev->gfx.config.max_cu_per_sh = 8;
+		adev->gfx.config.max_backends_per_se = 2;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_texture_channel_caches = 4;
 		adev->gfx.config.max_gprs = 256;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2719937e09d6..3b7e7af09ead 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -634,7 +634,7 @@ static int gmc_v9_0_late_init(void *handle)
 	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
 		BUG_ON(vm_inv_eng[i] > 16);
 
-	if (adev->asic_type == CHIP_VEGA10) {
+	if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
 		r = gmc_v9_0_ecc_available(adev);
 		if (r == 1) {
 			DRM_INFO("ECC is active.\n");
@@ -682,7 +682,10 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
 	if (!adev->mc.vram_width) {
 		/* hbm memory channel size */
-		chansize = 128;
+		if (adev->flags & AMD_IS_APU)
+			chansize = 64;
+		else
+			chansize = 128;
 
 		tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
 		tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index e92fb372bc99..91cf95a8c39c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -238,31 +238,27 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
 static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	u64 *wptr = NULL;
-	uint64_t local_wptr = 0;
+	u64 wptr;
 
 	if (ring->use_doorbell) {
 		/* XXX check if swapping is necessary on BE */
-		wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
-		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
-		*wptr = (*wptr) >> 2;
-		DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
+		wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
 	} else {
 		u32 lowbit, highbit;
 		int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
 
-		wptr = &local_wptr;
 		lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
 		highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
 
 		DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
 				me, highbit, lowbit);
-		*wptr = highbit;
-		*wptr = (*wptr) << 32;
-		*wptr |= lowbit;
+		wptr = highbit;
+		wptr = wptr << 32;
+		wptr |= lowbit;
 	}
 
-	return *wptr;
+	return wptr >> 2;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 543101d5a5ed..2095173aaabf 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -31,6 +31,7 @@
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 #include "atom.h"
+#include "amd_pcie.h"
 #include "amdgpu_powerplay.h"
 #include "sid.h"
 #include "si_ih.h"
@@ -1461,8 +1462,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 {
 	struct pci_dev *root = adev->pdev->bus->self;
 	int bridge_pos, gpu_pos;
-	u32 speed_cntl, mask, current_data_rate;
-	int ret, i;
+	u32 speed_cntl, current_data_rate;
+	int i;
 	u16 tmp16;
 
 	if (pci_is_root_bus(adev->pdev->bus))
@@ -1474,23 +1475,20 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 	if (adev->flags & AMD_IS_APU)
 		return;
 
-	ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-	if (ret != 0)
-		return;
-
-	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+	if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+					CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
 		return;
 
 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
 		LC_CURRENT_DATA_RATE_SHIFT;
-	if (mask & DRM_PCIE_SPEED_80) {
+	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
 		if (current_data_rate == 2) {
 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
 			return;
 		}
 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
-	} else if (mask & DRM_PCIE_SPEED_50) {
+	} else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
 		if (current_data_rate == 1) {
 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
 			return;
@@ -1506,7 +1504,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 	if (!gpu_pos)
 		return;
 
-	if (mask & DRM_PCIE_SPEED_80) {
+	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
 		if (current_data_rate != 2) {
 			u16 bridge_cfg, gpu_cfg;
 			u16 bridge_cfg2, gpu_cfg2;
@@ -1589,9 +1587,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 
 	pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
 	tmp16 &= ~0xf;
-	if (mask & DRM_PCIE_SPEED_80)
+	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
 		tmp16 |= 3;
-	else if (mask & DRM_PCIE_SPEED_50)
+	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
 		tmp16 |= 2;
 	else
 		tmp16 |= 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index ce675a7f179a..22f0b7ff3ac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -26,6 +26,7 @@
 #include "amdgpu_pm.h"
 #include "amdgpu_dpm.h"
 #include "amdgpu_atombios.h"
+#include "amd_pcie.h"
 #include "sid.h"
 #include "r600_dpm.h"
 #include "si_dpm.h"
@@ -3331,29 +3332,6 @@ static void btc_apply_voltage_delta_rules(struct amdgpu_device *adev,
 	}
 }
 
-static enum amdgpu_pcie_gen r600_get_pcie_gen_support(struct amdgpu_device *adev,
-					       u32 sys_mask,
-					       enum amdgpu_pcie_gen asic_gen,
-					       enum amdgpu_pcie_gen default_gen)
-{
-	switch (asic_gen) {
-	case AMDGPU_PCIE_GEN1:
-		return AMDGPU_PCIE_GEN1;
-	case AMDGPU_PCIE_GEN2:
-		return AMDGPU_PCIE_GEN2;
-	case AMDGPU_PCIE_GEN3:
-		return AMDGPU_PCIE_GEN3;
-	default:
-		if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3))
-			return AMDGPU_PCIE_GEN3;
-		else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2))
-			return AMDGPU_PCIE_GEN2;
-		else
-			return AMDGPU_PCIE_GEN1;
-	}
-	return AMDGPU_PCIE_GEN1;
-}
-
 static void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
 			    u32 *p, u32 *u)
 {
@@ -5028,10 +5006,11 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 							      table->ACPIState.levels[0].vddc.index,
 							      &table->ACPIState.levels[0].std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(adev,
-										    si_pi->sys_pcie_mask,
-										    si_pi->boot_pcie_gen,
-										    AMDGPU_PCIE_GEN1);
+		table->ACPIState.levels[0].gen2PCIE =
+			(u8)amdgpu_get_pcie_gen_support(adev,
+							si_pi->sys_pcie_mask,
+							si_pi->boot_pcie_gen,
+							AMDGPU_PCIE_GEN1);
 
 		if (si_pi->vddc_phase_shed_control)
 			si_populate_phase_shedding_value(adev,
@@ -7168,10 +7147,10 @@ static void si_parse_pplib_clock_info(struct amdgpu_device *adev,
 	pl->vddc = le16_to_cpu(clock_info->si.usVDDC);
 	pl->vddci = le16_to_cpu(clock_info->si.usVDDCI);
 	pl->flags = le32_to_cpu(clock_info->si.ulFlags);
-	pl->pcie_gen = r600_get_pcie_gen_support(adev,
-						 si_pi->sys_pcie_mask,
-						 si_pi->boot_pcie_gen,
-						 clock_info->si.ucPCIEGen);
+	pl->pcie_gen = amdgpu_get_pcie_gen_support(adev,
+						   si_pi->sys_pcie_mask,
+						   si_pi->boot_pcie_gen,
+						   clock_info->si.ucPCIEGen);
 
 	/* patch up vddc if necessary */
 	ret = si_get_leakage_voltage_from_leakage_index(adev, pl->vddc,
@@ -7326,7 +7305,6 @@ static int si_dpm_init(struct amdgpu_device *adev)
 	struct si_power_info *si_pi;
 	struct atom_clock_dividers dividers;
 	int ret;
-	u32 mask;
 
 	si_pi = kzalloc(sizeof(struct si_power_info), GFP_KERNEL);
 	if (si_pi == NULL)
@@ -7336,11 +7314,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
 	eg_pi = &ni_pi->eg;
 	pi = &eg_pi->rv7xx;
 
-	ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-	if (ret)
-		si_pi->sys_pcie_mask = 0;
-	else
-		si_pi->sys_pcie_mask = mask;
+	si_pi->sys_pcie_mask =
+		(adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
+		CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
 	si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
 	si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index b2bfedaf57f1..9bab4842cd44 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -1618,7 +1618,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
 	.set_wptr = uvd_v6_0_enc_ring_set_wptr,
 	.emit_frame_size =
 		4 + /* uvd_v6_0_enc_ring_emit_pipeline_sync */
-		6 + /* uvd_v6_0_enc_ring_emit_vm_flush */
+		5 + /* uvd_v6_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* uvd_v6_0_enc_ring_emit_fence x2 vm fence */
 		1, /* uvd_v6_0_enc_ring_insert_end */
 	.emit_ib_size = 5, /* uvd_v6_0_enc_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1ce4c98385e3..63c67346d316 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -629,11 +629,13 @@ static int dm_resume(void *handle)
 {
 	struct amdgpu_device *adev = handle;
 	struct amdgpu_display_manager *dm = &adev->dm;
+	int ret = 0;
 
 	/* power on hardware */
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
-	return 0;
+	ret = amdgpu_dm_display_resume(adev);
+	return ret;
 }
 
 int amdgpu_dm_display_resume(struct amdgpu_device *adev)
@@ -1035,6 +1037,10 @@ static void handle_hpd_rx_irq(void *param)
 			!is_mst_root_connector) {
 		/* Downstream Port status changed. */
 		if (dc_link_detect(dc_link, DETECT_REASON_HPDRX)) {
+
+			if (aconnector->fake_enable)
+				aconnector->fake_enable = false;
+
 			amdgpu_dm_update_connector_after_detect(aconnector);
 
 
@@ -2010,30 +2016,32 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
 	dst.width = stream->timing.h_addressable;
 	dst.height = stream->timing.v_addressable;
 
-	rmx_type = dm_state->scaling;
-	if (rmx_type == RMX_ASPECT || rmx_type == RMX_OFF) {
-		if (src.width * dst.height <
-				src.height * dst.width) {
-			/* height needs less upscaling/more downscaling */
-			dst.width = src.width *
-					dst.height / src.height;
-		} else {
-			/* width needs less upscaling/more downscaling */
-			dst.height = src.height *
-					dst.width / src.width;
+	if (dm_state) {
+		rmx_type = dm_state->scaling;
+		if (rmx_type == RMX_ASPECT || rmx_type == RMX_OFF) {
+			if (src.width * dst.height <
+					src.height * dst.width) {
+				/* height needs less upscaling/more downscaling */
+				dst.width = src.width *
+						dst.height / src.height;
+			} else {
+				/* width needs less upscaling/more downscaling */
+				dst.height = src.height *
+						dst.width / src.width;
+			}
+		} else if (rmx_type == RMX_CENTER) {
+			dst = src;
 		}
-	} else if (rmx_type == RMX_CENTER) {
-		dst = src;
-	}
 
-	dst.x = (stream->timing.h_addressable - dst.width) / 2;
-	dst.y = (stream->timing.v_addressable - dst.height) / 2;
+		dst.x = (stream->timing.h_addressable - dst.width) / 2;
+		dst.y = (stream->timing.v_addressable - dst.height) / 2;
 
-	if (dm_state->underscan_enable) {
-		dst.x += dm_state->underscan_hborder / 2;
-		dst.y += dm_state->underscan_vborder / 2;
-		dst.width -= dm_state->underscan_hborder;
-		dst.height -= dm_state->underscan_vborder;
+		if (dm_state->underscan_enable) {
+			dst.x += dm_state->underscan_hborder / 2;
+			dst.y += dm_state->underscan_vborder / 2;
+			dst.width -= dm_state->underscan_hborder;
+			dst.height -= dm_state->underscan_vborder;
+		}
 	}
 
 	stream->src = src;
@@ -2358,12 +2366,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	if (aconnector == NULL) {
 		DRM_ERROR("aconnector is NULL!\n");
-		goto drm_connector_null;
-	}
-
-	if (dm_state == NULL) {
-		DRM_ERROR("dm_state is NULL!\n");
-		goto dm_state_null;
+		return stream;
 	}
 
 	drm_connector = &aconnector->base;
@@ -2375,18 +2378,18 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		 */
 		if (aconnector->mst_port) {
 			dm_dp_mst_dc_sink_create(drm_connector);
-			goto mst_dc_sink_create_done;
+			return stream;
 		}
 
 		if (create_fake_sink(aconnector))
-			goto stream_create_fail;
+			return stream;
 	}
 
 	stream = dc_create_stream_for_sink(aconnector->dc_sink);
 
 	if (stream == NULL) {
 		DRM_ERROR("Failed to create stream for sink!\n");
-		goto stream_create_fail;
+		return stream;
 	}
 
 	list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
@@ -2412,9 +2415,12 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	} else {
 		decide_crtc_timing_for_drm_display_mode(
 				&mode, preferred_mode,
-				dm_state->scaling != RMX_OFF);
+				dm_state ? (dm_state->scaling != RMX_OFF) : false);
 	}
 
+	if (!dm_state)
+		drm_mode_set_crtcinfo(&mode, 0);
+
 	fill_stream_properties_from_drm_display_mode(stream,
 			&mode, &aconnector->base);
 	update_stream_scaling_settings(&mode, dm_state, stream);
@@ -2424,10 +2430,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		drm_connector,
 		aconnector->dc_sink);
 
-stream_create_fail:
-dm_state_null:
-drm_connector_null:
-mst_dc_sink_create_done:
+	update_stream_signal(stream);
+
 	return stream;
 }
 
@@ -2495,6 +2499,27 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
 	return &state->base;
 }
 
+
+static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
+{
+	enum dc_irq_source irq_source;
+	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = crtc->dev->dev_private;
+
+	irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
+	return dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
+}
+
+static int dm_enable_vblank(struct drm_crtc *crtc)
+{
+	return dm_set_vblank(crtc, true);
+}
+
+static void dm_disable_vblank(struct drm_crtc *crtc)
+{
+	dm_set_vblank(crtc, false);
+}
+
 /* Implemented only the options currently availible for the driver */
 static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 	.reset = dm_crtc_reset_state,
@@ -2504,6 +2529,8 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 	.page_flip = drm_atomic_helper_page_flip,
 	.atomic_duplicate_state = dm_crtc_duplicate_state,
 	.atomic_destroy_state = dm_crtc_destroy_state,
+	.enable_vblank = dm_enable_vblank,
+	.disable_vblank = dm_disable_vblank,
 };
 
 static enum drm_connector_status
@@ -2798,7 +2825,7 @@ int amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
 		goto fail;
 	}
 
-	stream = dc_create_stream_for_sink(dc_sink);
+	stream = create_stream_for_sink(aconnector, mode, NULL);
 	if (stream == NULL) {
 		DRM_ERROR("Failed to create stream for sink!\n");
 		goto fail;
@@ -3058,6 +3085,9 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
 	if (!dm_plane_state->dc_state)
 		return 0;
 
+	if (!fill_rects_from_plane_state(state, dm_plane_state->dc_state))
+		return -EINVAL;
+
 	if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK)
 		return 0;
 
@@ -3104,8 +3134,6 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 
 	switch (aplane->base.type) {
 	case DRM_PLANE_TYPE_PRIMARY:
-		aplane->base.format_default = true;
-
 		res = drm_universal_plane_init(
 				dm->adev->ddev,
 				&aplane->base,
@@ -4630,8 +4658,6 @@ static int dm_update_planes_state(struct dc *dc,
 	bool pflip_needed  = !state->allow_modeset;
 	int ret = 0;
 
-	if (pflip_needed)
-		return ret;
 
 	/* Add new planes */
 	for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
@@ -4646,6 +4672,8 @@ static int dm_update_planes_state(struct dc *dc,
 
 		/* Remove any changed/removed planes */
 		if (!enable) {
+			if (pflip_needed)
+				continue;
 
 			if (!old_plane_crtc)
 				continue;
@@ -4677,6 +4705,7 @@ static int dm_update_planes_state(struct dc *dc,
 			*lock_and_validation_needed = true;
 
 		} else { /* Add new planes */
+			struct dc_plane_state *dc_new_plane_state;
 
 			if (drm_atomic_plane_disabling(plane->state, new_plane_state))
 				continue;
@@ -4690,38 +4719,50 @@ static int dm_update_planes_state(struct dc *dc,
 			if (!dm_new_crtc_state->stream)
 				continue;
 
+			if (pflip_needed)
+				continue;
 
 			WARN_ON(dm_new_plane_state->dc_state);
 
-			dm_new_plane_state->dc_state = dc_create_plane_state(dc);
-
-			DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n",
-					plane->base.id, new_plane_crtc->base.id);
-
-			if (!dm_new_plane_state->dc_state) {
+			dc_new_plane_state = dc_create_plane_state(dc);
+			if (!dc_new_plane_state) {
 				ret = -EINVAL;
 				return ret;
 			}
 
+			DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n",
+					plane->base.id, new_plane_crtc->base.id);
+
 			ret = fill_plane_attributes(
 				new_plane_crtc->dev->dev_private,
-				dm_new_plane_state->dc_state,
+				dc_new_plane_state,
 				new_plane_state,
 				new_crtc_state);
-			if (ret)
+			if (ret) {
+				dc_plane_state_release(dc_new_plane_state);
 				return ret;
+			}
 
-
+			/*
+			 * Any atomic check errors that occur after this will
+			 * not need a release. The plane state will be attached
+			 * to the stream, and therefore part of the atomic
+			 * state. It'll be released when the atomic state is
+			 * cleaned.
+			 */
 			if (!dc_add_plane_to_context(
 					dc,
 					dm_new_crtc_state->stream,
-					dm_new_plane_state->dc_state,
+					dc_new_plane_state,
 					dm_state->context)) {
 
+				dc_plane_state_release(dc_new_plane_state);
 				ret = -EINVAL;
 				return ret;
 			}
 
+			dm_new_plane_state->dc_state = dc_new_plane_state;
+
 			/* Tell DC to do a full surface update every time there
 			 * is a plane change. Inefficient, but works for now.
 			 */
@@ -4735,6 +4776,33 @@ static int dm_update_planes_state(struct dc *dc,
 	return ret;
 }
 
+static int dm_atomic_check_plane_state_fb(struct drm_atomic_state *state,
+					  struct drm_crtc *crtc)
+{
+	struct drm_plane *plane;
+	struct drm_crtc_state *crtc_state;
+
+	WARN_ON(!drm_atomic_get_new_crtc_state(state, crtc));
+
+	drm_for_each_plane_mask(plane, state->dev, crtc->state->plane_mask) {
+		struct drm_plane_state *plane_state =
+			drm_atomic_get_plane_state(state, plane);
+
+		if (IS_ERR(plane_state))
+			return -EDEADLK;
+
+		crtc_state = drm_atomic_get_crtc_state(plane_state->state, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+
+		if (crtc->primary == plane && crtc_state->active) {
+			if (!plane_state->fb)
+				return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
 				  struct drm_atomic_state *state)
 {
@@ -4758,6 +4826,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 		goto fail;
 
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		ret = dm_atomic_check_plane_state_fb(state, crtc);
+		if (ret)
+			goto fail;
+
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state) &&
 		    !new_crtc_state->color_mgmt_changed)
 			continue;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 9bd142f65f9b..e1acc10e35a2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -109,7 +109,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
 		struct cea_sad *sad = &sads[i];
 
 		edid_caps->audio_modes[i].format_code = sad->format;
-		edid_caps->audio_modes[i].channel_count = sad->channels;
+		edid_caps->audio_modes[i].channel_count = sad->channels + 1;
 		edid_caps->audio_modes[i].sample_rate = sad->freq;
 		edid_caps->audio_modes[i].sample_size = sad->byte2;
 	}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 1874b6cee6af..422055080df4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -683,10 +683,8 @@ static const struct amdgpu_irq_src_funcs dm_hpd_irq_funcs = {
 
 void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
 {
-	if (adev->mode_info.num_crtc > 0)
-		adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
-	else
-		adev->crtc_irq.num_types = 0;
+
+	adev->crtc_irq.num_types = adev->mode_info.num_crtc;
 	adev->crtc_irq.funcs = &dm_crtc_irq_funcs;
 
 	adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index f3d87f418d2e..93421dad21bd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -189,6 +189,12 @@ void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
 			.link = aconnector->dc_link,
 			.sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
 
+	/*
+	 * TODO: Need to further figure out why ddc.algo is NULL while MST port exists
+	 */
+	if (!aconnector->port || !aconnector->port->aux.ddc.algo)
+		return;
+
 	edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
 
 	if (!edid) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 35e84ed031de..12868c769606 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1358,13 +1358,13 @@ enum dc_irq_source dc_interrupt_to_irq_source(
 	return dal_irq_service_to_irq_source(dc->res_pool->irqs, src_id, ext_id);
 }
 
-void dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable)
+bool dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable)
 {
 
 	if (dc == NULL)
-		return;
+		return false;
 
-	dal_irq_service_set(dc->res_pool->irqs, src, enable);
+	return dal_irq_service_set(dc->res_pool->irqs, src, enable);
 }
 
 void dc_interrupt_ack(struct dc *dc, enum dc_irq_source src)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index a37428271573..be5546181fa8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1749,8 +1749,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
 			link->link_enc,
 			pipe_ctx->clock_source->id,
 			display_color_depth,
-			pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A,
-			pipe_ctx->stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK,
+			pipe_ctx->stream->signal,
 			stream->phy_pix_clk);
 
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 61e8c3e02d16..639421a00ab6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -718,7 +718,7 @@ static enum link_training_result perform_channel_equalization_sequence(
 	uint32_t retries_ch_eq;
 	enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
 	union lane_align_status_updated dpcd_lane_status_updated = {{0}};
-	union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}};;
+	union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}};
 
 	hw_tr_pattern = get_supported_tp(link);
 
@@ -1465,7 +1465,7 @@ void decide_link_settings(struct dc_stream_state *stream,
 	/* MST doesn't perform link training for now
 	 * TODO: add MST specific link training routine
 	 */
-	if (is_mst_supported(link)) {
+	if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
 		*link_setting = link->verified_link_cap;
 		return;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 95b8dd0e53c6..4d07ffebfd31 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1360,9 +1360,6 @@ bool dc_is_stream_scaling_unchanged(
 	return true;
 }
 
-/* Maximum TMDS single link pixel clock 165MHz */
-#define TMDS_MAX_PIXEL_CLOCK_IN_KHZ 165000
-
 static void update_stream_engine_usage(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 261811e0c094..cd5819789d76 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -33,8 +33,7 @@
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
-#define TMDS_MAX_PIXEL_CLOCK_IN_KHZ_UPMOST 297000
-static void update_stream_signal(struct dc_stream_state *stream)
+void update_stream_signal(struct dc_stream_state *stream)
 {
 
 	struct dc_sink *dc_sink = stream->sink;
@@ -45,8 +44,9 @@ static void update_stream_signal(struct dc_stream_state *stream)
 		stream->signal = dc_sink->sink_signal;
 
 	if (dc_is_dvi_signal(stream->signal)) {
-		if (stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK_IN_KHZ_UPMOST &&
-			stream->sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
+		if (stream->ctx->dc->caps.dual_link_dvi &&
+		    stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK &&
+		    stream->sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
 			stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK;
 		else
 			stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
@@ -193,44 +193,20 @@ bool dc_stream_set_cursor_attributes(
 
 	core_dc = stream->ctx->dc;
 	res_ctx = &core_dc->current_state->res_ctx;
+	stream->cursor_attributes = *attributes;
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
 
-		if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp))
+		if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm &&
+		    !pipe_ctx->plane_res.dpp) || !pipe_ctx->plane_res.ipp)
 			continue;
 		if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
 			continue;
 
 
-		if (pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes != NULL)
-			pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes(
-						pipe_ctx->plane_res.ipp, attributes);
-
-		if (pipe_ctx->plane_res.hubp != NULL &&
-				pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes != NULL)
-			pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes(
-					pipe_ctx->plane_res.hubp, attributes);
-
-		if (pipe_ctx->plane_res.mi != NULL &&
-				pipe_ctx->plane_res.mi->funcs->set_cursor_attributes != NULL)
-			pipe_ctx->plane_res.mi->funcs->set_cursor_attributes(
-					pipe_ctx->plane_res.mi, attributes);
-
-
-		if (pipe_ctx->plane_res.xfm != NULL &&
-				pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes != NULL)
-			pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes(
-				pipe_ctx->plane_res.xfm, attributes);
-
-		if (pipe_ctx->plane_res.dpp != NULL &&
-				pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes != NULL)
-			pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes(
-				pipe_ctx->plane_res.dpp, attributes->color_format);
+		core_dc->hwss.set_cursor_attribute(pipe_ctx);
 	}
-
-	stream->cursor_attributes = *attributes;
-
 	return true;
 }
 
@@ -254,55 +230,21 @@ bool dc_stream_set_cursor_position(
 
 	core_dc = stream->ctx->dc;
 	res_ctx = &core_dc->current_state->res_ctx;
+	stream->cursor_position = *position;
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-		struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp;
-		struct mem_input *mi = pipe_ctx->plane_res.mi;
-		struct hubp *hubp = pipe_ctx->plane_res.hubp;
-		struct dpp *dpp = pipe_ctx->plane_res.dpp;
-		struct dc_cursor_position pos_cpy = *position;
-		struct dc_cursor_mi_param param = {
-			.pixel_clk_khz = stream->timing.pix_clk_khz,
-			.ref_clk_khz = core_dc->res_pool->ref_clock_inKhz,
-			.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x,
-			.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width,
-			.h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz
-		};
 
 		if (pipe_ctx->stream != stream ||
 				(!pipe_ctx->plane_res.mi  && !pipe_ctx->plane_res.hubp) ||
 				!pipe_ctx->plane_state ||
-				(!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp))
-			continue;
-
-		if (pipe_ctx->plane_state->address.type
-				== PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
-			pos_cpy.enable = false;
-
-		if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
-			pos_cpy.enable = false;
-
-
-		if (ipp != NULL && ipp->funcs->ipp_cursor_set_position != NULL)
-			ipp->funcs->ipp_cursor_set_position(ipp, &pos_cpy, &param);
-
-		if (mi != NULL && mi->funcs->set_cursor_position != NULL)
-			mi->funcs->set_cursor_position(mi, &pos_cpy, &param);
-
-		if (!hubp)
+				(!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) ||
+				!pipe_ctx->plane_res.ipp)
 			continue;
 
-		if (hubp->funcs->set_cursor_position != NULL)
-			hubp->funcs->set_cursor_position(hubp, &pos_cpy, &param);
-
-		if (dpp != NULL && dpp->funcs->set_cursor_position != NULL)
-			dpp->funcs->set_cursor_position(dpp, &pos_cpy, &param, hubp->curs_attr.width);
-
+		core_dc->hwss.set_cursor_position(pipe_ctx);
 	}
 
-	stream->cursor_position = *position;
-
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index e2e3c9df79ea..d6d56611604e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -62,6 +62,7 @@ struct dc_caps {
 	bool dcc_const_color;
 	bool dynamic_audio;
 	bool is_apu;
+	bool dual_link_dvi;
 };
 
 struct dc_dcc_surface_param {
@@ -672,7 +673,7 @@ enum dc_irq_source dc_interrupt_to_irq_source(
 		struct dc *dc,
 		uint32_t src_id,
 		uint32_t ext_id);
-void dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable);
+bool dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable);
 void dc_interrupt_ack(struct dc *dc, enum dc_irq_source src);
 enum dc_irq_source dc_get_hpd_irq_source_at_index(
 		struct dc *dc, uint32_t link_index);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 01c60f11b2bd..456e4d29eadd 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -237,6 +237,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
  */
 struct dc_stream_state *dc_create_stream_for_sink(struct dc_sink *dc_sink);
 
+void update_stream_signal(struct dc_stream_state *stream);
+
 void dc_stream_retain(struct dc_stream_state *dc_stream);
 void dc_stream_release(struct dc_stream_state *dc_stream);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
index b73db9e78437..f11f17fe08f9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
@@ -236,6 +236,7 @@
 	SR(D2VGA_CONTROL), \
 	SR(D3VGA_CONTROL), \
 	SR(D4VGA_CONTROL), \
+	SR(VGA_TEST_CONTROL), \
 	SR(DC_IP_REQUEST_CNTL), \
 	BL_REG_LIST()
 
@@ -337,6 +338,7 @@ struct dce_hwseq_registers {
 	uint32_t D2VGA_CONTROL;
 	uint32_t D3VGA_CONTROL;
 	uint32_t D4VGA_CONTROL;
+	uint32_t VGA_TEST_CONTROL;
 	/* MMHUB registers. read only. temporary hack */
 	uint32_t VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32;
 	uint32_t VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
@@ -493,6 +495,12 @@ struct dce_hwseq_registers {
 	HWS_SF(, DOMAIN6_PG_STATUS, DOMAIN6_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DOMAIN7_PG_STATUS, DOMAIN7_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
+	HWS_SF(, D1VGA_CONTROL, D1VGA_MODE_ENABLE, mask_sh),\
+	HWS_SF(, D2VGA_CONTROL, D2VGA_MODE_ENABLE, mask_sh),\
+	HWS_SF(, D3VGA_CONTROL, D3VGA_MODE_ENABLE, mask_sh),\
+	HWS_SF(, D4VGA_CONTROL, D4VGA_MODE_ENABLE, mask_sh),\
+	HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_ENABLE, mask_sh),\
+	HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_RENDER_START, mask_sh),\
 	HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh), \
 	HWS_SF(, LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh)
 
@@ -583,7 +591,13 @@ struct dce_hwseq_registers {
 	type DCFCLK_GATE_DIS; \
 	type DCHUBBUB_GLOBAL_TIMER_REFDIV; \
 	type DENTIST_DPPCLK_WDIVIDER; \
-	type DENTIST_DISPCLK_WDIVIDER;
+	type DENTIST_DISPCLK_WDIVIDER; \
+	type VGA_TEST_ENABLE; \
+	type VGA_TEST_RENDER_START; \
+	type D1VGA_MODE_ENABLE; \
+	type D2VGA_MODE_ENABLE; \
+	type D3VGA_MODE_ENABLE; \
+	type D4VGA_MODE_ENABLE;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index a266e3f5e75f..e4741f1a2b01 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -82,13 +82,6 @@
 #define DCE110_DIG_FE_SOURCE_SELECT_DIGF 0x20
 #define DCE110_DIG_FE_SOURCE_SELECT_DIGG 0x40
 
-/* Minimum pixel clock, in KHz. For TMDS signal is 25.00 MHz */
-#define TMDS_MIN_PIXEL_CLOCK 25000
-/* Maximum pixel clock, in KHz. For TMDS signal is 165.00 MHz */
-#define TMDS_MAX_PIXEL_CLOCK 165000
-/* For current ASICs pixel clock - 600MHz */
-#define MAX_ENCODER_CLOCK 600000
-
 enum {
 	DP_MST_UPDATE_MAX_RETRY = 50
 };
@@ -683,6 +676,7 @@ void dce110_link_encoder_construct(
 {
 	struct bp_encoder_cap_info bp_cap_info = {0};
 	const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+	enum bp_result result = BP_RESULT_OK;
 
 	enc110->base.funcs = &dce110_lnk_enc_funcs;
 	enc110->base.ctx = init_data->ctx;
@@ -757,15 +751,24 @@ void dce110_link_encoder_construct(
 		enc110->base.preferred_engine = ENGINE_ID_UNKNOWN;
 	}
 
+	/* default to one to mirror Windows behavior */
+	enc110->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+	result = bp_funcs->get_encoder_cap_info(enc110->base.ctx->dc_bios,
+						enc110->base.id, &bp_cap_info);
+
 	/* Override features with DCE-specific values */
-	if (BP_RESULT_OK == bp_funcs->get_encoder_cap_info(
-			enc110->base.ctx->dc_bios, enc110->base.id,
-			&bp_cap_info)) {
+	if (BP_RESULT_OK == result) {
 		enc110->base.features.flags.bits.IS_HBR2_CAPABLE =
 				bp_cap_info.DP_HBR2_EN;
 		enc110->base.features.flags.bits.IS_HBR3_CAPABLE =
 				bp_cap_info.DP_HBR3_EN;
 		enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+	} else {
+		dm_logger_write(enc110->base.ctx->logger, LOG_WARNING,
+				"%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+				__func__,
+				result);
 	}
 }
 
@@ -904,8 +907,7 @@ void dce110_link_encoder_enable_tmds_output(
 	struct link_encoder *enc,
 	enum clock_source_id clock_source,
 	enum dc_color_depth color_depth,
-	bool hdmi,
-	bool dual_link,
+	enum signal_type signal,
 	uint32_t pixel_clock)
 {
 	struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc);
@@ -919,16 +921,12 @@ void dce110_link_encoder_enable_tmds_output(
 	cntl.engine_id = enc->preferred_engine;
 	cntl.transmitter = enc110->base.transmitter;
 	cntl.pll_id = clock_source;
-	if (hdmi) {
-		cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
-		cntl.lanes_number = 4;
-	} else if (dual_link) {
-		cntl.signal = SIGNAL_TYPE_DVI_DUAL_LINK;
+	cntl.signal = signal;
+	if (cntl.signal == SIGNAL_TYPE_DVI_DUAL_LINK)
 		cntl.lanes_number = 8;
-	} else {
-		cntl.signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
+	else
 		cntl.lanes_number = 4;
-	}
+
 	cntl.hpd_sel = enc110->base.hpd_source;
 
 	cntl.pixel_clock = pixel_clock;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
index 8ca9afe47a2b..0ec3433d34b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
@@ -210,8 +210,7 @@ void dce110_link_encoder_enable_tmds_output(
 	struct link_encoder *enc,
 	enum clock_source_id clock_source,
 	enum dc_color_depth color_depth,
-	bool hdmi,
-	bool dual_link,
+	enum signal_type signal,
 	uint32_t pixel_clock);
 
 /* enables DP PHY output */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
index 3931412ab6d3..87093894ea9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
@@ -128,23 +128,22 @@ static void set_truncation(
 		return;
 	}
 	/* on other format-to do */
-	if (params->flags.TRUNCATE_ENABLED == 0 ||
-			params->flags.TRUNCATE_DEPTH == 2)
+	if (params->flags.TRUNCATE_ENABLED == 0)
 		return;
 	/*Set truncation depth and Enable truncation*/
 	REG_UPDATE_3(FMT_BIT_DEPTH_CONTROL,
 				FMT_TRUNCATE_EN, 1,
 				FMT_TRUNCATE_DEPTH,
-				params->flags.TRUNCATE_MODE,
+				params->flags.TRUNCATE_DEPTH,
 				FMT_TRUNCATE_MODE,
-				params->flags.TRUNCATE_DEPTH);
+				params->flags.TRUNCATE_MODE);
 }
 
 
 /**
  *	set_spatial_dither
  *	1) set spatial dithering mode: pattern of seed
- *	2) set spatical dithering depth: 0 for 18bpp or 1 for 24bpp
+ *	2) set spatial dithering depth: 0 for 18bpp or 1 for 24bpp
  *	3) set random seed
  *	4) set random mode
  *		lfsr is reset every frame or not reset
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 3ea43e2a9450..442dd2d93618 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -852,6 +852,7 @@ static bool construct(
 	dc->caps.max_downscale_ratio = 200;
 	dc->caps.i2c_speed_in_khz = 40;
 	dc->caps.max_cursor_size = 128;
+	dc->caps.dual_link_dvi = true;
 
 	for (i = 0; i < pool->base.pipe_count; i++) {
 		pool->base.timing_generators[i] =
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 86cdd7b4811f..6f382a3ac90f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -688,15 +688,22 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
 	struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
 	struct dc_link *link = pipe_ctx->stream->sink->link;
 
-	/* 1. update AVI info frame (HDMI, DP)
-	 * we always need to update info frame
-	*/
+
 	uint32_t active_total_with_borders;
 	uint32_t early_control = 0;
 	struct timing_generator *tg = pipe_ctx->stream_res.tg;
 
-	/* TODOFPGA may change to hwss.update_info_frame */
+	/* For MST, there are multiply stream go to only one link.
+	 * connect DIG back_end to front_end while enable_stream and
+	 * disconnect them during disable_stream
+	 * BY this, it is logic clean to separate stream and link */
+	link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
+						    pipe_ctx->stream_res.stream_enc->id, true);
+
+	/* update AVI info frame (HDMI, DP)*/
+	/* TODO: FPGA may change to hwss.update_info_frame */
 	dce110_update_info_frame(pipe_ctx);
+
 	/* enable early control to avoid corruption on DP monitor*/
 	active_total_with_borders =
 			timing->h_addressable
@@ -717,12 +724,8 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
 			pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
 	}
 
-	/* For MST, there are multiply stream go to only one link.
-	 * connect DIG back_end to front_end while enable_stream and
-	 * disconnect them during disable_stream
-	 * BY this, it is logic clean to separate stream and link */
-	link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
-						    pipe_ctx->stream_res.stream_enc->id, true);
+
+
 
 }
 
@@ -1690,9 +1693,13 @@ static void apply_min_clocks(
  *  Check if FBC can be enabled
  */
 static bool should_enable_fbc(struct dc *dc,
-			      struct dc_state *context)
+			      struct dc_state *context,
+			      uint32_t *pipe_idx)
 {
-	struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0];
+	uint32_t i;
+	struct pipe_ctx *pipe_ctx = NULL;
+	struct resource_context *res_ctx = &context->res_ctx;
+
 
 	ASSERT(dc->fbc_compressor);
 
@@ -1704,6 +1711,14 @@ static bool should_enable_fbc(struct dc *dc,
 	if (context->stream_count != 1)
 		return false;
 
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		if (res_ctx->pipe_ctx[i].stream) {
+			pipe_ctx = &res_ctx->pipe_ctx[i];
+			*pipe_idx = i;
+			break;
+		}
+	}
+
 	/* Only supports eDP */
 	if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP)
 		return false;
@@ -1729,11 +1744,14 @@ static bool should_enable_fbc(struct dc *dc,
 static void enable_fbc(struct dc *dc,
 		       struct dc_state *context)
 {
-	if (should_enable_fbc(dc, context)) {
+	uint32_t pipe_idx = 0;
+
+	if (should_enable_fbc(dc, context, &pipe_idx)) {
 		/* Program GRPH COMPRESSED ADDRESS and PITCH */
 		struct compr_addr_and_pitch_params params = {0, 0, 0};
 		struct compressor *compr = dc->fbc_compressor;
-		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0];
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
+
 
 		params.source_view_width = pipe_ctx->stream->timing.h_addressable;
 		params.source_view_height = pipe_ctx->stream->timing.v_addressable;
@@ -2915,6 +2933,49 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx,
 	}
 }
 
+void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx)
+{
+	struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
+	struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp;
+	struct mem_input *mi = pipe_ctx->plane_res.mi;
+	struct dc_cursor_mi_param param = {
+		.pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz,
+		.ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz,
+		.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x,
+		.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width,
+		.h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz
+	};
+
+	if (pipe_ctx->plane_state->address.type
+			== PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
+		pos_cpy.enable = false;
+
+	if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
+		pos_cpy.enable = false;
+
+	if (ipp->funcs->ipp_cursor_set_position)
+		ipp->funcs->ipp_cursor_set_position(ipp, &pos_cpy, &param);
+	if (mi->funcs->set_cursor_position)
+		mi->funcs->set_cursor_position(mi, &pos_cpy, &param);
+}
+
+void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
+{
+	struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes;
+
+	if (pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes)
+		pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes(
+				pipe_ctx->plane_res.ipp, attributes);
+
+	if (pipe_ctx->plane_res.mi->funcs->set_cursor_attributes)
+		pipe_ctx->plane_res.mi->funcs->set_cursor_attributes(
+				pipe_ctx->plane_res.mi, attributes);
+
+	if (pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes)
+		pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes(
+				pipe_ctx->plane_res.xfm, attributes);
+}
+
 static void ready_shared_resources(struct dc *dc, struct dc_state *context) {}
 
 static void optimize_shared_resources(struct dc *dc) {}
@@ -2957,6 +3018,8 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 	.edp_backlight_control = hwss_edp_backlight_control,
 	.edp_power_control = hwss_edp_power_control,
 	.edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready,
+	.set_cursor_position = dce110_set_cursor_position,
+	.set_cursor_attribute = dce110_set_cursor_attribute
 };
 
 void dce110_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index 7c4779578fb7..00f18c485e1e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -846,6 +846,16 @@ static bool dce110_validate_bandwidth(
 	return result;
 }
 
+enum dc_status dce110_validate_plane(const struct dc_plane_state *plane_state,
+				     struct dc_caps *caps)
+{
+	if (((plane_state->dst_rect.width * 2) < plane_state->src_rect.width) ||
+	    ((plane_state->dst_rect.height * 2) < plane_state->src_rect.height))
+		return DC_FAIL_SURFACE_VALIDATE;
+
+	return DC_OK;
+}
+
 static bool dce110_validate_surface_sets(
 		struct dc_state *context)
 {
@@ -869,6 +879,13 @@ static bool dce110_validate_surface_sets(
 					plane->src_rect.height > 1080))
 					return false;
 
+				/* we don't have the logic to support underlay
+				 * only yet so block the use case where we get
+				 * NV12 plane as top layer
+				 */
+				if (j == 0)
+					return false;
+
 				/* irrespective of plane format,
 				 * stream should be RGB encoded
 				 */
@@ -1021,6 +1038,7 @@ static const struct resource_funcs dce110_res_pool_funcs = {
 	.link_enc_create = dce110_link_encoder_create,
 	.validate_guaranteed = dce110_validate_guaranteed,
 	.validate_bandwidth = dce110_validate_bandwidth,
+	.validate_plane = dce110_validate_plane,
 	.acquire_idle_pipe_for_layer = dce110_acquire_underlay,
 	.add_stream_to_ctx = dce110_add_stream_to_ctx,
 	.validate_global = dce110_validate_global
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index 663e0a047a4b..98d9cd0109e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -1103,6 +1103,8 @@ static bool construct(
 	dc->caps.max_downscale_ratio = 200;
 	dc->caps.i2c_speed_in_khz = 100;
 	dc->caps.max_cursor_size = 128;
+	dc->caps.dual_link_dvi = true;
+
 
 	/*************************************************
 	 *  Create resources                             *
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 57cd67359567..5aab01db28ee 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -835,6 +835,8 @@ static bool construct(
 	dc->caps.max_downscale_ratio = 200;
 	dc->caps.i2c_speed_in_khz = 100;
 	dc->caps.max_cursor_size = 128;
+	dc->caps.dual_link_dvi = true;
+
 	dc->debug = debug_defaults;
 
 	/*************************************************
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 8f2bd56f3461..25d7eb1567ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -793,6 +793,7 @@ static bool dce80_construct(
 	dc->caps.max_downscale_ratio = 200;
 	dc->caps.i2c_speed_in_khz = 40;
 	dc->caps.max_cursor_size = 128;
+	dc->caps.dual_link_dvi = true;
 
 	/*************************************************
 	 *  Create resources                             *
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 82572863acab..dc1e010725c1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -238,10 +238,34 @@ static void enable_power_gating_plane(
 static void disable_vga(
 	struct dce_hwseq *hws)
 {
+	unsigned int in_vga1_mode = 0;
+	unsigned int in_vga2_mode = 0;
+	unsigned int in_vga3_mode = 0;
+	unsigned int in_vga4_mode = 0;
+
+	REG_GET(D1VGA_CONTROL, D1VGA_MODE_ENABLE, &in_vga1_mode);
+	REG_GET(D2VGA_CONTROL, D2VGA_MODE_ENABLE, &in_vga2_mode);
+	REG_GET(D3VGA_CONTROL, D3VGA_MODE_ENABLE, &in_vga3_mode);
+	REG_GET(D4VGA_CONTROL, D4VGA_MODE_ENABLE, &in_vga4_mode);
+
+	if (in_vga1_mode == 0 && in_vga2_mode == 0 &&
+			in_vga3_mode == 0 && in_vga4_mode == 0)
+		return;
+
 	REG_WRITE(D1VGA_CONTROL, 0);
 	REG_WRITE(D2VGA_CONTROL, 0);
 	REG_WRITE(D3VGA_CONTROL, 0);
 	REG_WRITE(D4VGA_CONTROL, 0);
+
+	/* HW Engineer's Notes:
+	 *  During switch from vga->extended, if we set the VGA_TEST_ENABLE and
+	 *  then hit the VGA_TEST_RENDER_START, then the DCHUBP timing gets updated correctly.
+	 *
+	 *  Then vBIOS will have it poll for the VGA_TEST_RENDER_DONE and unset
+	 *  VGA_TEST_ENABLE, to leave it in the same state as before.
+	 */
+	REG_UPDATE(VGA_TEST_CONTROL, VGA_TEST_ENABLE, 1);
+	REG_UPDATE(VGA_TEST_CONTROL, VGA_TEST_RENDER_START, 1);
 }
 
 static void dpp_pg_control(
@@ -1761,6 +1785,11 @@ static void update_dchubp_dpp(
 			&pipe_ctx->plane_res.scl_data.viewport_c);
 	}
 
+	if (pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
+		dc->hwss.set_cursor_position(pipe_ctx);
+		dc->hwss.set_cursor_attribute(pipe_ctx);
+	}
+
 	if (plane_state->update_flags.bits.full_update) {
 		/*gamut remap*/
 		program_gamut_remap(pipe_ctx);
@@ -2296,7 +2325,7 @@ static bool dcn10_dummy_display_power_gating(
 	return true;
 }
 
-void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
+static void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct timing_generator *tg = pipe_ctx->stream_res.tg;
@@ -2316,12 +2345,46 @@ void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
 	}
 }
 
-void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
+static void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
 {
 	if (hws->ctx->dc->res_pool->hubbub != NULL)
 		hubbub1_update_dchub(hws->ctx->dc->res_pool->hubbub, dh_data);
 }
 
+static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
+{
+	struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
+	struct hubp *hubp = pipe_ctx->plane_res.hubp;
+	struct dpp *dpp = pipe_ctx->plane_res.dpp;
+	struct dc_cursor_mi_param param = {
+		.pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz,
+		.ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz,
+		.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x,
+		.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width,
+		.h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz
+	};
+
+	if (pipe_ctx->plane_state->address.type
+			== PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
+		pos_cpy.enable = false;
+
+	if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
+		pos_cpy.enable = false;
+
+	hubp->funcs->set_cursor_position(hubp, &pos_cpy, &param);
+	dpp->funcs->set_cursor_position(dpp, &pos_cpy, &param, hubp->curs_attr.width);
+}
+
+static void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
+{
+	struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes;
+
+	pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes(
+			pipe_ctx->plane_res.hubp, attributes);
+	pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes(
+		pipe_ctx->plane_res.dpp, attributes->color_format);
+}
+
 static const struct hw_sequencer_funcs dcn10_funcs = {
 	.program_gamut_remap = program_gamut_remap,
 	.program_csc_matrix = program_csc_matrix,
@@ -2362,6 +2425,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
 	.edp_backlight_control = hwss_edp_backlight_control,
 	.edp_power_control = hwss_edp_power_control,
 	.edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index 0fd329deacd8..54d8a1386142 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -123,8 +123,7 @@ struct link_encoder_funcs {
 	void (*enable_tmds_output)(struct link_encoder *enc,
 		enum clock_source_id clock_source,
 		enum dc_color_depth color_depth,
-		bool hdmi,
-		bool dual_link,
+		enum signal_type signal,
 		uint32_t pixel_clock);
 	void (*enable_dp_output)(struct link_encoder *enc,
 		const struct dc_link_settings *link_settings,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 4c0aa56f7bae..379c6ecd271a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -198,6 +198,9 @@ struct hw_sequencer_funcs {
 			bool enable);
 	void (*edp_wait_for_hpd_ready)(struct dc_link *link, bool power_up);
 
+	void (*set_cursor_position)(struct pipe_ctx *pipe);
+	void (*set_cursor_attribute)(struct pipe_ctx *pipe);
+
 };
 
 void color_space_to_black_color(
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
index f7e40b292dfb..d3e1923b01a8 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
@@ -217,7 +217,7 @@ bool dce110_vblank_set(
 			core_dc->current_state->res_ctx.pipe_ctx[pipe_offset].stream_res.tg;
 
 	if (enable) {
-		if (!tg->funcs->arm_vert_intr(tg, 2)) {
+		if (!tg || !tg->funcs->arm_vert_intr(tg, 2)) {
 			DC_ERROR("Failed to get VBLANK!\n");
 			return false;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
index 57a54a7b89e5..1c079ba37c30 100644
--- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
@@ -42,8 +42,7 @@ static void virtual_link_encoder_enable_tmds_output(
 	struct link_encoder *enc,
 	enum clock_source_id clock_source,
 	enum dc_color_depth color_depth,
-	bool hdmi,
-	bool dual_link,
+	enum signal_type signal,
 	uint32_t pixel_clock) {}
 
 static void virtual_link_encoder_enable_dp_output(
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
index 7a9b43f84a31..36bbad594267 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
@@ -419,11 +419,6 @@ struct bios_event_info {
 	bool backlight_changed;
 };
 
-enum {
-	HDMI_PIXEL_CLOCK_IN_KHZ_297 = 297000,
-	TMDS_PIXEL_CLOCK_IN_KHZ_165 = 165000
-};
-
 /*
  * DFS-bypass flag
  */
diff --git a/drivers/gpu/drm/amd/display/include/signal_types.h b/drivers/gpu/drm/amd/display/include/signal_types.h
index b5ebde642207..199c5db67cbc 100644
--- a/drivers/gpu/drm/amd/display/include/signal_types.h
+++ b/drivers/gpu/drm/amd/display/include/signal_types.h
@@ -26,6 +26,11 @@
 #ifndef __DC_SIGNAL_TYPES_H__
 #define __DC_SIGNAL_TYPES_H__
 
+/* Minimum pixel clock, in KHz. For TMDS signal is 25.00 MHz */
+#define TMDS_MIN_PIXEL_CLOCK 25000
+/* Maximum pixel clock, in KHz. For TMDS signal is 165.00 MHz */
+#define TMDS_MAX_PIXEL_CLOCK 165000
+
 enum signal_type {
 	SIGNAL_TYPE_NONE		= 0L,		/* no signal */
 	SIGNAL_TYPE_DVI_SINGLE_LINK	= (1 << 0),
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 4c3223a4d62b..adb6e7b9280c 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -162,7 +162,7 @@ static int pp_hw_init(void *handle)
 		if(hwmgr->smumgr_funcs->start_smu(pp_handle->hwmgr)) {
 			pr_err("smc start failed\n");
 			hwmgr->smumgr_funcs->smu_fini(pp_handle->hwmgr);
-			return -EINVAL;;
+			return -EINVAL;
 		}
 		if (ret == PP_DPM_DISABLED)
 			goto exit;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 41e42beff213..08e8a793714f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2756,10 +2756,13 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-	disable_mclk_switching = ((1 < info.display_count) ||
-				  disable_mclk_switching_for_frame_lock ||
-				  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
-				  (mode_info.refresh_rate > 120));
+	if (info.display_count == 0)
+		disable_mclk_switching = false;
+	else
+		disable_mclk_switching = ((1 < info.display_count) ||
+					  disable_mclk_switching_for_frame_lock ||
+					  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
+					  (mode_info.refresh_rate > 120));
 
 	sclk = smu7_ps->performance_levels[0].engine_clock;
 	mclk = smu7_ps->performance_levels[0].memory_clock;
@@ -4534,13 +4537,6 @@ static int smu7_set_power_profile_state(struct pp_hwmgr *hwmgr,
 	int tmp_result, result = 0;
 	uint32_t sclk_mask = 0, mclk_mask = 0;
 
-	if (hwmgr->chip_id == CHIP_FIJI) {
-		if (request->type == AMD_PP_GFX_PROFILE)
-			smu7_enable_power_containment(hwmgr);
-		else if (request->type == AMD_PP_COMPUTE_PROFILE)
-			smu7_disable_power_containment(hwmgr);
-	}
-
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_AUTO)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 2d55dabc77d4..5f9c3efb532f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3168,10 +3168,13 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	disable_mclk_switching_for_vr = PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR);
 	force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh);
 
-	disable_mclk_switching = (info.display_count > 1) ||
-				    disable_mclk_switching_for_frame_lock ||
-				    disable_mclk_switching_for_vr ||
-				    force_mclk_high;
+	if (info.display_count == 0)
+		disable_mclk_switching = false;
+	else
+		disable_mclk_switching = (info.display_count > 1) ||
+			disable_mclk_switching_for_frame_lock ||
+			disable_mclk_switching_for_vr ||
+			force_mclk_high;
 
 	sclk = vega10_ps->performance_levels[0].gfx_clock;
 	mclk = vega10_ps->performance_levels[0].mem_clock;
diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index 5f4c2e833a65..d665dd5af5dd 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h
@@ -97,7 +97,7 @@ static const struct ast_vbios_dclk_info dclk_table[] = {
 	{0x67, 0x22, 0x00},			/* 0E: VCLK157_5	*/
 	{0x6A, 0x22, 0x00},			/* 0F: VCLK162		*/
 	{0x4d, 0x4c, 0x80},			/* 10: VCLK154		*/
-	{0xa7, 0x78, 0x80},			/* 11: VCLK83.5		*/
+	{0x68, 0x6f, 0x80},			/* 11: VCLK83.5		*/
 	{0x28, 0x49, 0x80},			/* 12: VCLK106.5	*/
 	{0x37, 0x49, 0x80},			/* 13: VCLK146.25	*/
 	{0x1f, 0x45, 0x80},			/* 14: VCLK148.5	*/
@@ -127,7 +127,7 @@ static const struct ast_vbios_dclk_info dclk_table_ast2500[] = {
 	{0x67, 0x22, 0x00},			/* 0E: VCLK157_5	*/
 	{0x6A, 0x22, 0x00},			/* 0F: VCLK162		*/
 	{0x4d, 0x4c, 0x80},			/* 10: VCLK154		*/
-	{0xa7, 0x78, 0x80},			/* 11: VCLK83.5		*/
+	{0x68, 0x6f, 0x80},			/* 11: VCLK83.5		*/
 	{0x28, 0x49, 0x80},			/* 12: VCLK106.5	*/
 	{0x37, 0x49, 0x80},			/* 13: VCLK146.25	*/
 	{0x1f, 0x45, 0x80},			/* 14: VCLK148.5	*/
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index cd23b1b28259..c91b9b054e3f 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -294,22 +294,7 @@ static void cirrus_crtc_prepare(struct drm_crtc *crtc)
 {
 }
 
-/*
- * This is called after a mode is programmed. It should reverse anything done
- * by the prepare function
- */
-static void cirrus_crtc_commit(struct drm_crtc *crtc)
-{
-}
-
-/*
- * The core can pass us a set of gamma values to program. We actually only
- * use this for 8-bit mode so can't perform smooth fades on deeper modes,
- * but it's a requirement that we provide the function
- */
-static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				 u16 *blue, uint32_t size,
-				 struct drm_modeset_acquire_ctx *ctx)
+static void cirrus_crtc_load_lut(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct cirrus_device *cdev = dev->dev_private;
@@ -317,7 +302,7 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
 	int i;
 
 	if (!crtc->enabled)
-		return 0;
+		return;
 
 	r = crtc->gamma_store;
 	g = r + crtc->gamma_size;
@@ -330,6 +315,27 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
 		WREG8(PALETTE_DATA, *g++ >> 8);
 		WREG8(PALETTE_DATA, *b++ >> 8);
 	}
+}
+
+/*
+ * This is called after a mode is programmed. It should reverse anything done
+ * by the prepare function
+ */
+static void cirrus_crtc_commit(struct drm_crtc *crtc)
+{
+	cirrus_crtc_load_lut(crtc);
+}
+
+/*
+ * The core can pass us a set of gamma values to program. We actually only
+ * use this for 8-bit mode so can't perform smooth fades on deeper modes,
+ * but it's a requirement that we provide the function
+ */
+static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				 u16 *blue, uint32_t size,
+				 struct drm_modeset_acquire_ctx *ctx)
+{
+	cirrus_crtc_load_lut(crtc);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index ab4032167094..ae3cbfe9e01c 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1878,6 +1878,8 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
 		new_crtc_state->event->base.completion = &commit->flip_done;
 		new_crtc_state->event->base.completion_release = release_crtc_commit;
 		drm_crtc_commit_get(commit);
+
+		commit->abort_completion = true;
 	}
 
 	for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
@@ -3421,8 +3423,21 @@ EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state);
 void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state)
 {
 	if (state->commit) {
+		/*
+		 * In the event that a non-blocking commit returns
+		 * -ERESTARTSYS before the commit_tail work is queued, we will
+		 * have an extra reference to the commit object. Release it, if
+		 * the event has not been consumed by the worker.
+		 *
+		 * state->event may be freed, so we can't directly look at
+		 * state->event->base.completion.
+		 */
+		if (state->event && state->commit->abort_completion)
+			drm_crtc_commit_put(state->commit);
+
 		kfree(state->commit->event);
 		state->commit->event = NULL;
+
 		drm_crtc_commit_put(state->commit);
 	}
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index ddd537914575..4f751a9d71a3 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -113,6 +113,9 @@ static const struct edid_quirk {
 	/* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
 	{ "AEO", 0, EDID_QUIRK_FORCE_6BPC },
 
+	/* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */
+	{ "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC },
+
 	/* Belinea 10 15 55 */
 	{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
 	{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
@@ -162,6 +165,24 @@ static const struct edid_quirk {
 
 	/* HTC Vive VR Headset */
 	{ "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP },
+
+	/* Oculus Rift DK1, DK2, and CV1 VR Headsets */
+	{ "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP },
+	{ "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP },
+	{ "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP },
+
+	/* Windows Mixed Reality Headsets */
+	{ "ACR", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+	{ "HPN", 0x3515, EDID_QUIRK_NON_DESKTOP },
+	{ "LEN", 0x0408, EDID_QUIRK_NON_DESKTOP },
+	{ "LEN", 0xb800, EDID_QUIRK_NON_DESKTOP },
+	{ "FUJ", 0x1970, EDID_QUIRK_NON_DESKTOP },
+	{ "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+	{ "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP },
+	{ "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP },
+
+	/* Sony PlayStation VR Headset */
+	{ "SNY", 0x0704, EDID_QUIRK_NON_DESKTOP },
 };
 
 /*
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 5a13ff29f4f0..2dc5e8bed172 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -121,6 +121,10 @@ int drm_mode_addfb(struct drm_device *dev,
 	r.pixel_format = drm_mode_legacy_fb_format(or->bpp, or->depth);
 	r.handles[0] = or->handle;
 
+	if (r.pixel_format == DRM_FORMAT_XRGB2101010 &&
+	    dev->driver->driver_features & DRIVER_PREFER_XBGR_30BPP)
+		r.pixel_format = DRM_FORMAT_XBGR2101010;
+
 	ret = drm_mode_addfb2(dev, &r, file_priv);
 	if (ret)
 		return ret;
@@ -457,6 +461,12 @@ int drm_mode_getfb(struct drm_device *dev,
 	if (!fb)
 		return -ENOENT;
 
+	/* Multi-planar framebuffers need getfb2. */
+	if (fb->format->num_planes > 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	r->height = fb->height;
 	r->width = fb->width;
 	r->depth = fb->format->depth;
@@ -480,6 +490,7 @@ int drm_mode_getfb(struct drm_device *dev,
 		ret = -ENODEV;
 	}
 
+out:
 	drm_framebuffer_put(fb);
 
 	return ret;
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 186c4e90cc1c..89eef1bb4ddc 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -836,9 +836,24 @@ struct drm_mm_node *drm_mm_scan_color_evict(struct drm_mm_scan *scan)
 	if (!mm->color_adjust)
 		return NULL;
 
-	hole = list_first_entry(&mm->hole_stack, typeof(*hole), hole_stack);
-	hole_start = __drm_mm_hole_node_start(hole);
-	hole_end = hole_start + hole->hole_size;
+	/*
+	 * The hole found during scanning should ideally be the first element
+	 * in the hole_stack list, but due to side-effects in the driver it
+	 * may not be.
+	 */
+	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
+		hole_start = __drm_mm_hole_node_start(hole);
+		hole_end = hole_start + hole->hole_size;
+
+		if (hole_start <= scan->hit_start &&
+		    hole_end >= scan->hit_end)
+			break;
+	}
+
+	/* We should only be called after we found the hole previously */
+	DRM_MM_BUG_ON(&hole->hole_stack == &mm->hole_stack);
+	if (unlikely(&hole->hole_stack == &mm->hole_stack))
+		return NULL;
 
 	DRM_MM_BUG_ON(hole_start > scan->hit_start);
 	DRM_MM_BUG_ON(hole_end < scan->hit_end);
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 555fbe54d6e2..00b8445ba819 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -654,6 +654,26 @@ out:
 }
 
 /**
+ * drm_kms_helper_is_poll_worker - is %current task an output poll worker?
+ *
+ * Determine if %current task is an output poll worker.  This can be used
+ * to select distinct code paths for output polling versus other contexts.
+ *
+ * One use case is to avoid a deadlock between the output poll worker and
+ * the autosuspend worker wherein the latter waits for polling to finish
+ * upon calling drm_kms_helper_poll_disable(), while the former waits for
+ * runtime suspend to finish upon calling pm_runtime_get_sync() in a
+ * connector ->detect hook.
+ */
+bool drm_kms_helper_is_poll_worker(void)
+{
+	struct work_struct *work = current_work();
+
+	return work && work->func == output_poll_execute;
+}
+EXPORT_SYMBOL(drm_kms_helper_is_poll_worker);
+
+/**
  * drm_kms_helper_poll_disable - disable output polling
  * @dev: drm_device
  *
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 2b8bf2dd6387..f68ef1b3a28c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -286,7 +286,6 @@ static int g2d_init_cmdlist(struct g2d_data *g2d)
 
 	node = kcalloc(G2D_CMDLIST_NUM, sizeof(*node), GFP_KERNEL);
 	if (!node) {
-		dev_err(dev, "failed to allocate memory\n");
 		ret = -ENOMEM;
 		goto err;
 	}
@@ -926,7 +925,7 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
 	struct drm_device *drm_dev = g2d->subdrv.drm_dev;
 	struct g2d_runqueue_node *runqueue_node = g2d->runqueue_node;
 	struct drm_exynos_pending_g2d_event *e;
-	struct timeval now;
+	struct timespec64 now;
 
 	if (list_empty(&runqueue_node->event_list))
 		return;
@@ -934,9 +933,9 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
 	e = list_first_entry(&runqueue_node->event_list,
 			     struct drm_exynos_pending_g2d_event, base.link);
 
-	do_gettimeofday(&now);
+	ktime_get_ts64(&now);
 	e->event.tv_sec = now.tv_sec;
-	e->event.tv_usec = now.tv_usec;
+	e->event.tv_usec = now.tv_nsec / NSEC_PER_USEC;
 	e->event.cmdlist_no = cmdlist_no;
 
 	drm_send_event(drm_dev, &e->base);
@@ -1358,10 +1357,9 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data,
 		return -EFAULT;
 
 	runqueue_node = kmem_cache_alloc(g2d->runqueue_slab, GFP_KERNEL);
-	if (!runqueue_node) {
-		dev_err(dev, "failed to allocate memory\n");
+	if (!runqueue_node)
 		return -ENOMEM;
-	}
+
 	run_cmdlist = &runqueue_node->run_cmdlist;
 	event_list = &runqueue_node->event_list;
 	INIT_LIST_HEAD(run_cmdlist);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.h b/drivers/gpu/drm/exynos/exynos_drm_rotator.h
deleted file mode 100644
index 71a0b4c0c1e8..000000000000
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *	YoungJun Cho <[email protected]>
- *	Eunchul Kim <[email protected]>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef	_EXYNOS_DRM_ROTATOR_H_
-#define	_EXYNOS_DRM_ROTATOR_H_
-
-/* TODO */
-
-#endif
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index a4b75a46f946..abd84cbcf1c2 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -1068,10 +1068,13 @@ static void hdmi_audio_config(struct hdmi_context *hdata)
 	/* Configuration I2S input ports. Configure I2S_PIN_SEL_0~4 */
 	hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_0, HDMI_I2S_SEL_SCLK(5)
 			| HDMI_I2S_SEL_LRCK(6));
-	hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(1)
-			| HDMI_I2S_SEL_SDATA2(4));
+
+	hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(3)
+			| HDMI_I2S_SEL_SDATA0(4));
+
 	hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_2, HDMI_I2S_SEL_SDATA3(1)
 			| HDMI_I2S_SEL_SDATA2(2));
+
 	hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_3, HDMI_I2S_SEL_DSD(0));
 
 	/* I2S_CON_1 & 2 */
diff --git a/drivers/gpu/drm/exynos/regs-fimc.h b/drivers/gpu/drm/exynos/regs-fimc.h
index 30496134a3d0..d7cbe53c4c01 100644
--- a/drivers/gpu/drm/exynos/regs-fimc.h
+++ b/drivers/gpu/drm/exynos/regs-fimc.h
@@ -569,7 +569,7 @@
 #define EXYNOS_CIIMGEFF_FIN_EMBOSSING		(4 << 26)
 #define EXYNOS_CIIMGEFF_FIN_SILHOUETTE		(5 << 26)
 #define EXYNOS_CIIMGEFF_FIN_MASK			(7 << 26)
-#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK		((0xff < 13) | (0xff < 0))
+#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK		((0xff << 13) | (0xff << 0))
 
 /* Real input DMA size register */
 #define EXYNOS_CIREAL_ISIZE_AUTOLOAD_ENABLE	(1 << 31)
diff --git a/drivers/gpu/drm/exynos/regs-hdmi.h b/drivers/gpu/drm/exynos/regs-hdmi.h
index 04be0f7e8193..4420c203ac85 100644
--- a/drivers/gpu/drm/exynos/regs-hdmi.h
+++ b/drivers/gpu/drm/exynos/regs-hdmi.h
@@ -464,7 +464,7 @@
 
 /* I2S_PIN_SEL_1 */
 #define HDMI_I2S_SEL_SDATA1(x)		(((x) & 0x7) << 4)
-#define HDMI_I2S_SEL_SDATA2(x)		((x) & 0x7)
+#define HDMI_I2S_SEL_SDATA0(x)		((x) & 0x7)
 
 /* I2S_PIN_SEL_2 */
 #define HDMI_I2S_SEL_SDATA3(x)		(((x) & 0x7) << 4)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index c8454ac43fae..db6b94dda5df 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -471,6 +471,7 @@ struct parser_exec_state {
 	 * used when ret from 2nd level batch buffer
 	 */
 	int saved_buf_addr_type;
+	bool is_ctx_wa;
 
 	struct cmd_info *info;
 
@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	bb->accessing = true;
 	bb->bb_start_cmd_va = s->ip_va;
 
+	if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
+		bb->bb_offset = s->ip_va - s->rb_va;
+	else
+		bb->bb_offset = 0;
+
 	/*
 	 * ip_va saves the virtual address of the shadow batch buffer, while
 	 * ip_gma saves the graphics address of the original batch buffer.
@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload)
 	s.ring_tail = gma_tail;
 	s.rb_va = workload->shadow_ring_buffer_va;
 	s.workload = workload;
+	s.is_ctx_wa = false;
 
 	if ((bypass_scan_mask & (1 << workload->ring_id)) ||
 		gma_head == gma_tail)
@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	s.ring_tail = gma_tail;
 	s.rb_va = wa_ctx->indirect_ctx.shadow_va;
 	s.workload = workload;
+	s.is_ctx_wa = true;
 
 	if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 909499b73d03..021f722e2481 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -733,6 +733,25 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	return ret == 0 ? count : ret;
 }
 
+static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
+{
+	struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+	struct intel_gvt *gvt = vgpu->gvt;
+	int offset;
+
+	/* Only allow MMIO GGTT entry access */
+	if (index != PCI_BASE_ADDRESS_0)
+		return false;
+
+	offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
+		intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+
+	return (offset >= gvt->device_info.gtt_start_offset &&
+		offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
+			true : false;
+}
+
 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
 			size_t count, loff_t *ppos)
 {
@@ -742,7 +761,21 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
 	while (count) {
 		size_t filled;
 
-		if (count >= 4 && !(*ppos % 4)) {
+		/* Only support GGTT entry 8 bytes read */
+		if (count >= 8 && !(*ppos % 8) &&
+			gtt_entry(mdev, ppos)) {
+			u64 val;
+
+			ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+					ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 8;
+		} else if (count >= 4 && !(*ppos % 4)) {
 			u32 val;
 
 			ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
@@ -802,7 +835,21 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev,
 	while (count) {
 		size_t filled;
 
-		if (count >= 4 && !(*ppos % 4)) {
+		/* Only support GGTT entry 8 bytes write */
+		if (count >= 8 && !(*ppos % 8) &&
+			gtt_entry(mdev, ppos)) {
+			u64 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+					ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 8;
+		} else if (count >= 4 && !(*ppos % 4)) {
 			u32 val;
 
 			if (copy_from_user(&val, buf, sizeof(val)))
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 73ad6e90e49d..152df3d0291e 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -118,6 +118,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
 	{RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
 	{RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
 	{RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+	{RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
 	{RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
 	{RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
 	{RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
@@ -393,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
 	 * performace for batch mmio read/write, so we need
 	 * handle forcewake mannually.
 	 */
+	intel_runtime_pm_get(dev_priv);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 	switch_mmio(pre, next, ring_id);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_runtime_pm_put(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index b55b3580ca1d..d74d6f05c62c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
 		pdp_pair[i].val = pdp[7 - i];
 }
 
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+		u32 *reg_state, bool save)
+{
+	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+	int i = 0;
+	u32 flex_mmio[] = {
+		i915_mmio_reg_offset(EU_PERF_CNTL0),
+		i915_mmio_reg_offset(EU_PERF_CNTL1),
+		i915_mmio_reg_offset(EU_PERF_CNTL2),
+		i915_mmio_reg_offset(EU_PERF_CNTL3),
+		i915_mmio_reg_offset(EU_PERF_CNTL4),
+		i915_mmio_reg_offset(EU_PERF_CNTL5),
+		i915_mmio_reg_offset(EU_PERF_CNTL6),
+	};
+
+	if (!workload || !reg_state || workload->ring_id != RCS)
+		return;
+
+	if (save) {
+		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+
+			workload->flex_mmio[i] = reg_state[state_offset + 1];
+		}
+	} else {
+		reg_state[ctx_oactxctrl] =
+			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+			u32 mmio = flex_mmio[i];
+
+			reg_state[state_offset] = mmio;
+			reg_state[state_offset + 1] = workload->flex_mmio[i];
+		}
+	}
+}
+
 static int populate_shadow_context(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
 	shadow_ring_context = kmap(page);
 
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
 #define COPY_REG(name) \
 	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
 		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 			sizeof(*shadow_ring_context),
 			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
 	kunmap(page);
 	return 0;
 }
@@ -376,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			goto err;
 		}
 
+		/* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+		 * is only updated into ring_scan_buffer, not real ring address
+		 * allocated in later copy_workload_to_ring_buffer. pls be noted
+		 * shadow_ring_buffer_va is now pointed to real ring buffer va
+		 * in copy_workload_to_ring_buffer.
+		 */
+
+		if (bb->bb_offset)
+			bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+				+ bb->bb_offset;
+
 		/* relocate shadow batch buffer */
 		bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
 		if (gmadr_bytes == 8)
@@ -1044,10 +1105,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
-	s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
-			sizeof(struct intel_vgpu_workload), 0,
-			SLAB_HWCACHE_ALIGN,
-			NULL);
+	s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+						  sizeof(struct intel_vgpu_workload), 0,
+						  SLAB_HWCACHE_ALIGN,
+						  offsetof(struct intel_vgpu_workload, rb_tail),
+						  sizeof_field(struct intel_vgpu_workload, rb_tail),
+						  NULL);
 
 	if (!s->workloads) {
 		ret = -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index ff175a98b19e..a79a4f60637e 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
 	/* shadow batch buffer */
 	struct list_head shadow_bb;
 	struct intel_shadow_wa_ctx wa_ctx;
+
+	/* oa registers */
+	u32 oactxctrl;
+	u32 flex_mmio[7];
 };
 
 struct intel_vgpu_shadow_bb {
@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb {
 	u32 *bb_start_cmd_va;
 	unsigned int clflush;
 	bool accessing;
+	unsigned long bb_offset;
 };
 
 #define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index 7a2511538f34..736bd2bc5127 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -333,7 +333,7 @@ TRACE_EVENT(render_mmio,
 	TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
 		 unsigned int old_val, unsigned int new_val),
 
-	TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
+	TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
 
 	TP_STRUCT__entry(
 		__field(int, old_id)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 173d0095e3b2..2f5209de0391 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1433,19 +1433,7 @@ void i915_driver_unload(struct drm_device *dev)
 
 	intel_modeset_cleanup(dev);
 
-	/*
-	 * free the memory space allocated for the child device
-	 * config parsed from VBT
-	 */
-	if (dev_priv->vbt.child_dev && dev_priv->vbt.child_dev_num) {
-		kfree(dev_priv->vbt.child_dev);
-		dev_priv->vbt.child_dev = NULL;
-		dev_priv->vbt.child_dev_num = 0;
-	}
-	kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
-	dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
-	kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
-	dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
+	intel_bios_cleanup(dev_priv);
 
 	vga_switcheroo_unregister_client(pdev);
 	vga_client_register(pdev, NULL, NULL, NULL);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a42deebedb0f..d307429a5ae0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1349,6 +1349,7 @@ struct intel_vbt_data {
 		u32 size;
 		u8 *data;
 		const u8 *sequence[MIPI_SEQ_MAX];
+		u8 *deassert_seq; /* Used by fixup_mipi_sequences() */
 	} dsi;
 
 	int crt_ddc_pin;
@@ -3657,6 +3658,7 @@ extern void intel_i2c_reset(struct drm_i915_private *dev_priv);
 
 /* intel_bios.c */
 void intel_bios_init(struct drm_i915_private *dev_priv);
+void intel_bios_cleanup(struct drm_i915_private *dev_priv);
 bool intel_bios_is_valid_vbt(const void *buf, size_t size);
 bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
 bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dd89abd2263d..6ff5d655c202 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -434,20 +434,28 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 			dma_fence_put(shared[i]);
 		kfree(shared);
 
+		/*
+		 * If both shared fences and an exclusive fence exist,
+		 * then by construction the shared fences must be later
+		 * than the exclusive fence. If we successfully wait for
+		 * all the shared fences, we know that the exclusive fence
+		 * must all be signaled. If all the shared fences are
+		 * signaled, we can prune the array and recover the
+		 * floating references on the fences/requests.
+		 */
 		prune_fences = count && timeout >= 0;
 	} else {
 		excl = reservation_object_get_excl_rcu(resv);
 	}
 
-	if (excl && timeout >= 0) {
+	if (excl && timeout >= 0)
 		timeout = i915_gem_object_wait_fence(excl, flags, timeout,
 						     rps_client);
-		prune_fences = timeout >= 0;
-	}
 
 	dma_fence_put(excl);
 
-	/* Oportunistically prune the fences iff we know they have *all* been
+	/*
+	 * Opportunistically prune the fences iff we know they have *all* been
 	 * signaled and that the reservation object has not been changed (i.e.
 	 * no new fences have been added).
 	 */
@@ -3205,8 +3213,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 	 * rolling the global seqno forward (since this would complete requests
 	 * for which we haven't set the fence error to EIO yet).
 	 */
-	for_each_engine(engine, i915, id)
+	for_each_engine(engine, i915, id) {
+		i915_gem_reset_prepare_engine(engine);
 		engine->submit_request = nop_submit_request;
+	}
 
 	/*
 	 * Make sure no one is running the old callback before we proceed with
@@ -3244,6 +3254,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		intel_engine_init_global_seqno(engine,
 					       intel_engine_last_submit(engine));
 		spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+		i915_gem_reset_finish_engine(engine);
 	}
 
 	set_bit(I915_WEDGED, &i915->gpu_error.flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 648e7536ff51..0c963fcf31ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -803,7 +803,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 
 	case I915_CONTEXT_PARAM_PRIORITY:
 		{
-			int priority = args->value;
+			s64 priority = args->value;
 
 			if (args->size)
 				ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4401068ff468..3ab1ace2a6bd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -505,6 +505,8 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 		list_add_tail(&vma->exec_link, &eb->unbound);
 		if (drm_mm_node_allocated(&vma->node))
 			err = i915_vma_unbind(vma);
+		if (unlikely(err))
+			vma->exec_flags = NULL;
 	}
 	return err;
 }
@@ -2410,7 +2412,7 @@ err_request:
 	if (out_fence) {
 		if (err == 0) {
 			fd_install(out_fence_fd, out_fence->file);
-			args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
 			args->rsvd2 |= (u64)out_fence_fd << 32;
 			out_fence_fd = -1;
 		} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e09d18df8b7f..a3e93d46316a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -476,8 +476,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->timeline->lock);
 
-	trace_i915_gem_request_execute(request);
-
 	/* Transfer from per-context onto the global per-engine timeline */
 	timeline = engine->timeline;
 	GEM_BUG_ON(timeline == request->timeline);
@@ -501,6 +499,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	list_move_tail(&request->link, &timeline->requests);
 	spin_unlock(&request->timeline->lock);
 
+	trace_i915_gem_request_execute(request);
+
 	wake_up_all(&request->execute);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.c b/drivers/gpu/drm/i915/i915_oa_cflgt3.c
index 42ff06fe54a3..792facdb6702 100644
--- a/drivers/gpu/drm/i915/i915_oa_cflgt3.c
+++ b/drivers/gpu/drm/i915/i915_oa_cflgt3.c
@@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
 void
 i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv)
 {
-	strncpy(dev_priv->perf.oa.test_config.uuid,
+	strlcpy(dev_priv->perf.oa.test_config.uuid,
 		"577e8e2c-3fa0-4875-8743-3538d585e3b0",
-		UUID_STRING_LEN);
+		sizeof(dev_priv->perf.oa.test_config.uuid));
 	dev_priv->perf.oa.test_config.id = 1;
 
 	dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.c b/drivers/gpu/drm/i915/i915_oa_cnl.c
index ff0ac3627cc4..ba9140c87cc0 100644
--- a/drivers/gpu/drm/i915/i915_oa_cnl.c
+++ b/drivers/gpu/drm/i915/i915_oa_cnl.c
@@ -96,9 +96,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
 void
 i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv)
 {
-	strncpy(dev_priv->perf.oa.test_config.uuid,
+	strlcpy(dev_priv->perf.oa.test_config.uuid,
 		"db41edd4-d8e7-4730-ad11-b9a2d6833503",
-		UUID_STRING_LEN);
+		sizeof(dev_priv->perf.oa.test_config.uuid));
 	dev_priv->perf.oa.test_config.id = 1;
 
 	dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0be50e43507d..f8fe5ffcdcff 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1303,9 +1303,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	 */
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	dev_priv->perf.oa.exclusive_stream = NULL;
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	free_oa_buffer(dev_priv);
 
@@ -1756,22 +1755,13 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
  * Note: it's only the RCS/Render context that has any OA state.
  */
 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
-				       const struct i915_oa_config *oa_config,
-				       bool interruptible)
+				       const struct i915_oa_config *oa_config)
 {
 	struct i915_gem_context *ctx;
 	int ret;
 	unsigned int wait_flags = I915_WAIT_LOCKED;
 
-	if (interruptible) {
-		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-		if (ret)
-			return ret;
-
-		wait_flags |= I915_WAIT_INTERRUPTIBLE;
-	} else {
-		mutex_lock(&dev_priv->drm.struct_mutex);
-	}
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	/* Switch away from any user context. */
 	ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
@@ -1819,8 +1809,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 	}
 
  out:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	return ret;
 }
 
@@ -1863,7 +1851,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
+	ret = gen8_configure_all_contexts(dev_priv, oa_config);
 	if (ret)
 		return ret;
 
@@ -1878,7 +1866,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
 static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
 {
 	/* Reset all contexts' slices/subslices configurations. */
-	gen8_configure_all_contexts(dev_priv, NULL, false);
+	gen8_configure_all_contexts(dev_priv, NULL);
 
 	I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
 				      ~GT_NOA_ENABLE));
@@ -1888,7 +1876,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
 static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
 {
 	/* Reset all contexts' slices/subslices configurations. */
-	gen8_configure_all_contexts(dev_priv, NULL, false);
+	gen8_configure_all_contexts(dev_priv, NULL);
 
 	/* Make sure we disable noa to save power. */
 	I915_WRITE(RPM_CONFIG1,
@@ -2138,6 +2126,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	if (ret)
 		goto err_oa_buf_alloc;
 
+	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
+	if (ret)
+		goto err_lock;
+
 	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
 						      stream->oa_config);
 	if (ret)
@@ -2145,23 +2137,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
 	stream->ops = &i915_oa_stream_ops;
 
-	/* Lock device for exclusive_stream access late because
-	 * enable_metric_set() might lock as well on gen8+.
-	 */
-	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-	if (ret)
-		goto err_lock;
-
 	dev_priv->perf.oa.exclusive_stream = stream;
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 
-err_lock:
+err_enable:
 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-err_enable:
+err_lock:
 	free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 55a8a1e29424..0e9b98c32b62 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,26 +285,41 @@ static u64 count_interrupts(struct drm_i915_private *i915)
 	return sum;
 }
 
-static void i915_pmu_event_destroy(struct perf_event *event)
+static void engine_event_destroy(struct perf_event *event)
 {
-	WARN_ON(event->parent);
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct intel_engine_cs *engine;
+
+	engine = intel_engine_lookup_user(i915,
+					  engine_event_class(event),
+					  engine_event_instance(event));
+	if (WARN_ON_ONCE(!engine))
+		return;
+
+	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
+	    intel_engine_supports_stats(engine))
+		intel_disable_engine_stats(engine);
 }
 
-static int engine_event_init(struct perf_event *event)
+static void i915_pmu_event_destroy(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	WARN_ON(event->parent);
 
-	if (!intel_engine_lookup_user(i915, engine_event_class(event),
-				      engine_event_instance(event)))
-		return -ENODEV;
+	if (is_engine_event(event))
+		engine_event_destroy(event);
+}
 
-	switch (engine_event_sample(event)) {
+static int
+engine_event_status(struct intel_engine_cs *engine,
+		    enum drm_i915_pmu_engine_sample sample)
+{
+	switch (sample) {
 	case I915_SAMPLE_BUSY:
 	case I915_SAMPLE_WAIT:
 		break;
 	case I915_SAMPLE_SEMA:
-		if (INTEL_GEN(i915) < 6)
+		if (INTEL_GEN(engine->i915) < 6)
 			return -ENODEV;
 		break;
 	default:
@@ -314,6 +329,30 @@ static int engine_event_init(struct perf_event *event)
 	return 0;
 }
 
+static int engine_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct intel_engine_cs *engine;
+	u8 sample;
+	int ret;
+
+	engine = intel_engine_lookup_user(i915, engine_event_class(event),
+					  engine_event_instance(event));
+	if (!engine)
+		return -ENODEV;
+
+	sample = engine_event_sample(event);
+	ret = engine_event_status(engine, sample);
+	if (ret)
+		return ret;
+
+	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
+		ret = intel_enable_engine_stats(engine);
+
+	return ret;
+}
+
 static int i915_pmu_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -370,7 +409,94 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __i915_pmu_event_read(struct perf_event *event)
+static u64 __get_rc6(struct drm_i915_private *i915)
+{
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
+static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+{
+#if IS_ENABLED(CONFIG_PM)
+	unsigned long flags;
+	u64 val;
+
+	if (intel_runtime_pm_get_if_in_use(i915)) {
+		val = __get_rc6(i915);
+		intel_runtime_pm_put(i915);
+
+		/*
+		 * If we are coming back from being runtime suspended we must
+		 * be careful not to report a larger value than returned
+		 * previously.
+		 */
+
+		if (!locked)
+			spin_lock_irqsave(&i915->pmu.lock, flags);
+
+		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+			i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+		} else {
+			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+		}
+
+		if (!locked)
+			spin_unlock_irqrestore(&i915->pmu.lock, flags);
+	} else {
+		struct pci_dev *pdev = i915->drm.pdev;
+		struct device *kdev = &pdev->dev;
+		unsigned long flags2;
+
+		/*
+		 * We are runtime suspended.
+		 *
+		 * Report the delta from when the device was suspended to now,
+		 * on top of the last known real value, as the approximated RC6
+		 * counter value.
+		 */
+		if (!locked)
+			spin_lock_irqsave(&i915->pmu.lock, flags);
+
+		spin_lock_irqsave(&kdev->power.lock, flags2);
+
+		if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
+			i915->pmu.suspended_jiffies_last =
+						kdev->power.suspended_jiffies;
+
+		val = kdev->power.suspended_jiffies -
+		      i915->pmu.suspended_jiffies_last;
+		val += jiffies - kdev->power.accounting_timestamp;
+
+		spin_unlock_irqrestore(&kdev->power.lock, flags2);
+
+		val = jiffies_to_nsecs(val);
+		val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+		if (!locked)
+			spin_unlock_irqrestore(&i915->pmu.lock, flags);
+	}
+
+	return val;
+#else
+	return __get_rc6(i915);
+#endif
+}
+
+static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
@@ -387,7 +513,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 		if (WARN_ON_ONCE(!engine)) {
 			/* Do nothing */
 		} else if (sample == I915_SAMPLE_BUSY &&
-			   engine->pmu.busy_stats) {
+			   intel_engine_supports_stats(engine)) {
 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
 		} else {
 			val = engine->pmu.sample[sample].cur;
@@ -408,18 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 			val = count_interrupts(i915);
 			break;
 		case I915_PMU_RC6_RESIDENCY:
-			intel_runtime_pm_get(i915);
-			val = intel_rc6_residency_ns(i915,
-						     IS_VALLEYVIEW(i915) ?
-						     VLV_GT_RENDER_RC6 :
-						     GEN6_GT_GFX_RC6);
-			if (HAS_RC6p(i915))
-				val += intel_rc6_residency_ns(i915,
-							      GEN6_GT_GFX_RC6p);
-			if (HAS_RC6pp(i915))
-				val += intel_rc6_residency_ns(i915,
-							      GEN6_GT_GFX_RC6pp);
-			intel_runtime_pm_put(i915);
+			val = get_rc6(i915, locked);
 			break;
 		}
 	}
@@ -434,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event)
 
 again:
 	prev = local64_read(&hwc->prev_count);
-	new = __i915_pmu_event_read(event);
+	new = __i915_pmu_event_read(event, false);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
 		goto again;
@@ -442,12 +557,6 @@ again:
 	local64_add(new - prev, &event->count);
 }
 
-static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
-{
-	return intel_engine_supports_stats(engine) &&
-	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
-}
-
 static void i915_pmu_enable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -487,21 +596,7 @@ static void i915_pmu_enable(struct perf_event *event)
 
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-		if (engine->pmu.enable_count[sample]++ == 0) {
-			/*
-			 * Enable engine busy stats tracking if needed or
-			 * alternatively cancel the scheduled disable.
-			 *
-			 * If the delayed disable was pending, cancel it and
-			 * in this case do not enable since it already is.
-			 */
-			if (engine_needs_busy_stats(engine) &&
-			    !engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = true;
-				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-					intel_enable_engine_stats(engine);
-			}
-		}
+		engine->pmu.enable_count[sample]++;
 	}
 
 	/*
@@ -509,19 +604,11 @@ static void i915_pmu_enable(struct perf_event *event)
 	 * for all listeners. Even when the event was already enabled and has
 	 * an existing non-zero value.
 	 */
-	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
+	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 }
 
-static void __disable_busy_stats(struct work_struct *work)
-{
-	struct intel_engine_cs *engine =
-	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
-
-	intel_disable_engine_stats(engine);
-}
-
 static void i915_pmu_disable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -545,26 +632,8 @@ static void i915_pmu_disable(struct perf_event *event)
 		 * Decrement the reference count and clear the enabled
 		 * bitmask when the last listener on an event goes away.
 		 */
-		if (--engine->pmu.enable_count[sample] == 0) {
+		if (--engine->pmu.enable_count[sample] == 0)
 			engine->pmu.enable &= ~BIT(sample);
-			if (!engine_needs_busy_stats(engine) &&
-			    engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = false;
-				/*
-				 * We request a delayed disable to handle the
-				 * rapid on/off cycles on events, which can
-				 * happen when tools like perf stat start, in a
-				 * nicer way.
-				 *
-				 * In addition, this also helps with busy stats
-				 * accuracy with background CPU offline/online
-				 * migration events.
-				 */
-				queue_delayed_work(system_wq,
-						   &engine->pmu.disable_busy_stats,
-						   round_jiffies_up_relative(HZ));
-			}
-		}
 	}
 
 	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -797,8 +866,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
 
 void i915_pmu_register(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	int ret;
 
 	if (INTEL_GEN(i915) <= 2) {
@@ -820,10 +887,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	i915->pmu.timer.function = i915_sample;
 
-	for_each_engine(engine, i915, id)
-		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
-				  __disable_busy_stats);
-
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret)
 		goto err;
@@ -843,9 +906,6 @@ err:
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -853,11 +913,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
-	for_each_engine(engine, i915, id) {
-		GEM_BUG_ON(engine->pmu.busy_stats);
-		flush_delayed_work(&engine->pmu.disable_busy_stats);
-	}
-
 	i915_pmu_unregister_cpuhp_state(i915);
 
 	perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 40c154d13565..bb62df15afa4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -27,6 +27,8 @@
 enum {
 	__I915_SAMPLE_FREQ_ACT = 0,
 	__I915_SAMPLE_FREQ_REQ,
+	__I915_SAMPLE_RC6,
+	__I915_SAMPLE_RC6_ESTIMATED,
 	__I915_NUM_PMU_SAMPLERS
 };
 
@@ -94,6 +96,10 @@ struct i915_pmu {
 	 * struct intel_engine_cs.
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
+	/**
+	 * @suspended_jiffies_last: Cached suspend time from PM core.
+	 */
+	unsigned long suspended_jiffies_last;
 };
 
 #ifdef CONFIG_PERF_EVENTS
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a2108e35c599..33eb0c5b1d32 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2027,7 +2027,7 @@ enum i915_power_well_id {
 #define _CNL_PORT_TX_DW5_LN0_AE		0x162454
 #define _CNL_PORT_TX_DW5_LN0_B		0x162654
 #define _CNL_PORT_TX_DW5_LN0_C		0x162C54
-#define _CNL_PORT_TX_DW5_LN0_D		0x162ED4
+#define _CNL_PORT_TX_DW5_LN0_D		0x162E54
 #define _CNL_PORT_TX_DW5_LN0_F		0x162854
 #define CNL_PORT_TX_DW5_GRP(port)	_MMIO_PORT6(port, \
 						    _CNL_PORT_TX_DW5_GRP_AE, \
@@ -2058,7 +2058,7 @@ enum i915_power_well_id {
 #define _CNL_PORT_TX_DW7_LN0_AE		0x16245C
 #define _CNL_PORT_TX_DW7_LN0_B		0x16265C
 #define _CNL_PORT_TX_DW7_LN0_C		0x162C5C
-#define _CNL_PORT_TX_DW7_LN0_D		0x162EDC
+#define _CNL_PORT_TX_DW7_LN0_D		0x162E5C
 #define _CNL_PORT_TX_DW7_LN0_F		0x16285C
 #define CNL_PORT_TX_DW7_GRP(port)	_MMIO_PORT6(port, \
 						    _CNL_PORT_TX_DW7_GRP_AE, \
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index b33d2158c234..e5e6f6bb2b05 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -304,8 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
+	bool boost = false;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
@@ -317,8 +318,13 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 		return -EINVAL;
 
 	mutex_lock(&dev_priv->pcu_lock);
-	rps->boost_freq = val;
+	if (val != rps->boost_freq) {
+		rps->boost_freq = val;
+		boost = atomic_read(&rps->num_waiters);
+	}
 	mutex_unlock(&dev_priv->pcu_lock);
+	if (boost)
+		schedule_work(&rps->work);
 
 	return count;
 }
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 522d54fecb53..4a01f62a392d 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -779,11 +779,11 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv,
 {
 	struct intel_encoder *encoder;
 
-	if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map)))
-		return NULL;
-
 	/* MST */
 	if (pipe >= 0) {
+		if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map)))
+			return NULL;
+
 		encoder = dev_priv->av_enc_map[pipe];
 		/*
 		 * when bootup, audio driver may not know it is
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index f7f771749e48..b49a2df44430 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -947,6 +947,86 @@ static int goto_next_sequence_v3(const u8 *data, int index, int total)
 	return 0;
 }
 
+/*
+ * Get len of pre-fixed deassert fragment from a v1 init OTP sequence,
+ * skip all delay + gpio operands and stop at the first DSI packet op.
+ */
+static int get_init_otp_deassert_fragment_len(struct drm_i915_private *dev_priv)
+{
+	const u8 *data = dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+	int index, len;
+
+	if (WARN_ON(!data || dev_priv->vbt.dsi.seq_version != 1))
+		return 0;
+
+	/* index = 1 to skip sequence byte */
+	for (index = 1; data[index] != MIPI_SEQ_ELEM_END; index += len) {
+		switch (data[index]) {
+		case MIPI_SEQ_ELEM_SEND_PKT:
+			return index == 1 ? 0 : index;
+		case MIPI_SEQ_ELEM_DELAY:
+			len = 5; /* 1 byte for operand + uint32 */
+			break;
+		case MIPI_SEQ_ELEM_GPIO:
+			len = 3; /* 1 byte for op, 1 for gpio_nr, 1 for value */
+			break;
+		default:
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence.
+ * The deassert must be done before calling intel_dsi_device_ready, so for
+ * these devices we split the init OTP sequence into a deassert sequence and
+ * the actual init OTP part.
+ */
+static void fixup_mipi_sequences(struct drm_i915_private *dev_priv)
+{
+	u8 *init_otp;
+	int len;
+
+	/* Limit this to VLV for now. */
+	if (!IS_VALLEYVIEW(dev_priv))
+		return;
+
+	/* Limit this to v1 vid-mode sequences */
+	if (dev_priv->vbt.dsi.config->is_cmd_mode ||
+	    dev_priv->vbt.dsi.seq_version != 1)
+		return;
+
+	/* Only do this if there are otp and assert seqs and no deassert seq */
+	if (!dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] ||
+	    !dev_priv->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET] ||
+	    dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET])
+		return;
+
+	/* The deassert-sequence ends at the first DSI packet */
+	len = get_init_otp_deassert_fragment_len(dev_priv);
+	if (!len)
+		return;
+
+	DRM_DEBUG_KMS("Using init OTP fragment to deassert reset\n");
+
+	/* Copy the fragment, update seq byte and terminate it */
+	init_otp = (u8 *)dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+	dev_priv->vbt.dsi.deassert_seq = kmemdup(init_otp, len + 1, GFP_KERNEL);
+	if (!dev_priv->vbt.dsi.deassert_seq)
+		return;
+	dev_priv->vbt.dsi.deassert_seq[0] = MIPI_SEQ_DEASSERT_RESET;
+	dev_priv->vbt.dsi.deassert_seq[len] = MIPI_SEQ_ELEM_END;
+	/* Use the copy for deassert */
+	dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET] =
+		dev_priv->vbt.dsi.deassert_seq;
+	/* Replace the last byte of the fragment with init OTP seq byte */
+	init_otp[len - 1] = MIPI_SEQ_INIT_OTP;
+	/* And make MIPI_MIPI_SEQ_INIT_OTP point to it */
+	dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
+}
+
 static void
 parse_mipi_sequence(struct drm_i915_private *dev_priv,
 		    const struct bdb_header *bdb)
@@ -1016,6 +1096,8 @@ parse_mipi_sequence(struct drm_i915_private *dev_priv,
 	dev_priv->vbt.dsi.size = seq_size;
 	dev_priv->vbt.dsi.seq_version = sequence->version;
 
+	fixup_mipi_sequences(dev_priv);
+
 	DRM_DEBUG_DRIVER("MIPI related VBT parsing complete\n");
 	return;
 
@@ -1589,6 +1671,29 @@ out:
 }
 
 /**
+ * intel_bios_cleanup - Free any resources allocated by intel_bios_init()
+ * @dev_priv: i915 device instance
+ */
+void intel_bios_cleanup(struct drm_i915_private *dev_priv)
+{
+	kfree(dev_priv->vbt.child_dev);
+	dev_priv->vbt.child_dev = NULL;
+	dev_priv->vbt.child_dev_num = 0;
+	kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
+	dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
+	kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
+	dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
+	kfree(dev_priv->vbt.dsi.data);
+	dev_priv->vbt.dsi.data = NULL;
+	kfree(dev_priv->vbt.dsi.pps);
+	dev_priv->vbt.dsi.pps = NULL;
+	kfree(dev_priv->vbt.dsi.config);
+	dev_priv->vbt.dsi.config = NULL;
+	kfree(dev_priv->vbt.dsi.deassert_seq);
+	dev_priv->vbt.dsi.deassert_seq = NULL;
+}
+
+/**
  * intel_bios_is_tv_present - is integrated TV present in VBT
  * @dev_priv:	i915 device instance
  *
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index bd40fea16b4f..f54ddda9fdad 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -594,29 +594,16 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	spin_unlock_irq(&b->rb_lock);
 }
 
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
-	return intel_wait_check_request(&request->signaling.wait, request);
-}
-
 static bool signal_complete(const struct drm_i915_gem_request *request)
 {
 	if (!request)
 		return false;
 
-	/* If another process served as the bottom-half it may have already
-	 * signalled that this wait is already completed.
-	 */
-	if (intel_wait_complete(&request->signaling.wait))
-		return signal_valid(request);
-
-	/* Carefully check if the request is complete, giving time for the
+	/*
+	 * Carefully check if the request is complete, giving time for the
 	 * seqno to be visible or if the GPU hung.
 	 */
-	if (__i915_request_irq_complete(request))
-		return true;
-
-	return false;
+	return __i915_request_irq_complete(request);
 }
 
 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
@@ -659,9 +646,13 @@ static int intel_breadcrumbs_signaler(void *arg)
 			request = i915_gem_request_get_rcu(request);
 		rcu_read_unlock();
 		if (signal_complete(request)) {
-			local_bh_disable();
-			dma_fence_signal(&request->fence);
-			local_bh_enable(); /* kick start the tasklets */
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &request->fence.flags)) {
+				local_bh_disable();
+				dma_fence_signal(&request->fence);
+				GEM_BUG_ON(!i915_gem_request_completed(request));
+				local_bh_enable(); /* kick start the tasklets */
+			}
 
 			spin_lock_irq(&b->rb_lock);
 
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 5dc118f26b51..1704c8897afd 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1952,6 +1952,14 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9)
 		min_cdclk = max(2 * 96000, min_cdclk);
 
+	/*
+	 * On Valleyview some DSI panels lose (v|h)sync when the clock is lower
+	 * than 320000KHz.
+	 */
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) &&
+	    IS_VALLEYVIEW(dev_priv))
+		min_cdclk = max(320000, min_cdclk);
+
 	if (min_cdclk > dev_priv->max_cdclk_freq) {
 		DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n",
 			      min_cdclk, dev_priv->max_cdclk_freq);
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index f51645a08dca..6aff9d096e13 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2175,8 +2175,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
 		intel_prepare_dp_ddi_buffers(encoder, crtc_state);
 
 	intel_ddi_init_dp_buf_reg(encoder);
-	if (!is_mst)
-		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 	intel_dp_start_link_train(intel_dp);
 	if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
 		intel_dp_stop_link_train(intel_dp);
@@ -2274,14 +2273,12 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
 	struct intel_dp *intel_dp = &dig_port->dp;
-	bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST);
 
 	/*
 	 * Power down sink before disabling the port, otherwise we end
 	 * up getting interrupts from the sink on detecting link loss.
 	 */
-	if (!is_mst)
-		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
 
 	intel_disable_ddi_buf(encoder);
 
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 35c5299feab6..a29868cd30c7 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -620,19 +620,15 @@ static int
 bxt_power_sequencer_idx(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
+	int backlight_controller = dev_priv->vbt.backlight.controller;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
 	/* We should never land here with regular DP ports */
 	WARN_ON(!intel_dp_is_edp(intel_dp));
 
-	/*
-	 * TODO: BXT has 2 PPS instances. The correct port->PPS instance
-	 * mapping needs to be retrieved from VBT, for now just hard-code to
-	 * use instance #0 always.
-	 */
 	if (!intel_dp->pps_reset)
-		return 0;
+		return backlight_controller;
 
 	intel_dp->pps_reset = false;
 
@@ -642,7 +638,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp)
 	 */
 	intel_dp_init_panel_power_sequencer_registers(intel_dp, false);
 
-	return 0;
+	return backlight_controller;
 }
 
 typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d790bdc227ff..fa960cfd2764 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1458,7 +1458,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	bool idle = true;
 
-	intel_runtime_pm_get(dev_priv);
+	/* If the whole device is asleep, the engine must be idle */
+	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+		return true;
 
 	/* First check that no commands are left in the ring */
 	if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
@@ -1943,16 +1945,22 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
  */
 int intel_enable_engine_stats(struct intel_engine_cs *engine)
 {
+	struct intel_engine_execlists *execlists = &engine->execlists;
 	unsigned long flags;
+	int err = 0;
 
 	if (!intel_engine_supports_stats(engine))
 		return -ENODEV;
 
+	tasklet_disable(&execlists->tasklet);
 	spin_lock_irqsave(&engine->stats.lock, flags);
-	if (engine->stats.enabled == ~0)
-		goto busy;
+
+	if (unlikely(engine->stats.enabled == ~0)) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
 	if (engine->stats.enabled++ == 0) {
-		struct intel_engine_execlists *execlists = &engine->execlists;
 		const struct execlist_port *port = execlists->port;
 		unsigned int num_ports = execlists_num_ports(execlists);
 
@@ -1967,14 +1975,12 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 		if (engine->stats.active)
 			engine->stats.start = engine->stats.enabled_at;
 	}
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
-
-	return 0;
 
-busy:
+unlock:
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	tasklet_enable(&execlists->tasklet);
 
-	return -EBUSY;
+	return err;
 }
 
 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 348a4f7ffb67..53747318f4a7 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -246,7 +246,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 	 */
 	tmp = I915_READ_CTL(engine);
 	if (tmp & RING_WAIT) {
-		i915_handle_error(dev_priv, 0,
+		i915_handle_error(dev_priv, BIT(engine->id),
 				  "Kicking stuck wait on %s",
 				  engine->name);
 		I915_WRITE_CTL(engine, tmp);
@@ -258,7 +258,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 		default:
 			return ENGINE_DEAD;
 		case 1:
-			i915_handle_error(dev_priv, 0,
+			i915_handle_error(dev_priv, ALL_ENGINES,
 					  "Kicking stuck semaphore on %s",
 					  engine->name);
 			I915_WRITE_CTL(engine, tmp);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7ece2f061b9e..e0fca035ff78 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -719,6 +719,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	unsigned long flags;
 
+	GEM_TRACE("%s\n", engine->name);
+
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
@@ -765,6 +767,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	 */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
+	/* Mark all CS interrupts as complete */
+	execlists->active = 0;
+
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -366,20 +366,6 @@ struct intel_engine_cs {
 		 */
 #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
-		/**
-		 * @busy_stats: Has enablement of engine stats tracking been
-		 * 		requested.
-		 */
-		bool busy_stats;
-		/**
-		 * @disable_busy_stats: Work item for busy stats disabling.
-		 *
-		 * Same as with @enable_busy_stats action, with the difference
-		 * that we delay it in case there are rapid enable-disable
-		 * actions, which can happen during tool startup (like perf
-		 * stat).
-		 */
-		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 9a9961802f5c..e83af0f2be86 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -225,7 +225,11 @@ static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
 				  struct drm_crtc_state *old_crtc_state)
 {
 	drm_crtc_vblank_on(crtc);
+}
 
+static void ipu_crtc_atomic_flush(struct drm_crtc *crtc,
+				  struct drm_crtc_state *old_crtc_state)
+{
 	spin_lock_irq(&crtc->dev->event_lock);
 	if (crtc->state->event) {
 		WARN_ON(drm_crtc_vblank_get(crtc));
@@ -293,6 +297,7 @@ static const struct drm_crtc_helper_funcs ipu_helper_funcs = {
 	.mode_set_nofb = ipu_crtc_mode_set_nofb,
 	.atomic_check = ipu_crtc_atomic_check,
 	.atomic_begin = ipu_crtc_atomic_begin,
+	.atomic_flush = ipu_crtc_atomic_flush,
 	.atomic_disable = ipu_crtc_atomic_disable,
 	.atomic_enable = ipu_crtc_atomic_enable,
 };
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 57ed56d8623f..d9113faaa62f 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -22,6 +22,7 @@
 #include <drm/drm_plane_helper.h>
 
 #include "video/imx-ipu-v3.h"
+#include "imx-drm.h"
 #include "ipuv3-plane.h"
 
 struct ipu_plane_state {
@@ -272,7 +273,7 @@ static void ipu_plane_destroy(struct drm_plane *plane)
 	kfree(ipu_plane);
 }
 
-void ipu_plane_state_reset(struct drm_plane *plane)
+static void ipu_plane_state_reset(struct drm_plane *plane)
 {
 	struct ipu_plane_state *ipu_state;
 
@@ -292,7 +293,8 @@ void ipu_plane_state_reset(struct drm_plane *plane)
 	plane->state = &ipu_state->base;
 }
 
-struct drm_plane_state *ipu_plane_duplicate_state(struct drm_plane *plane)
+static struct drm_plane_state *
+ipu_plane_duplicate_state(struct drm_plane *plane)
 {
 	struct ipu_plane_state *state;
 
@@ -306,8 +308,8 @@ struct drm_plane_state *ipu_plane_duplicate_state(struct drm_plane *plane)
 	return &state->base;
 }
 
-void ipu_plane_destroy_state(struct drm_plane *plane,
-			     struct drm_plane_state *state)
+static void ipu_plane_destroy_state(struct drm_plane *plane,
+				    struct drm_plane_state *state)
 {
 	struct ipu_plane_state *ipu_state = to_ipu_plane_state(state);
 
diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
index 5155f0179b61..05520202c967 100644
--- a/drivers/gpu/drm/meson/meson_crtc.c
+++ b/drivers/gpu/drm/meson/meson_crtc.c
@@ -36,6 +36,7 @@
 #include "meson_venc.h"
 #include "meson_vpp.h"
 #include "meson_viu.h"
+#include "meson_canvas.h"
 #include "meson_registers.h"
 
 /* CRTC definition */
@@ -192,6 +193,11 @@ void meson_crtc_irq(struct meson_drm *priv)
 		} else
 			meson_vpp_disable_interlace_vscaler_osd1(priv);
 
+		meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1,
+			   priv->viu.osd1_addr, priv->viu.osd1_stride,
+			   priv->viu.osd1_height, MESON_CANVAS_WRAP_NONE,
+			   MESON_CANVAS_BLKMODE_LINEAR);
+
 		/* Enable OSD1 */
 		writel_bits_relaxed(VPP_OSD1_POSTBLEND, VPP_OSD1_POSTBLEND,
 				    priv->io_base + _REG(VPP_MISC));
diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
index 5e8b392b9d1f..8450d6ac8c9b 100644
--- a/drivers/gpu/drm/meson/meson_drv.h
+++ b/drivers/gpu/drm/meson/meson_drv.h
@@ -43,6 +43,9 @@ struct meson_drm {
 		bool osd1_commit;
 		uint32_t osd1_ctrl_stat;
 		uint32_t osd1_blk0_cfg[5];
+		uint32_t osd1_addr;
+		uint32_t osd1_stride;
+		uint32_t osd1_height;
 	} viu;
 
 	struct {
diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
index d0a6ac8390f3..27bd3503e1e4 100644
--- a/drivers/gpu/drm/meson/meson_plane.c
+++ b/drivers/gpu/drm/meson/meson_plane.c
@@ -164,10 +164,9 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
 	/* Update Canvas with buffer address */
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1,
-			   gem->paddr, fb->pitches[0],
-			   fb->height, MESON_CANVAS_WRAP_NONE,
-			   MESON_CANVAS_BLKMODE_LINEAR);
+	priv->viu.osd1_addr = gem->paddr;
+	priv->viu.osd1_stride = fb->pitches[0];
+	priv->viu.osd1_height = fb->height;
 
 	spin_unlock_irqrestore(&priv->drm->event_lock, flags);
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index 3e9bba4d6624..6d8e3a9a6fc0 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -680,7 +680,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	} else {
 		dev_info(&pdev->dev,
 			 "no iommu, fallback to phys contig buffers for scanout\n");
-		aspace = NULL;;
+		aspace = NULL;
 	}
 
 	pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 380f340204e8..debbbf0fd4bd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -134,7 +134,7 @@ nv50_get_intensity(struct backlight_device *bd)
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
 	struct nvif_object *device = &drm->client.device.object;
-	int or = nv_encoder->or;
+	int or = ffs(nv_encoder->dcb->or) - 1;
 	u32 div = 1025;
 	u32 val;
 
@@ -149,7 +149,7 @@ nv50_set_intensity(struct backlight_device *bd)
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
 	struct nvif_object *device = &drm->client.device.object;
-	int or = nv_encoder->or;
+	int or = ffs(nv_encoder->dcb->or) - 1;
 	u32 div = 1025;
 	u32 val = (bd->props.brightness * div) / 100;
 
@@ -170,7 +170,7 @@ nva3_get_intensity(struct backlight_device *bd)
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
 	struct nvif_object *device = &drm->client.device.object;
-	int or = nv_encoder->or;
+	int or = ffs(nv_encoder->dcb->or) - 1;
 	u32 div, val;
 
 	div  = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
@@ -188,7 +188,7 @@ nva3_set_intensity(struct backlight_device *bd)
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
 	struct nvif_object *device = &drm->client.device.object;
-	int or = nv_encoder->or;
+	int or = ffs(nv_encoder->dcb->or) - 1;
 	u32 div, val;
 
 	div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
@@ -228,7 +228,7 @@ nv50_backlight_init(struct drm_connector *connector)
 			return -ENODEV;
 	}
 
-	if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
+	if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)))
 		return 0;
 
 	if (drm->client.device.info.chipset <= 0xa0 ||
@@ -268,13 +268,13 @@ nouveau_backlight_init(struct drm_device *dev)
 	struct nvif_device *device = &drm->client.device;
 	struct drm_connector *connector;
 
+	INIT_LIST_HEAD(&drm->bl_connectors);
+
 	if (apple_gmux_present()) {
 		NV_INFO(drm, "Apple GMUX detected: not registering Nouveau backlight interface\n");
 		return 0;
 	}
 
-	INIT_LIST_HEAD(&drm->bl_connectors);
-
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
 		    connector->connector_type != DRM_MODE_CONNECTOR_eDP)
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 69d6e61a01ec..6ed9cb053dfa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -570,9 +570,15 @@ nouveau_connector_detect(struct drm_connector *connector, bool force)
 		nv_connector->edid = NULL;
 	}
 
-	ret = pm_runtime_get_sync(connector->dev->dev);
-	if (ret < 0 && ret != -EACCES)
-		return conn_status;
+	/* Outputs are only polled while runtime active, so acquiring a
+	 * runtime PM ref here is unnecessary (and would deadlock upon
+	 * runtime suspend because it waits for polling to finish).
+	 */
+	if (!drm_kms_helper_is_poll_worker()) {
+		ret = pm_runtime_get_sync(connector->dev->dev);
+		if (ret < 0 && ret != -EACCES)
+			return conn_status;
+	}
 
 	nv_encoder = nouveau_connector_ddc_detect(connector);
 	if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) {
@@ -647,8 +653,10 @@ detect_analog:
 
  out:
 
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
 
 	return conn_status;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index dd8d4352ed99..caddce88d2d8 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -4477,6 +4477,7 @@ nv50_display_create(struct drm_device *dev)
 	nouveau_display(dev)->fini = nv50_display_fini;
 	disp->disp = &nouveau_display(dev)->disp;
 	dev->mode_config.funcs = &nv50_disp_func;
+	dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP;
 	if (nouveau_atomic)
 		dev->driver->driver_features |= DRIVER_ATOMIC;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index 93946dcee319..1c12e58f44c2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -1354,7 +1354,7 @@ nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse,
 
 		tail = this->addr + this->size;
 		if (vmm->func->page_block && next && next->page != p)
-			tail = ALIGN_DOWN(addr, vmm->func->page_block);
+			tail = ALIGN_DOWN(tail, vmm->func->page_block);
 
 		if (addr <= tail && tail - addr >= size) {
 			rb_erase(&this->tree, &vmm->free);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index bf62303571b3..3695cde669f8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -301,7 +301,7 @@ nvkm_therm_attr_set(struct nvkm_therm *therm,
 void
 nvkm_therm_clkgate_enable(struct nvkm_therm *therm)
 {
-	if (!therm->func->clkgate_enable || !therm->clkgating_enabled)
+	if (!therm || !therm->func->clkgate_enable || !therm->clkgating_enabled)
 		return;
 
 	nvkm_debug(&therm->subdev,
@@ -312,7 +312,7 @@ nvkm_therm_clkgate_enable(struct nvkm_therm *therm)
 void
 nvkm_therm_clkgate_fini(struct nvkm_therm *therm, bool suspend)
 {
-	if (!therm->func->clkgate_fini || !therm->clkgating_enabled)
+	if (!therm || !therm->func->clkgate_fini || !therm->clkgating_enabled)
 		return;
 
 	nvkm_debug(&therm->subdev,
@@ -395,7 +395,7 @@ void
 nvkm_therm_clkgate_init(struct nvkm_therm *therm,
 			const struct nvkm_therm_clkgate_pack *p)
 {
-	if (!therm->func->clkgate_init || !therm->clkgating_enabled)
+	if (!therm || !therm->func->clkgate_init || !therm->clkgating_enabled)
 		return;
 
 	therm->func->clkgate_init(therm, p);
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index d3045a371a55..7c73bc7e2f85 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3221,35 +3221,8 @@ static void cik_gpu_init(struct radeon_device *rdev)
 	case CHIP_KAVERI:
 		rdev->config.cik.max_shader_engines = 1;
 		rdev->config.cik.max_tile_pipes = 4;
-		if ((rdev->pdev->device == 0x1304) ||
-		    (rdev->pdev->device == 0x1305) ||
-		    (rdev->pdev->device == 0x130C) ||
-		    (rdev->pdev->device == 0x130F) ||
-		    (rdev->pdev->device == 0x1310) ||
-		    (rdev->pdev->device == 0x1311) ||
-		    (rdev->pdev->device == 0x131C)) {
-			rdev->config.cik.max_cu_per_sh = 8;
-			rdev->config.cik.max_backends_per_se = 2;
-		} else if ((rdev->pdev->device == 0x1309) ||
-			   (rdev->pdev->device == 0x130A) ||
-			   (rdev->pdev->device == 0x130D) ||
-			   (rdev->pdev->device == 0x1313) ||
-			   (rdev->pdev->device == 0x131D)) {
-			rdev->config.cik.max_cu_per_sh = 6;
-			rdev->config.cik.max_backends_per_se = 2;
-		} else if ((rdev->pdev->device == 0x1306) ||
-			   (rdev->pdev->device == 0x1307) ||
-			   (rdev->pdev->device == 0x130B) ||
-			   (rdev->pdev->device == 0x130E) ||
-			   (rdev->pdev->device == 0x1315) ||
-			   (rdev->pdev->device == 0x1318) ||
-			   (rdev->pdev->device == 0x131B)) {
-			rdev->config.cik.max_cu_per_sh = 4;
-			rdev->config.cik.max_backends_per_se = 1;
-		} else {
-			rdev->config.cik.max_cu_per_sh = 3;
-			rdev->config.cik.max_backends_per_se = 1;
-		}
+		rdev->config.cik.max_cu_per_sh = 8;
+		rdev->config.cik.max_backends_per_se = 2;
 		rdev->config.cik.max_sh_per_se = 1;
 		rdev->config.cik.max_texture_channel_caches = 4;
 		rdev->config.cik.max_gprs = 256;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 5012f5e47a1e..df9469a8fdb1 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -90,25 +90,18 @@ void radeon_connector_hotplug(struct drm_connector *connector)
 		/* don't do anything if sink is not display port, i.e.,
 		 * passive dp->(dvi|hdmi) adaptor
 		 */
-		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
-			int saved_dpms = connector->dpms;
-			/* Only turn off the display if it's physically disconnected */
-			if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
-				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-			} else if (radeon_dp_needs_link_train(radeon_connector)) {
-				/* Don't try to start link training before we
-				 * have the dpcd */
-				if (!radeon_dp_getdpcd(radeon_connector))
-					return;
-
-				/* set it to OFF so that drm_helper_connector_dpms()
-				 * won't return immediately since the current state
-				 * is ON at this point.
-				 */
-				connector->dpms = DRM_MODE_DPMS_OFF;
-				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-			}
-			connector->dpms = saved_dpms;
+		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+		    radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
+		    radeon_dp_needs_link_train(radeon_connector)) {
+			/* Don't start link training before we have the DPCD */
+			if (!radeon_dp_getdpcd(radeon_connector))
+				return;
+
+			/* Turn the connector off and back on immediately, which
+			 * will trigger link training
+			 */
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 		}
 	}
 }
@@ -899,9 +892,11 @@ radeon_lvds_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status ret = connector_status_disconnected;
 	int r;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	if (encoder) {
 		struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
@@ -924,8 +919,12 @@ radeon_lvds_detect(struct drm_connector *connector, bool force)
 	/* check acpi lid status ??? */
 
 	radeon_connector_update_scratch_regs(connector, ret);
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
+
 	return ret;
 }
 
@@ -1039,9 +1038,11 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status ret = connector_status_disconnected;
 	int r;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	encoder = radeon_best_single_encoder(connector);
 	if (!encoder)
@@ -1108,8 +1109,10 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 	radeon_connector_update_scratch_regs(connector, ret);
 
 out:
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
 
 	return ret;
 }
@@ -1173,9 +1176,11 @@ radeon_tv_detect(struct drm_connector *connector, bool force)
 	if (!radeon_connector->dac_load_detect)
 		return ret;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	encoder = radeon_best_single_encoder(connector);
 	if (!encoder)
@@ -1187,8 +1192,12 @@ radeon_tv_detect(struct drm_connector *connector, bool force)
 	if (ret == connector_status_connected)
 		ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, false);
 	radeon_connector_update_scratch_regs(connector, ret);
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
+
 	return ret;
 }
 
@@ -1251,9 +1260,11 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status ret = connector_status_disconnected;
 	bool dret = false, broken_edid = false;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	if (radeon_connector->detected_hpd_without_ddc) {
 		force = true;
@@ -1436,8 +1447,10 @@ out:
 	}
 
 exit:
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
 
 	return ret;
 }
@@ -1688,9 +1701,11 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 	if (radeon_dig_connector->is_mst)
 		return connector_status_disconnected;
 
-	r = pm_runtime_get_sync(connector->dev->dev);
-	if (r < 0)
-		return connector_status_disconnected;
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0)
+			return connector_status_disconnected;
+	}
 
 	if (!force && radeon_check_hpd_status_unchanged(connector)) {
 		ret = connector->status;
@@ -1777,8 +1792,10 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 	}
 
 out:
-	pm_runtime_mark_last_busy(connector->dev->dev);
-	pm_runtime_put_autosuspend(connector->dev->dev);
+	if (!drm_kms_helper_is_poll_worker()) {
+		pm_runtime_mark_last_busy(connector->dev->dev);
+		pm_runtime_put_autosuspend(connector->dev->dev);
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 8d3e3d2e0090..7828a5e10629 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1365,6 +1365,10 @@ int radeon_device_init(struct radeon_device *rdev,
 	if ((rdev->flags & RADEON_IS_PCI) &&
 	    (rdev->family <= CHIP_RS740))
 		rdev->need_dma32 = true;
+#ifdef CONFIG_PPC64
+	if (rdev->family == CHIP_CEDAR)
+		rdev->need_dma32 = true;
+#endif
 
 	dma_bits = rdev->need_dma32 ? 32 : 40;
 	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index a9962ffba720..27d8e7dd2d06 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -34,8 +34,6 @@ void radeon_gem_object_free(struct drm_gem_object *gobj)
 	struct radeon_bo *robj = gem_to_radeon_bo(gobj);
 
 	if (robj) {
-		if (robj->gem_base.import_attach)
-			drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
 		radeon_mn_unregister(robj);
 		radeon_bo_unref(&robj);
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 15404af9d740..31f5ad605e59 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -82,6 +82,8 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	mutex_unlock(&bo->rdev->gem.mutex);
 	radeon_bo_clear_surface_reg(bo);
 	WARN_ON_ONCE(!list_empty(&bo->va));
+	if (bo->gem_base.import_attach)
+		drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
 	drm_gem_object_release(&bo->gem_base);
 	kfree(bo);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 326ad068c15a..4b6542538ff9 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -47,7 +47,6 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev);
 static bool radeon_pm_debug_check_in_vbl(struct radeon_device *rdev, bool finish);
 static void radeon_pm_update_profile(struct radeon_device *rdev);
 static void radeon_pm_set_clocks(struct radeon_device *rdev);
-static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev);
 
 int radeon_pm_get_type_index(struct radeon_device *rdev,
 			     enum radeon_pm_state_type ps_type,
@@ -80,8 +79,6 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev)
 				radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power);
 		}
 		mutex_unlock(&rdev->pm.mutex);
-		/* allow new DPM state to be picked */
-		radeon_pm_compute_clocks_dpm(rdev);
 	} else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
 		if (rdev->pm.profile == PM_PROFILE_AUTO) {
 			mutex_lock(&rdev->pm.mutex);
@@ -885,8 +882,7 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
 		dpm_state = POWER_STATE_TYPE_INTERNAL_3DPERF;
 	/* balanced states don't exist at the moment */
 	if (dpm_state == POWER_STATE_TYPE_BALANCED)
-		dpm_state = rdev->pm.dpm.ac_power ?
-			POWER_STATE_TYPE_PERFORMANCE : POWER_STATE_TYPE_BATTERY;
+		dpm_state = POWER_STATE_TYPE_PERFORMANCE;
 
 restart_search:
 	/* Pick the best power state based on current conditions */
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 2c18996d59c5..0d95888ccc3e 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -461,7 +461,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
 {
 	struct drm_sched_job *s_job;
 	struct drm_sched_entity *entity, *tmp;
-	int i;;
+	int i;
 
 	spin_lock(&sched->job_list_lock);
 	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.c b/drivers/gpu/drm/sun4i/sun4i_crtc.c
index 5decae0069d0..78cbc3145e44 100644
--- a/drivers/gpu/drm/sun4i/sun4i_crtc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_crtc.c
@@ -93,6 +93,8 @@ static void sun4i_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	DRM_DEBUG_DRIVER("Disabling the CRTC\n");
 
+	drm_crtc_vblank_off(crtc);
+
 	sun4i_tcon_set_status(scrtc->tcon, encoder, false);
 
 	if (crtc->state->event && !crtc->state->active) {
@@ -113,6 +115,8 @@ static void sun4i_crtc_atomic_enable(struct drm_crtc *crtc,
 	DRM_DEBUG_DRIVER("Enabling the CRTC\n");
 
 	sun4i_tcon_set_status(scrtc->tcon, encoder, true);
+
+	drm_crtc_vblank_on(crtc);
 }
 
 static void sun4i_crtc_mode_set_nofb(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 023f39bda633..e36004fbe453 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -132,10 +132,13 @@ static int sun4i_dclk_get_phase(struct clk_hw *hw)
 static int sun4i_dclk_set_phase(struct clk_hw *hw, int degrees)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
+	u32 val = degrees / 120;
+
+	val <<= 28;
 
 	regmap_update_bits(dclk->regmap, SUN4I_TCON0_IO_POL_REG,
 			   GENMASK(29, 28),
-			   degrees / 120);
+			   val);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 4570da0227b4..d9a71f361b14 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -111,7 +111,7 @@ static int sun4i_drv_bind(struct device *dev)
 	/* drm_vblank_init calls kcalloc, which can fail */
 	ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
 	if (ret)
-		goto free_mem_region;
+		goto cleanup_mode_config;
 
 	drm->irq_enabled = true;
 
@@ -139,7 +139,6 @@ finish_poll:
 	sun4i_framebuffer_free(drm);
 cleanup_mode_config:
 	drm_mode_config_cleanup(drm);
-free_mem_region:
 	of_reserved_mem_device_release(dev);
 free_drm:
 	drm_dev_unref(drm);
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index 500b6fb3e028..fa4bcd092eaf 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -538,7 +538,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
 					     &sun4i_hdmi_regmap_config);
 	if (IS_ERR(hdmi->regmap)) {
 		dev_err(dev, "Couldn't create HDMI encoder regmap\n");
-		return PTR_ERR(hdmi->regmap);
+		ret = PTR_ERR(hdmi->regmap);
+		goto err_disable_mod_clk;
 	}
 
 	ret = sun4i_tmds_create(hdmi);
@@ -551,7 +552,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
 		hdmi->ddc_parent_clk = devm_clk_get(dev, "ddc");
 		if (IS_ERR(hdmi->ddc_parent_clk)) {
 			dev_err(dev, "Couldn't get the HDMI DDC clock\n");
-			return PTR_ERR(hdmi->ddc_parent_clk);
+			ret = PTR_ERR(hdmi->ddc_parent_clk);
+			goto err_disable_mod_clk;
 		}
 	} else {
 		hdmi->ddc_parent_clk = hdmi->tmds_clk;
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index 832f8f9bc47f..b8da5a50a61d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -92,6 +92,8 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+	tcon->dclk_min_div = 6;
+	tcon->dclk_max_div = 127;
 	rounded_rate = clk_round_rate(tcon->dclk, rate);
 	if (rounded_rate < rate)
 		return MODE_CLOCK_LOW;
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 3c15cf24b503..a818ca491605 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -101,10 +101,13 @@ static void sun4i_tcon_channel_set_status(struct sun4i_tcon *tcon, int channel,
 		return;
 	}
 
-	if (enabled)
+	if (enabled) {
 		clk_prepare_enable(clk);
-	else
+		clk_rate_exclusive_get(clk);
+	} else {
+		clk_rate_exclusive_put(clk);
 		clk_disable_unprepare(clk);
+	}
 }
 
 static void sun4i_tcon_lvds_set_status(struct sun4i_tcon *tcon,
@@ -335,6 +338,9 @@ static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon,
 	regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG,
 			   SUN4I_TCON_GCTL_IOMAP_MASK,
 			   SUN4I_TCON_GCTL_IOMAP_TCON0);
+
+	/* Enable the output on the pins */
+	regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, 0xe0000000);
 }
 
 static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
@@ -870,52 +876,56 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return ret;
 	}
 
-	/*
-	 * This can only be made optional since we've had DT nodes
-	 * without the LVDS reset properties.
-	 *
-	 * If the property is missing, just disable LVDS, and print a
-	 * warning.
-	 */
-	tcon->lvds_rst = devm_reset_control_get_optional(dev, "lvds");
-	if (IS_ERR(tcon->lvds_rst)) {
-		dev_err(dev, "Couldn't get our reset line\n");
-		return PTR_ERR(tcon->lvds_rst);
-	} else if (tcon->lvds_rst) {
-		has_lvds_rst = true;
-		reset_control_reset(tcon->lvds_rst);
-	} else {
-		has_lvds_rst = false;
-	}
+	if (tcon->quirks->supports_lvds) {
+		/*
+		 * This can only be made optional since we've had DT
+		 * nodes without the LVDS reset properties.
+		 *
+		 * If the property is missing, just disable LVDS, and
+		 * print a warning.
+		 */
+		tcon->lvds_rst = devm_reset_control_get_optional(dev, "lvds");
+		if (IS_ERR(tcon->lvds_rst)) {
+			dev_err(dev, "Couldn't get our reset line\n");
+			return PTR_ERR(tcon->lvds_rst);
+		} else if (tcon->lvds_rst) {
+			has_lvds_rst = true;
+			reset_control_reset(tcon->lvds_rst);
+		} else {
+			has_lvds_rst = false;
+		}
 
-	/*
-	 * This can only be made optional since we've had DT nodes
-	 * without the LVDS reset properties.
-	 *
-	 * If the property is missing, just disable LVDS, and print a
-	 * warning.
-	 */
-	if (tcon->quirks->has_lvds_alt) {
-		tcon->lvds_pll = devm_clk_get(dev, "lvds-alt");
-		if (IS_ERR(tcon->lvds_pll)) {
-			if (PTR_ERR(tcon->lvds_pll) == -ENOENT) {
-				has_lvds_alt = false;
+		/*
+		 * This can only be made optional since we've had DT
+		 * nodes without the LVDS reset properties.
+		 *
+		 * If the property is missing, just disable LVDS, and
+		 * print a warning.
+		 */
+		if (tcon->quirks->has_lvds_alt) {
+			tcon->lvds_pll = devm_clk_get(dev, "lvds-alt");
+			if (IS_ERR(tcon->lvds_pll)) {
+				if (PTR_ERR(tcon->lvds_pll) == -ENOENT) {
+					has_lvds_alt = false;
+				} else {
+					dev_err(dev, "Couldn't get the LVDS PLL\n");
+					return PTR_ERR(tcon->lvds_pll);
+				}
 			} else {
-				dev_err(dev, "Couldn't get the LVDS PLL\n");
-				return PTR_ERR(tcon->lvds_pll);
+				has_lvds_alt = true;
 			}
-		} else {
-			has_lvds_alt = true;
 		}
-	}
 
-	if (!has_lvds_rst || (tcon->quirks->has_lvds_alt && !has_lvds_alt)) {
-		dev_warn(dev,
-			 "Missing LVDS properties, Please upgrade your DT\n");
-		dev_warn(dev, "LVDS output disabled\n");
-		can_lvds = false;
+		if (!has_lvds_rst ||
+		    (tcon->quirks->has_lvds_alt && !has_lvds_alt)) {
+			dev_warn(dev, "Missing LVDS properties, Please upgrade your DT\n");
+			dev_warn(dev, "LVDS output disabled\n");
+			can_lvds = false;
+		} else {
+			can_lvds = true;
+		}
 	} else {
-		can_lvds = true;
+		can_lvds = false;
 	}
 
 	ret = sun4i_tcon_init_clocks(dev, tcon);
@@ -1134,7 +1144,7 @@ static const struct sun4i_tcon_quirks sun8i_a33_quirks = {
 };
 
 static const struct sun4i_tcon_quirks sun8i_a83t_lcd_quirks = {
-	/* nothing is supported */
+	.supports_lvds		= true,
 };
 
 static const struct sun4i_tcon_quirks sun8i_v3s_quirks = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h
index b761c7b823c5..278700c7bf9f 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.h
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h
@@ -175,6 +175,7 @@ struct sun4i_tcon_quirks {
 	bool	has_channel_1;	/* a33 does not have channel 1 */
 	bool	has_lvds_alt;	/* Does the LVDS clock have a parent other than the TCON clock? */
 	bool	needs_de_be_mux; /* sun6i needs mux to select backend */
+	bool	supports_lvds;   /* Does the TCON support an LVDS output? */
 
 	/* callback to handle tcon muxing options */
 	int	(*set_mux)(struct sun4i_tcon *, const struct drm_encoder *);
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index b8403ed48285..fbffe1948b3b 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1903,8 +1903,12 @@ cleanup:
 	if (!IS_ERR(primary))
 		drm_plane_cleanup(primary);
 
-	if (group && tegra->domain) {
-		iommu_detach_group(tegra->domain, group);
+	if (group && dc->domain) {
+		if (group == tegra->group) {
+			iommu_detach_group(dc->domain, group);
+			tegra->group = NULL;
+		}
+
 		dc->domain = NULL;
 	}
 
@@ -1913,8 +1917,10 @@ cleanup:
 
 static int tegra_dc_exit(struct host1x_client *client)
 {
+	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct iommu_group *group = iommu_group_get(client->dev);
 	struct tegra_dc *dc = host1x_client_to_dc(client);
+	struct tegra_drm *tegra = drm->dev_private;
 	int err;
 
 	devm_free_irq(dc->dev, dc->irq, dc);
@@ -1926,7 +1932,11 @@ static int tegra_dc_exit(struct host1x_client *client)
 	}
 
 	if (group && dc->domain) {
-		iommu_detach_group(dc->domain, group);
+		if (group == tegra->group) {
+			iommu_detach_group(dc->domain, group);
+			tegra->group = NULL;
+		}
+
 		dc->domain = NULL;
 	}
 
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index d50bddb2e447..7fcf4a242840 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -250,6 +250,7 @@ static void tegra_drm_unload(struct drm_device *drm)
 
 	drm_kms_helper_poll_fini(drm);
 	tegra_drm_fb_exit(drm);
+	drm_atomic_helper_shutdown(drm);
 	drm_mode_config_cleanup(drm);
 
 	err = host1x_device_exit(device);
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 4d2ed966f9e3..87c5d89bc9ba 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -1072,7 +1072,6 @@ static int tegra_dsi_exit(struct host1x_client *client)
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 
 	tegra_output_exit(&dsi->output);
-	regulator_disable(dsi->vdd);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 36a06a993698..94dac79ac3c9 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -297,6 +297,10 @@ int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha)
 	case WIN_COLOR_DEPTH_B8G8R8X8:
 		*alpha = WIN_COLOR_DEPTH_B8G8R8A8;
 		return 0;
+
+	case WIN_COLOR_DEPTH_B5G6R5:
+		*alpha = opaque;
+		return 0;
 	}
 
 	return -EINVAL;
@@ -330,9 +334,6 @@ void tegra_plane_check_dependent(struct tegra_plane *tegra,
 	unsigned int zpos[2];
 	unsigned int i;
 
-	for (i = 0; i < 3; i++)
-		state->dependent[i] = false;
-
 	for (i = 0; i < 2; i++)
 		zpos[i] = 0;
 
@@ -346,6 +347,8 @@ void tegra_plane_check_dependent(struct tegra_plane *tegra,
 
 		index = tegra_plane_get_overlap_index(tegra, p);
 
+		state->dependent[index] = false;
+
 		/*
 		 * If any of the other planes is on top of this plane and uses
 		 * a format with an alpha component, mark this plane as being
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index b5b335c9b2bb..2ebdc6d5a76e 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -159,10 +159,15 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	unsigned long start = vma->vm_start;
 	unsigned long size = vma->vm_end - vma->vm_start;
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long offset;
 	unsigned long page, pos;
 
-	if (offset + size > info->fix.smem_len)
+	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+
+	if (offset > info->fix.smem_len || size > info->fix.smem_len - offset)
 		return -EINVAL;
 
 	pos = (unsigned long)info->fix.smem_start + offset;
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 5720a0d4ac0a..677ac16c8a6d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -197,6 +197,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data,
 	case VIRTGPU_PARAM_3D_FEATURES:
 		value = vgdev->has_virgl_3d == true ? 1 : 0;
 		break;
+	case VIRTGPU_PARAM_CAPSET_QUERY_FIX:
+		value = 1;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -472,7 +475,7 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 {
 	struct virtio_gpu_device *vgdev = dev->dev_private;
 	struct drm_virtgpu_get_caps *args = data;
-	int size;
+	unsigned size, host_caps_size;
 	int i;
 	int found_valid = -1;
 	int ret;
@@ -481,6 +484,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 	if (vgdev->num_capsets == 0)
 		return -ENOSYS;
 
+	/* don't allow userspace to pass 0 */
+	if (args->size == 0)
+		return -EINVAL;
+
 	spin_lock(&vgdev->display_info_lock);
 	for (i = 0; i < vgdev->num_capsets; i++) {
 		if (vgdev->capsets[i].id == args->cap_set_id) {
@@ -496,11 +503,9 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	size = vgdev->capsets[found_valid].max_size;
-	if (args->size > size) {
-		spin_unlock(&vgdev->display_info_lock);
-		return -EINVAL;
-	}
+	host_caps_size = vgdev->capsets[found_valid].max_size;
+	/* only copy to user the minimum of the host caps size or the guest caps size */
+	size = min(args->size, host_caps_size);
 
 	list_for_each_entry(cache_ent, &vgdev->cap_cache, head) {
 		if (cache_ent->id == args->cap_set_id &&
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 184340d486c3..86d25f18aa99 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1337,6 +1337,19 @@ static void __vmw_svga_disable(struct vmw_private *dev_priv)
  */
 void vmw_svga_disable(struct vmw_private *dev_priv)
 {
+	/*
+	 * Disabling SVGA will turn off device modesetting capabilities, so
+	 * notify KMS about that so that it doesn't cache atomic state that
+	 * isn't valid anymore, for example crtcs turned on.
+	 * Strictly we'd want to do this under the SVGA lock (or an SVGA mutex),
+	 * but vmw_kms_lost_device() takes the reservation sem and thus we'll
+	 * end up with lock order reversal. Thus, a master may actually perform
+	 * a new modeset just after we call vmw_kms_lost_device() and race with
+	 * vmw_svga_disable(), but that should at worst cause atomic KMS state
+	 * to be inconsistent with the device, causing modesetting problems.
+	 *
+	 */
+	vmw_kms_lost_device(dev_priv->dev);
 	ttm_write_lock(&dev_priv->reservation_sem, false);
 	spin_lock(&dev_priv->svga_lock);
 	if (dev_priv->bdev.man[TTM_PL_VRAM].use_type) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index d08753e8fd94..9116fe8baebc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -938,6 +938,7 @@ int vmw_kms_present(struct vmw_private *dev_priv,
 int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv);
+void vmw_kms_lost_device(struct drm_device *dev);
 
 int vmw_dumb_create(struct drm_file *file_priv,
 		    struct drm_device *dev,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index ead61015cd79..3c824fd7cbf3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -31,7 +31,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_rect.h>
 
-
 /* Might need a hrtimer here? */
 #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
 
@@ -2517,9 +2516,12 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
  * Helper to be used if an error forces the caller to undo the actions of
  * vmw_kms_helper_resource_prepare.
  */
-void vmw_kms_helper_resource_revert(struct vmw_resource *res)
+void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx)
 {
-	vmw_kms_helper_buffer_revert(res->backup);
+	struct vmw_resource *res = ctx->res;
+
+	vmw_kms_helper_buffer_revert(ctx->buf);
+	vmw_dmabuf_unreference(&ctx->buf);
 	vmw_resource_unreserve(res, false, NULL, 0);
 	mutex_unlock(&res->dev_priv->cmdbuf_mutex);
 }
@@ -2536,10 +2538,14 @@ void vmw_kms_helper_resource_revert(struct vmw_resource *res)
  * interrupted by a signal.
  */
 int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
-				    bool interruptible)
+				    bool interruptible,
+				    struct vmw_validation_ctx *ctx)
 {
 	int ret = 0;
 
+	ctx->buf = NULL;
+	ctx->res = res;
+
 	if (interruptible)
 		ret = mutex_lock_interruptible(&res->dev_priv->cmdbuf_mutex);
 	else
@@ -2558,6 +2564,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
 						    res->dev_priv->has_mob);
 		if (ret)
 			goto out_unreserve;
+
+		ctx->buf = vmw_dmabuf_reference(res->backup);
 	}
 	ret = vmw_resource_validate(res);
 	if (ret)
@@ -2565,7 +2573,7 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
 	return 0;
 
 out_revert:
-	vmw_kms_helper_buffer_revert(res->backup);
+	vmw_kms_helper_buffer_revert(ctx->buf);
 out_unreserve:
 	vmw_resource_unreserve(res, false, NULL, 0);
 out_unlock:
@@ -2581,11 +2589,13 @@ out_unlock:
  * @out_fence: Optional pointer to a fence pointer. If non-NULL, a
  * ref-counted fence pointer is returned here.
  */
-void vmw_kms_helper_resource_finish(struct vmw_resource *res,
-			     struct vmw_fence_obj **out_fence)
+void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
+				    struct vmw_fence_obj **out_fence)
 {
-	if (res->backup || out_fence)
-		vmw_kms_helper_buffer_finish(res->dev_priv, NULL, res->backup,
+	struct vmw_resource *res = ctx->res;
+
+	if (ctx->buf || out_fence)
+		vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
 					     out_fence, NULL);
 
 	vmw_resource_unreserve(res, false, NULL, 0);
@@ -2851,3 +2861,14 @@ int vmw_kms_set_config(struct drm_mode_set *set,
 
 	return drm_atomic_helper_set_config(set, ctx);
 }
+
+
+/**
+ * vmw_kms_lost_device - Notify kms that modesetting capabilities will be lost
+ *
+ * @dev: Pointer to the drm device
+ */
+void vmw_kms_lost_device(struct drm_device *dev)
+{
+	drm_atomic_helper_shutdown(dev);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index cd9da2dd79af..3d2ca280eaa7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -240,6 +240,11 @@ struct vmw_display_unit {
 	int set_gui_y;
 };
 
+struct vmw_validation_ctx {
+	struct vmw_resource *res;
+	struct vmw_dma_buffer *buf;
+};
+
 #define vmw_crtc_to_du(x) \
 	container_of(x, struct vmw_display_unit, crtc)
 #define vmw_connector_to_du(x) \
@@ -296,9 +301,10 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
 				  struct drm_vmw_fence_rep __user *
 				  user_fence_rep);
 int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
-				    bool interruptible);
-void vmw_kms_helper_resource_revert(struct vmw_resource *res);
-void vmw_kms_helper_resource_finish(struct vmw_resource *res,
+				    bool interruptible,
+				    struct vmw_validation_ctx *ctx);
+void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx);
+void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
 				    struct vmw_fence_obj **out_fence);
 int vmw_kms_readback(struct vmw_private *dev_priv,
 		     struct drm_file *file_priv,
@@ -439,5 +445,4 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 
 int vmw_kms_set_config(struct drm_mode_set *set,
 		       struct drm_modeset_acquire_ctx *ctx);
-
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index 63a4cd794b73..3ec9eae831b8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -909,12 +909,13 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
 	struct vmw_framebuffer_surface *vfbs =
 		container_of(framebuffer, typeof(*vfbs), base);
 	struct vmw_kms_sou_surface_dirty sdirty;
+	struct vmw_validation_ctx ctx;
 	int ret;
 
 	if (!srf)
 		srf = &vfbs->surface->res;
 
-	ret = vmw_kms_helper_resource_prepare(srf, true);
+	ret = vmw_kms_helper_resource_prepare(srf, true, &ctx);
 	if (ret)
 		return ret;
 
@@ -933,7 +934,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
 	ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips,
 				   dest_x, dest_y, num_clips, inc,
 				   &sdirty.base);
-	vmw_kms_helper_resource_finish(srf, out_fence);
+	vmw_kms_helper_resource_finish(&ctx, out_fence);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index b68d74888ab1..6b969e5dea2a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -980,12 +980,13 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
 	struct vmw_framebuffer_surface *vfbs =
 		container_of(framebuffer, typeof(*vfbs), base);
 	struct vmw_stdu_dirty sdirty;
+	struct vmw_validation_ctx ctx;
 	int ret;
 
 	if (!srf)
 		srf = &vfbs->surface->res;
 
-	ret = vmw_kms_helper_resource_prepare(srf, true);
+	ret = vmw_kms_helper_resource_prepare(srf, true, &ctx);
 	if (ret)
 		return ret;
 
@@ -1008,7 +1009,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
 				   dest_x, dest_y, num_clips, inc,
 				   &sdirty.base);
 out_finish:
-	vmw_kms_helper_resource_finish(srf, out_fence);
+	vmw_kms_helper_resource_finish(&ctx, out_fence);
 
 	return ret;
 }
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 658fa2d3e40c..48685cddbad1 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1089,7 +1089,7 @@ static void ipu_irq_handler(struct irq_desc *desc)
 {
 	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
+	static const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
 
 	chained_irq_enter(chip, desc);
 
@@ -1102,7 +1102,7 @@ static void ipu_err_irq_handler(struct irq_desc *desc)
 {
 	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	const int int_reg[] = { 4, 5, 8, 9};
+	static const int int_reg[] = { 4, 5, 8, 9};
 
 	chained_irq_enter(chip, desc);
 
diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index bb9c087e6c0d..9f2d9ec42add 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -788,12 +788,14 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image)
 	case V4L2_PIX_FMT_SGBRG8:
 	case V4L2_PIX_FMT_SGRBG8:
 	case V4L2_PIX_FMT_SRGGB8:
+	case V4L2_PIX_FMT_GREY:
 		offset = image->rect.left + image->rect.top * pix->bytesperline;
 		break;
 	case V4L2_PIX_FMT_SBGGR16:
 	case V4L2_PIX_FMT_SGBRG16:
 	case V4L2_PIX_FMT_SGRBG16:
 	case V4L2_PIX_FMT_SRGGB16:
+	case V4L2_PIX_FMT_Y16:
 		offset = image->rect.left * 2 +
 			 image->rect.top * pix->bytesperline;
 		break;
diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c
index 24e12b87a0cb..caa05b0702e1 100644
--- a/drivers/gpu/ipu-v3/ipu-csi.c
+++ b/drivers/gpu/ipu-v3/ipu-csi.c
@@ -288,6 +288,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code)
 	case MEDIA_BUS_FMT_SGBRG10_1X10:
 	case MEDIA_BUS_FMT_SGRBG10_1X10:
 	case MEDIA_BUS_FMT_SRGGB10_1X10:
+	case MEDIA_BUS_FMT_Y10_1X10:
 		cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER;
 		cfg->mipi_dt = MIPI_DT_RAW10;
 		cfg->data_width = IPU_CSI_DATA_WIDTH_10;
@@ -296,6 +297,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code)
 	case MEDIA_BUS_FMT_SGBRG12_1X12:
 	case MEDIA_BUS_FMT_SGRBG12_1X12:
 	case MEDIA_BUS_FMT_SRGGB12_1X12:
+	case MEDIA_BUS_FMT_Y12_1X12:
 		cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER;
 		cfg->mipi_dt = MIPI_DT_RAW12;
 		cfg->data_width = IPU_CSI_DATA_WIDTH_12;
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index f1cec3d70498..0f70e8847540 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -129,11 +129,14 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index)
 		if (pre_node == pre->dev->of_node) {
 			mutex_unlock(&ipu_pre_list_mutex);
 			device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE);
+			of_node_put(pre_node);
 			return pre;
 		}
 	}
 	mutex_unlock(&ipu_pre_list_mutex);
 
+	of_node_put(pre_node);
+
 	return NULL;
 }
 
diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 067365c733c6..83f9dd934a5d 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -102,11 +102,14 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id)
 			mutex_unlock(&ipu_prg_list_mutex);
 			device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE);
 			prg->id = ipu_id;
+			of_node_put(prg_node);
 			return prg;
 		}
 	}
 	mutex_unlock(&ipu_prg_list_mutex);
 
+	of_node_put(prg_node);
+
 	return NULL;
 }
 
@@ -247,10 +250,14 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan)
 {
 	int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num);
 	struct ipu_prg *prg = ipu_chan->ipu->prg_priv;
-	struct ipu_prg_channel *chan = &prg->chan[prg_chan];
+	struct ipu_prg_channel *chan;
 	u32 val;
 
-	if (!chan->enabled || prg_chan < 0)
+	if (prg_chan < 0)
+		return;
+
+	chan = &prg->chan[prg_chan];
+	if (!chan->enabled)
 		return;
 
 	pm_runtime_get_sync(prg->dev);
@@ -277,13 +284,15 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 {
 	int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num);
 	struct ipu_prg *prg = ipu_chan->ipu->prg_priv;
-	struct ipu_prg_channel *chan = &prg->chan[prg_chan];
+	struct ipu_prg_channel *chan;
 	u32 val;
 	int ret;
 
 	if (prg_chan < 0)
 		return prg_chan;
 
+	chan = &prg->chan[prg_chan];
+
 	if (chan->enabled) {
 		ipu_pre_update(prg->pres[chan->used_pre], *eba);
 		return 0;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 43ddcdfbd0da..9454ac134ce2 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -645,6 +645,9 @@
 #define USB_DEVICE_ID_LD_MICROCASSYTIME		0x1033
 #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE	0x1035
 #define USB_DEVICE_ID_LD_MICROCASSYPH		0x1038
+#define USB_DEVICE_ID_LD_POWERANALYSERCASSY	0x1040
+#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY	0x1042
+#define USB_DEVICE_ID_LD_MACHINETESTCASSY	0x1043
 #define USB_DEVICE_ID_LD_JWM		0x1080
 #define USB_DEVICE_ID_LD_DMMP		0x1081
 #define USB_DEVICE_ID_LD_UMIP		0x1090
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 5f6035a5ce36..e92b77fa574a 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -809,6 +809,9 @@ static const struct hid_device_id hid_ignore_list[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 4bdbf77f7197..72c338eb5fae 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -269,13 +269,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
 	for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) {
 		const struct tjmax_model *tm = &tjmax_model_table[i];
 		if (c->x86_model == tm->model &&
-		    (tm->mask == ANY || c->x86_mask == tm->mask))
+		    (tm->mask == ANY || c->x86_stepping == tm->mask))
 			return tm->tjmax;
 	}
 
 	/* Early chips have no MSR for TjMax */
 
-	if (c->x86_model == 0xf && c->x86_mask < 4)
+	if (c->x86_model == 0xf && c->x86_stepping < 4)
 		usemsr_ee = 0;
 
 	if (c->x86_model > 0xe && usemsr_ee) {
@@ -426,7 +426,7 @@ static int chk_ucode_version(unsigned int cpu)
 	 * Readings might stop update when processor visited too deep sleep,
 	 * fixed for stepping D0 (6EC).
 	 */
-	if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) {
+	if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) {
 		pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n");
 		return -ENODEV;
 	}
diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index ef91b8a67549..84e91286fc4f 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c
@@ -293,7 +293,7 @@ u8 vid_which_vrm(void)
 	if (c->x86 < 6)		/* Any CPU with family lower than 6 */
 		return 0;	/* doesn't have VID */
 
-	vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor);
+	vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor);
 	if (vrm_ret == 134)
 		vrm_ret = get_via_model_d_vrm();
 	if (vrm_ret == 0)
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 06b4e1c78bd8..051a72eecb24 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -129,7 +129,10 @@ static ssize_t temp1_input_show(struct device *dev,
 
 	data->read_tempreg(data->pdev, &regval);
 	temp = (regval >> 21) * 125;
-	temp -= data->temp_offset;
+	if (temp > data->temp_offset)
+		temp -= data->temp_offset;
+	else
+		temp = 0;
 
 	return sprintf(buf, "%u\n", temp);
 }
@@ -227,7 +230,7 @@ static bool has_erratum_319(struct pci_dev *pdev)
 	 * and AM3 formats, but that's the best we can do.
 	 */
 	return boot_cpu_data.x86_model < 4 ||
-	       (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2);
+	       (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
 }
 
 static int k10temp_probe(struct pci_dev *pdev,
diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c
index 5a632bcf869b..e59f9113fb93 100644
--- a/drivers/hwmon/k8temp.c
+++ b/drivers/hwmon/k8temp.c
@@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 
 	model = boot_cpu_data.x86_model;
-	stepping = boot_cpu_data.x86_mask;
+	stepping = boot_cpu_data.x86_stepping;
 
 	/* feature available since SH-C0, exclude older revisions */
 	if ((model == 4 && stepping == 0) ||
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index a9805c7cb305..e2954fb86d65 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -123,8 +123,10 @@ config I2C_I801
 	    Wildcat Point (PCH)
 	    Wildcat Point-LP (PCH)
 	    BayTrail (SOC)
+	    Braswell (SOC)
 	    Sunrise Point-H (PCH)
 	    Sunrise Point-LP (PCH)
+	    Kaby Lake-H (PCH)
 	    DNV (SOC)
 	    Broxton (SOC)
 	    Lewisburg (PCH)
diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c
index cd07a69e2e93..44deae78913e 100644
--- a/drivers/i2c/busses/i2c-bcm2835.c
+++ b/drivers/i2c/busses/i2c-bcm2835.c
@@ -50,6 +50,9 @@
 #define BCM2835_I2C_S_CLKT	BIT(9)
 #define BCM2835_I2C_S_LEN	BIT(10) /* Fake bit for SW error reporting */
 
+#define BCM2835_I2C_FEDL_SHIFT	16
+#define BCM2835_I2C_REDL_SHIFT	0
+
 #define BCM2835_I2C_CDIV_MIN	0x0002
 #define BCM2835_I2C_CDIV_MAX	0xFFFE
 
@@ -81,7 +84,7 @@ static inline u32 bcm2835_i2c_readl(struct bcm2835_i2c_dev *i2c_dev, u32 reg)
 
 static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev)
 {
-	u32 divider;
+	u32 divider, redl, fedl;
 
 	divider = DIV_ROUND_UP(clk_get_rate(i2c_dev->clk),
 			       i2c_dev->bus_clk_rate);
@@ -100,6 +103,22 @@ static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev)
 
 	bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DIV, divider);
 
+	/*
+	 * Number of core clocks to wait after falling edge before
+	 * outputting the next data bit.  Note that both FEDL and REDL
+	 * can't be greater than CDIV/2.
+	 */
+	fedl = max(divider / 16, 1u);
+
+	/*
+	 * Number of core clocks to wait after rising edge before
+	 * sampling the next incoming data bit.
+	 */
+	redl = max(divider / 4, 1u);
+
+	bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DEL,
+			   (fedl << BCM2835_I2C_FEDL_SHIFT) |
+			   (redl << BCM2835_I2C_REDL_SHIFT));
 	return 0;
 }
 
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index ae691884d071..05732531829f 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -209,7 +209,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 	i2c_dw_disable_int(dev);
 
 	/* Enable the adapter */
-	__i2c_dw_enable(dev, true);
+	__i2c_dw_enable_and_wait(dev, true);
 
 	/* Clear and enable interrupts */
 	dw_readl(dev, DW_IC_CLR_INTR);
@@ -644,7 +644,7 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev)
 	gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH);
 	if (IS_ERR(gpio)) {
 		r = PTR_ERR(gpio);
-		if (r == -ENOENT)
+		if (r == -ENOENT || r == -ENOSYS)
 			return 0;
 		return r;
 	}
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 8eac00efadc1..692b34125866 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -58,6 +58,7 @@
  * Wildcat Point (PCH)		0x8ca2	32	hard	yes	yes	yes
  * Wildcat Point-LP (PCH)	0x9ca2	32	hard	yes	yes	yes
  * BayTrail (SOC)		0x0f12	32	hard	yes	yes	yes
+ * Braswell (SOC)		0x2292	32	hard	yes	yes	yes
  * Sunrise Point-H (PCH) 	0xa123  32	hard	yes	yes	yes
  * Sunrise Point-LP (PCH)	0x9d23	32	hard	yes	yes	yes
  * DNV (SOC)			0x19df	32	hard	yes	yes	yes
diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c
index 1d8775799056..d9607905dc2f 100644
--- a/drivers/i2c/busses/i2c-octeon-core.c
+++ b/drivers/i2c/busses/i2c-octeon-core.c
@@ -233,6 +233,7 @@ static int octeon_i2c_check_status(struct octeon_i2c *i2c, int final_read)
 		return -EOPNOTSUPP;
 
 	case STAT_TXDATA_NAK:
+	case STAT_BUS_ERROR:
 		return -EIO;
 	case STAT_TXADDR_NAK:
 	case STAT_RXADDR_NAK:
diff --git a/drivers/i2c/busses/i2c-octeon-core.h b/drivers/i2c/busses/i2c-octeon-core.h
index a7ef19855bb8..9bb9f64fdda0 100644
--- a/drivers/i2c/busses/i2c-octeon-core.h
+++ b/drivers/i2c/busses/i2c-octeon-core.h
@@ -43,7 +43,7 @@
 #define TWSI_CTL_AAK		0x04	/* Assert ACK */
 
 /* Status values */
-#define STAT_ERROR		0x00
+#define STAT_BUS_ERROR		0x00
 #define STAT_START		0x08
 #define STAT_REP_START		0x10
 #define STAT_TXADDR_ACK		0x18
diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c
index 2fd8b6d00391..87197ece0f90 100644
--- a/drivers/i2c/busses/i2c-sirf.c
+++ b/drivers/i2c/busses/i2c-sirf.c
@@ -341,7 +341,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, adap);
 	init_completion(&siic->done);
 
-	/* Controller Initalisation */
+	/* Controller initialisation */
 
 	writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL);
 	while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET)
@@ -369,7 +369,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev)
 	 * but they start to affect the speed when clock is set to faster
 	 * frequencies.
 	 * Through the actual tests, use the different user_div value(which
-	 * in the divider formular 'Fio / (Fi2c * user_div)') to adapt
+	 * in the divider formula 'Fio / (Fi2c * user_div)') to adapt
 	 * the different ranges of i2c bus clock frequency, to make the SCL
 	 * more accurate.
 	 */
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 17fd55af4d92..caa20eb5f26b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -928,7 +928,7 @@ static int exact_lock(dev_t dev, void *data)
 {
 	struct gendisk *p = data;
 
-	if (!get_disk(p))
+	if (!get_disk_and_module(p))
 		return -1;
 	return 0;
 }
diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c
index 327a49ba1991..9515ca165dfd 100644
--- a/drivers/iio/adc/aspeed_adc.c
+++ b/drivers/iio/adc/aspeed_adc.c
@@ -243,7 +243,7 @@ static int aspeed_adc_probe(struct platform_device *pdev)
 					 ASPEED_ADC_INIT_POLLING_TIME,
 					 ASPEED_ADC_INIT_TIMEOUT);
 		if (ret)
-			goto scaler_error;
+			goto poll_timeout_error;
 	}
 
 	/* Start all channels in normal mode. */
@@ -274,9 +274,10 @@ iio_register_error:
 	writel(ASPEED_OPERATION_MODE_POWER_DOWN,
 		data->base + ASPEED_REG_ENGINE_CONTROL);
 	clk_disable_unprepare(data->clk_scaler->clk);
-reset_error:
-	reset_control_assert(data->rst);
 clk_enable_error:
+poll_timeout_error:
+	reset_control_assert(data->rst);
+reset_error:
 	clk_hw_unregister_divider(data->clk_scaler);
 scaler_error:
 	clk_hw_unregister_divider(data->clk_prescaler);
diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 7f5def465340..9a2583caedaa 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -722,8 +722,6 @@ static int stm32h7_adc_enable(struct stm32_adc *adc)
 	int ret;
 	u32 val;
 
-	/* Clear ADRDY by writing one, then enable ADC */
-	stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY);
 	stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN);
 
 	/* Poll for ADRDY to be set (after adc startup time) */
@@ -731,8 +729,11 @@ static int stm32h7_adc_enable(struct stm32_adc *adc)
 					   val & STM32H7_ADRDY,
 					   100, STM32_ADC_TIMEOUT_US);
 	if (ret) {
-		stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN);
+		stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADDIS);
 		dev_err(&indio_dev->dev, "Failed to enable ADC\n");
+	} else {
+		/* Clear ADRDY by writing one */
+		stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY);
 	}
 
 	return ret;
diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c
index 0dd5a381be64..457372f36791 100644
--- a/drivers/iio/imu/adis_trigger.c
+++ b/drivers/iio/imu/adis_trigger.c
@@ -46,6 +46,10 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
 	if (adis->trig == NULL)
 		return -ENOMEM;
 
+	adis->trig->dev.parent = &adis->spi->dev;
+	adis->trig->ops = &adis_trigger_ops;
+	iio_trigger_set_drvdata(adis->trig, adis);
+
 	ret = request_irq(adis->spi->irq,
 			  &iio_trigger_generic_data_rdy_poll,
 			  IRQF_TRIGGER_RISING,
@@ -54,9 +58,6 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
 	if (ret)
 		goto error_free_trig;
 
-	adis->trig->dev.parent = &adis->spi->dev;
-	adis->trig->ops = &adis_trigger_ops;
-	iio_trigger_set_drvdata(adis->trig, adis);
 	ret = iio_trigger_register(adis->trig);
 
 	indio_dev->trig = iio_trigger_get(adis->trig);
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 79abf70a126d..cd5bfe39591b 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -175,7 +175,7 @@ __poll_t iio_buffer_poll(struct file *filp,
 	struct iio_dev *indio_dev = filp->private_data;
 	struct iio_buffer *rb = indio_dev->buffer;
 
-	if (!indio_dev->info)
+	if (!indio_dev->info || rb == NULL)
 		return 0;
 
 	poll_wait(filp, &rb->pollq, wait);
diff --git a/drivers/iio/proximity/Kconfig b/drivers/iio/proximity/Kconfig
index fcb1c4ba5e41..f726f9427602 100644
--- a/drivers/iio/proximity/Kconfig
+++ b/drivers/iio/proximity/Kconfig
@@ -68,6 +68,8 @@ config SX9500
 
 config SRF08
 	tristate "Devantech SRF02/SRF08/SRF10 ultrasonic ranger sensor"
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
 	depends on I2C
 	help
 	  Say Y here to build a driver for Devantech SRF02/SRF08/SRF10
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index a5b4cf030c11..9183d148d644 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -550,18 +550,13 @@ static int addr_resolve(struct sockaddr *src_in,
 		dst_release(dst);
 	}
 
-	if (ndev->flags & IFF_LOOPBACK) {
-		ret = rdma_translate_ip(dst_in, addr);
-		/*
-		 * Put the loopback device and get the translated
-		 * device instead.
-		 */
+	if (ndev) {
+		if (ndev->flags & IFF_LOOPBACK)
+			ret = rdma_translate_ip(dst_in, addr);
+		else
+			addr->bound_dev_if = ndev->ifindex;
 		dev_put(ndev);
-		ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
-	} else {
-		addr->bound_dev_if = ndev->ifindex;
 	}
-	dev_put(ndev);
 
 	return ret;
 }
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e66963ca58bd..6ab1059fed66 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
 			   IPV6_ADDR_LINKLOCAL;
 	struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
 					 &src_addr->sin6_addr, net_dev->ifindex,
-					 strict);
+					 NULL, strict);
 	bool ret;
 
 	if (!rt)
@@ -3069,7 +3069,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
 			continue;
 
 		/* different dest port -> unique */
-		if (!cma_any_port(cur_daddr) &&
+		if (!cma_any_port(daddr) &&
+		    !cma_any_port(cur_daddr) &&
 		    (dport != cur_dport))
 			continue;
 
@@ -3080,7 +3081,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
 			continue;
 
 		/* different dst address -> unique */
-		if (!cma_any_addr(cur_daddr) &&
+		if (!cma_any_addr(daddr) &&
+		    !cma_any_addr(cur_daddr) &&
 		    cma_addr_cmp(daddr, cur_daddr))
 			continue;
 
@@ -3378,13 +3380,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 		}
 #endif
 	}
+	daddr = cma_dst_addr(id_priv);
+	daddr->sa_family = addr->sa_family;
+
 	ret = cma_get_port(id_priv);
 	if (ret)
 		goto err2;
 
-	daddr = cma_dst_addr(id_priv);
-	daddr->sa_family = addr->sa_family;
-
 	return 0;
 err2:
 	if (id_priv->cma_dev)
@@ -4173,6 +4175,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	struct cma_multicast *mc;
 	int ret;
 
+	if (!id->device)
+		return -EINVAL;
+
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
 	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index c4560d84dfae..25bb178f6074 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -305,16 +305,21 @@ void nldev_exit(void);
 static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
 					  struct ib_pd *pd,
 					  struct ib_qp_init_attr *attr,
-					  struct ib_udata *udata)
+					  struct ib_udata *udata,
+					  struct ib_uobject *uobj)
 {
 	struct ib_qp *qp;
 
+	if (!dev->create_qp)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	qp = dev->create_qp(pd, attr, udata);
 	if (IS_ERR(qp))
 		return qp;
 
 	qp->device = dev;
 	qp->pd = pd;
+	qp->uobject = uobj;
 	/*
 	 * We don't track XRC QPs for now, because they don't have PD
 	 * and more importantly they are created internaly by driver,
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index bc79ca8215d7..af5ad6a56ae4 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -17,6 +17,7 @@
 
 /* # of WCs to poll for with a single call to ib_poll_cq */
 #define IB_POLL_BATCH			16
+#define IB_POLL_BATCH_DIRECT		8
 
 /* # of WCs to iterate over before yielding */
 #define IB_POLL_BUDGET_IRQ		256
@@ -25,18 +26,18 @@
 #define IB_POLL_FLAGS \
 	(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
 
-static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
+static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
+			   int batch)
 {
 	int i, n, completed = 0;
-	struct ib_wc *wcs = poll_wc ? : cq->wc;
 
 	/*
 	 * budget might be (-1) if the caller does not
 	 * want to bound this call, thus we need unsigned
 	 * minimum here.
 	 */
-	while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
-			budget - completed), wcs)) > 0) {
+	while ((n = ib_poll_cq(cq, min_t(u32, batch,
+					 budget - completed), wcs)) > 0) {
 		for (i = 0; i < n; i++) {
 			struct ib_wc *wc = &wcs[i];
 
@@ -48,8 +49,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
 
 		completed += n;
 
-		if (n != IB_POLL_BATCH ||
-		    (budget != -1 && completed >= budget))
+		if (n != batch || (budget != -1 && completed >= budget))
 			break;
 	}
 
@@ -72,9 +72,9 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
  */
 int ib_process_cq_direct(struct ib_cq *cq, int budget)
 {
-	struct ib_wc wcs[IB_POLL_BATCH];
+	struct ib_wc wcs[IB_POLL_BATCH_DIRECT];
 
-	return __ib_process_cq(cq, budget, wcs);
+	return __ib_process_cq(cq, budget, wcs, IB_POLL_BATCH_DIRECT);
 }
 EXPORT_SYMBOL(ib_process_cq_direct);
 
@@ -88,7 +88,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
 	struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
 	int completed;
 
-	completed = __ib_process_cq(cq, budget, NULL);
+	completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
 	if (completed < budget) {
 		irq_poll_complete(&cq->iop);
 		if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
@@ -108,7 +108,8 @@ static void ib_cq_poll_work(struct work_struct *work)
 	struct ib_cq *cq = container_of(work, struct ib_cq, work);
 	int completed;
 
-	completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, NULL);
+	completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
+				    IB_POLL_BATCH);
 	if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
 	    ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
 		queue_work(ib_comp_wq, &cq->work);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index e8010e73a1cf..bb065c9449be 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -536,14 +536,14 @@ int ib_register_device(struct ib_device *device,
 	ret = device->query_device(device, &device->attrs, &uhw);
 	if (ret) {
 		pr_warn("Couldn't query the device attributes\n");
-		goto cache_cleanup;
+		goto cg_cleanup;
 	}
 
 	ret = ib_device_register_sysfs(device, port_callback);
 	if (ret) {
 		pr_warn("Couldn't register device %s with driver model\n",
 			device->name);
-		goto cache_cleanup;
+		goto cg_cleanup;
 	}
 
 	device->reg_state = IB_DEV_REGISTERED;
@@ -559,6 +559,8 @@ int ib_register_device(struct ib_device *device,
 	mutex_unlock(&device_mutex);
 	return 0;
 
+cg_cleanup:
+	ib_device_unregister_rdmacg(device);
 cache_cleanup:
 	ib_cache_cleanup_one(device);
 	ib_cache_release_one(device);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 85b5ee4defa4..d8eead5d106d 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -141,7 +141,12 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
 	 */
 	uobj->context = context;
 	uobj->type = type;
-	atomic_set(&uobj->usecnt, 0);
+	/*
+	 * Allocated objects start out as write locked to deny any other
+	 * syscalls from accessing them until they are committed. See
+	 * rdma_alloc_commit_uobject
+	 */
+	atomic_set(&uobj->usecnt, -1);
 	kref_init(&uobj->ref);
 
 	return uobj;
@@ -196,7 +201,15 @@ static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *t
 		goto free;
 	}
 
-	uverbs_uobject_get(uobj);
+	/*
+	 * The idr_find is guaranteed to return a pointer to something that
+	 * isn't freed yet, or NULL, as the free after idr_remove goes through
+	 * kfree_rcu(). However the object may still have been released and
+	 * kfree() could be called at any time.
+	 */
+	if (!kref_get_unless_zero(&uobj->ref))
+		uobj = ERR_PTR(-ENOENT);
+
 free:
 	rcu_read_unlock();
 	return uobj;
@@ -399,13 +412,13 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
 	return ret;
 }
 
-static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
 {
 #ifdef CONFIG_LOCKDEP
 	if (exclusive)
-		WARN_ON(atomic_read(&uobj->usecnt) > 0);
+		WARN_ON(atomic_read(&uobj->usecnt) != -1);
 	else
-		WARN_ON(atomic_read(&uobj->usecnt) == -1);
+		WARN_ON(atomic_read(&uobj->usecnt) <= 0);
 #endif
 }
 
@@ -444,7 +457,7 @@ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
 		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
 		return 0;
 	}
-	lockdep_check(uobj, true);
+	assert_uverbs_usecnt(uobj, true);
 	ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
 
 	up_read(&ucontext->cleanup_rwsem);
@@ -474,16 +487,17 @@ int rdma_explicit_destroy(struct ib_uobject *uobject)
 		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
 		return 0;
 	}
-	lockdep_check(uobject, true);
+	assert_uverbs_usecnt(uobject, true);
 	ret = uobject->type->type_class->remove_commit(uobject,
 						       RDMA_REMOVE_DESTROY);
 	if (ret)
-		return ret;
+		goto out;
 
 	uobject->type = &null_obj_type;
 
+out:
 	up_read(&ucontext->cleanup_rwsem);
-	return 0;
+	return ret;
 }
 
 static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
@@ -527,6 +541,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 		return ret;
 	}
 
+	/* matches atomic_set(-1) in alloc_uobj */
+	assert_uverbs_usecnt(uobj, true);
+	atomic_set(&uobj->usecnt, 0);
+
 	uobj->type->type_class->alloc_commit(uobj);
 	up_read(&uobj->context->cleanup_rwsem);
 
@@ -561,7 +579,7 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
 
 void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
 {
-	lockdep_check(uobj, exclusive);
+	assert_uverbs_usecnt(uobj, exclusive);
 	uobj->type->type_class->lookup_put(uobj, exclusive);
 	/*
 	 * In order to unlock an object, either decrease its usecnt for
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 857637bf46da..3dbc4e4cca41 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -7,7 +7,6 @@
 #include <rdma/restrack.h>
 #include <linux/mutex.h>
 #include <linux/sched/task.h>
-#include <linux/uaccess.h>
 #include <linux/pid_namespace.h>
 
 void rdma_restrack_init(struct rdma_restrack_root *res)
@@ -63,7 +62,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 {
 	enum rdma_restrack_type type = res->type;
 	struct ib_device *dev;
-	struct ib_xrcd *xrcd;
 	struct ib_pd *pd;
 	struct ib_cq *cq;
 	struct ib_qp *qp;
@@ -81,10 +79,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 		qp = container_of(res, struct ib_qp, res);
 		dev = qp->device;
 		break;
-	case RDMA_RESTRACK_XRCD:
-		xrcd = container_of(res, struct ib_xrcd, res);
-		dev = xrcd->device;
-		break;
 	default:
 		WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
 		return NULL;
@@ -93,6 +87,21 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 	return dev;
 }
 
+static bool res_is_user(struct rdma_restrack_entry *res)
+{
+	switch (res->type) {
+	case RDMA_RESTRACK_PD:
+		return container_of(res, struct ib_pd, res)->uobject;
+	case RDMA_RESTRACK_CQ:
+		return container_of(res, struct ib_cq, res)->uobject;
+	case RDMA_RESTRACK_QP:
+		return container_of(res, struct ib_qp, res)->uobject;
+	default:
+		WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
+		return false;
+	}
+}
+
 void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
@@ -100,7 +109,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
 	if (!dev)
 		return;
 
-	if (!uaccess_kernel()) {
+	if (res_is_user(res)) {
 		get_task_struct(current);
 		res->task = current;
 		res->kern_name = NULL;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8cf15d4a8ac4..9f029a1ca5ea 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1291,10 +1291,9 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 
 		resolved_dev = dev_get_by_index(dev_addr.net,
 						dev_addr.bound_dev_if);
-		if (resolved_dev->flags & IFF_LOOPBACK) {
-			dev_put(resolved_dev);
-			resolved_dev = idev;
-			dev_hold(resolved_dev);
+		if (!resolved_dev) {
+			dev_put(idev);
+			return -ENODEV;
 		}
 		ndev = ib_get_ndev_from_path(rec);
 		rcu_read_lock();
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index f015f1bf88c9..e5a1e7d81326 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -132,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id,
 	ctx = idr_find(&ctx_idr, id);
 	if (!ctx)
 		ctx = ERR_PTR(-ENOENT);
-	else if (ctx->file != file)
+	else if (ctx->file != file || !ctx->cm_id)
 		ctx = ERR_PTR(-EINVAL);
 	return ctx;
 }
@@ -456,6 +456,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 	struct rdma_ucm_create_id cmd;
 	struct rdma_ucm_create_id_resp resp;
 	struct ucma_context *ctx;
+	struct rdma_cm_id *cm_id;
 	enum ib_qp_type qp_type;
 	int ret;
 
@@ -476,10 +477,10 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 		return -ENOMEM;
 
 	ctx->uid = cmd.uid;
-	ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
-				    ucma_event_handler, ctx, cmd.ps, qp_type);
-	if (IS_ERR(ctx->cm_id)) {
-		ret = PTR_ERR(ctx->cm_id);
+	cm_id = rdma_create_id(current->nsproxy->net_ns,
+			       ucma_event_handler, ctx, cmd.ps, qp_type);
+	if (IS_ERR(cm_id)) {
+		ret = PTR_ERR(cm_id);
 		goto err1;
 	}
 
@@ -489,14 +490,19 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 		ret = -EFAULT;
 		goto err2;
 	}
+
+	ctx->cm_id = cm_id;
 	return 0;
 
 err2:
-	rdma_destroy_id(ctx->cm_id);
+	rdma_destroy_id(cm_id);
 err1:
 	mutex_lock(&mut);
 	idr_remove(&ctx_idr, ctx->id);
 	mutex_unlock(&mut);
+	mutex_lock(&file->mut);
+	list_del(&ctx->list);
+	mutex_unlock(&file->mut);
 	kfree(ctx);
 	return ret;
 }
@@ -664,19 +670,23 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
 			       int in_len, int out_len)
 {
 	struct rdma_ucm_resolve_ip cmd;
+	struct sockaddr *src, *dst;
 	struct ucma_context *ctx;
 	int ret;
 
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
+	src = (struct sockaddr *) &cmd.src_addr;
+	dst = (struct sockaddr *) &cmd.dst_addr;
+	if (!rdma_addr_size(src) || !rdma_addr_size(dst))
+		return -EINVAL;
+
 	ctx = ucma_get_ctx(file, cmd.id);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
-				(struct sockaddr *) &cmd.dst_addr,
-				cmd.timeout_ms);
+	ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
 	ucma_put_ctx(ctx);
 	return ret;
 }
@@ -1149,6 +1159,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
+	if (cmd.qp_state > IB_QPS_ERR)
+		return -EINVAL;
+
 	ctx = ucma_get_ctx(file, cmd.id);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
@@ -1294,6 +1307,9 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	if (unlikely(cmd.optval > KMALLOC_MAX_SIZE))
+		return -EINVAL;
+
 	optval = memdup_user((void __user *) (unsigned long) cmd.optval,
 			     cmd.optlen);
 	if (IS_ERR(optval)) {
@@ -1343,7 +1359,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
 		return -ENOSPC;
 
 	addr = (struct sockaddr *) &cmd->addr;
-	if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+	if (cmd->addr_size != rdma_addr_size(addr))
 		return -EINVAL;
 
 	if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
@@ -1411,6 +1427,9 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
 	join_cmd.uid = cmd.uid;
 	join_cmd.id = cmd.id;
 	join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
+	if (!join_cmd.addr_size)
+		return -EINVAL;
+
 	join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
 	memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
 
@@ -1426,6 +1445,9 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
+	if (!rdma_addr_size((struct sockaddr *)&cmd.addr))
+		return -EINVAL;
+
 	return ucma_process_join(file, &cmd, out_len);
 }
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 256934d1f64f..a148de35df8d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -562,9 +562,10 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	if (f.file)
 		fdput(f);
 
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+
 	uobj_alloc_commit(&obj->uobject);
 
-	mutex_unlock(&file->device->xrcd_tree_mutex);
 	return in_len;
 
 err_copy:
@@ -603,10 +604,8 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 
 	uobj  = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
 			       file->ucontext);
-	if (IS_ERR(uobj)) {
-		mutex_unlock(&file->device->xrcd_tree_mutex);
+	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
-	}
 
 	ret = uobj_remove_commit(uobj);
 	return ret ?: in_len;
@@ -979,6 +978,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	struct ib_uverbs_ex_create_cq_resp resp;
 	struct ib_cq_init_attr attr = {};
 
+	if (!ib_dev->create_cq)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	if (cmd->comp_vector >= file->device->num_comp_vectors)
 		return ERR_PTR(-EINVAL);
 
@@ -1030,14 +1032,14 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	resp.response_length = offsetof(typeof(resp), response_length) +
 		sizeof(resp.response_length);
 
+	cq->res.type = RDMA_RESTRACK_CQ;
+	rdma_restrack_add(&cq->res);
+
 	ret = cb(file, obj, &resp, ucore, context);
 	if (ret)
 		goto err_cb;
 
 	uobj_alloc_commit(&obj->uobject);
-	cq->res.type = RDMA_RESTRACK_CQ;
-	rdma_restrack_add(&cq->res);
-
 	return obj;
 
 err_cb:
@@ -1518,7 +1520,8 @@ static int create_qp(struct ib_uverbs_file *file,
 	if (cmd->qp_type == IB_QPT_XRC_TGT)
 		qp = ib_create_qp(pd, &attr);
 	else
-		qp = _ib_create_qp(device, pd, &attr, uhw);
+		qp = _ib_create_qp(device, pd, &attr, uhw,
+				   &obj->uevent.uobject);
 
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
@@ -1550,8 +1553,10 @@ static int create_qp(struct ib_uverbs_file *file,
 			atomic_inc(&attr.srq->usecnt);
 		if (ind_tbl)
 			atomic_inc(&ind_tbl->usecnt);
+	} else {
+		/* It is done in _ib_create_qp for other QP types */
+		qp->uobject = &obj->uevent.uobject;
 	}
-	qp->uobject = &obj->uevent.uobject;
 
 	obj->uevent.uobject.object = qp;
 
@@ -1971,8 +1976,15 @@ static int modify_qp(struct ib_uverbs_file *file,
 		goto release_qp;
 	}
 
+	if ((cmd->base.attr_mask & IB_QP_AV) &&
+	    !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
+		ret = -EINVAL;
+		goto release_qp;
+	}
+
 	if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
-	    !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) {
+	    (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
+	    !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) {
 		ret = -EINVAL;
 		goto release_qp;
 	}
@@ -2941,6 +2953,11 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 		wq_init_attr.create_flags = cmd.create_flags;
 	obj->uevent.events_reported = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
+
+	if (!pd->device->create_wq) {
+		err = -EOPNOTSUPP;
+		goto err_put_cq;
+	}
 	wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
 	if (IS_ERR(wq)) {
 		err = PTR_ERR(wq);
@@ -3084,7 +3101,12 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 		wq_attr.flags = cmd.flags;
 		wq_attr.flags_mask = cmd.flags_mask;
 	}
+	if (!wq->device->modify_wq) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 	ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
+out:
 	uobj_put_obj_read(wq);
 	return ret;
 }
@@ -3181,6 +3203,11 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 
 	init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
 	init_attr.ind_tbl = wqs;
+
+	if (!ib_dev->create_rwq_ind_table) {
+		err = -EOPNOTSUPP;
+		goto err_uobj;
+	}
 	rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
 
 	if (IS_ERR(rwq_ind_tbl)) {
@@ -3770,6 +3797,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	struct ib_device_attr attr = {0};
 	int err;
 
+	if (!ib_dev->query_device)
+		return -EOPNOTSUPP;
+
 	if (ucore->inlen < sizeof(cmd))
 		return -EINVAL;
 
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index d96dc1d17be1..339b85145044 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -59,6 +59,9 @@ static int uverbs_process_attr(struct ib_device *ibdev,
 			return 0;
 	}
 
+	if (test_bit(attr_id, attr_bundle_h->valid_bitmap))
+		return -EINVAL;
+
 	spec = &attr_spec_bucket->attrs[attr_id];
 	e = &elements[attr_id];
 	e->uattr = uattr_ptr;
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index 062485f9300d..62e1eb1d2a28 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -114,6 +114,7 @@ static size_t get_elements_above_id(const void **iters,
 	short min = SHRT_MAX;
 	const void *elem;
 	int i, j, last_stored = -1;
+	unsigned int equal_min = 0;
 
 	for_each_element(elem, i, j, elements, num_elements, num_offset,
 			 data_offset) {
@@ -136,6 +137,10 @@ static size_t get_elements_above_id(const void **iters,
 		 */
 		iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
 		last_stored = i;
+		if (min == GET_ID(id))
+			equal_min++;
+		else
+			equal_min = 1;
 		min = GET_ID(id);
 	}
 
@@ -146,15 +151,10 @@ static size_t get_elements_above_id(const void **iters,
 	 * Therefore, we need to clean the beginning of the array to make sure
 	 * all ids of final elements are equal to min.
 	 */
-	for (i = num_iters - 1; i >= 0 &&
-	     GET_ID(*(u16 *)(iters[i] + id_offset)) == min; i--)
-		;
-
-	num_iters -= i + 1;
-	memmove(iters, iters + i + 1, sizeof(*iters) * num_iters);
+	memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min);
 
 	*min_id = min;
-	return num_iters;
+	return equal_min;
 }
 
 #define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
@@ -322,7 +322,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
 		hash = kzalloc(sizeof(*hash) +
 			       ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
 				     sizeof(long)) +
-			       BITS_TO_LONGS(attr_max_bucket) * sizeof(long),
+			       BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long),
 			       GFP_KERNEL);
 		if (!hash) {
 			res = -ENOMEM;
@@ -509,7 +509,7 @@ static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_
 			 * first handler which != NULL. This also defines the
 			 * set of flags used for this handler.
 			 */
-			for (i = num_object_defs - 1;
+			for (i = num_method_defs - 1;
 			     i >= 0 && !method_defs[i]->handler; i--)
 				;
 			hash->methods[min_id++] = method;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 395a3b091229..b1ca223aa380 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -650,12 +650,21 @@ static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
 	return -1;
 }
 
+static bool verify_command_idx(u32 command, bool extended)
+{
+	if (extended)
+		return command < ARRAY_SIZE(uverbs_ex_cmd_table);
+
+	return command < ARRAY_SIZE(uverbs_cmd_table);
+}
+
 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 			     size_t count, loff_t *pos)
 {
 	struct ib_uverbs_file *file = filp->private_data;
 	struct ib_device *ib_dev;
 	struct ib_uverbs_cmd_hdr hdr;
+	bool extended_command;
 	__u32 command;
 	__u32 flags;
 	int srcu_key;
@@ -688,6 +697,15 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 	}
 
 	command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+	flags = (hdr.command &
+		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
+
+	extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED;
+	if (!verify_command_idx(command, extended_command)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	if (verify_command_mask(ib_dev, command)) {
 		ret = -EOPNOTSUPP;
 		goto out;
@@ -699,12 +717,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 		goto out;
 	}
 
-	flags = (hdr.command &
-		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
 	if (!flags) {
-		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
-		    !uverbs_cmd_table[command]) {
+		if (!uverbs_cmd_table[command]) {
 			ret = -EINVAL;
 			goto out;
 		}
@@ -725,8 +739,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 		struct ib_udata uhw;
 		size_t written_count = count;
 
-		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
-		    !uverbs_ex_cmd_table[command]) {
+		if (!uverbs_ex_cmd_table[command]) {
 			ret = -ENOSYS;
 			goto out;
 		}
@@ -942,6 +955,7 @@ static const struct file_operations uverbs_fops = {
 	.llseek	 = no_llseek,
 #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
 	.unlocked_ioctl = ib_uverbs_ioctl,
+	.compat_ioctl = ib_uverbs_ioctl,
 #endif
 };
 
@@ -954,6 +968,7 @@ static const struct file_operations uverbs_mmap_fops = {
 	.llseek	 = no_llseek,
 #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
 	.unlocked_ioctl = ib_uverbs_ioctl,
+	.compat_ioctl = ib_uverbs_ioctl,
 #endif
 };
 
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index cab0ac3556eb..df1360e6774f 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -234,15 +234,18 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
 		uverbs_attr_get(ctx, UVERBS_UHW_OUT);
 
 	if (!IS_ERR(uhw_in)) {
-		udata->inbuf = uhw_in->ptr_attr.ptr;
 		udata->inlen = uhw_in->ptr_attr.len;
+		if (uverbs_attr_ptr_is_inline(uhw_in))
+			udata->inbuf = &uhw_in->uattr->data;
+		else
+			udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
 	} else {
 		udata->inbuf = NULL;
 		udata->inlen = 0;
 	}
 
 	if (!IS_ERR(uhw_out)) {
-		udata->outbuf = uhw_out->ptr_attr.ptr;
+		udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
 		udata->outlen = uhw_out->ptr_attr.len;
 	} else {
 		udata->outbuf = NULL;
@@ -323,7 +326,8 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev,
 	cq->res.type = RDMA_RESTRACK_CQ;
 	rdma_restrack_add(&cq->res);
 
-	ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
+	ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe,
+			     sizeof(cq->cqe));
 	if (ret)
 		goto err_cq;
 
@@ -375,7 +379,7 @@ static int uverbs_destroy_cq_handler(struct ib_device *ib_dev,
 	resp.comp_events_reported  = obj->comp_events_reported;
 	resp.async_events_reported = obj->async_events_reported;
 
-	return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp);
+	return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp));
 }
 
 static DECLARE_UVERBS_METHOD(
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 16ebc6372c31..93025d2009b8 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -887,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
 	if (qp_init_attr->cap.max_rdma_ctxs)
 		rdma_rw_init_qp(device, qp_init_attr);
 
-	qp = _ib_create_qp(device, pd, qp_init_attr, NULL);
+	qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
 	if (IS_ERR(qp))
 		return qp;
 
@@ -898,7 +898,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
 	}
 
 	qp->real_qp    = qp;
-	qp->uobject    = NULL;
 	qp->qp_type    = qp_init_attr->qp_type;
 	qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
 
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ca32057e886f..96f76896488d 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -57,8 +57,8 @@
 #define BNXT_RE_PAGE_SIZE_8M		BIT(BNXT_RE_PAGE_SHIFT_8M)
 #define BNXT_RE_PAGE_SIZE_1G		BIT(BNXT_RE_PAGE_SHIFT_1G)
 
-#define BNXT_RE_MAX_MR_SIZE_LOW		BIT(BNXT_RE_PAGE_SHIFT_1G)
-#define BNXT_RE_MAX_MR_SIZE_HIGH	BIT(39)
+#define BNXT_RE_MAX_MR_SIZE_LOW		BIT_ULL(BNXT_RE_PAGE_SHIFT_1G)
+#define BNXT_RE_MAX_MR_SIZE_HIGH	BIT_ULL(39)
 #define BNXT_RE_MAX_MR_SIZE		BNXT_RE_MAX_MR_SIZE_HIGH
 
 #define BNXT_RE_MAX_QPC_COUNT		(64 * 1024)
@@ -120,7 +120,6 @@ struct bnxt_re_dev {
 #define BNXT_RE_FLAG_HAVE_L2_REF		3
 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN		4
 #define BNXT_RE_FLAG_QOS_WORK_REG		5
-#define BNXT_RE_FLAG_TASK_IN_PROG		6
 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS          29
 	struct net_device		*netdev;
 	unsigned int			version, major, minor;
@@ -158,6 +157,7 @@ struct bnxt_re_dev {
 	atomic_t			srq_count;
 	atomic_t			mr_count;
 	atomic_t			mw_count;
+	atomic_t			sched_count;
 	/* Max of 2 lossless traffic class supported per port */
 	u16				cosq[2];
 
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index ae9e9ff54826..8301d7e5fa8c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -174,10 +174,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->max_pd = dev_attr->max_pd;
 	ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
 	ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
-	if (dev_attr->is_atomic) {
-		ib_attr->atomic_cap = IB_ATOMIC_HCA;
-		ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
-	}
+	ib_attr->atomic_cap = IB_ATOMIC_NONE;
+	ib_attr->masked_atomic_cap = IB_ATOMIC_NONE;
 
 	ib_attr->max_ee_rd_atom = 0;
 	ib_attr->max_res_rd_atom = 0;
@@ -787,20 +785,51 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
 	return 0;
 }
 
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+	__acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->scq->cq_lock, flags);
+	if (qp->rcq != qp->scq)
+		spin_lock(&qp->rcq->cq_lock);
+	else
+		__acquire(&qp->rcq->cq_lock);
+
+	return flags;
+}
+
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+			unsigned long flags)
+	__releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
+{
+	if (qp->rcq != qp->scq)
+		spin_unlock(&qp->rcq->cq_lock);
+	else
+		__release(&qp->rcq->cq_lock);
+	spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
+}
+
 /* Queue Pairs */
 int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_re_dev *rdev = qp->rdev;
 	int rc;
+	unsigned int flags;
 
 	bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
-	bnxt_qplib_del_flush_qp(&qp->qplib_qp);
 	rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
 	if (rc) {
 		dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
 		return rc;
 	}
+
+	flags = bnxt_re_lock_cqs(qp);
+	bnxt_qplib_clean_qp(&qp->qplib_qp);
+	bnxt_re_unlock_cqs(qp, flags);
+	bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
+
 	if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
 		rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
 					   &rdev->sqp_ah->qplib_ah);
@@ -810,7 +839,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
 			return rc;
 		}
 
-		bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+		bnxt_qplib_clean_qp(&qp->qplib_qp);
 		rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
 					   &rdev->qp1_sqp->qplib_qp);
 		if (rc) {
@@ -1069,6 +1098,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 			goto fail;
 		}
 		qp->qplib_qp.scq = &cq->qplib_cq;
+		qp->scq = cq;
 	}
 
 	if (qp_init_attr->recv_cq) {
@@ -1080,6 +1110,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 			goto fail;
 		}
 		qp->qplib_qp.rcq = &cq->qplib_cq;
+		qp->rcq = cq;
 	}
 
 	if (qp_init_attr->srq) {
@@ -1185,7 +1216,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 		rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
 		if (rc) {
 			dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
-			goto fail;
+			goto free_umem;
 		}
 	}
 
@@ -1213,6 +1244,13 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	return &qp->ib_qp;
 qp_destroy:
 	bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+free_umem:
+	if (udata) {
+		if (qp->rumem)
+			ib_umem_release(qp->rumem);
+		if (qp->sumem)
+			ib_umem_release(qp->sumem);
+	}
 fail:
 	kfree(qp);
 	return ERR_PTR(rc);
@@ -1568,6 +1606,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	int status;
 	union ib_gid sgid;
 	struct ib_gid_attr sgid_attr;
+	unsigned int flags;
 	u8 nw_type;
 
 	qp->qplib_qp.modify_flags = 0;
@@ -1596,14 +1635,18 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 			dev_dbg(rdev_to_dev(rdev),
 				"Move QP = %p to flush list\n",
 				qp);
+			flags = bnxt_re_lock_cqs(qp);
 			bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+			bnxt_re_unlock_cqs(qp, flags);
 		}
 		if (!qp->sumem &&
 		    qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
 			dev_dbg(rdev_to_dev(rdev),
 				"Move QP = %p out of flush list\n",
 				qp);
-			bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+			flags = bnxt_re_lock_cqs(qp);
+			bnxt_qplib_clean_qp(&qp->qplib_qp);
+			bnxt_re_unlock_cqs(qp, flags);
 		}
 	}
 	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
@@ -2189,10 +2232,13 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
 	wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
 	wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
 
+	/* Need unconditional fence for local invalidate
+	 * opcode to work as expected.
+	 */
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+
 	if (wr->send_flags & IB_SEND_SIGNALED)
 		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
-	if (wr->send_flags & IB_SEND_FENCE)
-		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
 	if (wr->send_flags & IB_SEND_SOLICITED)
 		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
 
@@ -2213,8 +2259,12 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
 	wqe->frmr.levels = qplib_frpl->hwq.level + 1;
 	wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
 
-	if (wr->wr.send_flags & IB_SEND_FENCE)
-		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	/* Need unconditional fence for reg_mr
+	 * opcode to function as expected.
+	 */
+
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+
 	if (wr->wr.send_flags & IB_SEND_SIGNALED)
 		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
 
@@ -3548,7 +3598,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 	int umem_pgs, page_shift, rc;
 
 	if (length > BNXT_RE_MAX_MR_SIZE) {
-		dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n",
+		dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%lld\n",
 			length, BNXT_RE_MAX_MR_SIZE);
 		return ERR_PTR(-ENOMEM);
 	}
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 423ebe012f95..e62b7c2c7da6 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -89,6 +89,8 @@ struct bnxt_re_qp {
 	/* QP1 */
 	u32			send_psn;
 	struct ib_ud_header	qp1_hdr;
+	struct bnxt_re_cq	*scq;
+	struct bnxt_re_cq	*rcq;
 };
 
 struct bnxt_re_cq {
@@ -220,4 +222,7 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 					   struct ib_udata *udata);
 int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
 int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
 #endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 508d00a5a106..f6e361750466 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -656,7 +656,6 @@ static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
 	mutex_unlock(&bnxt_re_dev_lock);
 
 	synchronize_rcu();
-	flush_workqueue(bnxt_re_wq);
 
 	ib_dealloc_device(&rdev->ibdev);
 	/* rdev is gone */
@@ -731,6 +730,13 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
 					 struct bnxt_re_qp *qp)
 {
 	struct ib_event event;
+	unsigned int flags;
+
+	if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+		flags = bnxt_re_lock_cqs(qp);
+		bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+		bnxt_re_unlock_cqs(qp, flags);
+	}
 
 	memset(&event, 0, sizeof(event));
 	if (qp->qplib_qp.srq) {
@@ -1417,9 +1423,12 @@ static void bnxt_re_task(struct work_struct *work)
 	switch (re_work->event) {
 	case NETDEV_REGISTER:
 		rc = bnxt_re_ib_reg(rdev);
-		if (rc)
+		if (rc) {
 			dev_err(rdev_to_dev(rdev),
 				"Failed to register with IB: %#x", rc);
+			bnxt_re_remove_one(rdev);
+			bnxt_re_dev_unreg(rdev);
+		}
 		break;
 	case NETDEV_UP:
 		bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
@@ -1441,7 +1450,7 @@ static void bnxt_re_task(struct work_struct *work)
 		break;
 	}
 	smp_mb__before_atomic();
-	clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+	atomic_dec(&rdev->sched_count);
 	kfree(re_work);
 }
 
@@ -1503,7 +1512,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 		/* netdev notifier will call NETDEV_UNREGISTER again later since
 		 * we are still holding the reference to the netdev
 		 */
-		if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags))
+		if (atomic_read(&rdev->sched_count) > 0)
 			goto exit;
 		bnxt_re_ib_unreg(rdev, false);
 		bnxt_re_remove_one(rdev);
@@ -1523,7 +1532,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 			re_work->vlan_dev = (real_dev == netdev ?
 					     NULL : netdev);
 			INIT_WORK(&re_work->work, bnxt_re_task);
-			set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+			atomic_inc(&rdev->sched_count);
 			queue_work(bnxt_re_wq, &re_work->work);
 		}
 	}
@@ -1578,6 +1587,11 @@ static void __exit bnxt_re_mod_exit(void)
 	*/
 	list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
 		dev_info(rdev_to_dev(rdev), "Unregistering Device");
+		/*
+		 * Flush out any scheduled tasks before destroying the
+		 * resources
+		 */
+		flush_workqueue(bnxt_re_wq);
 		bnxt_re_dev_stop(rdev);
 		bnxt_re_ib_unreg(rdev, true);
 		bnxt_re_remove_one(rdev);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 1b0e94697fe3..3a78faba8d91 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -88,75 +88,35 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 	}
 }
 
-void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
-				 unsigned long *flags)
-	__acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock)
+static void bnxt_qplib_acquire_cq_flush_locks(struct bnxt_qplib_qp *qp,
+				       unsigned long *flags)
+	__acquires(&qp->scq->flush_lock) __acquires(&qp->rcq->flush_lock)
 {
-	spin_lock_irqsave(&qp->scq->hwq.lock, *flags);
+	spin_lock_irqsave(&qp->scq->flush_lock, *flags);
 	if (qp->scq == qp->rcq)
-		__acquire(&qp->rcq->hwq.lock);
+		__acquire(&qp->rcq->flush_lock);
 	else
-		spin_lock(&qp->rcq->hwq.lock);
+		spin_lock(&qp->rcq->flush_lock);
 }
 
-void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
-				 unsigned long *flags)
-	__releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock)
+static void bnxt_qplib_release_cq_flush_locks(struct bnxt_qplib_qp *qp,
+				       unsigned long *flags)
+	__releases(&qp->scq->flush_lock) __releases(&qp->rcq->flush_lock)
 {
 	if (qp->scq == qp->rcq)
-		__release(&qp->rcq->hwq.lock);
+		__release(&qp->rcq->flush_lock);
 	else
-		spin_unlock(&qp->rcq->hwq.lock);
-	spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags);
-}
-
-static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp,
-						      struct bnxt_qplib_cq *cq)
-{
-	struct bnxt_qplib_cq *buddy_cq = NULL;
-
-	if (qp->scq == qp->rcq)
-		buddy_cq = NULL;
-	else if (qp->scq == cq)
-		buddy_cq = qp->rcq;
-	else
-		buddy_cq = qp->scq;
-	return buddy_cq;
-}
-
-static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp,
-				     struct bnxt_qplib_cq *cq)
-	__acquires(&buddy_cq->hwq.lock)
-{
-	struct bnxt_qplib_cq *buddy_cq = NULL;
-
-	buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-	if (!buddy_cq)
-		__acquire(&cq->hwq.lock);
-	else
-		spin_lock(&buddy_cq->hwq.lock);
-}
-
-static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp,
-				       struct bnxt_qplib_cq *cq)
-	__releases(&buddy_cq->hwq.lock)
-{
-	struct bnxt_qplib_cq *buddy_cq = NULL;
-
-	buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-	if (!buddy_cq)
-		__release(&cq->hwq.lock);
-	else
-		spin_unlock(&buddy_cq->hwq.lock);
+		spin_unlock(&qp->rcq->flush_lock);
+	spin_unlock_irqrestore(&qp->scq->flush_lock, *flags);
 }
 
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 {
 	unsigned long flags;
 
-	bnxt_qplib_acquire_cq_locks(qp, &flags);
+	bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
 	__bnxt_qplib_add_flush_qp(qp);
-	bnxt_qplib_release_cq_locks(qp, &flags);
+	bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
@@ -173,11 +133,11 @@ static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
 	}
 }
 
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
 {
 	unsigned long flags;
 
-	bnxt_qplib_acquire_cq_locks(qp, &flags);
+	bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
 	__clean_cq(qp->scq, (u64)(unsigned long)qp);
 	qp->sq.hwq.prod = 0;
 	qp->sq.hwq.cons = 0;
@@ -186,7 +146,7 @@ void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
 	qp->rq.hwq.cons = 0;
 
 	__bnxt_qplib_del_flush_qp(qp);
-	bnxt_qplib_release_cq_locks(qp, &flags);
+	bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void bnxt_qpn_cqn_sched_task(struct work_struct *work)
@@ -283,7 +243,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
 	u32 sw_cons, raw_cons;
 	u16 type;
 	int budget = nq->budget;
-	u64 q_handle;
+	uintptr_t q_handle;
 
 	/* Service the NQ until empty */
 	raw_cons = hwq->cons;
@@ -566,7 +526,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
 
 	/* Configure the request */
 	req.dpi = cpu_to_le32(srq->dpi->dpi);
-	req.srq_handle = cpu_to_le64(srq);
+	req.srq_handle = cpu_to_le64((uintptr_t)srq);
 
 	req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements);
 	pbl = &srq->hwq.pbl[PBL_LVL_0];
@@ -1419,7 +1379,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_qp req;
 	struct creq_destroy_qp_resp resp;
-	unsigned long flags;
 	u16 cmd_flags = 0;
 	int rc;
 
@@ -1437,19 +1396,12 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 		return rc;
 	}
 
-	/* Must walk the associated CQs to nullified the QP ptr */
-	spin_lock_irqsave(&qp->scq->hwq.lock, flags);
-
-	__clean_cq(qp->scq, (u64)(unsigned long)qp);
-
-	if (qp->rcq && qp->rcq != qp->scq) {
-		spin_lock(&qp->rcq->hwq.lock);
-		__clean_cq(qp->rcq, (u64)(unsigned long)qp);
-		spin_unlock(&qp->rcq->hwq.lock);
-	}
-
-	spin_unlock_irqrestore(&qp->scq->hwq.lock, flags);
+	return 0;
+}
 
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+			    struct bnxt_qplib_qp *qp)
+{
 	bnxt_qplib_free_qp_hdr_buf(res, qp);
 	bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
 	kfree(qp->sq.swq);
@@ -1462,7 +1414,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 	if (qp->orrq.max_elements)
 		bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
 
-	return 0;
 }
 
 void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
@@ -2116,9 +2067,6 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
 	/* Must block new posting of SQ and RQ */
 	qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 	bnxt_qplib_cancel_phantom_processing(qp);
-
-	/* Add qp to flush list of the CQ */
-	__bnxt_qplib_add_flush_qp(qp);
 }
 
 /* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
@@ -2294,9 +2242,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 				sw_sq_cons, cqe->wr_id, cqe->status);
 			cqe++;
 			(*budget)--;
-			bnxt_qplib_lock_buddy_cq(qp, cq);
 			bnxt_qplib_mark_qp_error(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			/* Add qp to flush list of the CQ */
+			bnxt_qplib_add_flush_qp(qp);
 		} else {
 			if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
 				/* Before we complete, do WA 9060 */
@@ -2412,9 +2360,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 		if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 			/* Add qp to flush list of the CQ */
-			bnxt_qplib_lock_buddy_cq(qp, cq);
-			__bnxt_qplib_add_flush_qp(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			bnxt_qplib_add_flush_qp(qp);
 		}
 	}
 
@@ -2498,9 +2444,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 		if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 			/* Add qp to flush list of the CQ */
-			bnxt_qplib_lock_buddy_cq(qp, cq);
-			__bnxt_qplib_add_flush_qp(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			bnxt_qplib_add_flush_qp(qp);
 		}
 	}
 done:
@@ -2510,11 +2454,9 @@ done:
 bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 {
 	struct cq_base *hw_cqe, **hw_cqe_ptr;
-	unsigned long flags;
 	u32 sw_cons, raw_cons;
 	bool rc = true;
 
-	spin_lock_irqsave(&cq->hwq.lock, flags);
 	raw_cons = cq->hwq.cons;
 	sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
 	hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
@@ -2522,7 +2464,6 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 
 	 /* Check for Valid bit. If the CQE is valid, return false */
 	rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
 	return rc;
 }
 
@@ -2611,9 +2552,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 		if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 			/* Add qp to flush list of the CQ */
-			bnxt_qplib_lock_buddy_cq(qp, cq);
-			__bnxt_qplib_add_flush_qp(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			bnxt_qplib_add_flush_qp(qp);
 		}
 	}
 
@@ -2728,9 +2667,7 @@ do_rq:
 	 */
 
 	/* Add qp to flush list of the CQ */
-	bnxt_qplib_lock_buddy_cq(qp, cq);
-	__bnxt_qplib_add_flush_qp(qp);
-	bnxt_qplib_unlock_buddy_cq(qp, cq);
+	bnxt_qplib_add_flush_qp(qp);
 done:
 	return rc;
 }
@@ -2759,7 +2696,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
 	u32 budget = num_cqes;
 	unsigned long flags;
 
-	spin_lock_irqsave(&cq->hwq.lock, flags);
+	spin_lock_irqsave(&cq->flush_lock, flags);
 	list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
 		dev_dbg(&cq->hwq.pdev->dev,
 			"QPLIB: FP: Flushing SQ QP= %p",
@@ -2773,7 +2710,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
 			qp);
 		__flush_rq(&qp->rq, qp, &cqe, &budget);
 	}
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
+	spin_unlock_irqrestore(&cq->flush_lock, flags);
 
 	return num_cqes - budget;
 }
@@ -2782,11 +2719,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
 	struct cq_base *hw_cqe, **hw_cqe_ptr;
-	unsigned long flags;
 	u32 sw_cons, raw_cons;
 	int budget, rc = 0;
 
-	spin_lock_irqsave(&cq->hwq.lock, flags);
 	raw_cons = cq->hwq.cons;
 	budget = num_cqes;
 
@@ -2862,20 +2797,15 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
 	}
 exit:
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
 	return num_cqes - budget;
 }
 
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->hwq.lock, flags);
 	if (arm_type)
 		bnxt_qplib_arm_cq(cq, arm_type);
 	/* Using cq->arm_state variable to track whether to issue cq handler */
 	atomic_set(&cq->arm_state, 1);
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
 }
 
 void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 211b27a8f9e2..ade9f13c0fd1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -389,6 +389,18 @@ struct bnxt_qplib_cq {
 	struct list_head		sqf_head, rqf_head;
 	atomic_t			arm_state;
 	spinlock_t			compl_lock; /* synch CQ handlers */
+/* Locking Notes:
+ * QP can move to error state from modify_qp, async error event or error
+ * CQE as part of poll_cq. When QP is moved to error state, it gets added
+ * to two flush lists, one each for SQ and RQ.
+ * Each flush list is protected by qplib_cq->flush_lock. Both scq and rcq
+ * flush_locks should be acquired when QP is moved to error. The control path
+ * operations(modify_qp and async error events) are synchronized with poll_cq
+ * using upper level CQ locks (bnxt_re_cq->cq_lock) of both SCQ and RCQ.
+ * The qplib_cq->flush_lock is required to synchronize two instances of poll_cq
+ * of the same QP while manipulating the flush list.
+ */
+	spinlock_t			flush_lock; /* QP flush management */
 };
 
 #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE	sizeof(struct xrrq_irrq)
@@ -478,6 +490,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp);
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+			    struct bnxt_qplib_qp *qp);
 void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
 				struct bnxt_qplib_sge *sge);
 void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
@@ -500,7 +515,6 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp);
 void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
 				 unsigned long *flags);
 void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 8329ec6a7946..80027a494730 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -305,9 +305,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 			err_event->res_err_state_reason);
 		if (!qp)
 			break;
-		bnxt_qplib_acquire_cq_locks(qp, &flags);
 		bnxt_qplib_mark_qp_error(qp);
-		bnxt_qplib_release_cq_locks(qp, &flags);
+		rcfw->aeq_handler(rcfw, qp_event, qp);
 		break;
 	default:
 		/* Command Response */
@@ -460,7 +459,11 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 	int rc;
 
 	RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
-
+	/* Supply (log-base-2-of-host-page-size - base-page-shift)
+	 * to bono to adjust the doorbell page sizes.
+	 */
+	req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
+					   RCFW_DBR_BASE_PAGE_SHIFT);
 	/*
 	 * VFs need not setup the HW context area, PF
 	 * shall setup this area for VF. Skipping the
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 6bee6e3636ea..c7cce2e4185e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -49,6 +49,7 @@
 #define RCFW_COMM_SIZE			0x104
 
 #define RCFW_DBR_PCI_BAR_REGION		2
+#define RCFW_DBR_BASE_PAGE_SHIFT	12
 
 #define RCFW_CMD_PREP(req, CMD, cmd_flags)				\
 	do {								\
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index c015c1861351..ee98e5efef84 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -52,18 +52,6 @@ const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
 
 /* Device */
 
-static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
-{
-	int rc;
-	u16 pcie_ctl2;
-
-	rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2,
-				       &pcie_ctl2);
-	if (rc)
-		return false;
-	return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
-}
-
 static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
 				     char *fw_ver)
 {
@@ -151,7 +139,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 	attr->max_pkey = le32_to_cpu(sb->max_pkeys);
 
 	attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
-	attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
+	attr->l2_db_size = (sb->l2_db_space_size + 1) *
+			    (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
 	attr->max_sgid = le32_to_cpu(sb->max_gid);
 
 	bnxt_qplib_query_version(rcfw, attr->fw_ver);
@@ -165,7 +154,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
 	}
 
-	attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
+	attr->is_atomic = 0;
 bail:
 	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
 	return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 2d7ea096a247..3e5a4f760d0e 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -1761,7 +1761,30 @@ struct cmdq_initialize_fw {
 	#define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M		   (0x3UL << 4)
 	#define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M		   (0x4UL << 4)
 	#define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G		   (0x5UL << 4)
-	__le16 reserved16;
+	/* This value is (log-base-2-of-DBR-page-size - 12).
+	 * 0 for 4KB. HW supported values are enumerated below.
+	 */
+	__le16  log2_dbr_pg_size;
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK	0xfUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT		0
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K	0x0UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K	0x1UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K	0x2UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K	0x3UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K	0x4UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K	0x5UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K	0x6UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K	0x7UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M	0x8UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M	0x9UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M	0xaUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M	0xbUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M	0xcUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M	0xdUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M	0xeUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M	0xfUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST		\
+			CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M
 	__le64 qpc_page_dir;
 	__le64 mrw_page_dir;
 	__le64 srq_page_dir;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 7a9d0de89d6a..e96771ddc9a7 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -1217,6 +1217,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
 		if (ctx->dev)
 			c4iw_remove(ctx);
 		break;
+	case CXGB4_STATE_FATAL_ERROR:
 	case CXGB4_STATE_START_RECOVERY:
 		pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
 		if (ctx->dev) {
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 9a566ee3ceff..82adc0d1d30e 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -601,6 +601,7 @@ static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
 	wc->dlid_path_bits = 0;
 
 	if (is_eth) {
+		wc->slid = 0;
 		wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
 		memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
 		memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
@@ -851,7 +852,6 @@ repoll:
 			}
 		}
 
-		wc->slid	   = be16_to_cpu(cqe->rlid);
 		g_mlpath_rqpn	   = be32_to_cpu(cqe->g_mlpath_rqpn);
 		wc->src_qp	   = g_mlpath_rqpn & 0xffffff;
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
@@ -860,6 +860,7 @@ repoll:
 		wc->wc_flags	  |= mlx4_ib_ipoib_csum_ok(cqe->status,
 					cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
 		if (is_eth) {
+			wc->slid = 0;
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
 			if (be32_to_cpu(cqe->vlan_my_qpn) &
 					MLX4_CQE_CVLAN_PRESENT_MASK) {
@@ -871,6 +872,7 @@ repoll:
 			memcpy(wc->smac, cqe->smac, ETH_ALEN);
 			wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
 		} else {
+			wc->slid = be16_to_cpu(cqe->rlid);
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
 			wc->vlan_id = 0xffff;
 		}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8d2ee9322f2e..5a0e4fc4785a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -219,8 +219,6 @@ static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
 			gid_tbl[i].version = 2;
 			if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
 				gid_tbl[i].type = 1;
-			else
-				memset(&gid_tbl[i].gid, 0, 12);
 		}
 	}
 
@@ -366,8 +364,13 @@ static int mlx4_ib_del_gid(struct ib_device *device,
 		if (!gids) {
 			ret = -ENOMEM;
 		} else {
-			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
-				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
+				memcpy(&gids[i].gid,
+				       &port_gid_table->gids[i].gid,
+				       sizeof(union ib_gid));
+				gids[i].gid_type =
+				    port_gid_table->gids[i].gid_type;
+			}
 		}
 	}
 	spin_unlock_bh(&iboe->lock);
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index bc6299697dda..d42b922bede8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 5b974fb97611..94a27d89a303 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -64,14 +64,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
 	}
 }
 
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
-	return mlx5_buf_offset(&buf->buf, n * size);
-}
-
 static void *get_cqe(struct mlx5_ib_cq *cq, int n)
 {
-	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+	return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
 }
 
 static u8 sw_ownership_bit(int n, int nent)
@@ -226,7 +221,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 		wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
 		break;
 	}
-	wc->slid	   = be16_to_cpu(cqe->slid);
 	wc->src_qp	   = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
 	wc->dlid_path_bits = cqe->ml_path;
 	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
@@ -241,10 +235,12 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 	}
 
 	if (ll != IB_LINK_LAYER_ETHERNET) {
+		wc->slid = be16_to_cpu(cqe->slid);
 		wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
 		return;
 	}
 
+	wc->slid = 0;
 	vlan_present = cqe->l4_l3_hdr_type & 0x1;
 	roce_packet_type   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
 	if (vlan_present) {
@@ -403,7 +399,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 
 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
 {
-	mlx5_buf_free(dev->mdev, &buf->buf);
+	mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
 }
 
 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -724,12 +720,25 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	return ret;
 }
 
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
-			int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_cq_buf *buf,
+			     int nent,
+			     int cqe_size)
 {
+	struct mlx5_frag_buf_ctrl *c = &buf->fbc;
+	struct mlx5_frag_buf *frag_buf = &c->frag_buf;
+	u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
 	int err;
 
-	err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+	MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
+	MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
+
+	mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
+
+	err = mlx5_frag_buf_alloc_node(dev->mdev,
+				       nent * cqe_size,
+				       frag_buf,
+				       dev->mdev->priv.numa_node);
 	if (err)
 		return err;
 
@@ -862,14 +871,15 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
 	ib_umem_release(cq->buf.umem);
 }
 
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+			     struct mlx5_ib_cq_buf *buf)
 {
 	int i;
 	void *cqe;
 	struct mlx5_cqe64 *cqe64;
 
 	for (i = 0; i < buf->nent; i++) {
-		cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+		cqe = get_cqe(cq, i);
 		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
 		cqe64->op_own = MLX5_CQE_INVALID << 4;
 	}
@@ -891,14 +901,15 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	cq->mcq.arm_db     = cq->db.db + 1;
 	cq->mcq.cqe_sz = cqe_size;
 
-	err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+	err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
 	if (err)
 		goto err_db;
 
-	init_cq_buf(cq, &cq->buf);
+	init_cq_frag_buf(cq, &cq->buf);
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
+		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+		 cq->buf.fbc.frag_buf.npages;
 	*cqb = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*cqb) {
 		err = -ENOMEM;
@@ -906,11 +917,12 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	}
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
-	mlx5_fill_page_array(&cq->buf.buf, pas);
+	mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
 
 	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
 	MLX5_SET(cqc, cqc, log_page_size,
-		 cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+		 cq->buf.fbc.frag_buf.page_shift -
+		 MLX5_ADAPTER_PAGE_SHIFT);
 
 	*index = dev->mdev->priv.uar->index;
 
@@ -1177,7 +1189,12 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (ucmd.reserved0 || ucmd.reserved1)
 		return -EINVAL;
 
-	umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+	/* check multiplication overflow */
+	if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
+		return -EINVAL;
+
+	umem = ib_umem_get(context, ucmd.buf_addr,
+			   (size_t)ucmd.cqe_size * entries,
 			   IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(umem)) {
 		err = PTR_ERR(umem);
@@ -1207,11 +1224,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (!cq->resize_buf)
 		return -ENOMEM;
 
-	err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+	err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
 	if (err)
 		goto ex;
 
-	init_cq_buf(cq, cq->resize_buf);
+	init_cq_frag_buf(cq, cq->resize_buf);
 
 	return 0;
 
@@ -1256,9 +1273,8 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
 	}
 
 	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
-		dcqe = get_cqe_from_buf(cq->resize_buf,
-					(i + 1) & (cq->resize_buf->nent),
-					dsize);
+		dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+					     (i + 1) & cq->resize_buf->nent);
 		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
 		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
 		memcpy(dcqe, scqe, dsize);
@@ -1324,8 +1340,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		cqe_size = 64;
 		err = resize_kernel(dev, cq, entries, cqe_size);
 		if (!err) {
-			npas = cq->resize_buf->buf.npages;
-			page_shift = cq->resize_buf->buf.page_shift;
+			struct mlx5_frag_buf_ctrl *c;
+
+			c = &cq->resize_buf->fbc;
+			npas = c->frag_buf.npages;
+			page_shift = c->frag_buf.page_shift;
 		}
 	}
 
@@ -1346,7 +1365,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
 				     pas, 0);
 	else
-		mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+		mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
+					  pas);
 
 	MLX5_SET(modify_cq_in, in,
 		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644
index 000000000000..0e04fdddf670
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+	STAGE_CREATE(MLX5_IB_STAGE_INIT,
+		     mlx5_ib_stage_init_init,
+		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_rep_flow_db_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+		     mlx5_ib_stage_caps_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_rep_non_default_cb,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+		     mlx5_ib_stage_rep_roce_init,
+		     mlx5_ib_stage_rep_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+		     mlx5_ib_stage_dev_res_init,
+		     mlx5_ib_stage_dev_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+		     mlx5_ib_stage_counters_init,
+		     mlx5_ib_stage_counters_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+		     mlx5_ib_stage_bfrag_init,
+		     mlx5_ib_stage_bfrag_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+		     NULL,
+		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+		     mlx5_ib_stage_ib_reg_init,
+		     mlx5_ib_stage_ib_reg_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+		     mlx5_ib_stage_post_ib_reg_umr_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+		     mlx5_ib_stage_class_attr_init,
+		     NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+	rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_ib_dev *ibdev;
+
+	ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+	if (!ibdev)
+		return -ENOMEM;
+
+	ibdev->rep = rep;
+	ibdev->mdev = dev;
+	ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+			       MLX5_CAP_GEN(dev, num_vhca_ports));
+	if (!__mlx5_ib_add(ibdev, &rep_profile))
+		return -EINVAL;
+
+	rep->rep_if[REP_IB].priv = ibdev;
+
+	return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_ib_dev *dev;
+
+	if (!rep->rep_if[REP_IB].priv)
+		return;
+
+	dev = mlx5_ib_rep_to_dev(rep);
+	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+	rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+	return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+	int vport;
+
+	for (vport = 1; vport < total_vfs; vport++) {
+		struct mlx5_eswitch_rep_if rep_if = {};
+
+		rep_if.load = mlx5_ib_vport_rep_load;
+		rep_if.unload = mlx5_ib_vport_rep_unload;
+		rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+	}
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+	int vport;
+
+	for (vport = 1; vport < total_vfs; vport++)
+		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep_if rep_if = {};
+
+	rep_if.load = mlx5_ib_nic_rep_load;
+	rep_if.unload = mlx5_ib_nic_rep_unload;
+	rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+	rep_if.priv = dev;
+
+	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+	mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+
+	mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+	mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+	return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+	return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+			      struct mlx5_ib_sq *sq)
+{
+	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+	if (!dev->rep)
+		return 0;
+
+	flow_rule =
+		mlx5_eswitch_add_send_to_vport_rule(esw,
+						    dev->rep->vport,
+						    sq->base.mqp.qpn);
+	if (IS_ERR(flow_rule))
+		return PTR_ERR(flow_rule);
+	sq->flow_rule = flow_rule;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644
index 000000000000..046fd942fd46
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+					   int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+			      struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+	return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+					   int vport_index)
+{
+	return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+					    struct mlx5_ib_sq *sq)
+{
+	return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+	return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4236c8086820..390e4375647e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -57,7 +57,9 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 #include "cmd.h"
+#include <linux/mlx5/fs_helpers.h>
 
 #define DRIVER_NAME "mlx5_ib"
 #define DRIVER_VERSION "5.0-0"
@@ -130,7 +132,7 @@ static int get_port_state(struct ib_device *ibdev,
 	int ret;
 
 	memset(&attr, 0, sizeof(attr));
-	ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+	ret = ibdev->query_port(ibdev, port_num, &attr);
 	if (!ret)
 		*state = attr.state;
 	return ret;
@@ -154,10 +156,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
 	case NETDEV_REGISTER:
 	case NETDEV_UNREGISTER:
 		write_lock(&roce->netdev_lock);
-
-		if (ndev->dev.parent == &mdev->pdev->dev)
-			roce->netdev = (event == NETDEV_UNREGISTER) ?
+		if (ibdev->rep) {
+			struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+			struct net_device *rep_ndev;
+
+			rep_ndev = mlx5_ib_get_rep_netdev(esw,
+							  ibdev->rep->vport);
+			if (rep_ndev == ndev)
+				roce->netdev = (event == NETDEV_UNREGISTER) ?
 					NULL : ndev;
+		} else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+			roce->netdev = (event == NETDEV_UNREGISTER) ?
+				NULL : ndev;
+		}
 		write_unlock(&roce->netdev_lock);
 		break;
 
@@ -245,12 +256,16 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
 	struct mlx5_ib_multiport_info *mpi;
 	struct mlx5_ib_port *port;
 
+	if (!mlx5_core_mp_enabled(ibdev->mdev) ||
+	    ll != IB_LINK_LAYER_ETHERNET) {
+		if (native_port_num)
+			*native_port_num = ib_port_num;
+		return ibdev->mdev;
+	}
+
 	if (native_port_num)
 		*native_port_num = 1;
 
-	if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
-		return ibdev->mdev;
-
 	port = &ibdev->port[ib_port_num - 1];
 	if (!port)
 		return NULL;
@@ -1268,6 +1283,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	return ret;
 }
 
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+				  struct ib_port_attr *props)
+{
+	int ret;
+
+	/* Only link layer == ethernet is valid for representors */
+	ret = mlx5_query_port_roce(ibdev, port, props);
+	if (ret || !props)
+		return ret;
+
+	/* We don't support GIDS */
+	props->gid_tbl_len = 0;
+
+	return ret;
+}
+
 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 			     union ib_gid *gid)
 {
@@ -2286,11 +2317,9 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 		   offsetof(typeof(filter), field) -\
 		   sizeof(filter.field))
 
-#define IPV4_VERSION 4
-#define IPV6_VERSION 6
 static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			   u32 *match_v, const union ib_flow_spec *ib_spec,
-			   u32 *tag_id, bool *is_drop)
+			   struct mlx5_flow_act *action)
 {
 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   misc_parameters);
@@ -2373,7 +2402,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ip_version, 0xf);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 ip_version, IPV4_VERSION);
+				 ip_version, MLX5_FS_IPV4_VERSION);
 		} else {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ethertype, 0xffff);
@@ -2412,7 +2441,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ip_version, 0xf);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 ip_version, IPV6_VERSION);
+				 ip_version, MLX5_FS_IPV6_VERSION);
 		} else {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ethertype, 0xffff);
@@ -2508,13 +2537,14 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 		if (ib_spec->flow_tag.tag_id >= BIT(24))
 			return -EINVAL;
 
-		*tag_id = ib_spec->flow_tag.tag_id;
+		action->flow_tag = ib_spec->flow_tag.tag_id;
+		action->has_flow_tag = true;
 		break;
 	case IB_FLOW_SPEC_ACTION_DROP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
 					 LAST_DROP_FIELD))
 			return -EOPNOTSUPP;
-		*is_drop = true;
+		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 		break;
 	default:
 		return -EINVAL;
@@ -2631,7 +2661,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 							  ibflow);
 	struct mlx5_ib_flow_handler *iter, *tmp;
 
-	mutex_lock(&dev->flow_db.lock);
+	mutex_lock(&dev->flow_db->lock);
 
 	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
 		mlx5_del_flow_rules(iter->rule);
@@ -2642,7 +2672,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 
 	mlx5_del_flow_rules(handler->rule);
 	put_flow_table(dev, handler->prio, true);
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 
 	kfree(handler);
 
@@ -2691,7 +2721,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 					     MLX5_FLOW_NAMESPACE_BYPASS);
 		num_entries = MLX5_FS_MAX_ENTRIES;
 		num_groups = MLX5_FS_MAX_TYPES;
-		prio = &dev->flow_db.prios[priority];
+		prio = &dev->flow_db->prios[priority];
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2699,7 +2729,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 		build_leftovers_ft_param(&priority,
 					 &num_entries,
 					 &num_groups);
-		prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
 	} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 					allow_sniffer_and_nic_rx_shared_tir))
@@ -2709,7 +2739,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 					     MLX5_FLOW_NAMESPACE_SNIFFER_RX :
 					     MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 
-		prio = &dev->flow_db.sniffer[ft_type];
+		prio = &dev->flow_db->sniffer[ft_type];
 		priority = 0;
 		num_entries = 1;
 		num_groups = 1;
@@ -2767,13 +2797,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 {
 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
 	struct mlx5_ib_flow_handler *handler;
-	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
 	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_destination *rule_dst = dst;
 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
 	unsigned int spec_index;
-	u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
-	bool is_drop = false;
 	int err = 0;
 	int dest_num = 1;
 
@@ -2792,7 +2820,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 		err = parse_flow_attr(dev->mdev, spec->match_criteria,
 				      spec->match_value,
-				      ib_flow, &flow_tag, &is_drop);
+				      ib_flow, &flow_act);
 		if (err < 0)
 			goto free;
 
@@ -2802,9 +2830,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	if (!flow_is_multicast_only(flow_attr))
 		set_underlay_qp(dev, spec, underlay_qpn);
 
+	if (dev->rep) {
+		void *misc;
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port,
+			 dev->rep->vport);
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+	}
+
 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-	if (is_drop) {
-		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
 		rule_dst = NULL;
 		dest_num = 0;
 	} else {
@@ -2812,15 +2851,14 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 		    MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	}
 
-	if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+	if (flow_act.has_flow_tag &&
 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
-			     flow_tag, flow_attr->type);
+			     flow_act.flow_tag, flow_attr->type);
 		err = -EINVAL;
 		goto free;
 	}
-	flow_act.flow_tag = flow_tag;
 	handler->rule = mlx5_add_flow_rules(ft, spec,
 					    &flow_act,
 					    rule_dst, dest_num);
@@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	if (!dst)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_lock(&dev->flow_db.lock);
+	mutex_lock(&dev->flow_db->lock);
 
 	ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
 	if (IS_ERR(ft_prio)) {
@@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 		goto destroy_ft;
 	}
 
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
 
 	return &handler->ibflow;
@@ -3058,7 +3096,7 @@ destroy_ft:
 	if (ft_prio_tx)
 		put_flow_table(dev, ft_prio_tx, false);
 unlock:
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
 	kfree(handler);
 	return ERR_PTR(err);
@@ -3263,7 +3301,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 	struct mlx5_ib_dev *ibdev;
 	struct ib_event ibev;
 	bool fatal = false;
-	u8 port = 0;
+	u8 port = (u8)work->param;
 
 	if (mlx5_core_is_mp_slave(work->dev)) {
 		ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
@@ -3283,8 +3321,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 	case MLX5_DEV_EVENT_PORT_UP:
 	case MLX5_DEV_EVENT_PORT_DOWN:
 	case MLX5_DEV_EVENT_PORT_INITIALIZED:
-		port = (u8)work->param;
-
 		/* In RoCE, port up/down events are handled in
 		 * mlx5_netdev_event().
 		 */
@@ -3298,24 +3334,19 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 
 	case MLX5_DEV_EVENT_LID_CHANGE:
 		ibev.event = IB_EVENT_LID_CHANGE;
-		port = (u8)work->param;
 		break;
 
 	case MLX5_DEV_EVENT_PKEY_CHANGE:
 		ibev.event = IB_EVENT_PKEY_CHANGE;
-		port = (u8)work->param;
-
 		schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
 		break;
 
 	case MLX5_DEV_EVENT_GUID_CHANGE:
 		ibev.event = IB_EVENT_GID_CHANGE;
-		port = (u8)work->param;
 		break;
 
 	case MLX5_DEV_EVENT_CLIENT_REREG:
 		ibev.event = IB_EVENT_CLIENT_REREGISTER;
-		port = (u8)work->param;
 		break;
 	case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
 		schedule_work(&ibdev->delay_drop.delay_drop_work);
@@ -3327,7 +3358,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 	ibev.device	      = &ibdev->ib_dev;
 	ibev.element.port_num = port;
 
-	if (port < 1 || port > ibdev->num_ports) {
+	if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
 		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
 		goto out;
 	}
@@ -3772,6 +3803,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 	return 0;
 }
 
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+				   struct ib_port_immutable *immutable)
+{
+	struct ib_port_attr attr;
+	int err;
+
+	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+	err = ib_query_port(ibdev, port_num, &attr);
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+	return 0;
+}
+
 static void get_dev_fw_str(struct ib_device *ibdev, char *str)
 {
 	struct mlx5_ib_dev *dev =
@@ -3802,7 +3852,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
 		goto err_destroy_vport_lag;
 	}
 
-	dev->flow_db.lag_demux_ft = ft;
+	dev->flow_db->lag_demux_ft = ft;
 	return 0;
 
 err_destroy_vport_lag:
@@ -3814,9 +3864,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 
-	if (dev->flow_db.lag_demux_ft) {
-		mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
-		dev->flow_db.lag_demux_ft = NULL;
+	if (dev->flow_db->lag_demux_ft) {
+		mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+		dev->flow_db->lag_demux_ft = NULL;
 
 		mlx5_cmd_destroy_vport_lag(mdev);
 	}
@@ -3848,14 +3898,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
 {
 	int err;
 
-	err = mlx5_add_netdev_notifier(dev, port_num);
-	if (err)
-		return err;
-
 	if (MLX5_CAP_GEN(dev->mdev, roce)) {
 		err = mlx5_nic_vport_enable_roce(dev->mdev);
 		if (err)
-			goto err_unregister_netdevice_notifier;
+			return err;
 	}
 
 	err = mlx5_eth_lag_init(dev);
@@ -3868,8 +3914,6 @@ err_disable_roce:
 	if (MLX5_CAP_GEN(dev->mdev, roce))
 		mlx5_nic_vport_disable_roce(dev->mdev);
 
-err_unregister_netdevice_notifier:
-	mlx5_remove_netdev_notifier(dev, port_num);
 	return err;
 }
 
@@ -4503,7 +4547,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
 	mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_cleanup_multiport_master(dev);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4512,7 +4556,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 	kfree(dev->port);
 }
 
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 	const char *name;
@@ -4534,8 +4578,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 		goto err_free_port;
 
 	if (!mlx5_core_mp_enabled(mdev)) {
-		int i;
-
 		for (i = 1; i <= dev->num_ports; i++) {
 			err = get_port_caps(dev, i);
 			if (err)
@@ -4564,7 +4606,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 		dev->mdev->priv.eq_table.num_comp_vectors;
 	dev->ib_dev.dev.parent		= &mdev->pdev->dev;
 
-	mutex_init(&dev->flow_db.lock);
 	mutex_init(&dev->cap_mask_mutex);
 	INIT_LIST_HEAD(&dev->qp_list);
 	spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4585,7 +4626,38 @@ err_free_port:
 	return -ENOMEM;
 }
 
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+	if (!dev->flow_db)
+		return -ENOMEM;
+
+	mutex_init(&dev->flow_db->lock);
+
+	return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_ib_dev *nic_dev;
+
+	nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+	if (!nic_dev)
+		return -EINVAL;
+
+	dev->flow_db = nic_dev->flow_db;
+
+	return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+	kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 	int err;
@@ -4626,7 +4698,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 		(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
 
 	dev->ib_dev.query_device	= mlx5_ib_query_device;
-	dev->ib_dev.query_port		= mlx5_ib_query_port;
 	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;
 	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
 	dev->ib_dev.add_gid		= mlx5_ib_add_gid;
@@ -4669,7 +4740,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	dev->ib_dev.alloc_mr		= mlx5_ib_alloc_mr;
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
-	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
 	dev->ib_dev.get_vector_affinity	= mlx5_ib_get_vector_affinity;
 	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4720,6 +4790,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+	dev->ib_dev.query_port		= mlx5_ib_query_port;
+
+	return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+	dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable;
+	dev->ib_dev.query_port		= mlx5_ib_rep_query_port;
+
+	return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+					  u8 port_num)
+{
+	int i;
+
+	for (i = 0; i < dev->num_ports; i++) {
+		dev->roce[i].dev = dev;
+		dev->roce[i].native_port_num = i + 1;
+		dev->roce[i].last_port_state = IB_PORT_DOWN;
+	}
+
+	dev->ib_dev.get_netdev	= mlx5_ib_get_netdev;
+	dev->ib_dev.create_wq	 = mlx5_ib_create_wq;
+	dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq;
+	dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq;
+	dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+	dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+	dev->ib_dev.uverbs_ex_cmd_mask |=
+			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+	return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+	u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+	mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_core_dev *mdev = dev->mdev;
+	enum rdma_link_layer ll;
+	int port_type_cap;
+	int err = 0;
+	u8 port_num;
+
+	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+	if (ll == IB_LINK_LAYER_ETHERNET)
+		err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+	return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
 static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
@@ -4727,37 +4871,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 	int port_type_cap;
 	u8 port_num;
 	int err;
-	int i;
 
 	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
-		for (i = 0; i < dev->num_ports; i++) {
-			dev->roce[i].dev = dev;
-			dev->roce[i].native_port_num = i + 1;
-			dev->roce[i].last_port_state = IB_PORT_DOWN;
-		}
+		err = mlx5_ib_stage_common_roce_init(dev, port_num);
+		if (err)
+			return err;
 
-		dev->ib_dev.get_netdev	= mlx5_ib_get_netdev;
-		dev->ib_dev.create_wq	 = mlx5_ib_create_wq;
-		dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq;
-		dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq;
-		dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
-		dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-		dev->ib_dev.uverbs_ex_cmd_mask |=
-			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
-			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
 		err = mlx5_enable_eth(dev, port_num);
 		if (err)
-			return err;
+			goto cleanup;
 	}
 
 	return 0;
+cleanup:
+	mlx5_ib_stage_common_roce_cleanup(dev);
+
+	return err;
 }
 
 static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4773,16 +4906,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
 		mlx5_disable_eth(dev);
-		mlx5_remove_netdev_notifier(dev, port_num);
+		mlx5_ib_stage_common_roce_cleanup(dev);
 	}
 }
 
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
 {
 	return create_dev_resources(&dev->devr);
 }
 
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
 {
 	destroy_dev_resources(&dev->devr);
 }
@@ -4794,7 +4927,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
 	return mlx5_ib_odp_init_one(dev);
 }
 
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
 		dev->ib_dev.get_hw_stats	= mlx5_ib_get_hw_stats;
@@ -4806,7 +4939,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
 		mlx5_ib_dealloc_counters(dev);
@@ -4837,7 +4970,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
 	mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 }
 
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 {
 	int err;
 
@@ -4852,30 +4985,30 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 	return err;
 }
 
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 	mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 }
 
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
 	return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 {
-	ib_unregister_device(&dev->ib_dev);
+	destroy_umrc_res(dev);
 }
 
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 {
-	return create_umr_res(dev);
+	ib_unregister_device(&dev->ib_dev);
 }
 
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 {
-	destroy_umrc_res(dev);
+	return create_umr_res(dev);
 }
 
 static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
@@ -4890,7 +5023,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
 	cancel_delay_drop(dev);
 }
 
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 {
 	int err;
 	int i;
@@ -4905,9 +5038,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
-			     const struct mlx5_ib_profile *profile,
-			     int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_register_vport_reps(dev);
+
+	return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+		      const struct mlx5_ib_profile *profile,
+		      int stage)
 {
 	/* Number of stages to cleanup */
 	while (stage) {
@@ -4921,23 +5066,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
 
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
-			   const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+		    const struct mlx5_ib_profile *profile)
 {
-	struct mlx5_ib_dev *dev;
 	int err;
 	int i;
 
 	printk_once(KERN_INFO "%s", mlx5_version);
 
-	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
-	if (!dev)
-		return NULL;
-
-	dev->mdev = mdev;
-	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
-			     MLX5_CAP_GEN(mdev, num_vhca_ports));
-
 	for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
 		if (profile->stage[i].init) {
 			err = profile->stage[i].init(dev);
@@ -4961,9 +5097,15 @@ static const struct mlx5_ib_profile pf_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_INIT,
 		     mlx5_ib_stage_init_init,
 		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_flow_db_init,
+		     mlx5_ib_stage_flow_db_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
 		     mlx5_ib_stage_caps_init,
 		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_non_default_cb,
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
 		     mlx5_ib_stage_roce_init,
 		     mlx5_ib_stage_roce_cleanup),
@@ -4985,12 +5127,15 @@ static const struct mlx5_ib_profile pf_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
 		     mlx5_ib_stage_bfrag_init,
 		     mlx5_ib_stage_bfrag_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+		     NULL,
+		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
 		     mlx5_ib_stage_ib_reg_init,
 		     mlx5_ib_stage_ib_reg_cleanup),
-	STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
-		     mlx5_ib_stage_umr_res_init,
-		     mlx5_ib_stage_umr_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+		     mlx5_ib_stage_post_ib_reg_umr_init,
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
 		     mlx5_ib_stage_delay_drop_init,
 		     mlx5_ib_stage_delay_drop_cleanup),
@@ -4999,6 +5144,51 @@ static const struct mlx5_ib_profile pf_profile = {
 		     NULL),
 };
 
+static const struct mlx5_ib_profile nic_rep_profile = {
+	STAGE_CREATE(MLX5_IB_STAGE_INIT,
+		     mlx5_ib_stage_init_init,
+		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_flow_db_init,
+		     mlx5_ib_stage_flow_db_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+		     mlx5_ib_stage_caps_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_rep_non_default_cb,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+		     mlx5_ib_stage_rep_roce_init,
+		     mlx5_ib_stage_rep_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+		     mlx5_ib_stage_dev_res_init,
+		     mlx5_ib_stage_dev_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+		     mlx5_ib_stage_counters_init,
+		     mlx5_ib_stage_counters_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_UAR,
+		     mlx5_ib_stage_uar_init,
+		     mlx5_ib_stage_uar_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+		     mlx5_ib_stage_bfrag_init,
+		     mlx5_ib_stage_bfrag_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+		     NULL,
+		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+		     mlx5_ib_stage_ib_reg_init,
+		     mlx5_ib_stage_ib_reg_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+		     mlx5_ib_stage_post_ib_reg_umr_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+		     mlx5_ib_stage_class_attr_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+		     mlx5_ib_stage_rep_reg_init,
+		     mlx5_ib_stage_rep_reg_cleanup),
+};
+
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 {
 	struct mlx5_ib_multiport_info *mpi;
@@ -5044,8 +5234,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
 	enum rdma_link_layer ll;
+	struct mlx5_ib_dev *dev;
 	int port_type_cap;
 
+	printk_once(KERN_INFO "%s", mlx5_version);
+
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -5055,7 +5248,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		return mlx5_ib_add_slave_port(mdev, port_num);
 	}
 
-	return __mlx5_ib_add(mdev, &pf_profile);
+	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+	if (!dev)
+		return NULL;
+
+	dev->mdev = mdev;
+	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+			     MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+	if (MLX5_VPORT_MANAGER(mdev) &&
+	    mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+		dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+		return __mlx5_ib_add(dev, &nic_rep_profile);
+	}
+
+	return __mlx5_ib_add(dev, &pf_profile);
 }
 
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 139385129973..c33bf1523d67 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
 	struct mlx5_ib_wq	*sq;
 	struct mlx5_ib_ubuffer  ubuffer;
 	struct mlx5_db		*doorbell;
+	struct mlx5_flow_handle	*flow_rule;
 	u32			tisn;
 	u8			state;
 };
@@ -371,7 +372,7 @@ struct mlx5_ib_qp {
 		struct mlx5_ib_rss_qp rss_qp;
 		struct mlx5_ib_dct dct;
 	};
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 
 	struct mlx5_db		db;
 	struct mlx5_ib_wq	rq;
@@ -413,7 +414,7 @@ struct mlx5_ib_qp {
 };
 
 struct mlx5_ib_cq_buf {
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf_ctrl fbc;
 	struct ib_umem		*umem;
 	int			cqe_size;
 	int			nent;
@@ -495,7 +496,7 @@ struct mlx5_ib_wc {
 struct mlx5_ib_srq {
 	struct ib_srq		ibsrq;
 	struct mlx5_core_srq	msrq;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	struct mlx5_db		db;
 	u64		       *wrid;
 	/* protect SRQ hanlding
@@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
 
 enum mlx5_ib_stages {
 	MLX5_IB_STAGE_INIT,
+	MLX5_IB_STAGE_FLOW_DB,
 	MLX5_IB_STAGE_CAPS,
+	MLX5_IB_STAGE_NON_DEFAULT_CB,
 	MLX5_IB_STAGE_ROCE,
 	MLX5_IB_STAGE_DEVICE_RESOURCES,
 	MLX5_IB_STAGE_ODP,
@@ -739,10 +742,12 @@ enum mlx5_ib_stages {
 	MLX5_IB_STAGE_CONG_DEBUGFS,
 	MLX5_IB_STAGE_UAR,
 	MLX5_IB_STAGE_BFREG,
+	MLX5_IB_STAGE_PRE_IB_REG_UMR,
 	MLX5_IB_STAGE_IB_REG,
-	MLX5_IB_STAGE_UMR_RESOURCES,
+	MLX5_IB_STAGE_POST_IB_REG_UMR,
 	MLX5_IB_STAGE_DELAY_DROP,
 	MLX5_IB_STAGE_CLASS_ATTR,
+	MLX5_IB_STAGE_REP_REG,
 	MLX5_IB_STAGE_MAX,
 };
 
@@ -797,7 +802,7 @@ struct mlx5_ib_dev {
 	struct srcu_struct      mr_srcu;
 	u32			null_mkey;
 #endif
-	struct mlx5_ib_flow_db	flow_db;
+	struct mlx5_ib_flow_db	*flow_db;
 	/* protect resources needed as part of reset flow */
 	spinlock_t		reset_flow_resource_lock;
 	struct list_head	qp_list;
@@ -807,6 +812,7 @@ struct mlx5_ib_dev {
 	struct mlx5_sq_bfreg	fp_bfreg;
 	struct mlx5_ib_delay_drop	delay_drop;
 	const struct mlx5_ib_profile	*profile;
+	struct mlx5_eswitch_rep		*rep;
 
 	/* protect the user_td */
 	struct mutex		lb_mutex;
@@ -1049,6 +1055,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+		      const struct mlx5_ib_profile *profile,
+		      int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+		    const struct mlx5_ib_profile *profile);
+
 int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
 			  u8 port, struct ifla_vf_info *info);
 int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 556e015678de..95a36e9ea552 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 {
-	if (!mlx5_debugfs_root)
+	if (!mlx5_debugfs_root || dev->rep)
 		return;
 
 	debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 	struct mlx5_cache_ent *ent;
 	int i;
 
-	if (!mlx5_debugfs_root)
+	if (!mlx5_debugfs_root || dev->rep)
 		return 0;
 
 	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 			   MLX5_IB_UMR_OCTOWORD;
 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
 		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+		    !dev->rep &&
 		    mlx5_core_is_pf(dev->mdev))
 			ent->limit = dev->mdev->profile->mr_cache[i].limit;
 		else
@@ -838,7 +839,8 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
 	*umem = ib_umem_get(pd->uobject->context, start, length,
 			    access_flags, 0);
 	err = PTR_ERR_OR_ZERO(*umem);
-	if (err < 0) {
+	if (err) {
+		*umem = NULL;
 		mlx5_ib_err(dev, "umem get failed (%d)\n", err);
 		return err;
 	}
@@ -1415,6 +1417,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		if (err) {
 			mlx5_ib_warn(dev, "Failed to rereg UMR\n");
 			ib_umem_release(mr->umem);
+			mr->umem = NULL;
 			clean_mr(dev, mr);
 			return err;
 		}
@@ -1498,14 +1501,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		u32 key = mr->mmkey.key;
 
 		err = destroy_mkey(dev, mr);
-		kfree(mr);
 		if (err) {
 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 				     key, err);
 			return err;
 		}
-	} else {
-		mlx5_mr_cache_free(dev, mr);
 	}
 
 	return 0;
@@ -1548,6 +1548,11 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		atomic_sub(npages, &dev->mdev->priv.reg_pages);
 	}
 
+	if (!mr->allocated_from_cache)
+		kfree(mr);
+	else
+		mlx5_mr_cache_free(dev, mr);
+
 	return 0;
 }
 
@@ -1816,7 +1821,6 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
 
 	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
 	mr->ibmr.length = 0;
-	mr->ndescs = sg_nents;
 
 	for_each_sg(sgl, sg, sg_nents, i) {
 		if (unlikely(i >= mr->max_descs))
@@ -1828,6 +1832,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
 
 		sg_offset = 0;
 	}
+	mr->ndescs = i;
 
 	if (sg_offset_p)
 		*sg_offset_p = sg_offset;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 39d24bf694a8..85c612ac547a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -36,6 +36,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 	mlx5_core_destroy_tis(dev->mdev, sq->tisn);
 }
 
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+				       struct mlx5_ib_sq *sq)
+{
+	if (sq->flow_rule)
+		mlx5_del_flow_rules(sq->flow_rule);
+}
+
 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 				   struct mlx5_ib_sq *sq, void *qpin,
 				   struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 	if (err)
 		goto err_umem;
 
+	err = create_flow_rule_vport_sq(dev, sq);
+	if (err)
+		goto err_flow;
+
 	return 0;
 
+err_flow:
+	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
 err_umem:
 	ib_umem_release(sq->ubuffer.umem);
 	sq->ubuffer.umem = NULL;
@@ -1157,11 +1172,12 @@ err_umem:
 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 				     struct mlx5_ib_sq *sq)
 {
+	destroy_flow_rule_vport_sq(dev, sq);
 	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
 	ib_umem_release(sq->ubuffer.umem);
 }
 
-static int get_rq_pas_size(void *qpc)
+static size_t get_rq_pas_size(void *qpc)
 {
 	u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
 	u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
@@ -1177,7 +1193,8 @@ static int get_rq_pas_size(void *qpc)
 }
 
 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
-				   struct mlx5_ib_rq *rq, void *qpin)
+				   struct mlx5_ib_rq *rq, void *qpin,
+				   size_t qpinlen)
 {
 	struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
 	__be64 *pas;
@@ -1186,9 +1203,12 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 	void *rqc;
 	void *wq;
 	void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
-	int inlen;
+	size_t rq_pas_size = get_rq_pas_size(qpc);
+	size_t inlen;
 	int err;
-	u32 rq_pas_size = get_rq_pas_size(qpc);
+
+	if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas))
+		return -EINVAL;
 
 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
 	in = kvzalloc(inlen, GFP_KERNEL);
@@ -1263,6 +1283,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 	if (tunnel_offload_en)
 		MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
+	if (dev->rep)
+		MLX5_SET(tirc, tirc, self_lb_block,
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
 	kvfree(in);
@@ -1277,7 +1301,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 }
 
 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-				u32 *in,
+				u32 *in, size_t inlen,
 				struct ib_pd *pd)
 {
 	struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
@@ -1309,7 +1333,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 			rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
 		if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
 			rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
-		err = create_raw_packet_qp_rq(dev, rq, in);
+		err = create_raw_packet_qp_rq(dev, rq, in, inlen);
 		if (err)
 			goto err_destroy_sq;
 
@@ -1554,6 +1578,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
+	if (dev->rep)
+		MLX5_SET(tirc, tirc, self_lb_block,
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
 	if (err)
@@ -1584,6 +1612,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
 	struct mlx5_ib_create_qp ucmd;
 	struct mlx5_ib_qp_base *base;
+	int mlx5_st;
 	void *qpc;
 	u32 *in;
 	int err;
@@ -1592,6 +1621,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	spin_lock_init(&qp->sq.lock);
 	spin_lock_init(&qp->rq.lock);
 
+	mlx5_st = to_mlx5_st(init_attr->qp_type);
+	if (mlx5_st < 0)
+		return -EINVAL;
+
 	if (init_attr->rwq_ind_tbl) {
 		if (!udata)
 			return -ENOSYS;
@@ -1753,7 +1786,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 
-	MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+	MLX5_SET(qpc, qpc, st, mlx5_st);
 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 
 	if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
@@ -1867,11 +1900,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		}
 	}
 
+	if (inlen < 0) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
 	    qp->flags & MLX5_IB_QP_UNDERLAY) {
 		qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
 		raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
-		err = create_raw_packet_qp(dev, qp, in, pd);
+		err = create_raw_packet_qp(dev, qp, in, inlen, pd);
 	} else {
 		err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
 	}
@@ -2129,7 +2167,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 					struct ib_qp_init_attr *attr,
 					struct mlx5_ib_create_qp *ucmd)
 {
-	struct mlx5_ib_dev *dev;
 	struct mlx5_ib_qp *qp;
 	int err = 0;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
@@ -2138,8 +2175,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 	if (!attr->srq || !attr->recv_cq)
 		return ERR_PTR(-EINVAL);
 
-	dev = to_mdev(pd->device);
-
 	err = get_qp_user_index(to_mucontext(pd->uobject->context),
 				ucmd, sizeof(*ucmd), &uidx);
 	if (err)
@@ -3095,8 +3130,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		goto out;
 
 	if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
-	    !optab[mlx5_cur][mlx5_new])
+	    !optab[mlx5_cur][mlx5_new]) {
+		err = -EINVAL;
 		goto out;
+	}
 
 	op = optab[mlx5_cur][mlx5_new];
 	optpar = ib_mask_to_mlx5_opt(attr_mask);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6d5fadad9090..3c7522d025f2 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -241,8 +241,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_srq *srq;
-	int desc_size;
-	int buf_size;
+	size_t desc_size;
+	size_t buf_size;
 	int err;
 	struct mlx5_srq_attr in = {0};
 	__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
@@ -266,15 +266,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 
 	desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
 		    srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
+	if (desc_size == 0 || srq->msrq.max_gs > desc_size)
+		return ERR_PTR(-EINVAL);
 	desc_size = roundup_pow_of_two(desc_size);
-	desc_size = max_t(int, 32, desc_size);
+	desc_size = max_t(size_t, 32, desc_size);
+	if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg))
+		return ERR_PTR(-EINVAL);
 	srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
 		sizeof(struct mlx5_wqe_data_seg);
 	srq->msrq.wqe_shift = ilog2(desc_size);
 	buf_size = srq->msrq.max * desc_size;
-	mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
-		    desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
-		    srq->msrq.max_avail_gather);
+	if (buf_size < desc_size)
+		return ERR_PTR(-EINVAL);
 	in.type = init_attr->srq_type;
 
 	if (pd->uobject)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index db4bf97c0e15..eb32abb0099a 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -90,8 +90,8 @@ static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num)
 	dev_hold(qdev->ndev);
 
 	/* The HW vendor's device driver must guarantee
-	 * that this function returns NULL before the net device reaches
-	 * NETDEV_UNREGISTER_FINAL state.
+	 * that this function returns NULL before the net device has finished
+	 * NETDEV_UNREGISTER state.
 	 */
 	return qdev->ndev;
 }
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 478b7317b80a..26dc374787f7 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -458,8 +458,7 @@ qedr_addr6_resolve(struct qedr_dev *dev,
 		}
 		return -EINVAL;
 	}
-	neigh = dst_neigh_lookup(dst, &dst_in);
-
+	neigh = dst_neigh_lookup(dst, &fl6.daddr);
 	if (neigh) {
 		rcu_read_lock();
 		if (neigh->nud_state & NUD_VALID) {
@@ -494,10 +493,14 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
 	qp = idr_find(&dev->qpidr, conn_param->qpn);
 
-	laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-	laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
-	raddr6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+	raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+	DP_DEBUG(dev, QEDR_MSG_IWARP, "MAPPED %d %d\n",
+		 ntohs(((struct sockaddr_in *)&cm_id->remote_addr)->sin_port),
+		 ntohs(raddr->sin_port));
 
 	DP_DEBUG(dev, QEDR_MSG_IWARP,
 		 "Connect source address: %pISpc, remote address: %pISpc\n",
@@ -599,8 +602,8 @@ int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 	int rc;
 	int i;
 
-	laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
 
 	DP_DEBUG(dev, QEDR_MSG_IWARP,
 		 "Create Listener address: %pISpc\n", &cm_id->local_addr);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 53f00dbf313f..875b17272d65 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -3034,6 +3034,11 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	switch (wr->opcode) {
 	case IB_WR_SEND_WITH_IMM:
+		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+			rc = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
 		swqe->wqe_size = 2;
@@ -3075,6 +3080,11 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		break;
 
 	case IB_WR_RDMA_WRITE_WITH_IMM:
+		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+			rc = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
 
@@ -3724,7 +3734,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
 	struct qedr_cq *cq = get_qedr_cq(ibcq);
-	union rdma_cqe *cqe = cq->latest_cqe;
+	union rdma_cqe *cqe;
 	u32 old_cons, new_cons;
 	unsigned long flags;
 	int update = 0;
@@ -3741,6 +3751,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
 
 	spin_lock_irqsave(&cq->cq_lock, flags);
+	cqe = cq->latest_cqe;
 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
 	while (num_entries && is_valid_cqe(cq, cqe)) {
 		struct qedr_qp *qp;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index f45e99a938e0..ca5638091b55 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -95,24 +95,6 @@ void usnic_ib_log_vf(struct usnic_ib_vf *vf)
 }
 
 /* Start of netdev section */
-static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
-{
-	const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN",
-		"NETDEV_REBOOT", "NETDEV_CHANGE",
-		"NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU",
-		"NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE",
-		"NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP",
-		"NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE",
-		"NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE",
-		"NETDEV_NOTIFY_PEERS", "NETDEV_JOIN"
-	};
-
-	if (event >= ARRAY_SIZE(event2str))
-		return "UNKNOWN_NETDEV_EVENT";
-	else
-		return event2str[event];
-}
-
 static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
 {
 	struct usnic_ib_ucontext *ctx;
@@ -185,7 +167,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 			ib_dispatch_event(&ib_event);
 		} else {
 			usnic_dbg("Ignoring %s on %s\n",
-					usnic_ib_netdev_event_to_string(event),
+					netdev_cmd_to_name(event),
 					us_ibdev->ib_dev.name);
 		}
 		break;
@@ -222,7 +204,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 		break;
 	default:
 		usnic_dbg("Ignoring event %s on %s",
-				usnic_ib_netdev_event_to_string(event),
+				netdev_cmd_to_name(event),
 				us_ibdev->ib_dev.name);
 	}
 	mutex_unlock(&us_ibdev->usdev_lock);
@@ -264,7 +246,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
 	switch (event) {
 	case NETDEV_DOWN:
 		usnic_info("%s via ip notifiers",
-				usnic_ib_netdev_event_to_string(event));
+				netdev_cmd_to_name(event));
 		usnic_fwd_del_ipaddr(us_ibdev->ufdev);
 		usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
 		ib_event.event = IB_EVENT_GID_CHANGE;
@@ -275,7 +257,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
 	case NETDEV_UP:
 		usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
 		usnic_info("%s via ip notifiers: ip %pI4",
-				usnic_ib_netdev_event_to_string(event),
+				netdev_cmd_to_name(event),
 				&us_ibdev->ufdev->inaddr);
 		ib_event.event = IB_EVENT_GID_CHANGE;
 		ib_event.device = &us_ibdev->ib_dev;
@@ -284,7 +266,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
 		break;
 	default:
 		usnic_info("Ignoring event %s on %s",
-				usnic_ib_netdev_event_to_string(event),
+				netdev_cmd_to_name(event),
 				us_ibdev->ib_dev.name);
 	}
 	mutex_unlock(&us_ibdev->usdev_lock);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index faa9478c14a6..f95b97646c25 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -114,6 +114,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 	union pvrdma_cmd_resp rsp;
 	struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
 	struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
+	struct pvrdma_create_cq_resp cq_resp = {0};
 	struct pvrdma_create_cq ucmd;
 
 	BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
@@ -197,6 +198,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 
 	cq->ibcq.cqe = resp->cqe;
 	cq->cq_handle = resp->cq_handle;
+	cq_resp.cqn = resp->cq_handle;
 	spin_lock_irqsave(&dev->cq_tbl_lock, flags);
 	dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
 	spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
@@ -205,7 +207,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 		cq->uar = &(to_vucontext(context)->uar);
 
 		/* Copy udata back. */
-		if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) {
+		if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
 			dev_warn(&dev->pdev->dev,
 				 "failed to copy back udata\n");
 			pvrdma_destroy_cq(&cq->ibcq);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 5acebb1ef631..af235967a9c2 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -113,6 +113,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
 	union pvrdma_cmd_resp rsp;
 	struct pvrdma_cmd_create_srq *cmd = &req.create_srq;
 	struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp;
+	struct pvrdma_create_srq_resp srq_resp = {0};
 	struct pvrdma_create_srq ucmd;
 	unsigned long flags;
 	int ret;
@@ -204,12 +205,13 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
 	}
 
 	srq->srq_handle = resp->srqn;
+	srq_resp.srqn = resp->srqn;
 	spin_lock_irqsave(&dev->srq_tbl_lock, flags);
 	dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq;
 	spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
 
 	/* Copy udata back. */
-	if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) {
+	if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) {
 		dev_warn(&dev->pdev->dev, "failed to copy back udata\n");
 		pvrdma_destroy_srq(&srq->ibsrq);
 		return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index 16b96616ef7e..a51463cd2f37 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -447,6 +447,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
 	union pvrdma_cmd_resp rsp;
 	struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
 	struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
+	struct pvrdma_alloc_pd_resp pd_resp = {0};
 	int ret;
 	void *ptr;
 
@@ -475,9 +476,10 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
 	pd->privileged = !context;
 	pd->pd_handle = resp->pd_handle;
 	pd->pdn = resp->pd_handle;
+	pd_resp.pdn = resp->pd_handle;
 
 	if (context) {
-		if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+		if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) {
 			dev_warn(&dev->pdev->dev,
 				 "failed to copy back protection domain\n");
 			pvrdma_dealloc_pd(&pd->ibpd);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 1b2e5362a3ff..cc429b567d0a 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -489,11 +489,13 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
 	unsigned long timeout;
 	struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
 
-	if (percpu_ref_is_zero(&mr->refcount))
-		return 0;
-	/* avoid dma mr */
-	if (mr->lkey)
+	if (mr->lkey) {
+		/* avoid dma mr */
 		rvt_dereg_clean_qps(mr);
+		/* @mr was indexed on rcu protected @lkey_table */
+		synchronize_rcu();
+	}
+
 	timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
 	if (!timeout) {
 		rvt_pr_err(rdi,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 11f74cbe6660..ea302b054601 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -281,8 +281,6 @@ void ipoib_delete_debug_files(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
-	WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
-	WARN_ONCE(!priv->path_dentry, "null path debug file\n");
 	debugfs_remove(priv->mcg_dentry);
 	debugfs_remove(priv->path_dentry);
 	priv->mcg_dentry = priv->path_dentry = NULL;
diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index 1f316d66e6f7..41614c185918 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -218,8 +218,10 @@ static void matrix_keypad_stop(struct input_dev *dev)
 {
 	struct matrix_keypad *keypad = input_get_drvdata(dev);
 
+	spin_lock_irq(&keypad->lock);
 	keypad->stopped = true;
-	mb();
+	spin_unlock_irq(&keypad->lock);
+
 	flush_work(&keypad->work.work);
 	/*
 	 * matrix_keypad_scan() will leave IRQs enabled;
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 3d2e23a0ae39..a246fc686bb7 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -173,7 +173,6 @@ static const char * const smbus_pnp_ids[] = {
 	"LEN0046", /* X250 */
 	"LEN004a", /* W541 */
 	"LEN200f", /* T450s */
-	"LEN2018", /* T460p */
 	NULL
 };
 
diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index db4f6bb502e3..a5ab774da4cc 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c
@@ -1,11 +1,8 @@
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Melfas MMS114/MMS152 touchscreen device driver
+//
+// Copyright (c) 2012 Samsung Electronics Co., Ltd.
+// Author: Joonyoung Shim <[email protected]>
 
 #include <linux/module.h>
 #include <linux/delay.h>
@@ -624,4 +621,4 @@ module_i2c_driver(mms114_driver);
 /* Module information */
 MODULE_AUTHOR("Joonyoung Shim <[email protected]>");
 MODULE_DESCRIPTION("MELFAS mms114 Touchscreen driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 35a408d0ae4f..99bc9bd64b9e 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -205,7 +205,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 			 * for example, an "address" value of 0x12345f000 will
 			 * flush from 0x123440000 to 0x12347ffff (256KiB). */
 			unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
-			unsigned long mask = __rounddown_pow_of_two(address ^ last);;
+			unsigned long mask = __rounddown_pow_of_two(address ^ last);
 
 			desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
 		} else {
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index 55cfb986225b..faf734ff4cf3 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -339,9 +339,6 @@ int __init bcm7038_l1_of_init(struct device_node *dn,
 		goto out_unmap;
 	}
 
-	pr_info("registered BCM7038 L1 intc (mem: 0x%p, IRQs: %d)\n",
-		intc->cpus[0]->map_base, IRQS_PER_WORD * intc->n_words);
-
 	return 0;
 
 out_unmap:
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index 983640eba418..8968e5e93fcb 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -318,9 +318,6 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
 		}
 	}
 
-	pr_info("registered %s intc (mem: 0x%p, parent IRQ(s): %d)\n",
-			intc_name, data->map_base[0], data->num_parent_irqs);
-
 	return 0;
 
 out_free_domain:
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index 691d20eb0bec..0e65f609352e 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -262,9 +262,6 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
 		ct->chip.irq_set_wake = irq_gc_set_wake;
 	}
 
-	pr_info("registered L2 intc (mem: 0x%p, parent irq: %d)\n",
-			base, parent_irq);
-
 	return 0;
 
 out_free_domain:
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 993a8426a453..1ff38aff9f29 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -94,7 +94,7 @@ static struct irq_chip gicv2m_msi_irq_chip = {
 
 static struct msi_domain_info gicv2m_msi_domain_info = {
 	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
-		   MSI_FLAG_PCI_MSIX),
+		   MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
 	.chip	= &gicv2m_msi_irq_chip,
 };
 
@@ -155,18 +155,12 @@ static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
 	return 0;
 }
 
-static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq)
+static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq,
+			       int nr_irqs)
 {
-	int pos;
-
-	pos = hwirq - v2m->spi_start;
-	if (pos < 0 || pos >= v2m->nr_spis) {
-		pr_err("Failed to teardown msi. Invalid hwirq %d\n", hwirq);
-		return;
-	}
-
 	spin_lock(&v2m_lock);
-	__clear_bit(pos, v2m->bm);
+	bitmap_release_region(v2m->bm, hwirq - v2m->spi_start,
+			      get_count_order(nr_irqs));
 	spin_unlock(&v2m_lock);
 }
 
@@ -174,13 +168,13 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 				   unsigned int nr_irqs, void *args)
 {
 	struct v2m_data *v2m = NULL, *tmp;
-	int hwirq, offset, err = 0;
+	int hwirq, offset, i, err = 0;
 
 	spin_lock(&v2m_lock);
 	list_for_each_entry(tmp, &v2m_nodes, entry) {
-		offset = find_first_zero_bit(tmp->bm, tmp->nr_spis);
-		if (offset < tmp->nr_spis) {
-			__set_bit(offset, tmp->bm);
+		offset = bitmap_find_free_region(tmp->bm, tmp->nr_spis,
+						 get_count_order(nr_irqs));
+		if (offset >= 0) {
 			v2m = tmp;
 			break;
 		}
@@ -192,16 +186,21 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 
 	hwirq = v2m->spi_start + offset;
 
-	err = gicv2m_irq_gic_domain_alloc(domain, virq, hwirq);
-	if (err) {
-		gicv2m_unalloc_msi(v2m, hwirq);
-		return err;
-	}
+	for (i = 0; i < nr_irqs; i++) {
+		err = gicv2m_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
+		if (err)
+			goto fail;
 
-	irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-				      &gicv2m_irq_chip, v2m);
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &gicv2m_irq_chip, v2m);
+	}
 
 	return 0;
+
+fail:
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+	gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs));
+	return err;
 }
 
 static void gicv2m_irq_domain_free(struct irq_domain *domain,
@@ -210,8 +209,7 @@ static void gicv2m_irq_domain_free(struct irq_domain *domain,
 	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
 	struct v2m_data *v2m = irq_data_get_irq_chip_data(d);
 
-	BUG_ON(nr_irqs != 1);
-	gicv2m_unalloc_msi(v2m, d->hwirq);
+	gicv2m_unalloc_msi(v2m, d->hwirq, nr_irqs);
 	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
 }
 
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index 14a8c0a7e095..25a98de5cfb2 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -132,6 +132,8 @@ static int __init its_pci_of_msi_init(void)
 
 	for (np = of_find_matching_node(NULL, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller"))
 			continue;
 
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 833a90fe33ae..8881a053c173 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -154,6 +154,8 @@ static void __init its_pmsi_of_init(void)
 
 	for (np = of_find_matching_node(NULL, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller"))
 			continue;
 
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 06f025fd5726..2cbb19cddbf8 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1412,7 +1412,7 @@ static struct irq_chip its_irq_chip = {
  * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations.
  */
 #define IRQS_PER_CHUNK_SHIFT	5
-#define IRQS_PER_CHUNK		(1 << IRQS_PER_CHUNK_SHIFT)
+#define IRQS_PER_CHUNK		(1UL << IRQS_PER_CHUNK_SHIFT)
 #define ITS_MAX_LPI_NRBITS	16 /* 64K LPIs */
 
 static unsigned long *lpi_bitmap;
@@ -2119,11 +2119,10 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	/*
-	 * At least one bit of EventID is being used, hence a minimum
-	 * of two entries. No, the architecture doesn't let you
-	 * express an ITT with a single entry.
+	 * We allocate at least one chunk worth of LPIs bet device,
+	 * and thus that many ITEs. The device may require less though.
 	 */
-	nr_ites = max(2UL, roundup_pow_of_two(nvecs));
+	nr_ites = max(IRQS_PER_CHUNK, roundup_pow_of_two(nvecs));
 	sz = nr_ites * its->ite_size;
 	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
 	itt = kzalloc(sz, GFP_KERNEL);
@@ -2495,7 +2494,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
 
 static void its_vpe_schedule(struct its_vpe *vpe)
 {
-	void * __iomem vlpi_base = gic_data_rdist_vlpi_base();
+	void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
 	u64 val;
 
 	/* Schedule the VPE */
@@ -2527,7 +2526,7 @@ static void its_vpe_schedule(struct its_vpe *vpe)
 
 static void its_vpe_deschedule(struct its_vpe *vpe)
 {
-	void * __iomem vlpi_base = gic_data_rdist_vlpi_base();
+	void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
 	u32 count = 1000000;	/* 1s! */
 	bool clean;
 	u64 val;
@@ -3314,6 +3313,8 @@ static int __init its_of_probe(struct device_node *node)
 
 	for (np = of_find_matching_node(node, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller")) {
 			pr_warn("%pOF: no msi-controller property, ITS ignored\n",
 				np);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index a57c0fbbd34a..d99cc07903ec 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -673,7 +673,7 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
 	       MPIDR_TO_SGI_RS(cluster_id)		|
 	       tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
 
-	pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
+	pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
 	gic_write_sgi1r(val);
 }
 
@@ -688,7 +688,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 	 * Ensure that stores to Normal memory are visible to the
 	 * other CPUs before issuing the IPI.
 	 */
-	smp_wmb();
+	wmb();
 
 	for_each_cpu(cpu, mask) {
 		u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c
index 675eda5ff2b8..4760307ab43f 100644
--- a/drivers/irqchip/irq-imx-gpcv2.c
+++ b/drivers/irqchip/irq-imx-gpcv2.c
@@ -28,20 +28,6 @@ struct gpcv2_irqchip_data {
 
 static struct gpcv2_irqchip_data *imx_gpcv2_instance;
 
-/*
- * Interface for the low level wakeup code.
- */
-u32 imx_gpcv2_get_wakeup_source(u32 **sources)
-{
-	if (!imx_gpcv2_instance)
-		return 0;
-
-	if (sources)
-		*sources = imx_gpcv2_instance->wakeup_sources;
-
-	return IMR_NUM;
-}
-
 static int gpcv2_wakeup_source_save(void)
 {
 	struct gpcv2_irqchip_data *cd;
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index ef92a4d2038e..d32268cc1174 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -424,8 +424,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
 	spin_lock_irqsave(&gic_lock, flags);
 	write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
 	write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
-	gic_clear_pcpu_masks(intr);
-	set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
 	irq_data_update_effective_affinity(data, cpumask_of(cpu));
 	spin_unlock_irqrestore(&gic_lock, flags);
 
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 62f541f968f6..07074820a167 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -375,6 +375,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
 	dev->ofdev.dev.of_node = np;
 	dev->ofdev.archdata.dma_mask = 0xffffffffUL;
 	dev->ofdev.dev.dma_mask = &dev->ofdev.archdata.dma_mask;
+	dev->ofdev.dev.coherent_dma_mask = dev->ofdev.archdata.dma_mask;
 	dev->ofdev.dev.parent = parent;
 	dev->ofdev.dev.bus = &macio_bus_type;
 	dev->ofdev.dev.release = macio_release_dev;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 1a46b41dac70..6422846b546e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -659,11 +659,11 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
 static void search_free(struct closure *cl)
 {
 	struct search *s = container_of(cl, struct search, cl);
-	bio_complete(s);
 
 	if (s->iop.bio)
 		bio_put(s->iop.bio);
 
+	bio_complete(s);
 	closure_debug_destroy(cl);
 	mempool_free(s, s->d->c->search);
 }
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 312895788036..f2273143b3cb 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -963,6 +963,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	uint32_t rtime = cpu_to_le32(get_seconds());
 	struct uuid_entry *u;
 	char buf[BDEVNAME_SIZE];
+	struct cached_dev *exist_dc, *t;
 
 	bdevname(dc->bdev, buf);
 
@@ -987,6 +988,16 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 		return -EINVAL;
 	}
 
+	/* Check whether already attached */
+	list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
+		if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
+			pr_err("Tried to attach %s but duplicate UUID already attached",
+				buf);
+
+			return -EINVAL;
+		}
+	}
+
 	u = uuid_find(c, dc->sb.uuid);
 
 	if (u &&
@@ -1204,7 +1215,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
 
 	return;
 err:
-	pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+	pr_notice("error %s: %s", bdevname(bdev, name), err);
 	bcache_device_stop(&dc->disk);
 }
 
@@ -1274,7 +1285,7 @@ static int flash_devs_run(struct cache_set *c)
 	struct uuid_entry *u;
 
 	for (u = c->uuids;
-	     u < c->uuids + c->devices_max_used && !ret;
+	     u < c->uuids + c->nr_uuids && !ret;
 	     u++)
 		if (UUID_FLASH_ONLY(u))
 			ret = flash_dev_run(c, u);
@@ -1883,6 +1894,8 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 	const char *err = NULL; /* must be set for any error case */
 	int ret = 0;
 
+	bdevname(bdev, name);
+
 	memcpy(&ca->sb, sb, sizeof(struct cache_sb));
 	ca->bdev = bdev;
 	ca->bdev->bd_holder = ca;
@@ -1891,11 +1904,12 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 	bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
 	get_page(sb_page);
 
-	if (blk_queue_discard(bdev_get_queue(ca->bdev)))
+	if (blk_queue_discard(bdev_get_queue(bdev)))
 		ca->discard = CACHE_DISCARD(&ca->sb);
 
 	ret = cache_alloc(ca);
 	if (ret != 0) {
+		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 		if (ret == -ENOMEM)
 			err = "cache_alloc(): -ENOMEM";
 		else
@@ -1918,14 +1932,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 		goto out;
 	}
 
-	pr_info("registered cache device %s", bdevname(bdev, name));
+	pr_info("registered cache device %s", name);
 
 out:
 	kobject_put(&ca->kobj);
 
 err:
 	if (err)
-		pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+		pr_notice("error %s: %s", name, err);
 
 	return ret;
 }
@@ -2014,6 +2028,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 	if (err)
 		goto err_close;
 
+	err = "failed to register device";
 	if (SB_IS_BDEV(sb)) {
 		struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
 		if (!dc)
@@ -2028,7 +2043,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 			goto err_close;
 
 		if (register_cache(sb, sb_page, bdev, ca) != 0)
-			goto err_close;
+			goto err;
 	}
 out:
 	if (sb_page)
@@ -2041,7 +2056,7 @@ out:
 err_close:
 	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 err:
-	pr_info("error opening %s: %s", path, err);
+	pr_info("error %s: %s", path, err);
 	ret = -EINVAL;
 	goto out;
 }
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 414c9af54ded..aa2032fa80d4 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -386,9 +386,6 @@ static void __cache_size_refresh(void)
 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 			       enum data_mode *data_mode)
 {
-	unsigned noio_flag;
-	void *ptr;
-
 	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
 		*data_mode = DATA_MODE_SLAB;
 		return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -412,16 +409,15 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 	 * all allocations done by this process (including pagetables) are done
 	 * as if GFP_NOIO was specified.
 	 */
+	if (gfp_mask & __GFP_NORETRY) {
+		unsigned noio_flag = memalloc_noio_save();
+		void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 
-	if (gfp_mask & __GFP_NORETRY)
-		noio_flag = memalloc_noio_save();
-
-	ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
-
-	if (gfp_mask & __GFP_NORETRY)
 		memalloc_noio_restore(noio_flag);
+		return ptr;
+	}
 
-	return ptr;
+	return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 }
 
 /*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 7d3e572072f5..a05a560d3cba 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -211,29 +211,27 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
 		else
 			m->queue_mode = DM_TYPE_REQUEST_BASED;
 
-	} else if (m->queue_mode == DM_TYPE_BIO_BASED ||
-		   m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+	} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
 		INIT_WORK(&m->process_queued_bios, process_queued_bios);
-
-		if (m->queue_mode == DM_TYPE_BIO_BASED) {
-			/*
-			 * bio-based doesn't support any direct scsi_dh management;
-			 * it just discovers if a scsi_dh is attached.
-			 */
-			set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
-		}
-	}
-
-	if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
-		set_bit(MPATHF_QUEUE_IO, &m->flags);
-		atomic_set(&m->pg_init_in_progress, 0);
-		atomic_set(&m->pg_init_count, 0);
-		m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
-		init_waitqueue_head(&m->pg_init_wait);
+		/*
+		 * bio-based doesn't support any direct scsi_dh management;
+		 * it just discovers if a scsi_dh is attached.
+		 */
+		set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
 	}
 
 	dm_table_set_type(ti->table, m->queue_mode);
 
+	/*
+	 * Init fields that are only used when a scsi_dh is attached
+	 * - must do this unconditionally (really doesn't hurt non-SCSI uses)
+	 */
+	set_bit(MPATHF_QUEUE_IO, &m->flags);
+	atomic_set(&m->pg_init_in_progress, 0);
+	atomic_set(&m->pg_init_count, 0);
+	m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
+	init_waitqueue_head(&m->pg_init_wait);
+
 	return 0;
 }
 
@@ -337,9 +335,6 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
 {
 	m->current_pg = pg;
 
-	if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
-		return;
-
 	/* Must we initialise the PG first, and queue I/O till it's ready? */
 	if (m->hw_handler_name) {
 		set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
@@ -385,8 +380,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
 	unsigned bypassed = 1;
 
 	if (!atomic_read(&m->nr_valid_paths)) {
-		if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
-			clear_bit(MPATHF_QUEUE_IO, &m->flags);
+		clear_bit(MPATHF_QUEUE_IO, &m->flags);
 		goto failed;
 	}
 
@@ -599,7 +593,7 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
 	return pgpath;
 }
 
-static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
+static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
 {
 	struct pgpath *pgpath;
 	unsigned long flags;
@@ -634,8 +628,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
 {
 	struct pgpath *pgpath;
 
-	if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
-		pgpath = __map_bio_nvme(m, bio);
+	if (!m->hw_handler_name)
+		pgpath = __map_bio_fast(m, bio);
 	else
 		pgpath = __map_bio(m, bio);
 
@@ -675,8 +669,7 @@ static void process_queued_io_list(struct multipath *m)
 {
 	if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
 		dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
-	else if (m->queue_mode == DM_TYPE_BIO_BASED ||
-		 m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+	else if (m->queue_mode == DM_TYPE_BIO_BASED)
 		queue_work(kmultipathd, &m->process_queued_bios);
 }
 
@@ -811,15 +804,14 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
 	return 0;
 }
 
-static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, char **error)
+static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
+			 const char *attached_handler_name, char **error)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-	const char *attached_handler_name;
 	int r;
 
 	if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
 retain:
-		attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
 		if (attached_handler_name) {
 			/*
 			 * Clear any hw_handler_params associated with a
@@ -873,6 +865,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	int r;
 	struct pgpath *p;
 	struct multipath *m = ti->private;
+	struct request_queue *q;
+	const char *attached_handler_name;
 
 	/* we need at least a path arg */
 	if (as->argc < 1) {
@@ -891,9 +885,11 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 		goto bad;
 	}
 
-	if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+	q = bdev_get_queue(p->path.dev->bdev);
+	attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
+	if (attached_handler_name) {
 		INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
-		r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
+		r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error);
 		if (r) {
 			dm_put_device(ti, p->path.dev);
 			goto bad;
@@ -1001,8 +997,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 	if (!hw_argc)
 		return 0;
 
-	if (m->queue_mode == DM_TYPE_BIO_BASED ||
-	    m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+	if (m->queue_mode == DM_TYPE_BIO_BASED) {
 		dm_consume_args(as, hw_argc);
 		DMERR("bio-based multipath doesn't allow hardware handler args");
 		return 0;
@@ -1091,8 +1086,6 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 
 			if (!strcasecmp(queue_mode_name, "bio"))
 				m->queue_mode = DM_TYPE_BIO_BASED;
-			else if (!strcasecmp(queue_mode_name, "nvme"))
-				m->queue_mode = DM_TYPE_NVME_BIO_BASED;
 			else if (!strcasecmp(queue_mode_name, "rq"))
 				m->queue_mode = DM_TYPE_REQUEST_BASED;
 			else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1193,7 +1186,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	ti->num_discard_bios = 1;
 	ti->num_write_same_bios = 1;
 	ti->num_write_zeroes_bios = 1;
-	if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+	if (m->queue_mode == DM_TYPE_BIO_BASED)
 		ti->per_io_data_size = multipath_per_bio_data_size();
 	else
 		ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1730,9 +1723,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 			case DM_TYPE_BIO_BASED:
 				DMEMIT("queue_mode bio ");
 				break;
-			case DM_TYPE_NVME_BIO_BASED:
-				DMEMIT("queue_mode nvme ");
-				break;
 			case DM_TYPE_MQ_REQUEST_BASED:
 				DMEMIT("queue_mode mq ");
 				break;
@@ -2030,8 +2020,9 @@ static int multipath_busy(struct dm_target *ti)
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 12, 0},
-	.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
+	.version = {1, 13, 0},
+	.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
+		    DM_TARGET_PASSES_INTEGRITY,
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7ef469e902c6..c1d1034ff7b7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3408,9 +3408,10 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
 		set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
 
 	} else {
-		if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
-		    test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
-		    test_bit(MD_RECOVERY_RUNNING, &recovery))
+		if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
+		    (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
+		     test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
+		     test_bit(MD_RECOVERY_RUNNING, &recovery)))
 			r = mddev->curr_resync_completed;
 		else
 			r = mddev->recovery_cp;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5fe7ec356c33..7eb3e2a3c07d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -942,17 +942,12 @@ static int dm_table_determine_type(struct dm_table *t)
 
 	if (t->type != DM_TYPE_NONE) {
 		/* target already set the table's type */
-		if (t->type == DM_TYPE_BIO_BASED)
-			return 0;
-		else if (t->type == DM_TYPE_NVME_BIO_BASED) {
-			if (!dm_table_does_not_support_partial_completion(t)) {
-				DMERR("nvme bio-based is only possible with devices"
-				      " that don't support partial completion");
-				return -EINVAL;
-			}
-			/* Fallthru, also verify all devices are blk-mq */
+		if (t->type == DM_TYPE_BIO_BASED) {
+			/* possibly upgrade to a variant of bio-based */
+			goto verify_bio_based;
 		}
 		BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
+		BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
 		goto verify_rq_based;
 	}
 
@@ -985,6 +980,7 @@ static int dm_table_determine_type(struct dm_table *t)
 	}
 
 	if (bio_based) {
+verify_bio_based:
 		/* We must use this table as bio-based */
 		t->type = DM_TYPE_BIO_BASED;
 		if (dm_table_supports_dax(t) ||
@@ -1755,7 +1751,7 @@ static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev
 	char b[BDEVNAME_SIZE];
 
 	/* For now, NVMe devices are the only devices of this class */
-	return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0);
+	return (strncmp(bdevname(dev->bdev, b), "nvme", 4) == 0);
 }
 
 static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d6de00f367ef..45328d8b2859 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -458,9 +458,11 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return dm_get_geometry(md, geo);
 }
 
-static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
-				  struct block_device **bdev,
-				  fmode_t *mode)
+static char *_dm_claim_ptr = "I belong to device-mapper";
+
+static int dm_get_bdev_for_ioctl(struct mapped_device *md,
+				 struct block_device **bdev,
+				 fmode_t *mode)
 {
 	struct dm_target *tgt;
 	struct dm_table *map;
@@ -490,6 +492,10 @@ retry:
 		goto out;
 
 	bdgrab(*bdev);
+	r = blkdev_get(*bdev, *mode, _dm_claim_ptr);
+	if (r < 0)
+		goto out;
+
 	dm_put_live_table(md, srcu_idx);
 	return r;
 
@@ -508,7 +514,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -528,7 +534,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 
 	r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 out:
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -708,14 +714,13 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
 static int open_table_device(struct table_device *td, dev_t dev,
 			     struct mapped_device *md)
 {
-	static char *_claim_ptr = "I belong to device-mapper";
 	struct block_device *bdev;
 
 	int r;
 
 	BUG_ON(td->dm_dev.bdev);
 
-	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
+	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 
@@ -903,7 +908,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			bio->bi_status = io_error;
+			if (io_error)
+				bio->bi_status = io_error;
 			bio_endio(bio);
 		}
 	}
@@ -3010,7 +3016,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3020,7 +3026,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -3031,7 +3037,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3041,7 +3047,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -3053,7 +3059,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3063,7 +3069,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -3074,7 +3080,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3084,7 +3090,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index e40065bdbfc8..0a7e99d62c69 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -157,7 +157,7 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
 		seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
 	}
 	rcu_read_unlock();
-	seq_printf (seq, "]");
+	seq_putc(seq, ']');
 }
 
 static int multipath_congested(struct mddev *mddev, int bits)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index bc67ab6844f0..254e44e44668 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -801,6 +801,9 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	struct bio *bio;
 	int ff = 0;
 
+	if (!page)
+		return;
+
 	if (test_bit(Faulty, &rdev->flags))
 		return;
 
@@ -5452,6 +5455,7 @@ int md_run(struct mddev *mddev)
 	 * the only valid external interface is through the md
 	 * device.
 	 */
+	mddev->has_superblocks = false;
 	rdev_for_each(rdev, mddev) {
 		if (test_bit(Faulty, &rdev->flags))
 			continue;
@@ -5465,6 +5469,9 @@ int md_run(struct mddev *mddev)
 				set_disk_ro(mddev->gendisk, 1);
 		}
 
+		if (rdev->sb_page)
+			mddev->has_superblocks = true;
+
 		/* perform some consistency tests on the device.
 		 * We don't want the data to overlap the metadata,
 		 * Internal Bitmap issues have been handled elsewhere.
@@ -5497,8 +5504,10 @@ int md_run(struct mddev *mddev)
 	}
 	if (mddev->sync_set == NULL) {
 		mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-		if (!mddev->sync_set)
-			return -ENOMEM;
+		if (!mddev->sync_set) {
+			err = -ENOMEM;
+			goto abort;
+		}
 	}
 
 	spin_lock(&pers_lock);
@@ -5511,7 +5520,8 @@ int md_run(struct mddev *mddev)
 		else
 			pr_warn("md: personality for level %s is not loaded!\n",
 				mddev->clevel);
-		return -EINVAL;
+		err = -EINVAL;
+		goto abort;
 	}
 	spin_unlock(&pers_lock);
 	if (mddev->level != pers->level) {
@@ -5524,7 +5534,8 @@ int md_run(struct mddev *mddev)
 	    pers->start_reshape == NULL) {
 		/* This personality cannot handle reshaping... */
 		module_put(pers->owner);
-		return -EINVAL;
+		err = -EINVAL;
+		goto abort;
 	}
 
 	if (pers->sync_request) {
@@ -5593,7 +5604,7 @@ int md_run(struct mddev *mddev)
 		mddev->private = NULL;
 		module_put(pers->owner);
 		bitmap_destroy(mddev);
-		return err;
+		goto abort;
 	}
 	if (mddev->queue) {
 		bool nonrot = true;
@@ -5655,6 +5666,18 @@ int md_run(struct mddev *mddev)
 	sysfs_notify_dirent_safe(mddev->sysfs_action);
 	sysfs_notify(&mddev->kobj, NULL, "degraded");
 	return 0;
+
+abort:
+	if (mddev->bio_set) {
+		bioset_free(mddev->bio_set);
+		mddev->bio_set = NULL;
+	}
+	if (mddev->sync_set) {
+		bioset_free(mddev->sync_set);
+		mddev->sync_set = NULL;
+	}
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
 
@@ -8049,6 +8072,7 @@ EXPORT_SYMBOL(md_done_sync);
 bool md_write_start(struct mddev *mddev, struct bio *bi)
 {
 	int did_change = 0;
+
 	if (bio_data_dir(bi) != WRITE)
 		return true;
 
@@ -8081,6 +8105,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
 	rcu_read_unlock();
 	if (did_change)
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
+	if (!mddev->has_superblocks)
+		return true;
 	wait_event(mddev->sb_wait,
 		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
 		   mddev->suspended);
@@ -8543,6 +8569,19 @@ void md_do_sync(struct md_thread *thread)
 	set_mask_bits(&mddev->sb_flags, 0,
 		      BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
 
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+			!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+			mddev->delta_disks > 0 &&
+			mddev->pers->finish_reshape &&
+			mddev->pers->size &&
+			mddev->queue) {
+		mddev_lock_nointr(mddev);
+		md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
+		mddev_unlock(mddev);
+		set_capacity(mddev->gendisk, mddev->array_sectors);
+		revalidate_disk(mddev->gendisk);
+	}
+
 	spin_lock(&mddev->lock);
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 		/* We completed so min/max setting can be forgotten if used. */
@@ -8569,6 +8608,10 @@ static int remove_and_add_spares(struct mddev *mddev,
 	int removed = 0;
 	bool remove_some = false;
 
+	if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+		/* Mustn't remove devices when resync thread is running */
+		return 0;
+
 	rdev_for_each(rdev, mddev) {
 		if ((this == NULL || rdev == this) &&
 		    rdev->raid_disk >= 0 &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 58cd20a5e85e..fbc925cce810 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -468,6 +468,8 @@ struct mddev {
 	void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
 	struct md_cluster_info		*cluster_info;
 	unsigned int			good_device_nr;	/* good device num within cluster raid */
+
+	bool	has_superblocks:1;
 };
 
 enum recovery_flags {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b2eae332e1a2..fe872dc6712e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1108,7 +1108,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
 
 	bio_copy_data(behind_bio, bio);
 skip_copy:
-	r1_bio->behind_master_bio = behind_bio;;
+	r1_bio->behind_master_bio = behind_bio;
 	set_bit(R1BIO_BehindIO, &r1_bio->state);
 
 	return;
@@ -1809,6 +1809,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 			struct md_rdev *repl =
 				conf->mirrors[conf->raid_disks + number].rdev;
 			freeze_array(conf, 0);
+			if (atomic_read(&repl->nr_pending)) {
+				/* It means that some queued IO of retry_list
+				 * hold repl. Thus, we cannot set replacement
+				 * as NULL, avoiding rdev NULL pointer
+				 * dereference in sync_request_write and
+				 * handle_write_finished.
+				 */
+				err = -EBUSY;
+				unfreeze_array(conf);
+				goto abort;
+			}
 			clear_bit(Replacement, &repl->flags);
 			p->rdev = repl;
 			conf->mirrors[conf->raid_disks + number].rdev = NULL;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index c7294e7557e0..eb84bc68e2fd 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -26,6 +26,18 @@
 #define BARRIER_BUCKETS_NR_BITS		(PAGE_SHIFT - ilog2(sizeof(atomic_t)))
 #define BARRIER_BUCKETS_NR		(1<<BARRIER_BUCKETS_NR_BITS)
 
+/* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk.
+ * There are three safe ways to access raid1_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery is known to be happening - i.e. in code that is
+ *    called as part of performing resync/recovery.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the
+ *    RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
 struct raid1_info {
 	struct md_rdev	*rdev;
 	sector_t	head_position;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 99c9207899a7..c5e6c60fc0d4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -141,7 +141,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
 #define RESYNC_WINDOW (1024*1024)
 /* maximum number of concurrent requests, memory permitting */
 #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
-#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
+#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW)
 #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
 
 /*
@@ -2655,7 +2655,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 		for (m = 0; m < conf->copies; m++) {
 			int dev = r10_bio->devs[m].devnum;
 			rdev = conf->mirrors[dev].rdev;
-			if (r10_bio->devs[m].bio == NULL)
+			if (r10_bio->devs[m].bio == NULL ||
+				r10_bio->devs[m].bio->bi_end_io == NULL)
 				continue;
 			if (!r10_bio->devs[m].bio->bi_status) {
 				rdev_clear_badblocks(
@@ -2670,7 +2671,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 					md_error(conf->mddev, rdev);
 			}
 			rdev = conf->mirrors[dev].replacement;
-			if (r10_bio->devs[m].repl_bio == NULL)
+			if (r10_bio->devs[m].repl_bio == NULL ||
+				r10_bio->devs[m].repl_bio->bi_end_io == NULL)
 				continue;
 
 			if (!r10_bio->devs[m].repl_bio->bi_status) {
@@ -3782,7 +3784,7 @@ static int raid10_run(struct mddev *mddev)
 		if (fc > 1 || fo > 0) {
 			pr_err("only near layout is supported by clustered"
 				" raid10\n");
-			goto out;
+			goto out_free_conf;
 		}
 	}
 
@@ -4830,17 +4832,11 @@ static void raid10_finish_reshape(struct mddev *mddev)
 		return;
 
 	if (mddev->delta_disks > 0) {
-		sector_t size = raid10_size(mddev, 0, 0);
-		md_set_array_sectors(mddev, size);
 		if (mddev->recovery_cp > mddev->resync_max_sectors) {
 			mddev->recovery_cp = mddev->resync_max_sectors;
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		}
-		mddev->resync_max_sectors = size;
-		if (mddev->queue) {
-			set_capacity(mddev->gendisk, mddev->array_sectors);
-			revalidate_disk(mddev->gendisk);
-		}
+		mddev->resync_max_sectors = mddev->array_sectors;
 	} else {
 		int d;
 		rcu_read_lock();
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index db2ac22ac1b4..e2e8840de9bf 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -2,6 +2,19 @@
 #ifndef _RAID10_H
 #define _RAID10_H
 
+/* Note: raid10_info.rdev can be set to NULL asynchronously by
+ * raid10_remove_disk.
+ * There are three safe ways to access raid10_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
+ *    that is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the
+ *    RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
 struct raid10_info {
 	struct md_rdev	*rdev, *replacement;
 	sector_t	head_position;
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 0c76bcedfc1c..a001808a2b77 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -44,6 +44,7 @@ extern void ppl_write_stripe_run(struct r5conf *conf);
 extern void ppl_stripe_write_finished(struct stripe_head *sh);
 extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
 extern void ppl_quiesce(struct r5conf *conf, int quiesce);
+extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
 
 static inline bool raid5_has_ppl(struct r5conf *conf)
 {
@@ -104,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
 	if (conf->log)
 		ret = r5l_handle_flush_request(conf->log, bio);
 	else if (raid5_has_ppl(conf))
-		ret = 0;
+		ret = ppl_handle_flush_request(conf->log, bio);
 
 	return ret;
 }
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 2764c2290062..42890a08375b 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -693,6 +693,16 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
 	}
 }
 
+int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
+{
+	if (bio->bi_iter.bi_size == 0) {
+		bio_endio(bio);
+		return 0;
+	}
+	bio->bi_opf &= ~REQ_PREFLUSH;
+	return -EAGAIN;
+}
+
 void ppl_stripe_write_finished(struct stripe_head *sh)
 {
 	struct ppl_io_unit *io;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 50d01144b805..b5d2601483e3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2196,15 +2196,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 static int grow_stripes(struct r5conf *conf, int num)
 {
 	struct kmem_cache *sc;
+	size_t namelen = sizeof(conf->cache_name[0]);
 	int devs = max(conf->raid_disks, conf->previous_raid_disks);
 
 	if (conf->mddev->gendisk)
-		sprintf(conf->cache_name[0],
+		snprintf(conf->cache_name[0], namelen,
 			"raid%d-%s", conf->level, mdname(conf->mddev));
 	else
-		sprintf(conf->cache_name[0],
+		snprintf(conf->cache_name[0], namelen,
 			"raid%d-%p", conf->level, conf->mddev);
-	sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+	snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
 
 	conf->active_name = 0;
 	sc = kmem_cache_create(conf->cache_name[conf->active_name],
@@ -6764,9 +6765,7 @@ static void free_conf(struct r5conf *conf)
 
 	log_exit(conf);
 
-	if (conf->shrinker.nr_deferred)
-		unregister_shrinker(&conf->shrinker);
-
+	unregister_shrinker(&conf->shrinker);
 	free_thread_groups(conf);
 	shrink_stripes(conf);
 	raid5_free_percpu(conf);
@@ -8001,13 +8000,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
 
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 
-		if (mddev->delta_disks > 0) {
-			md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-			if (mddev->queue) {
-				set_capacity(mddev->gendisk, mddev->array_sectors);
-				revalidate_disk(mddev->gendisk);
-			}
-		} else {
+		if (mddev->delta_disks <= 0) {
 			int d;
 			spin_lock_irq(&conf->device_lock);
 			mddev->degraded = raid5_calc_degraded(conf);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 2e6123825095..3f8da26032ac 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -450,6 +450,18 @@ enum {
  * HANDLE gets cleared if stripe_handle leaves nothing locked.
  */
 
+/* Note: disk_info.rdev can be set to NULL asynchronously by raid5_remove_disk.
+ * There are three safe ways to access disk_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code that
+ *    is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the RCU
+ *    lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if
+ * it has been incremented, the pointer is put back in .rdev.
+ */
+
 struct disk_info {
 	struct md_rdev	*rdev, *replacement;
 	struct page	*extra_page; /* extra page to use in prexor */
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index 145e12bfb819..372c074bb1b9 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -147,6 +147,8 @@ config DVB_CORE
 config DVB_MMAP
 	bool "Enable DVB memory-mapped API (EXPERIMENTAL)"
 	depends on DVB_CORE
+	depends on VIDEO_V4L2=y || VIDEO_V4L2=DVB_CORE
+	select VIDEOBUF2_VMALLOC
 	default n
 	help
 	  This option enables DVB experimental memory-mapped API, with
diff --git a/drivers/media/common/videobuf2/Kconfig b/drivers/media/common/videobuf2/Kconfig
index 5df05250de94..17c32ea58395 100644
--- a/drivers/media/common/videobuf2/Kconfig
+++ b/drivers/media/common/videobuf2/Kconfig
@@ -3,6 +3,9 @@ config VIDEOBUF2_CORE
 	select DMA_SHARED_BUFFER
 	tristate
 
+config VIDEOBUF2_V4L2
+	tristate
+
 config VIDEOBUF2_MEMOPS
 	tristate
 	select FRAME_VECTOR
diff --git a/drivers/media/common/videobuf2/Makefile b/drivers/media/common/videobuf2/Makefile
index 19de5ccda20b..77bebe8b202f 100644
--- a/drivers/media/common/videobuf2/Makefile
+++ b/drivers/media/common/videobuf2/Makefile
@@ -1,5 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+videobuf2-common-objs := videobuf2-core.o
 
-obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-core.o videobuf2-v4l2.o
+ifeq ($(CONFIG_TRACEPOINTS),y)
+  videobuf2-common-objs += vb2-trace.o
+endif
+
+obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-common.o
+obj-$(CONFIG_VIDEOBUF2_V4L2) += videobuf2-v4l2.o
 obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o
 obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o
 obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o
diff --git a/drivers/media/v4l2-core/vb2-trace.c b/drivers/media/common/videobuf2/vb2-trace.c
index 4c0f39d271f0..4c0f39d271f0 100644
--- a/drivers/media/v4l2-core/vb2-trace.c
+++ b/drivers/media/common/videobuf2/vb2-trace.c
diff --git a/drivers/media/dvb-core/Makefile b/drivers/media/dvb-core/Makefile
index 3a105d82019a..62b028ded9f7 100644
--- a/drivers/media/dvb-core/Makefile
+++ b/drivers/media/dvb-core/Makefile
@@ -4,7 +4,7 @@
 #
 
 dvb-net-$(CONFIG_DVB_NET) := dvb_net.o
-dvb-vb2-$(CONFIG_DVB_MMSP) := dvb_vb2.o
+dvb-vb2-$(CONFIG_DVB_MMAP) := dvb_vb2.o
 
 dvb-core-objs := dvbdev.o dmxdev.o dvb_demux.o			\
 		 dvb_ca_en50221.o dvb_frontend.o		\
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index 6d53af00190e..61a750fae465 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -128,11 +128,7 @@ static int dvb_dvr_open(struct inode *inode, struct file *file)
 	struct dvb_device *dvbdev = file->private_data;
 	struct dmxdev *dmxdev = dvbdev->priv;
 	struct dmx_frontend *front;
-#ifndef DVB_MMAP
 	bool need_ringbuffer = false;
-#else
-	const bool need_ringbuffer = true;
-#endif
 
 	dprintk("%s\n", __func__);
 
@@ -144,17 +140,31 @@ static int dvb_dvr_open(struct inode *inode, struct file *file)
 		return -ENODEV;
 	}
 
-#ifndef DVB_MMAP
+	dmxdev->may_do_mmap = 0;
+
+	/*
+	 * The logic here is a little tricky due to the ifdef.
+	 *
+	 * The ringbuffer is used for both read and mmap.
+	 *
+	 * It is not needed, however, on two situations:
+	 *	- Write devices (access with O_WRONLY);
+	 *	- For duplex device nodes, opened with O_RDWR.
+	 */
+
 	if ((file->f_flags & O_ACCMODE) == O_RDONLY)
 		need_ringbuffer = true;
-#else
-	if ((file->f_flags & O_ACCMODE) == O_RDWR) {
+	else if ((file->f_flags & O_ACCMODE) == O_RDWR) {
 		if (!(dmxdev->capabilities & DMXDEV_CAP_DUPLEX)) {
+#ifdef CONFIG_DVB_MMAP
+			dmxdev->may_do_mmap = 1;
+			need_ringbuffer = true;
+#else
 			mutex_unlock(&dmxdev->mutex);
 			return -EOPNOTSUPP;
+#endif
 		}
 	}
-#endif
 
 	if (need_ringbuffer) {
 		void *mem;
@@ -169,8 +179,9 @@ static int dvb_dvr_open(struct inode *inode, struct file *file)
 			return -ENOMEM;
 		}
 		dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE);
-		dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr",
-			     file->f_flags & O_NONBLOCK);
+		if (dmxdev->may_do_mmap)
+			dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr",
+				     file->f_flags & O_NONBLOCK);
 		dvbdev->readers--;
 	}
 
@@ -200,11 +211,6 @@ static int dvb_dvr_release(struct inode *inode, struct file *file)
 {
 	struct dvb_device *dvbdev = file->private_data;
 	struct dmxdev *dmxdev = dvbdev->priv;
-#ifndef DVB_MMAP
-	bool need_ringbuffer = false;
-#else
-	const bool need_ringbuffer = true;
-#endif
 
 	mutex_lock(&dmxdev->mutex);
 
@@ -213,15 +219,14 @@ static int dvb_dvr_release(struct inode *inode, struct file *file)
 		dmxdev->demux->connect_frontend(dmxdev->demux,
 						dmxdev->dvr_orig_fe);
 	}
-#ifndef DVB_MMAP
-	if ((file->f_flags & O_ACCMODE) == O_RDONLY)
-		need_ringbuffer = true;
-#endif
 
-	if (need_ringbuffer) {
-		if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
-			dvb_vb2_stream_off(&dmxdev->dvr_vb2_ctx);
-		dvb_vb2_release(&dmxdev->dvr_vb2_ctx);
+	if (((file->f_flags & O_ACCMODE) == O_RDONLY) ||
+	    dmxdev->may_do_mmap) {
+		if (dmxdev->may_do_mmap) {
+			if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
+				dvb_vb2_stream_off(&dmxdev->dvr_vb2_ctx);
+			dvb_vb2_release(&dmxdev->dvr_vb2_ctx);
+		}
 		dvbdev->readers++;
 		if (dmxdev->dvr_buffer.data) {
 			void *mem = dmxdev->dvr_buffer.data;
@@ -380,7 +385,8 @@ static void dvb_dmxdev_filter_timer(struct dmxdev_filter *dmxdevfilter)
 
 static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
 				       const u8 *buffer2, size_t buffer2_len,
-				       struct dmx_section_filter *filter)
+				       struct dmx_section_filter *filter,
+				       u32 *buffer_flags)
 {
 	struct dmxdev_filter *dmxdevfilter = filter->priv;
 	int ret;
@@ -399,10 +405,12 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
 	dprintk("section callback %*ph\n", 6, buffer1);
 	if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx)) {
 		ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx,
-					  buffer1, buffer1_len);
+					  buffer1, buffer1_len,
+					  buffer_flags);
 		if (ret == buffer1_len)
 			ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx,
-						  buffer2, buffer2_len);
+						  buffer2, buffer2_len,
+						  buffer_flags);
 	} else {
 		ret = dvb_dmxdev_buffer_write(&dmxdevfilter->buffer,
 					      buffer1, buffer1_len);
@@ -422,11 +430,12 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
 
 static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
 				  const u8 *buffer2, size_t buffer2_len,
-				  struct dmx_ts_feed *feed)
+				  struct dmx_ts_feed *feed,
+				  u32 *buffer_flags)
 {
 	struct dmxdev_filter *dmxdevfilter = feed->priv;
 	struct dvb_ringbuffer *buffer;
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 	struct dvb_vb2_ctx *ctx;
 #endif
 	int ret;
@@ -440,20 +449,22 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
 	if (dmxdevfilter->params.pes.output == DMX_OUT_TAP ||
 	    dmxdevfilter->params.pes.output == DMX_OUT_TSDEMUX_TAP) {
 		buffer = &dmxdevfilter->buffer;
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 		ctx = &dmxdevfilter->vb2_ctx;
 #endif
 	} else {
 		buffer = &dmxdevfilter->dev->dvr_buffer;
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 		ctx = &dmxdevfilter->dev->dvr_vb2_ctx;
 #endif
 	}
 
 	if (dvb_vb2_is_streaming(ctx)) {
-		ret = dvb_vb2_fill_buffer(ctx, buffer1, buffer1_len);
+		ret = dvb_vb2_fill_buffer(ctx, buffer1, buffer1_len,
+					  buffer_flags);
 		if (ret == buffer1_len)
-			ret = dvb_vb2_fill_buffer(ctx, buffer2, buffer2_len);
+			ret = dvb_vb2_fill_buffer(ctx, buffer2, buffer2_len,
+						  buffer_flags);
 	} else {
 		if (buffer->error) {
 			spin_unlock(&dmxdevfilter->dev->lock);
@@ -802,6 +813,12 @@ static int dvb_demux_open(struct inode *inode, struct file *file)
 	mutex_init(&dmxdevfilter->mutex);
 	file->private_data = dmxdevfilter;
 
+#ifdef CONFIG_DVB_MMAP
+	dmxdev->may_do_mmap = 1;
+#else
+	dmxdev->may_do_mmap = 0;
+#endif
+
 	dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192);
 	dvb_vb2_init(&dmxdevfilter->vb2_ctx, "demux_filter",
 		     file->f_flags & O_NONBLOCK);
@@ -1111,7 +1128,7 @@ static int dvb_demux_do_ioctl(struct file *file,
 		mutex_unlock(&dmxdevfilter->mutex);
 		break;
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 	case DMX_REQBUFS:
 		if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
 			mutex_unlock(&dmxdev->mutex);
@@ -1160,7 +1177,7 @@ static int dvb_demux_do_ioctl(struct file *file,
 		break;
 #endif
 	default:
-		ret = -EINVAL;
+		ret = -ENOTTY;
 		break;
 	}
 	mutex_unlock(&dmxdev->mutex);
@@ -1199,13 +1216,16 @@ static __poll_t dvb_demux_poll(struct file *file, poll_table *wait)
 	return mask;
 }
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 static int dvb_demux_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct dmxdev_filter *dmxdevfilter = file->private_data;
 	struct dmxdev *dmxdev = dmxdevfilter->dev;
 	int ret;
 
+	if (!dmxdev->may_do_mmap)
+		return -ENOTTY;
+
 	if (mutex_lock_interruptible(&dmxdev->mutex))
 		return -ERESTARTSYS;
 
@@ -1249,7 +1269,7 @@ static const struct file_operations dvb_demux_fops = {
 	.release = dvb_demux_release,
 	.poll = dvb_demux_poll,
 	.llseek = default_llseek,
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 	.mmap = dvb_demux_mmap,
 #endif
 };
@@ -1280,7 +1300,7 @@ static int dvb_dvr_do_ioctl(struct file *file,
 		ret = dvb_dvr_set_buffer_size(dmxdev, arg);
 		break;
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 	case DMX_REQBUFS:
 		ret = dvb_vb2_reqbufs(&dmxdev->dvr_vb2_ctx, parg);
 		break;
@@ -1304,7 +1324,7 @@ static int dvb_dvr_do_ioctl(struct file *file,
 		break;
 #endif
 	default:
-		ret = -EINVAL;
+		ret = -ENOTTY;
 		break;
 	}
 	mutex_unlock(&dmxdev->mutex);
@@ -1322,11 +1342,6 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
 	struct dvb_device *dvbdev = file->private_data;
 	struct dmxdev *dmxdev = dvbdev->priv;
 	__poll_t mask = 0;
-#ifndef DVB_MMAP
-	bool need_ringbuffer = false;
-#else
-	const bool need_ringbuffer = true;
-#endif
 
 	dprintk("%s\n", __func__);
 
@@ -1337,11 +1352,8 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &dmxdev->dvr_buffer.queue, wait);
 
-#ifndef DVB_MMAP
-	if ((file->f_flags & O_ACCMODE) == O_RDONLY)
-		need_ringbuffer = true;
-#endif
-	if (need_ringbuffer) {
+	if (((file->f_flags & O_ACCMODE) == O_RDONLY) ||
+	    dmxdev->may_do_mmap) {
 		if (dmxdev->dvr_buffer.error)
 			mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI | EPOLLERR);
 
@@ -1353,13 +1365,16 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
 	return mask;
 }
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 static int dvb_dvr_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct dvb_device *dvbdev = file->private_data;
 	struct dmxdev *dmxdev = dvbdev->priv;
 	int ret;
 
+	if (!dmxdev->may_do_mmap)
+		return -ENOTTY;
+
 	if (dmxdev->exit)
 		return -ENODEV;
 
@@ -1381,7 +1396,7 @@ static const struct file_operations dvb_dvr_fops = {
 	.release = dvb_dvr_release,
 	.poll = dvb_dvr_poll,
 	.llseek = default_llseek,
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 	.mmap = dvb_dvr_mmap,
 #endif
 };
diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
index 210eed0269b0..f45091246bdc 100644
--- a/drivers/media/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb-core/dvb_demux.c
@@ -55,6 +55,17 @@ MODULE_PARM_DESC(dvb_demux_feed_err_pkts,
 		dprintk(x);				\
 } while (0)
 
+#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
+#  define dprintk_sect_loss(x...) dprintk(x)
+#else
+#  define dprintk_sect_loss(x...)
+#endif
+
+#define set_buf_flags(__feed, __flag)			\
+	do {						\
+		(__feed)->buffer_flags |= (__flag);	\
+	} while (0)
+
 /******************************************************************************
  * static inlined helper functions
  ******************************************************************************/
@@ -104,31 +115,30 @@ static inline int dvb_dmx_swfilter_payload(struct dvb_demux_feed *feed,
 {
 	int count = payload(buf);
 	int p;
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
 	int ccok;
 	u8 cc;
-#endif
 
 	if (count == 0)
 		return -1;
 
 	p = 188 - count;
 
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
 	cc = buf[3] & 0x0f;
 	ccok = ((feed->cc + 1) & 0x0f) == cc;
 	feed->cc = cc;
-	if (!ccok)
-		dprintk("missed packet: %d instead of %d!\n",
-			cc, (feed->cc + 1) & 0x0f);
-#endif
+	if (!ccok) {
+		set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+		dprintk_sect_loss("missed packet: %d instead of %d!\n",
+				  cc, (feed->cc + 1) & 0x0f);
+	}
 
 	if (buf[1] & 0x40)	// PUSI ?
 		feed->peslen = 0xfffa;
 
 	feed->peslen += count;
 
-	return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts);
+	return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts,
+			   &feed->buffer_flags);
 }
 
 static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed,
@@ -150,7 +160,7 @@ static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed,
 		return 0;
 
 	return feed->cb.sec(feed->feed.sec.secbuf, feed->feed.sec.seclen,
-			    NULL, 0, &f->filter);
+			    NULL, 0, &f->filter, &feed->buffer_flags);
 }
 
 static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed)
@@ -169,8 +179,10 @@ static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed)
 	if (sec->check_crc) {
 		section_syntax_indicator = ((sec->secbuf[1] & 0x80) != 0);
 		if (section_syntax_indicator &&
-		    demux->check_crc32(feed, sec->secbuf, sec->seclen))
+		    demux->check_crc32(feed, sec->secbuf, sec->seclen)) {
+			set_buf_flags(feed, DMX_BUFFER_FLAG_HAD_CRC32_DISCARD);
 			return -1;
+		}
 	}
 
 	do {
@@ -187,7 +199,6 @@ static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed)
 {
 	struct dmx_section_feed *sec = &feed->feed.sec;
 
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
 	if (sec->secbufp < sec->tsfeedp) {
 		int n = sec->tsfeedp - sec->secbufp;
 
@@ -197,12 +208,13 @@ static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed)
 		 * but just first and last.
 		 */
 		if (sec->secbuf[0] != 0xff || sec->secbuf[n - 1] != 0xff) {
-			dprintk("section ts padding loss: %d/%d\n",
-			       n, sec->tsfeedp);
-			dprintk("pad data: %*ph\n", n, sec->secbuf);
+			set_buf_flags(feed,
+				      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+			dprintk_sect_loss("section ts padding loss: %d/%d\n",
+					  n, sec->tsfeedp);
+			dprintk_sect_loss("pad data: %*ph\n", n, sec->secbuf);
 		}
 	}
-#endif
 
 	sec->tsfeedp = sec->secbufp = sec->seclen = 0;
 	sec->secbuf = sec->secbuf_base;
@@ -237,11 +249,10 @@ static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed,
 		return 0;
 
 	if (sec->tsfeedp + len > DMX_MAX_SECFEED_SIZE) {
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-		dprintk("section buffer full loss: %d/%d\n",
-			sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE,
-			DMX_MAX_SECFEED_SIZE);
-#endif
+		set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+		dprintk_sect_loss("section buffer full loss: %d/%d\n",
+				  sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE,
+				  DMX_MAX_SECFEED_SIZE);
 		len = DMX_MAX_SECFEED_SIZE - sec->tsfeedp;
 	}
 
@@ -269,12 +280,13 @@ static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed,
 		sec->seclen = seclen;
 		sec->crc_val = ~0;
 		/* dump [secbuf .. secbuf+seclen) */
-		if (feed->pusi_seen)
+		if (feed->pusi_seen) {
 			dvb_dmx_swfilter_section_feed(feed);
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-		else
-			dprintk("pusi not seen, discarding section data\n");
-#endif
+		} else {
+			set_buf_flags(feed,
+				      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+			dprintk_sect_loss("pusi not seen, discarding section data\n");
+		}
 		sec->secbufp += seclen;	/* secbufp and secbuf moving together is */
 		sec->secbuf += seclen;	/* redundant but saves pointer arithmetic */
 	}
@@ -307,18 +319,22 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
 	}
 
 	if (!ccok || dc_i) {
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-		if (dc_i)
-			dprintk("%d frame with disconnect indicator\n",
+		if (dc_i) {
+			set_buf_flags(feed,
+				      DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR);
+			dprintk_sect_loss("%d frame with disconnect indicator\n",
 				cc);
-		else
-			dprintk("discontinuity: %d instead of %d. %d bytes lost\n",
+		} else {
+			set_buf_flags(feed,
+				      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+			dprintk_sect_loss("discontinuity: %d instead of %d. %d bytes lost\n",
 				cc, (feed->cc + 1) & 0x0f, count + 4);
+		}
 		/*
-		 * those bytes under sume circumstances will again be reported
+		 * those bytes under some circumstances will again be reported
 		 * in the following dvb_dmx_swfilter_section_new
 		 */
-#endif
+
 		/*
 		 * Discontinuity detected. Reset pusi_seen to
 		 * stop feeding of suspicious data until next PUSI=1 arrives
@@ -326,6 +342,7 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
 		 * FIXME: does it make sense if the MPEG-TS is the one
 		 *	reporting discontinuity?
 		 */
+
 		feed->pusi_seen = false;
 		dvb_dmx_swfilter_section_new(feed);
 	}
@@ -345,11 +362,11 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
 			dvb_dmx_swfilter_section_new(feed);
 			dvb_dmx_swfilter_section_copy_dump(feed, after,
 							   after_len);
+		} else if (count > 0) {
+			set_buf_flags(feed,
+				      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+			dprintk_sect_loss("PUSI=1 but %d bytes lost\n", count);
 		}
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-		else if (count > 0)
-			dprintk("PUSI=1 but %d bytes lost\n", count);
-#endif
 	} else {
 		/* PUSI=0 (is not set), no section boundary */
 		dvb_dmx_swfilter_section_copy_dump(feed, &buf[p], count);
@@ -369,7 +386,8 @@ static inline void dvb_dmx_swfilter_packet_type(struct dvb_demux_feed *feed,
 			if (feed->ts_type & TS_PAYLOAD_ONLY)
 				dvb_dmx_swfilter_payload(feed, buf);
 			else
-				feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts);
+				feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts,
+					    &feed->buffer_flags);
 		}
 		/* Used only on full-featured devices */
 		if (feed->ts_type & TS_DECODER)
@@ -430,6 +448,11 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
 	}
 
 	if (buf[1] & 0x80) {
+		list_for_each_entry(feed, &demux->feed_list, list_head) {
+			if ((feed->pid != pid) && (feed->pid != 0x2000))
+				continue;
+			set_buf_flags(feed, DMX_BUFFER_FLAG_TEI);
+		}
 		dprintk_tscheck("TEI detected. PID=0x%x data1=0x%x\n",
 				pid, buf[1]);
 		/* data in this packet can't be trusted - drop it unless
@@ -445,6 +468,13 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
 						(demux->cnt_storage[pid] + 1) & 0xf;
 
 				if ((buf[3] & 0xf) != demux->cnt_storage[pid]) {
+					list_for_each_entry(feed, &demux->feed_list, list_head) {
+						if ((feed->pid != pid) && (feed->pid != 0x2000))
+							continue;
+						set_buf_flags(feed,
+							      DMX_BUFFER_PKT_COUNTER_MISMATCH);
+					}
+
 					dprintk_tscheck("TS packet counter mismatch. PID=0x%x expected 0x%x got 0x%x\n",
 							pid, demux->cnt_storage[pid],
 							buf[3] & 0xf);
@@ -466,7 +496,8 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
 		if (feed->pid == pid)
 			dvb_dmx_swfilter_packet_type(feed, buf);
 		else if (feed->pid == 0x2000)
-			feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts);
+			feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts,
+				    &feed->buffer_flags);
 	}
 }
 
@@ -585,7 +616,8 @@ void dvb_dmx_swfilter_raw(struct dvb_demux *demux, const u8 *buf, size_t count)
 
 	spin_lock_irqsave(&demux->lock, flags);
 
-	demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts);
+	demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts,
+			   &demux->feed->buffer_flags);
 
 	spin_unlock_irqrestore(&demux->lock, flags);
 }
@@ -785,6 +817,7 @@ static int dvbdmx_allocate_ts_feed(struct dmx_demux *dmx,
 	feed->demux = demux;
 	feed->pid = 0xffff;
 	feed->peslen = 0xfffa;
+	feed->buffer_flags = 0;
 
 	(*ts_feed) = &feed->feed.ts;
 	(*ts_feed)->parent = dmx;
@@ -1042,6 +1075,7 @@ static int dvbdmx_allocate_section_feed(struct dmx_demux *demux,
 	dvbdmxfeed->cb.sec = callback;
 	dvbdmxfeed->demux = dvbdmx;
 	dvbdmxfeed->pid = 0xffff;
+	dvbdmxfeed->buffer_flags = 0;
 	dvbdmxfeed->feed.sec.secbuf = dvbdmxfeed->feed.sec.secbuf_base;
 	dvbdmxfeed->feed.sec.secbufp = dvbdmxfeed->feed.sec.seclen = 0;
 	dvbdmxfeed->feed.sec.tsfeedp = 0;
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index b6c7eec863b9..ba39f9942e1d 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -883,7 +883,8 @@ static void dvb_net_ule(struct net_device *dev, const u8 *buf, size_t buf_len)
 
 static int dvb_net_ts_callback(const u8 *buffer1, size_t buffer1_len,
 			       const u8 *buffer2, size_t buffer2_len,
-			       struct dmx_ts_feed *feed)
+			       struct dmx_ts_feed *feed,
+			       u32 *buffer_flags)
 {
 	struct net_device *dev = feed->priv;
 
@@ -992,7 +993,7 @@ static void dvb_net_sec(struct net_device *dev,
 
 static int dvb_net_sec_callback(const u8 *buffer1, size_t buffer1_len,
 		 const u8 *buffer2, size_t buffer2_len,
-		 struct dmx_section_filter *filter)
+		 struct dmx_section_filter *filter, u32 *buffer_flags)
 {
 	struct net_device *dev = filter->priv;
 
diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c
index 763145d74e83..b811adf88afa 100644
--- a/drivers/media/dvb-core/dvb_vb2.c
+++ b/drivers/media/dvb-core/dvb_vb2.c
@@ -256,7 +256,8 @@ int dvb_vb2_is_streaming(struct dvb_vb2_ctx *ctx)
 }
 
 int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx,
-			const unsigned char *src, int len)
+			const unsigned char *src, int len,
+			enum dmx_buffer_flags *buffer_flags)
 {
 	unsigned long flags = 0;
 	void *vbuf = NULL;
@@ -264,15 +265,17 @@ int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx,
 	unsigned char *psrc = (unsigned char *)src;
 	int ll = 0;
 
-	dprintk(3, "[%s] %d bytes are rcvd\n", ctx->name, len);
-	if (!src) {
-		dprintk(3, "[%s]:NULL pointer src\n", ctx->name);
-		/**normal case: This func is called twice from demux driver
-		 * once with valid src pointer, second time with NULL pointer
-		 */
+	/*
+	 * normal case: This func is called twice from demux driver
+	 * one with valid src pointer, second time with NULL pointer
+	 */
+	if (!src || !len)
 		return 0;
-	}
 	spin_lock_irqsave(&ctx->slock, flags);
+	if (buffer_flags && *buffer_flags) {
+		ctx->flags |= *buffer_flags;
+		*buffer_flags = 0;
+	}
 	while (todo) {
 		if (!ctx->buf) {
 			if (list_empty(&ctx->dvb_q)) {
@@ -395,6 +398,7 @@ int dvb_vb2_qbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
 
 int dvb_vb2_dqbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
 {
+	unsigned long flags;
 	int ret;
 
 	ret = vb2_core_dqbuf(&ctx->vb_q, &b->index, b, ctx->nonblocking);
@@ -402,7 +406,16 @@ int dvb_vb2_dqbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
 		dprintk(1, "[%s] errno=%d\n", ctx->name, ret);
 		return ret;
 	}
-	dprintk(5, "[%s] index=%d\n", ctx->name, b->index);
+
+	spin_lock_irqsave(&ctx->slock, flags);
+	b->count = ctx->count++;
+	b->flags = ctx->flags;
+	ctx->flags = 0;
+	spin_unlock_irqrestore(&ctx->slock, flags);
+
+	dprintk(5, "[%s] index=%d, count=%d, flags=%d\n",
+		ctx->name, b->index, ctx->count, b->flags);
+
 
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
index 50bce68ffd66..65d157fe76d1 100644
--- a/drivers/media/dvb-frontends/m88ds3103.c
+++ b/drivers/media/dvb-frontends/m88ds3103.c
@@ -1262,11 +1262,12 @@ static int m88ds3103_select(struct i2c_mux_core *muxc, u32 chan)
  * New users must use I2C client binding directly!
  */
 struct dvb_frontend *m88ds3103_attach(const struct m88ds3103_config *cfg,
-		struct i2c_adapter *i2c, struct i2c_adapter **tuner_i2c_adapter)
+				      struct i2c_adapter *i2c,
+				      struct i2c_adapter **tuner_i2c_adapter)
 {
 	struct i2c_client *client;
 	struct i2c_board_info board_info;
-	struct m88ds3103_platform_data pdata;
+	struct m88ds3103_platform_data pdata = {};
 
 	pdata.clk = cfg->clock;
 	pdata.i2c_wr_max = cfg->i2c_wr_max;
@@ -1409,6 +1410,8 @@ static int m88ds3103_probe(struct i2c_client *client,
 	case M88DS3103_CHIP_ID:
 		break;
 	default:
+		ret = -ENODEV;
+		dev_err(&client->dev, "Unknown device. Chip_id=%02x\n", dev->chip_id);
 		goto err_kfree;
 	}
 
diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c
index 3c1851984b90..2476d812f669 100644
--- a/drivers/media/i2c/tvp5150.c
+++ b/drivers/media/i2c/tvp5150.c
@@ -505,80 +505,77 @@ static struct i2c_vbi_ram_value vbi_ram_default[] =
 	/* FIXME: Current api doesn't handle all VBI types, those not
 	   yet supported are placed under #if 0 */
 #if 0
-	{0x010, /* Teletext, SECAM, WST System A */
+	[0] = {0x010, /* Teletext, SECAM, WST System A */
 		{V4L2_SLICED_TELETEXT_SECAM,6,23,1},
 		{ 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x26,
 		  0xe6, 0xb4, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00 }
 	},
 #endif
-	{0x030, /* Teletext, PAL, WST System B */
+	[1] = {0x030, /* Teletext, PAL, WST System B */
 		{V4L2_SLICED_TELETEXT_B,6,22,1},
 		{ 0xaa, 0xaa, 0xff, 0xff, 0x27, 0x2e, 0x20, 0x2b,
 		  0xa6, 0x72, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00 }
 	},
 #if 0
-	{0x050, /* Teletext, PAL, WST System C */
+	[2] = {0x050, /* Teletext, PAL, WST System C */
 		{V4L2_SLICED_TELETEXT_PAL_C,6,22,1},
 		{ 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x22,
 		  0xa6, 0x98, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 }
 	},
-	{0x070, /* Teletext, NTSC, WST System B */
+	[3] = {0x070, /* Teletext, NTSC, WST System B */
 		{V4L2_SLICED_TELETEXT_NTSC_B,10,21,1},
 		{ 0xaa, 0xaa, 0xff, 0xff, 0x27, 0x2e, 0x20, 0x23,
 		  0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 }
 	},
-	{0x090, /* Tetetext, NTSC NABTS System C */
+	[4] = {0x090, /* Tetetext, NTSC NABTS System C */
 		{V4L2_SLICED_TELETEXT_NTSC_C,10,21,1},
 		{ 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x22,
 		  0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x15, 0x00 }
 	},
-	{0x0b0, /* Teletext, NTSC-J, NABTS System D */
+	[5] = {0x0b0, /* Teletext, NTSC-J, NABTS System D */
 		{V4L2_SLICED_TELETEXT_NTSC_D,10,21,1},
 		{ 0xaa, 0xaa, 0xff, 0xff, 0xa7, 0x2e, 0x20, 0x23,
 		  0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 }
 	},
-	{0x0d0, /* Closed Caption, PAL/SECAM */
+	[6] = {0x0d0, /* Closed Caption, PAL/SECAM */
 		{V4L2_SLICED_CAPTION_625,22,22,1},
 		{ 0xaa, 0x2a, 0xff, 0x3f, 0x04, 0x51, 0x6e, 0x02,
 		  0xa6, 0x7b, 0x09, 0x00, 0x00, 0x00, 0x27, 0x00 }
 	},
 #endif
-	{0x0f0, /* Closed Caption, NTSC */
+	[7] = {0x0f0, /* Closed Caption, NTSC */
 		{V4L2_SLICED_CAPTION_525,21,21,1},
 		{ 0xaa, 0x2a, 0xff, 0x3f, 0x04, 0x51, 0x6e, 0x02,
 		  0x69, 0x8c, 0x09, 0x00, 0x00, 0x00, 0x27, 0x00 }
 	},
-	{0x110, /* Wide Screen Signal, PAL/SECAM */
+	[8] = {0x110, /* Wide Screen Signal, PAL/SECAM */
 		{V4L2_SLICED_WSS_625,23,23,1},
 		{ 0x5b, 0x55, 0xc5, 0xff, 0x00, 0x71, 0x6e, 0x42,
 		  0xa6, 0xcd, 0x0f, 0x00, 0x00, 0x00, 0x3a, 0x00 }
 	},
 #if 0
-	{0x130, /* Wide Screen Signal, NTSC C */
+	[9] = {0x130, /* Wide Screen Signal, NTSC C */
 		{V4L2_SLICED_WSS_525,20,20,1},
 		{ 0x38, 0x00, 0x3f, 0x00, 0x00, 0x71, 0x6e, 0x43,
 		  0x69, 0x7c, 0x08, 0x00, 0x00, 0x00, 0x39, 0x00 }
 	},
-	{0x150, /* Vertical Interval Timecode (VITC), PAL/SECAM */
+	[10] = {0x150, /* Vertical Interval Timecode (VITC), PAL/SECAM */
 		{V4l2_SLICED_VITC_625,6,22,0},
 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x8f, 0x6d, 0x49,
 		  0xa6, 0x85, 0x08, 0x00, 0x00, 0x00, 0x4c, 0x00 }
 	},
-	{0x170, /* Vertical Interval Timecode (VITC), NTSC */
+	[11] = {0x170, /* Vertical Interval Timecode (VITC), NTSC */
 		{V4l2_SLICED_VITC_525,10,20,0},
 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x8f, 0x6d, 0x49,
 		  0x69, 0x94, 0x08, 0x00, 0x00, 0x00, 0x4c, 0x00 }
 	},
 #endif
-	{0x190, /* Video Program System (VPS), PAL */
+	[12] = {0x190, /* Video Program System (VPS), PAL */
 		{V4L2_SLICED_VPS,16,16,0},
 		{ 0xaa, 0xaa, 0xff, 0xff, 0xba, 0xce, 0x2b, 0x0d,
 		  0xa6, 0xda, 0x0b, 0x00, 0x00, 0x00, 0x60, 0x00 }
 	},
 	/* 0x1d0 User programmable */
-
-	/* End of struct */
-	{ (u16)-1 }
 };
 
 static int tvp5150_write_inittab(struct v4l2_subdev *sd,
@@ -591,10 +588,10 @@ static int tvp5150_write_inittab(struct v4l2_subdev *sd,
 	return 0;
 }
 
-static int tvp5150_vdp_init(struct v4l2_subdev *sd,
-				const struct i2c_vbi_ram_value *regs)
+static int tvp5150_vdp_init(struct v4l2_subdev *sd)
 {
 	unsigned int i;
+	int j;
 
 	/* Disable Full Field */
 	tvp5150_write(sd, TVP5150_FULL_FIELD_ENA, 0);
@@ -604,14 +601,17 @@ static int tvp5150_vdp_init(struct v4l2_subdev *sd,
 		tvp5150_write(sd, i, 0xff);
 
 	/* Load Ram Table */
-	while (regs->reg != (u16)-1) {
+	for (j = 0; j < ARRAY_SIZE(vbi_ram_default); j++) {
+		const struct i2c_vbi_ram_value *regs = &vbi_ram_default[j];
+
+		if (!regs->type.vbi_type)
+			continue;
+
 		tvp5150_write(sd, TVP5150_CONF_RAM_ADDR_HIGH, regs->reg >> 8);
 		tvp5150_write(sd, TVP5150_CONF_RAM_ADDR_LOW, regs->reg);
 
 		for (i = 0; i < 16; i++)
 			tvp5150_write(sd, TVP5150_VDP_CONF_RAM_DATA, regs->values[i]);
-
-		regs++;
 	}
 	return 0;
 }
@@ -620,19 +620,23 @@ static int tvp5150_vdp_init(struct v4l2_subdev *sd,
 static int tvp5150_g_sliced_vbi_cap(struct v4l2_subdev *sd,
 				struct v4l2_sliced_vbi_cap *cap)
 {
-	const struct i2c_vbi_ram_value *regs = vbi_ram_default;
-	int line;
+	int line, i;
 
 	dev_dbg_lvl(sd->dev, 1, debug, "g_sliced_vbi_cap\n");
 	memset(cap, 0, sizeof *cap);
 
-	while (regs->reg != (u16)-1 ) {
-		for (line=regs->type.ini_line;line<=regs->type.end_line;line++) {
+	for (i = 0; i < ARRAY_SIZE(vbi_ram_default); i++) {
+		const struct i2c_vbi_ram_value *regs = &vbi_ram_default[i];
+
+		if (!regs->type.vbi_type)
+			continue;
+
+		for (line = regs->type.ini_line;
+		     line <= regs->type.end_line;
+		     line++) {
 			cap->service_lines[0][line] |= regs->type.vbi_type;
 		}
 		cap->service_set |= regs->type.vbi_type;
-
-		regs++;
 	}
 	return 0;
 }
@@ -651,14 +655,13 @@ static int tvp5150_g_sliced_vbi_cap(struct v4l2_subdev *sd,
  *	MSB = field2
  */
 static int tvp5150_set_vbi(struct v4l2_subdev *sd,
-			const struct i2c_vbi_ram_value *regs,
 			unsigned int type,u8 flags, int line,
 			const int fields)
 {
 	struct tvp5150 *decoder = to_tvp5150(sd);
 	v4l2_std_id std = decoder->norm;
 	u8 reg;
-	int pos = 0;
+	int i, pos = 0;
 
 	if (std == V4L2_STD_ALL) {
 		dev_err(sd->dev, "VBI can't be configured without knowing number of lines\n");
@@ -671,19 +674,19 @@ static int tvp5150_set_vbi(struct v4l2_subdev *sd,
 	if (line < 6 || line > 27)
 		return 0;
 
-	while (regs->reg != (u16)-1) {
+	for (i = 0; i < ARRAY_SIZE(vbi_ram_default); i++) {
+		const struct i2c_vbi_ram_value *regs =  &vbi_ram_default[i];
+
+		if (!regs->type.vbi_type)
+			continue;
+
 		if ((type & regs->type.vbi_type) &&
 		    (line >= regs->type.ini_line) &&
 		    (line <= regs->type.end_line))
 			break;
-
-		regs++;
 		pos++;
 	}
 
-	if (regs->reg == (u16)-1)
-		return 0;
-
 	type = pos | (flags & 0xf0);
 	reg = ((line - 6) << 1) + TVP5150_LINE_MODE_INI;
 
@@ -696,8 +699,7 @@ static int tvp5150_set_vbi(struct v4l2_subdev *sd,
 	return type;
 }
 
-static int tvp5150_get_vbi(struct v4l2_subdev *sd,
-			const struct i2c_vbi_ram_value *regs, int line)
+static int tvp5150_get_vbi(struct v4l2_subdev *sd, int line)
 {
 	struct tvp5150 *decoder = to_tvp5150(sd);
 	v4l2_std_id std = decoder->norm;
@@ -726,8 +728,8 @@ static int tvp5150_get_vbi(struct v4l2_subdev *sd,
 			return 0;
 		}
 		pos = ret & 0x0f;
-		if (pos < 0x0f)
-			type |= regs[pos].type.vbi_type;
+		if (pos < ARRAY_SIZE(vbi_ram_default))
+			type |= vbi_ram_default[pos].type.vbi_type;
 	}
 
 	return type;
@@ -788,7 +790,7 @@ static int tvp5150_reset(struct v4l2_subdev *sd, u32 val)
 	tvp5150_write_inittab(sd, tvp5150_init_default);
 
 	/* Initializes VDP registers */
-	tvp5150_vdp_init(sd, vbi_ram_default);
+	tvp5150_vdp_init(sd);
 
 	/* Selects decoder input */
 	tvp5150_selmux(sd);
@@ -1121,8 +1123,8 @@ static int tvp5150_s_sliced_fmt(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_f
 		for (i = 0; i <= 23; i++) {
 			svbi->service_lines[1][i] = 0;
 			svbi->service_lines[0][i] =
-				tvp5150_set_vbi(sd, vbi_ram_default,
-				       svbi->service_lines[0][i], 0xf0, i, 3);
+				tvp5150_set_vbi(sd, svbi->service_lines[0][i],
+						0xf0, i, 3);
 		}
 		/* Enables FIFO */
 		tvp5150_write(sd, TVP5150_FIFO_OUT_CTRL, 1);
@@ -1148,7 +1150,7 @@ static int tvp5150_g_sliced_fmt(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_f
 
 	for (i = 0; i <= 23; i++) {
 		svbi->service_lines[0][i] =
-			tvp5150_get_vbi(sd, vbi_ram_default, i);
+			tvp5150_get_vbi(sd, i);
 		mask |= svbi->service_lines[0][i];
 	}
 	svbi->service_set = mask;
diff --git a/drivers/media/pci/ttpci/av7110.c b/drivers/media/pci/ttpci/av7110.c
index dc8e577b2f74..d6816effb878 100644
--- a/drivers/media/pci/ttpci/av7110.c
+++ b/drivers/media/pci/ttpci/av7110.c
@@ -324,14 +324,15 @@ static int DvbDmxFilterCallback(u8 *buffer1, size_t buffer1_len,
 		}
 		return dvbdmxfilter->feed->cb.sec(buffer1, buffer1_len,
 						  buffer2, buffer2_len,
-						  &dvbdmxfilter->filter);
+						  &dvbdmxfilter->filter, NULL);
 	case DMX_TYPE_TS:
 		if (!(dvbdmxfilter->feed->ts_type & TS_PACKET))
 			return 0;
 		if (dvbdmxfilter->feed->ts_type & TS_PAYLOAD_ONLY)
 			return dvbdmxfilter->feed->cb.ts(buffer1, buffer1_len,
 							 buffer2, buffer2_len,
-							 &dvbdmxfilter->feed->feed.ts);
+							 &dvbdmxfilter->feed->feed.ts,
+							 NULL);
 		else
 			av7110_p2t_write(buffer1, buffer1_len,
 					 dvbdmxfilter->feed->pid,
diff --git a/drivers/media/pci/ttpci/av7110_av.c b/drivers/media/pci/ttpci/av7110_av.c
index 4daba76ec240..ef1bc17cdc4d 100644
--- a/drivers/media/pci/ttpci/av7110_av.c
+++ b/drivers/media/pci/ttpci/av7110_av.c
@@ -99,7 +99,7 @@ int av7110_record_cb(struct dvb_filter_pes2ts *p2t, u8 *buf, size_t len)
 		buf[4] = buf[5] = 0;
 	if (dvbdmxfeed->ts_type & TS_PAYLOAD_ONLY)
 		return dvbdmxfeed->cb.ts(buf, len, NULL, 0,
-					 &dvbdmxfeed->feed.ts);
+					 &dvbdmxfeed->feed.ts, NULL);
 	else
 		return dvb_filter_pes2ts(p2t, buf, len, 1);
 }
@@ -109,7 +109,7 @@ static int dvb_filter_pes2ts_cb(void *priv, unsigned char *data)
 	struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *) priv;
 
 	dvbdmxfeed->cb.ts(data, 188, NULL, 0,
-			  &dvbdmxfeed->feed.ts);
+			  &dvbdmxfeed->feed.ts, NULL);
 	return 0;
 }
 
@@ -814,7 +814,7 @@ static void p_to_t(u8 const *buf, long int length, u16 pid, u8 *counter,
 			memcpy(obuf + l, buf + c, TS_SIZE - l);
 			c = length;
 		}
-		feed->cb.ts(obuf, 188, NULL, 0, &feed->feed.ts);
+		feed->cb.ts(obuf, 188, NULL, 0, &feed->feed.ts, NULL);
 		pes_start = 0;
 	}
 }
diff --git a/drivers/media/usb/au0828/Kconfig b/drivers/media/usb/au0828/Kconfig
index 70521e0b4c53..bfaa806633df 100644
--- a/drivers/media/usb/au0828/Kconfig
+++ b/drivers/media/usb/au0828/Kconfig
@@ -1,7 +1,7 @@
 
 config VIDEO_AU0828
 	tristate "Auvitek AU0828 support"
-	depends on I2C && INPUT && DVB_CORE && USB
+	depends on I2C && INPUT && DVB_CORE && USB && VIDEO_V4L2
 	select I2C_ALGOBIT
 	select VIDEO_TVEEPROM
 	select VIDEOBUF2_VMALLOC
diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c
index a8900f5571f7..44ca66cb9b8f 100644
--- a/drivers/media/usb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c
@@ -428,7 +428,7 @@ static int ttusb_dec_audio_pes2ts_cb(void *priv, unsigned char *data)
 	struct ttusb_dec *dec = priv;
 
 	dec->audio_filter->feed->cb.ts(data, 188, NULL, 0,
-				       &dec->audio_filter->feed->feed.ts);
+				       &dec->audio_filter->feed->feed.ts, NULL);
 
 	return 0;
 }
@@ -438,7 +438,7 @@ static int ttusb_dec_video_pes2ts_cb(void *priv, unsigned char *data)
 	struct ttusb_dec *dec = priv;
 
 	dec->video_filter->feed->cb.ts(data, 188, NULL, 0,
-				       &dec->video_filter->feed->feed.ts);
+				       &dec->video_filter->feed->feed.ts, NULL);
 
 	return 0;
 }
@@ -490,7 +490,7 @@ static void ttusb_dec_process_pva(struct ttusb_dec *dec, u8 *pva, int length)
 
 		if (output_pva) {
 			dec->video_filter->feed->cb.ts(pva, length, NULL, 0,
-				&dec->video_filter->feed->feed.ts);
+				&dec->video_filter->feed->feed.ts, NULL);
 			return;
 		}
 
@@ -551,7 +551,7 @@ static void ttusb_dec_process_pva(struct ttusb_dec *dec, u8 *pva, int length)
 	case 0x02:		/* MainAudioStream */
 		if (output_pva) {
 			dec->audio_filter->feed->cb.ts(pva, length, NULL, 0,
-				&dec->audio_filter->feed->feed.ts);
+				&dec->audio_filter->feed->feed.ts, NULL);
 			return;
 		}
 
@@ -589,7 +589,7 @@ static void ttusb_dec_process_filter(struct ttusb_dec *dec, u8 *packet,
 
 	if (filter)
 		filter->feed->cb.sec(&packet[2], length - 2, NULL, 0,
-				     &filter->filter);
+				     &filter->filter, NULL);
 }
 
 static void ttusb_dec_process_packet(struct ttusb_dec *dec)
diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
index bf52fbd07aed..8e37e7c5e0f7 100644
--- a/drivers/media/v4l2-core/Kconfig
+++ b/drivers/media/v4l2-core/Kconfig
@@ -7,6 +7,7 @@ config VIDEO_V4L2
 	tristate
 	depends on (I2C || I2C=n) && VIDEO_DEV
 	select RATIONAL
+	select VIDEOBUF2_V4L2 if VIDEOBUF2_CORE
 	default (I2C || I2C=n) && VIDEO_DEV
 
 config VIDEO_ADV_DEBUG
diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index 80de2cb9c476..7df54582e956 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -13,7 +13,7 @@ ifeq ($(CONFIG_COMPAT),y)
 endif
 obj-$(CONFIG_V4L2_FWNODE) += v4l2-fwnode.o
 ifeq ($(CONFIG_TRACEPOINTS),y)
-  videodev-objs += vb2-trace.o v4l2-trace.o
+  videodev-objs += v4l2-trace.o
 endif
 videodev-$(CONFIG_MEDIA_CONTROLLER) += v4l2-mc.o
 
@@ -35,4 +35,3 @@ obj-$(CONFIG_VIDEOBUF_DVB) += videobuf-dvb.o
 
 ccflags-y += -I$(srctree)/drivers/media/dvb-frontends
 ccflags-y += -I$(srctree)/drivers/media/tuners
-
diff --git a/drivers/memory/brcmstb_dpfe.c b/drivers/memory/brcmstb_dpfe.c
index 0a7bdbed3a6f..e9c1485c32b9 100644
--- a/drivers/memory/brcmstb_dpfe.c
+++ b/drivers/memory/brcmstb_dpfe.c
@@ -45,8 +45,16 @@
 #define REG_TO_DCPU_MBOX	0x10
 #define REG_TO_HOST_MBOX	0x14
 
+/* Macros to process offsets returned by the DCPU */
+#define DRAM_MSG_ADDR_OFFSET	0x0
+#define DRAM_MSG_TYPE_OFFSET	0x1c
+#define DRAM_MSG_ADDR_MASK	((1UL << DRAM_MSG_TYPE_OFFSET) - 1)
+#define DRAM_MSG_TYPE_MASK	((1UL << \
+				 (BITS_PER_LONG - DRAM_MSG_TYPE_OFFSET)) - 1)
+
 /* Message RAM */
-#define DCPU_MSG_RAM(x)		(0x100 + (x) * sizeof(u32))
+#define DCPU_MSG_RAM_START	0x100
+#define DCPU_MSG_RAM(x)		(DCPU_MSG_RAM_START + (x) * sizeof(u32))
 
 /* DRAM Info Offsets & Masks */
 #define DRAM_INFO_INTERVAL	0x0
@@ -255,6 +263,40 @@ static unsigned int get_msg_chksum(const u32 msg[])
 	return sum;
 }
 
+static void __iomem *get_msg_ptr(struct private_data *priv, u32 response,
+				 char *buf, ssize_t *size)
+{
+	unsigned int msg_type;
+	unsigned int offset;
+	void __iomem *ptr = NULL;
+
+	msg_type = (response >> DRAM_MSG_TYPE_OFFSET) & DRAM_MSG_TYPE_MASK;
+	offset = (response >> DRAM_MSG_ADDR_OFFSET) & DRAM_MSG_ADDR_MASK;
+
+	/*
+	 * msg_type == 1: the offset is relative to the message RAM
+	 * msg_type == 0: the offset is relative to the data RAM (this is the
+	 *                previous way of passing data)
+	 * msg_type is anything else: there's critical hardware problem
+	 */
+	switch (msg_type) {
+	case 1:
+		ptr = priv->regs + DCPU_MSG_RAM_START + offset;
+		break;
+	case 0:
+		ptr = priv->dmem + offset;
+		break;
+	default:
+		dev_emerg(priv->dev, "invalid message reply from DCPU: %#x\n",
+			response);
+		if (buf && size)
+			*size = sprintf(buf,
+				"FATAL: communication error with DCPU\n");
+	}
+
+	return ptr;
+}
+
 static int __send_command(struct private_data *priv, unsigned int cmd,
 			  u32 result[])
 {
@@ -507,7 +549,7 @@ static ssize_t show_info(struct device *dev, struct device_attribute *devattr,
 {
 	u32 response[MSG_FIELD_MAX];
 	unsigned int info;
-	int ret;
+	ssize_t ret;
 
 	ret = generic_show(DPFE_CMD_GET_INFO, response, dev, buf);
 	if (ret)
@@ -528,18 +570,19 @@ static ssize_t show_refresh(struct device *dev,
 	u32 response[MSG_FIELD_MAX];
 	void __iomem *info;
 	struct private_data *priv;
-	unsigned int offset;
 	u8 refresh, sr_abort, ppre, thermal_offs, tuf;
 	u32 mr4;
-	int ret;
+	ssize_t ret;
 
 	ret = generic_show(DPFE_CMD_GET_REFRESH, response, dev, buf);
 	if (ret)
 		return ret;
 
 	priv = dev_get_drvdata(dev);
-	offset = response[MSG_ARG0];
-	info = priv->dmem + offset;
+
+	info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret);
+	if (!info)
+		return ret;
 
 	mr4 = readl_relaxed(info + DRAM_INFO_MR4) & DRAM_INFO_MR4_MASK;
 
@@ -561,7 +604,6 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr,
 	u32 response[MSG_FIELD_MAX];
 	struct private_data *priv;
 	void __iomem *info;
-	unsigned int offset;
 	unsigned long val;
 	int ret;
 
@@ -574,8 +616,10 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr,
 	if (ret)
 		return ret;
 
-	offset = response[MSG_ARG0];
-	info = priv->dmem + offset;
+	info = get_msg_ptr(priv, response[MSG_ARG0], NULL, NULL);
+	if (!info)
+		return -EIO;
+
 	writel_relaxed(val, info + DRAM_INFO_INTERVAL);
 
 	return count;
@@ -587,23 +631,25 @@ static ssize_t show_vendor(struct device *dev, struct device_attribute *devattr,
 	u32 response[MSG_FIELD_MAX];
 	struct private_data *priv;
 	void __iomem *info;
-	unsigned int offset;
-	int ret;
+	ssize_t ret;
 
 	ret = generic_show(DPFE_CMD_GET_VENDOR, response, dev, buf);
 	if (ret)
 		return ret;
 
-	offset = response[MSG_ARG0];
 	priv = dev_get_drvdata(dev);
-	info = priv->dmem + offset;
+
+	info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret);
+	if (!info)
+		return ret;
 
 	return sprintf(buf, "%#x %#x %#x %#x %#x\n",
 		       readl_relaxed(info + DRAM_VENDOR_MR5) & DRAM_VENDOR_MASK,
 		       readl_relaxed(info + DRAM_VENDOR_MR6) & DRAM_VENDOR_MASK,
 		       readl_relaxed(info + DRAM_VENDOR_MR7) & DRAM_VENDOR_MASK,
 		       readl_relaxed(info + DRAM_VENDOR_MR8) & DRAM_VENDOR_MASK,
-		       readl_relaxed(info + DRAM_VENDOR_ERROR));
+		       readl_relaxed(info + DRAM_VENDOR_ERROR) &
+				     DRAM_VENDOR_MASK);
 }
 
 static int brcmstb_dpfe_resume(struct platform_device *pdev)
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index 8d12017b9893..4470630dd545 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -2687,6 +2687,8 @@ mptctl_hp_targetinfo(unsigned long arg)
 				__FILE__, __LINE__, iocnum);
 		return -ENODEV;
 	}
+	if (karg.hdr.id >= MPT_MAX_FC_DEVICES)
+		return -EINVAL;
 	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n",
 	    ioc->name));
 
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 3e5eabdae8d9..772d02922529 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -548,12 +548,6 @@ int mei_cldev_disable(struct mei_cl_device *cldev)
 		goto out;
 	}
 
-	if (bus->dev_state == MEI_DEV_POWER_DOWN) {
-		dev_dbg(bus->dev, "Device is powering down, don't bother with disconnection\n");
-		err = 0;
-		goto out;
-	}
-
 	err = mei_cl_disconnect(cl);
 	if (err < 0)
 		dev_err(bus->dev, "Could not disconnect from the ME client\n");
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index be64969d986a..7e60c1817c31 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -945,6 +945,12 @@ int mei_cl_disconnect(struct mei_cl *cl)
 		return 0;
 	}
 
+	if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+		cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n");
+		mei_cl_set_disconnected(cl);
+		return 0;
+	}
+
 	rets = pm_runtime_get(dev->dev);
 	if (rets < 0 && rets != -EINPROGRESS) {
 		pm_runtime_put_noidle(dev->dev);
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 0ccccbaf530d..e4b10b2d1a08 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -132,6 +132,11 @@
 #define MEI_DEV_ID_KBP        0xA2BA  /* Kaby Point */
 #define MEI_DEV_ID_KBP_2      0xA2BB  /* Kaby Point 2 */
 
+#define MEI_DEV_ID_CNP_LP     0x9DE0  /* Cannon Point LP */
+#define MEI_DEV_ID_CNP_LP_4   0x9DE4  /* Cannon Point LP 4 (iTouch) */
+#define MEI_DEV_ID_CNP_H      0xA360  /* Cannon Point H */
+#define MEI_DEV_ID_CNP_H_4    0xA364  /* Cannon Point H 4 (iTouch) */
+
 /*
  * MEI HW Section
  */
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 4a0ccda4d04b..ea4e152270a3 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -98,6 +98,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
 	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)},
 
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
+
 	/* required last entry */
 	{0, }
 };
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index d9aa407db06a..038509e5d031 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -102,10 +102,32 @@ static long afu_ioctl_attach(struct ocxl_context *ctx,
 	return rc;
 }
 
+static long afu_ioctl_get_metadata(struct ocxl_context *ctx,
+		struct ocxl_ioctl_metadata __user *uarg)
+{
+	struct ocxl_ioctl_metadata arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+	arg.version = 0;
+
+	arg.afu_version_major = ctx->afu->config.version_major;
+	arg.afu_version_minor = ctx->afu->config.version_minor;
+	arg.pasid = ctx->pasid;
+	arg.pp_mmio_size = ctx->afu->config.pp_mmio_stride;
+	arg.global_mmio_size = ctx->afu->config.global_mmio_size;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+
 #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :			\
 			x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :	\
 			x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :		\
 			x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :	\
+			x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" :	\
 			"UNKNOWN")
 
 static long afu_ioctl(struct file *file, unsigned int cmd,
@@ -133,8 +155,10 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
 		if (!rc) {
 			rc = copy_to_user((u64 __user *) args, &irq_offset,
 					sizeof(irq_offset));
-			if (rc)
+			if (rc) {
 				ocxl_afu_irq_free(ctx, irq_offset);
+				return -EFAULT;
+			}
 		}
 		break;
 
@@ -157,6 +181,11 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
 					irq_fd.eventfd);
 		break;
 
+	case OCXL_IOCTL_GET_METADATA:
+		rc = afu_ioctl_get_metadata(ctx,
+				(struct ocxl_ioctl_metadata __user *) args);
+		break;
+
 	default:
 		rc = -EINVAL;
 	}
@@ -277,7 +306,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
 	struct ocxl_context *ctx = file->private_data;
 	struct ocxl_kernel_event_header header;
 	ssize_t rc;
-	size_t used = 0;
+	ssize_t used = 0;
 	DEFINE_WAIT(event_wait);
 
 	memset(&header, 0, sizeof(header));
@@ -329,7 +358,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
 
 	used += sizeof(header);
 
-	rc = (ssize_t) used;
+	rc = used;
 	return rc;
 }
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 20135a5de748..2cfb963d9f37 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -72,6 +72,7 @@ MODULE_ALIAS("mmc:block");
 #define MMC_BLK_TIMEOUT_MS  (10 * 1000)
 #define MMC_SANITIZE_REQ_TIMEOUT 240000
 #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16)
+#define MMC_EXTRACT_VALUE_FROM_ARG(x) ((x & 0x0000FF00) >> 8)
 
 #define mmc_req_rel_wr(req)	((req->cmd_flags & REQ_FUA) && \
 				  (rq_data_dir(req) == WRITE))
@@ -587,6 +588,24 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
 	}
 
 	/*
+	 * Make sure the cache of the PARTITION_CONFIG register and
+	 * PARTITION_ACCESS bits is updated in case the ioctl ext_csd write
+	 * changed it successfully.
+	 */
+	if ((MMC_EXTRACT_INDEX_FROM_ARG(cmd.arg) == EXT_CSD_PART_CONFIG) &&
+	    (cmd.opcode == MMC_SWITCH)) {
+		struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev);
+		u8 value = MMC_EXTRACT_VALUE_FROM_ARG(cmd.arg);
+
+		/*
+		 * Update cache so the next mmc_blk_part_switch call operates
+		 * on up-to-date data.
+		 */
+		card->ext_csd.part_config = value;
+		main_md->part_curr = value & EXT_CSD_PART_CONFIG_ACC_MASK;
+	}
+
+	/*
 	 * According to the SD specs, some commands require a delay after
 	 * issuing the command.
 	 */
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index 79a5b985ccf5..9c821eedd156 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -82,6 +82,7 @@ struct mmc_fixup {
 #define CID_MANFID_APACER       0x27
 #define CID_MANFID_KINGSTON     0x70
 #define CID_MANFID_HYNIX	0x90
+#define CID_MANFID_NUMONYX	0xFE
 
 #define END_FIXUP { NULL }
 
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 908e4db03535..42d6aa89a48a 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -848,7 +848,6 @@ int mmc_interrupt_hpi(struct mmc_card *card)
 		return 1;
 	}
 
-	mmc_claim_host(card->host);
 	err = mmc_send_status(card, &status);
 	if (err) {
 		pr_err("%s: Get card status fail\n", mmc_hostname(card->host));
@@ -890,7 +889,6 @@ int mmc_interrupt_hpi(struct mmc_card *card)
 	} while (!err);
 
 out:
-	mmc_release_host(card->host);
 	return err;
 }
 
@@ -932,9 +930,7 @@ static int mmc_read_bkops_status(struct mmc_card *card)
 	int err;
 	u8 *ext_csd;
 
-	mmc_claim_host(card->host);
 	err = mmc_get_ext_csd(card, &ext_csd);
-	mmc_release_host(card->host);
 	if (err)
 		return err;
 
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 75d317623852..5153577754f0 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -109,6 +109,12 @@ static const struct mmc_fixup mmc_ext_csd_fixups[] = {
 	 */
 	MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_HYNIX,
 			      0x014a, add_quirk, MMC_QUIRK_BROKEN_HPI, 5),
+	/*
+	 * Certain Micron (Numonyx) eMMC 4.5 cards might get broken when HPI
+	 * feature is used so disable the HPI feature for such buggy cards.
+	 */
+	MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_NUMONYX,
+			      0x014e, add_quirk, MMC_QUIRK_BROKEN_HPI, 6),
 
 	END_FIXUP
 };
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index 229dc18f0581..768972af8b85 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -1265,7 +1265,8 @@ static int bcm2835_add_host(struct bcm2835_host *host)
 	char pio_limit_string[20];
 	int ret;
 
-	mmc->f_max = host->max_clk;
+	if (!mmc->f_max || mmc->f_max > host->max_clk)
+		mmc->f_max = host->max_clk;
 	mmc->f_min = host->max_clk / SDCDIV_MAX_CDIV;
 
 	mmc->max_busy_timeout = ~0 / (mmc->f_max / 1000);
diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 35026795be28..a84aa3f1ae85 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -165,9 +165,15 @@ static void dw_mci_exynos_set_clksel_timing(struct dw_mci *host, u32 timing)
 static int dw_mci_exynos_runtime_resume(struct device *dev)
 {
 	struct dw_mci *host = dev_get_drvdata(dev);
+	int ret;
+
+	ret = dw_mci_runtime_resume(dev);
+	if (ret)
+		return ret;
 
 	dw_mci_exynos_config_smu(host);
-	return dw_mci_runtime_resume(dev);
+
+	return ret;
 }
 
 /**
@@ -487,6 +493,7 @@ static unsigned long exynos_dwmmc_caps[4] = {
 
 static const struct dw_mci_drv_data exynos_drv_data = {
 	.caps			= exynos_dwmmc_caps,
+	.num_caps		= ARRAY_SIZE(exynos_dwmmc_caps),
 	.init			= dw_mci_exynos_priv_init,
 	.set_ios		= dw_mci_exynos_set_ios,
 	.parse_dt		= dw_mci_exynos_parse_dt,
diff --git a/drivers/mmc/host/dw_mmc-k3.c b/drivers/mmc/host/dw_mmc-k3.c
index 73fd75c3c824..89cdb3d533bb 100644
--- a/drivers/mmc/host/dw_mmc-k3.c
+++ b/drivers/mmc/host/dw_mmc-k3.c
@@ -135,6 +135,9 @@ static int dw_mci_hi6220_parse_dt(struct dw_mci *host)
 	if (priv->ctrl_id < 0)
 		priv->ctrl_id = 0;
 
+	if (priv->ctrl_id >= TIMING_MODE)
+		return -EINVAL;
+
 	host->priv = priv;
 	return 0;
 }
@@ -207,6 +210,7 @@ static int dw_mci_hi6220_execute_tuning(struct dw_mci_slot *slot, u32 opcode)
 
 static const struct dw_mci_drv_data hi6220_data = {
 	.caps			= dw_mci_hi6220_caps,
+	.num_caps		= ARRAY_SIZE(dw_mci_hi6220_caps),
 	.switch_voltage		= dw_mci_hi6220_switch_voltage,
 	.set_ios		= dw_mci_hi6220_set_ios,
 	.parse_dt		= dw_mci_hi6220_parse_dt,
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index a3f1c2b30145..339295212935 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -319,6 +319,7 @@ static const struct dw_mci_drv_data rk2928_drv_data = {
 
 static const struct dw_mci_drv_data rk3288_drv_data = {
 	.caps			= dw_mci_rk3288_dwmmc_caps,
+	.num_caps		= ARRAY_SIZE(dw_mci_rk3288_dwmmc_caps),
 	.set_ios		= dw_mci_rk3288_set_ios,
 	.execute_tuning		= dw_mci_rk3288_execute_tuning,
 	.parse_dt		= dw_mci_rk3288_parse_dt,
diff --git a/drivers/mmc/host/dw_mmc-zx.c b/drivers/mmc/host/dw_mmc-zx.c
index d38e94ae2b85..c06b5393312f 100644
--- a/drivers/mmc/host/dw_mmc-zx.c
+++ b/drivers/mmc/host/dw_mmc-zx.c
@@ -195,6 +195,7 @@ static unsigned long zx_dwmmc_caps[3] = {
 
 static const struct dw_mci_drv_data zx_drv_data = {
 	.caps			= zx_dwmmc_caps,
+	.num_caps		= ARRAY_SIZE(zx_dwmmc_caps),
 	.execute_tuning		= dw_mci_zx_execute_tuning,
 	.prepare_hs400_tuning	= dw_mci_zx_prepare_hs400_tuning,
 	.parse_dt               = dw_mci_zx_parse_dt,
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 0aa39975f33b..06d47414d0c1 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -165,6 +165,8 @@ static int dw_mci_regs_show(struct seq_file *s, void *v)
 {
 	struct dw_mci *host = s->private;
 
+	pm_runtime_get_sync(host->dev);
+
 	seq_printf(s, "STATUS:\t0x%08x\n", mci_readl(host, STATUS));
 	seq_printf(s, "RINTSTS:\t0x%08x\n", mci_readl(host, RINTSTS));
 	seq_printf(s, "CMD:\t0x%08x\n", mci_readl(host, CMD));
@@ -172,6 +174,8 @@ static int dw_mci_regs_show(struct seq_file *s, void *v)
 	seq_printf(s, "INTMASK:\t0x%08x\n", mci_readl(host, INTMASK));
 	seq_printf(s, "CLKENA:\t0x%08x\n", mci_readl(host, CLKENA));
 
+	pm_runtime_put_autosuspend(host->dev);
+
 	return 0;
 }
 
@@ -409,7 +413,9 @@ static inline void dw_mci_set_cto(struct dw_mci *host)
 	cto_div = (mci_readl(host, CLKDIV) & 0xff) * 2;
 	if (cto_div == 0)
 		cto_div = 1;
-	cto_ms = DIV_ROUND_UP(MSEC_PER_SEC * cto_clks * cto_div, host->bus_hz);
+
+	cto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * cto_clks * cto_div,
+				  host->bus_hz);
 
 	/* add a bit spare time */
 	cto_ms += 10;
@@ -558,6 +564,7 @@ static int dw_mci_idmac_init(struct dw_mci *host)
 					(sizeof(struct idmac_desc_64addr) *
 							(i + 1))) >> 32;
 			/* Initialize reserved and buffer size fields to "0" */
+			p->des0 = 0;
 			p->des1 = 0;
 			p->des2 = 0;
 			p->des3 = 0;
@@ -580,6 +587,7 @@ static int dw_mci_idmac_init(struct dw_mci *host)
 		     i++, p++) {
 			p->des3 = cpu_to_le32(host->sg_dma +
 					(sizeof(struct idmac_desc) * (i + 1)));
+			p->des0 = 0;
 			p->des1 = 0;
 		}
 
@@ -1795,8 +1803,8 @@ static bool dw_mci_reset(struct dw_mci *host)
 	}
 
 	if (host->use_dma == TRANS_MODE_IDMAC)
-		/* It is also recommended that we reset and reprogram idmac */
-		dw_mci_idmac_reset(host);
+		/* It is also required that we reinit idmac */
+		dw_mci_idmac_init(host);
 
 	ret = true;
 
@@ -1944,8 +1952,9 @@ static void dw_mci_set_drto(struct dw_mci *host)
 	drto_div = (mci_readl(host, CLKDIV) & 0xff) * 2;
 	if (drto_div == 0)
 		drto_div = 1;
-	drto_ms = DIV_ROUND_UP(MSEC_PER_SEC * drto_clks * drto_div,
-			       host->bus_hz);
+
+	drto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * drto_clks * drto_div,
+				   host->bus_hz);
 
 	/* add a bit spare time */
 	drto_ms += 10;
@@ -2778,12 +2787,57 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int dw_mci_init_slot_caps(struct dw_mci_slot *slot)
+{
+	struct dw_mci *host = slot->host;
+	const struct dw_mci_drv_data *drv_data = host->drv_data;
+	struct mmc_host *mmc = slot->mmc;
+	int ctrl_id;
+
+	if (host->pdata->caps)
+		mmc->caps = host->pdata->caps;
+
+	/*
+	 * Support MMC_CAP_ERASE by default.
+	 * It needs to use trim/discard/erase commands.
+	 */
+	mmc->caps |= MMC_CAP_ERASE;
+
+	if (host->pdata->pm_caps)
+		mmc->pm_caps = host->pdata->pm_caps;
+
+	if (host->dev->of_node) {
+		ctrl_id = of_alias_get_id(host->dev->of_node, "mshc");
+		if (ctrl_id < 0)
+			ctrl_id = 0;
+	} else {
+		ctrl_id = to_platform_device(host->dev)->id;
+	}
+
+	if (drv_data && drv_data->caps) {
+		if (ctrl_id >= drv_data->num_caps) {
+			dev_err(host->dev, "invalid controller id %d\n",
+				ctrl_id);
+			return -EINVAL;
+		}
+		mmc->caps |= drv_data->caps[ctrl_id];
+	}
+
+	if (host->pdata->caps2)
+		mmc->caps2 = host->pdata->caps2;
+
+	/* Process SDIO IRQs through the sdio_irq_work. */
+	if (mmc->caps & MMC_CAP_SDIO_IRQ)
+		mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+
+	return 0;
+}
+
 static int dw_mci_init_slot(struct dw_mci *host)
 {
 	struct mmc_host *mmc;
 	struct dw_mci_slot *slot;
-	const struct dw_mci_drv_data *drv_data = host->drv_data;
-	int ctrl_id, ret;
+	int ret;
 	u32 freq[2];
 
 	mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), host->dev);
@@ -2817,38 +2871,13 @@ static int dw_mci_init_slot(struct dw_mci *host)
 	if (!mmc->ocr_avail)
 		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
-	if (host->pdata->caps)
-		mmc->caps = host->pdata->caps;
-
-	/*
-	 * Support MMC_CAP_ERASE by default.
-	 * It needs to use trim/discard/erase commands.
-	 */
-	mmc->caps |= MMC_CAP_ERASE;
-
-	if (host->pdata->pm_caps)
-		mmc->pm_caps = host->pdata->pm_caps;
-
-	if (host->dev->of_node) {
-		ctrl_id = of_alias_get_id(host->dev->of_node, "mshc");
-		if (ctrl_id < 0)
-			ctrl_id = 0;
-	} else {
-		ctrl_id = to_platform_device(host->dev)->id;
-	}
-	if (drv_data && drv_data->caps)
-		mmc->caps |= drv_data->caps[ctrl_id];
-
-	if (host->pdata->caps2)
-		mmc->caps2 = host->pdata->caps2;
-
 	ret = mmc_of_parse(mmc);
 	if (ret)
 		goto err_host_allocated;
 
-	/* Process SDIO IRQs through the sdio_irq_work. */
-	if (mmc->caps & MMC_CAP_SDIO_IRQ)
-		mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+	ret = dw_mci_init_slot_caps(slot);
+	if (ret)
+		goto err_host_allocated;
 
 	/* Useful defaults if platform data is unset. */
 	if (host->use_dma == TRANS_MODE_IDMAC) {
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index e3124f06a47e..1424bd490dd1 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -543,6 +543,7 @@ struct dw_mci_slot {
 /**
  * dw_mci driver data - dw-mshc implementation specific driver data.
  * @caps: mmc subsystem specified capabilities of the controller(s).
+ * @num_caps: number of capabilities specified by @caps.
  * @init: early implementation specific initialization.
  * @set_ios: handle bus specific extensions.
  * @parse_dt: parse implementation specific device tree properties.
@@ -554,6 +555,7 @@ struct dw_mci_slot {
  */
 struct dw_mci_drv_data {
 	unsigned long	*caps;
+	u32		num_caps;
 	int		(*init)(struct dw_mci *host);
 	void		(*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
 	int		(*parse_dt)(struct dw_mci *host);
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 22438ebfe4e6..4f972b879fe6 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -717,22 +717,6 @@ static int meson_mmc_clk_phase_tuning(struct mmc_host *mmc, u32 opcode,
 static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct meson_host *host = mmc_priv(mmc);
-	int ret;
-
-	/*
-	 * If this is the initial tuning, try to get a sane Rx starting
-	 * phase before doing the actual tuning.
-	 */
-	if (!mmc->doing_retune) {
-		ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
-
-		if (ret)
-			return ret;
-	}
-
-	ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->tx_clk);
-	if (ret)
-		return ret;
 
 	return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
 }
@@ -763,9 +747,8 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 
-		/* Reset phases */
+		/* Reset rx phase */
 		clk_set_phase(host->rx_clk, 0);
-		clk_set_phase(host->tx_clk, 270);
 
 		break;
 
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 4065da58789d..32321bd596d8 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -680,7 +680,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
 	host->hw_name	= "ACPI";
 	host->ops	= &sdhci_acpi_ops_dflt;
 	host->irq	= platform_get_irq(pdev, 0);
-	if (host->irq <= 0) {
+	if (host->irq < 0) {
 		err = -EINVAL;
 		goto err_free;
 	}
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 6d1a983e6227..82c4f05f91d8 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -654,9 +654,36 @@ static void byt_read_dsm(struct sdhci_pci_slot *slot)
 	slot->chip->rpm_retune = intel_host->d3_retune;
 }
 
-static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
+static int intel_execute_tuning(struct mmc_host *mmc, u32 opcode)
+{
+	int err = sdhci_execute_tuning(mmc, opcode);
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	if (err)
+		return err;
+
+	/*
+	 * Tuning can leave the IP in an active state (Buffer Read Enable bit
+	 * set) which prevents the entry to low power states (i.e. S0i3). Data
+	 * reset will clear it.
+	 */
+	sdhci_reset(host, SDHCI_RESET_DATA);
+
+	return 0;
+}
+
+static void byt_probe_slot(struct sdhci_pci_slot *slot)
 {
+	struct mmc_host_ops *ops = &slot->host->mmc_host_ops;
+
 	byt_read_dsm(slot);
+
+	ops->execute_tuning = intel_execute_tuning;
+}
+
+static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
+{
+	byt_probe_slot(slot);
 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
 				 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
 				 MMC_CAP_CMD_DURING_TFR |
@@ -779,7 +806,7 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 {
 	int err;
 
-	byt_read_dsm(slot);
+	byt_probe_slot(slot);
 
 	err = ni_set_max_freq(slot);
 	if (err)
@@ -792,7 +819,7 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 
 static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 {
-	byt_read_dsm(slot);
+	byt_probe_slot(slot);
 	slot->host->mmc->caps |= MMC_CAP_POWER_OFF_CARD | MMC_CAP_NONREMOVABLE |
 				 MMC_CAP_WAIT_WHILE_BUSY;
 	return 0;
@@ -800,7 +827,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 
 static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
 {
-	byt_read_dsm(slot);
+	byt_probe_slot(slot);
 	slot->host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY |
 				 MMC_CAP_AGGRESSIVE_PM | MMC_CAP_CD_WAKE;
 	slot->cd_idx = 0;
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index e6b8c59f2c0d..736ac887303c 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -328,7 +328,7 @@ config MTD_NAND_MARVELL
 	tristate "NAND controller support on Marvell boards"
 	depends on PXA3xx || ARCH_MMP || PLAT_ORION || ARCH_MVEBU || \
 		   COMPILE_TEST
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && HAS_DMA
 	help
 	  This enables the NAND flash controller driver for Marvell boards,
 	  including:
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 80d31a58e558..f367144f3c6f 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -752,10 +752,8 @@ static int vf610_nfc_probe(struct platform_device *pdev)
 		if (mtd->oobsize > 64)
 			mtd->oobsize = 64;
 
-		/*
-		 * mtd->ecclayout is not specified here because we're using the
-		 * default large page ECC layout defined in NAND core.
-		 */
+		/* Use default large page ECC layout defined in NAND core */
+		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
 		if (chip->ecc.strength == 32) {
 			nfc->ecc_mode = ECC_60_BYTE;
 			chip->ecc.bytes = 60;
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 944ec3c9282c..08b85215c2be 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,9 +149,9 @@ config MACVTAP
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
-    depends on IPV6
+    depends on IPV6 || !IPV6
     depends on NETFILTER
-    depends on NET_L3_MASTER_DEV
+    select NET_L3_MASTER_DEV
     ---help---
       This allows one to create virtual devices off of a main interface
       and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 11fe71278f40..3afda6561434 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -114,12 +114,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
 #ifdef CONFIG_MVME147_NET	/* MVME147 internal Ethernet */
 	{mvme147lance_probe, 0},
 #endif
-#ifdef CONFIG_MAC8390           /* NuBus NS8390-based cards */
-	{mac8390_probe, 0},
-#endif
-#ifdef CONFIG_MAC89x0
-	{mac89x0_probe, 0},
-#endif
 	{NULL, 0},
 };
 
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index f7799321dffb..01059f1a7bca 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -287,7 +287,7 @@ void bond_create_proc_entry(struct bonding *bond)
 
 	if (bn->proc_dir) {
 		bond->proc_entry = proc_create_data(bond_dev->name,
-						    S_IRUGO, bn->proc_dir,
+						    0444, bn->proc_dir,
 						    &bond_info_fops, bond);
 		if (bond->proc_entry == NULL)
 			netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n",
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 040b493f60ae..6096440e96ea 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -147,7 +147,7 @@ err_no_cmd:
 static const struct class_attribute class_attr_bonding_masters = {
 	.attr = {
 		.name = "bonding_masters",
-		.mode = S_IWUSR | S_IRUGO,
+		.mode = 0644,
 	},
 	.show = bonding_show_bonds,
 	.store = bonding_store_bonds,
@@ -202,7 +202,7 @@ static ssize_t bonding_show_slaves(struct device *d,
 
 	return res;
 }
-static DEVICE_ATTR(slaves, S_IRUGO | S_IWUSR, bonding_show_slaves,
+static DEVICE_ATTR(slaves, 0644, bonding_show_slaves,
 		   bonding_sysfs_store_option);
 
 /* Show the bonding mode. */
@@ -216,8 +216,7 @@ static ssize_t bonding_show_mode(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, BOND_MODE(bond));
 }
-static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR,
-		   bonding_show_mode, bonding_sysfs_store_option);
+static DEVICE_ATTR(mode, 0644, bonding_show_mode, bonding_sysfs_store_option);
 
 /* Show the bonding transmit hash method. */
 static ssize_t bonding_show_xmit_hash(struct device *d,
@@ -231,7 +230,7 @@ static ssize_t bonding_show_xmit_hash(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.xmit_policy);
 }
-static DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(xmit_hash_policy, 0644,
 		   bonding_show_xmit_hash, bonding_sysfs_store_option);
 
 /* Show arp_validate. */
@@ -247,7 +246,7 @@ static ssize_t bonding_show_arp_validate(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.arp_validate);
 }
-static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate,
+static DEVICE_ATTR(arp_validate, 0644, bonding_show_arp_validate,
 		   bonding_sysfs_store_option);
 
 /* Show arp_all_targets. */
@@ -263,7 +262,7 @@ static ssize_t bonding_show_arp_all_targets(struct device *d,
 	return sprintf(buf, "%s %d\n",
 		       val->string, bond->params.arp_all_targets);
 }
-static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(arp_all_targets, 0644,
 		   bonding_show_arp_all_targets, bonding_sysfs_store_option);
 
 /* Show fail_over_mac. */
@@ -279,7 +278,7 @@ static ssize_t bonding_show_fail_over_mac(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.fail_over_mac);
 }
-static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(fail_over_mac, 0644,
 		   bonding_show_fail_over_mac, bonding_sysfs_store_option);
 
 /* Show the arp timer interval. */
@@ -291,7 +290,7 @@ static ssize_t bonding_show_arp_interval(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.arp_interval);
 }
-static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(arp_interval, 0644,
 		   bonding_show_arp_interval, bonding_sysfs_store_option);
 
 /* Show the arp targets. */
@@ -312,7 +311,7 @@ static ssize_t bonding_show_arp_targets(struct device *d,
 
 	return res;
 }
-static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(arp_ip_target, 0644,
 		   bonding_show_arp_targets, bonding_sysfs_store_option);
 
 /* Show the up and down delays. */
@@ -324,7 +323,7 @@ static ssize_t bonding_show_downdelay(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);
 }
-static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(downdelay, 0644,
 		   bonding_show_downdelay, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_updelay(struct device *d,
@@ -336,7 +335,7 @@ static ssize_t bonding_show_updelay(struct device *d,
 	return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);
 
 }
-static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(updelay, 0644,
 		   bonding_show_updelay, bonding_sysfs_store_option);
 
 /* Show the LACP interval. */
@@ -351,7 +350,7 @@ static ssize_t bonding_show_lacp(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
 }
-static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(lacp_rate, 0644,
 		   bonding_show_lacp, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_min_links(struct device *d,
@@ -362,7 +361,7 @@ static ssize_t bonding_show_min_links(struct device *d,
 
 	return sprintf(buf, "%u\n", bond->params.min_links);
 }
-static DEVICE_ATTR(min_links, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(min_links, 0644,
 		   bonding_show_min_links, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_select(struct device *d,
@@ -376,7 +375,7 @@ static ssize_t bonding_show_ad_select(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.ad_select);
 }
-static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_select, 0644,
 		   bonding_show_ad_select, bonding_sysfs_store_option);
 
 /* Show the number of peer notifications to send after a failover event. */
@@ -387,9 +386,9 @@ static ssize_t bonding_show_num_peer_notif(struct device *d,
 	struct bonding *bond = to_bond(d);
 	return sprintf(buf, "%d\n", bond->params.num_peer_notif);
 }
-static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(num_grat_arp, 0644,
 		   bonding_show_num_peer_notif, bonding_sysfs_store_option);
-static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(num_unsol_na, 0644,
 		   bonding_show_num_peer_notif, bonding_sysfs_store_option);
 
 /* Show the MII monitor interval. */
@@ -401,7 +400,7 @@ static ssize_t bonding_show_miimon(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.miimon);
 }
-static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(miimon, 0644,
 		   bonding_show_miimon, bonding_sysfs_store_option);
 
 /* Show the primary slave. */
@@ -421,7 +420,7 @@ static ssize_t bonding_show_primary(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(primary, 0644,
 		   bonding_show_primary, bonding_sysfs_store_option);
 
 /* Show the primary_reselect flag. */
@@ -438,7 +437,7 @@ static ssize_t bonding_show_primary_reselect(struct device *d,
 	return sprintf(buf, "%s %d\n",
 		       val->string, bond->params.primary_reselect);
 }
-static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(primary_reselect, 0644,
 		   bonding_show_primary_reselect, bonding_sysfs_store_option);
 
 /* Show the use_carrier flag. */
@@ -450,7 +449,7 @@ static ssize_t bonding_show_carrier(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.use_carrier);
 }
-static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(use_carrier, 0644,
 		   bonding_show_carrier, bonding_sysfs_store_option);
 
 
@@ -471,7 +470,7 @@ static ssize_t bonding_show_active_slave(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(active_slave, 0644,
 		   bonding_show_active_slave, bonding_sysfs_store_option);
 
 /* Show link status of the bond interface. */
@@ -484,7 +483,7 @@ static ssize_t bonding_show_mii_status(struct device *d,
 
 	return sprintf(buf, "%s\n", active ? "up" : "down");
 }
-static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL);
+static DEVICE_ATTR(mii_status, 0444, bonding_show_mii_status, NULL);
 
 /* Show current 802.3ad aggregator ID. */
 static ssize_t bonding_show_ad_aggregator(struct device *d,
@@ -503,7 +502,7 @@ static ssize_t bonding_show_ad_aggregator(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_aggregator, S_IRUGO, bonding_show_ad_aggregator, NULL);
+static DEVICE_ATTR(ad_aggregator, 0444, bonding_show_ad_aggregator, NULL);
 
 
 /* Show number of active 802.3ad ports. */
@@ -523,7 +522,7 @@ static ssize_t bonding_show_ad_num_ports(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_num_ports, S_IRUGO, bonding_show_ad_num_ports, NULL);
+static DEVICE_ATTR(ad_num_ports, 0444, bonding_show_ad_num_ports, NULL);
 
 
 /* Show current 802.3ad actor key. */
@@ -543,7 +542,7 @@ static ssize_t bonding_show_ad_actor_key(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_actor_key, S_IRUGO, bonding_show_ad_actor_key, NULL);
+static DEVICE_ATTR(ad_actor_key, 0444, bonding_show_ad_actor_key, NULL);
 
 
 /* Show current 802.3ad partner key. */
@@ -563,7 +562,7 @@ static ssize_t bonding_show_ad_partner_key(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_partner_key, S_IRUGO, bonding_show_ad_partner_key, NULL);
+static DEVICE_ATTR(ad_partner_key, 0444, bonding_show_ad_partner_key, NULL);
 
 
 /* Show current 802.3ad partner mac. */
@@ -582,7 +581,7 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
+static DEVICE_ATTR(ad_partner_mac, 0444, bonding_show_ad_partner_mac, NULL);
 
 /* Show the queue_ids of the slaves in the current bond. */
 static ssize_t bonding_show_queue_id(struct device *d,
@@ -615,7 +614,7 @@ static ssize_t bonding_show_queue_id(struct device *d,
 
 	return res;
 }
-static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id,
+static DEVICE_ATTR(queue_id, 0644, bonding_show_queue_id,
 		   bonding_sysfs_store_option);
 
 
@@ -628,7 +627,7 @@ static ssize_t bonding_show_slaves_active(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.all_slaves_active);
 }
-static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(all_slaves_active, 0644,
 		   bonding_show_slaves_active, bonding_sysfs_store_option);
 
 /* Show the number of IGMP membership reports to send on link failure */
@@ -640,7 +639,7 @@ static ssize_t bonding_show_resend_igmp(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.resend_igmp);
 }
-static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(resend_igmp, 0644,
 		   bonding_show_resend_igmp, bonding_sysfs_store_option);
 
 
@@ -652,7 +651,7 @@ static ssize_t bonding_show_lp_interval(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.lp_interval);
 }
-static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(lp_interval, 0644,
 		   bonding_show_lp_interval, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
@@ -662,7 +661,7 @@ static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
 	struct bonding *bond = to_bond(d);
 	return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);
 }
-static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(tlb_dynamic_lb, 0644,
 		   bonding_show_tlb_dynamic_lb, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_packets_per_slave(struct device *d,
@@ -674,7 +673,7 @@ static ssize_t bonding_show_packets_per_slave(struct device *d,
 
 	return sprintf(buf, "%u\n", packets_per_slave);
 }
-static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(packets_per_slave, 0644,
 		   bonding_show_packets_per_slave, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_actor_sys_prio(struct device *d,
@@ -688,7 +687,7 @@ static ssize_t bonding_show_ad_actor_sys_prio(struct device *d,
 
 	return 0;
 }
-static DEVICE_ATTR(ad_actor_sys_prio, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_actor_sys_prio, 0644,
 		   bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_actor_system(struct device *d,
@@ -703,7 +702,7 @@ static ssize_t bonding_show_ad_actor_system(struct device *d,
 	return 0;
 }
 
-static DEVICE_ATTR(ad_actor_system, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_actor_system, 0644,
 		   bonding_show_ad_actor_system, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_user_port_key(struct device *d,
@@ -717,7 +716,7 @@ static ssize_t bonding_show_ad_user_port_key(struct device *d,
 
 	return 0;
 }
-static DEVICE_ATTR(ad_user_port_key, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_user_port_key, 0644,
 		   bonding_show_ad_user_port_key, bonding_sysfs_store_option);
 
 static struct attribute *per_bond_attrs[] = {
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 7d16c51e6913..2f120b2ffef0 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -25,8 +25,8 @@ const struct slave_attribute slave_attr_##_name = {		\
 		 .mode = _mode },				\
 	.show	= _show,					\
 };
-#define SLAVE_ATTR_RO(_name) \
-	SLAVE_ATTR(_name, S_IRUGO, _name##_show)
+#define SLAVE_ATTR_RO(_name)					\
+	SLAVE_ATTR(_name, 0444, _name##_show)
 
 static ssize_t state_show(struct slave *slave, char *buf)
 {
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 709838e4c062..a0f954f36c09 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -40,20 +40,20 @@ static LIST_HEAD(ser_list);
 static LIST_HEAD(ser_release_list);
 
 static bool ser_loop;
-module_param(ser_loop, bool, S_IRUGO);
+module_param(ser_loop, bool, 0444);
 MODULE_PARM_DESC(ser_loop, "Run in simulated loopback mode.");
 
 static bool ser_use_stx = true;
-module_param(ser_use_stx, bool, S_IRUGO);
+module_param(ser_use_stx, bool, 0444);
 MODULE_PARM_DESC(ser_use_stx, "STX enabled or not.");
 
 static bool ser_use_fcs = true;
 
-module_param(ser_use_fcs, bool, S_IRUGO);
+module_param(ser_use_fcs, bool, 0444);
 MODULE_PARM_DESC(ser_use_fcs, "FCS enabled or not.");
 
 static int ser_write_chunk = MAX_WRITE_CHUNK;
-module_param(ser_write_chunk, int, S_IRUGO);
+module_param(ser_write_chunk, int, 0444);
 
 MODULE_PARM_DESC(ser_write_chunk, "Maximum size of data written to UART.");
 
@@ -97,21 +97,21 @@ static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty)
 	ser->debugfs_tty_dir =
 			debugfs_create_dir(tty->name, debugfsdir);
 	if (!IS_ERR(ser->debugfs_tty_dir)) {
-		debugfs_create_blob("last_tx_msg", S_IRUSR,
-				ser->debugfs_tty_dir,
-				&ser->tx_blob);
+		debugfs_create_blob("last_tx_msg", 0400,
+				    ser->debugfs_tty_dir,
+				    &ser->tx_blob);
 
-		debugfs_create_blob("last_rx_msg", S_IRUSR,
-				ser->debugfs_tty_dir,
-				&ser->rx_blob);
+		debugfs_create_blob("last_rx_msg", 0400,
+				    ser->debugfs_tty_dir,
+				    &ser->rx_blob);
 
-		debugfs_create_x32("ser_state", S_IRUSR,
-				ser->debugfs_tty_dir,
-				(u32 *)&ser->state);
+		debugfs_create_x32("ser_state", 0400,
+				   ser->debugfs_tty_dir,
+				   (u32 *)&ser->state);
 
-		debugfs_create_x8("tty_status", S_IRUSR,
-				ser->debugfs_tty_dir,
-				&ser->tty_status);
+		debugfs_create_x8("tty_status", 0400,
+				  ser->debugfs_tty_dir,
+				  &ser->tty_status);
 
 	}
 	ser->tx_blob.data = ser->tx_data;
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 980eace53d44..d28a1398c091 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -35,27 +35,27 @@ MODULE_DESCRIPTION("CAIF SPI driver");
 #define PAD_POW2(x, pow) ((((x)&((pow)-1))==0) ? 0 : (((pow)-((x)&((pow)-1)))))
 
 static bool spi_loop;
-module_param(spi_loop, bool, S_IRUGO);
+module_param(spi_loop, bool, 0444);
 MODULE_PARM_DESC(spi_loop, "SPI running in loopback mode.");
 
 /* SPI frame alignment. */
-module_param(spi_frm_align, int, S_IRUGO);
+module_param(spi_frm_align, int, 0444);
 MODULE_PARM_DESC(spi_frm_align, "SPI frame alignment.");
 
 /*
  * SPI padding options.
  * Warning: must be a base of 2 (& operation used) and can not be zero !
  */
-module_param(spi_up_head_align, int, S_IRUGO);
+module_param(spi_up_head_align, int, 0444);
 MODULE_PARM_DESC(spi_up_head_align, "SPI uplink head alignment.");
 
-module_param(spi_up_tail_align, int, S_IRUGO);
+module_param(spi_up_tail_align, int, 0444);
 MODULE_PARM_DESC(spi_up_tail_align, "SPI uplink tail alignment.");
 
-module_param(spi_down_head_align, int, S_IRUGO);
+module_param(spi_down_head_align, int, 0444);
 MODULE_PARM_DESC(spi_down_head_align, "SPI downlink head alignment.");
 
-module_param(spi_down_tail_align, int, S_IRUGO);
+module_param(spi_down_tail_align, int, 0444);
 MODULE_PARM_DESC(spi_down_tail_align, "SPI downlink tail alignment.");
 
 #ifdef CONFIG_ARM
@@ -250,10 +250,10 @@ static const struct file_operations dbgfs_frame_fops = {
 static inline void dev_debugfs_add(struct cfspi *cfspi)
 {
 	cfspi->dbgfs_dir = debugfs_create_dir(cfspi->pdev->name, dbgfs_root);
-	cfspi->dbgfs_state = debugfs_create_file("state", S_IRUGO,
+	cfspi->dbgfs_state = debugfs_create_file("state", 0444,
 						 cfspi->dbgfs_dir, cfspi,
 						 &dbgfs_state_fops);
-	cfspi->dbgfs_frame = debugfs_create_file("frame", S_IRUGO,
+	cfspi->dbgfs_frame = debugfs_create_file("frame", 0444,
 						 cfspi->dbgfs_dir, cfspi,
 						 &dbgfs_frame_fops);
 }
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index c3d104feee13..2814e0dee4bb 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -629,21 +629,21 @@ static inline void debugfs_init(struct cfv_info *cfv)
 	if (IS_ERR(cfv->debugfs))
 		return;
 
-	debugfs_create_u32("rx-napi-complete", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-napi-complete", 0400, cfv->debugfs,
 			   &cfv->stats.rx_napi_complete);
-	debugfs_create_u32("rx-napi-resched", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-napi-resched", 0400, cfv->debugfs,
 			   &cfv->stats.rx_napi_resched);
-	debugfs_create_u32("rx-nomem", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-nomem", 0400, cfv->debugfs,
 			   &cfv->stats.rx_nomem);
-	debugfs_create_u32("rx-kicks", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-kicks", 0400, cfv->debugfs,
 			   &cfv->stats.rx_kicks);
-	debugfs_create_u32("tx-full-ring", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-full-ring", 0400, cfv->debugfs,
 			   &cfv->stats.tx_full_ring);
-	debugfs_create_u32("tx-no-mem", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-no-mem", 0400, cfv->debugfs,
 			   &cfv->stats.tx_no_mem);
-	debugfs_create_u32("tx-kicks", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-kicks", 0400, cfv->debugfs,
 			   &cfv->stats.tx_kicks);
-	debugfs_create_u32("tx-flow-on", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-flow-on", 0400, cfv->debugfs,
 			   &cfv->stats.tx_flow_on);
 }
 
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index f37ce0e1b603..d98c69045b17 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1224,8 +1224,7 @@ static ssize_t at91_sysfs_set_mb0_id(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(mb0_id, S_IWUSR | S_IRUGO,
-	at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id);
+static DEVICE_ATTR(mb0_id, 0644, at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id);
 
 static struct attribute *at91_sysfs_attrs[] = {
 	&dev_attr_mb0_id.attr,
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
index 1e37313054f3..d4dd4da23997 100644
--- a/drivers/net/can/cc770/cc770.c
+++ b/drivers/net/can/cc770/cc770.c
@@ -67,12 +67,12 @@ MODULE_DESCRIPTION(KBUILD_MODNAME "CAN netdevice driver");
  * otherwise 11 bit SFF messages.
  */
 static int msgobj15_eff;
-module_param(msgobj15_eff, int, S_IRUGO);
+module_param(msgobj15_eff, int, 0444);
 MODULE_PARM_DESC(msgobj15_eff, "Extended 29-bit frames for message object 15 "
 		 "(default: 11-bit standard frames)");
 
 static int i82527_compat;
-module_param(i82527_compat, int, S_IRUGO);
+module_param(i82527_compat, int, 0444);
 MODULE_PARM_DESC(i82527_compat, "Strict Intel 82527 comptibility mode "
 		 "without using additional functions");
 
@@ -390,37 +390,23 @@ static int cc770_get_berr_counter(const struct net_device *dev,
 	return 0;
 }
 
-static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static void cc770_tx(struct net_device *dev, int mo)
 {
 	struct cc770_priv *priv = netdev_priv(dev);
-	struct net_device_stats *stats = &dev->stats;
-	struct can_frame *cf = (struct can_frame *)skb->data;
-	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
+	struct can_frame *cf = (struct can_frame *)priv->tx_skb->data;
 	u8 dlc, rtr;
 	u32 id;
 	int i;
 
-	if (can_dropped_invalid_skb(dev, skb))
-		return NETDEV_TX_OK;
-
-	if ((cc770_read_reg(priv,
-			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
-		netdev_err(dev, "TX register is still occupied!\n");
-		return NETDEV_TX_BUSY;
-	}
-
-	netif_stop_queue(dev);
-
 	dlc = cf->can_dlc;
 	id = cf->can_id;
-	if (cf->can_id & CAN_RTR_FLAG)
-		rtr = 0;
-	else
-		rtr = MSGCFG_DIR;
+	rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR;
+
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
 	cc770_write_reg(priv, msgobj[mo].ctrl1,
 			RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES);
-	cc770_write_reg(priv, msgobj[mo].ctrl0,
-			MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES);
+
 	if (id & CAN_EFF_FLAG) {
 		id &= CAN_EFF_MASK;
 		cc770_write_reg(priv, msgobj[mo].config,
@@ -439,22 +425,30 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	for (i = 0; i < dlc; i++)
 		cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]);
 
-	/* Store echo skb before starting the transfer */
-	can_put_echo_skb(skb, dev, 0);
-
 	cc770_write_reg(priv, msgobj[mo].ctrl1,
-			RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
+			RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC);
+}
 
-	stats->tx_bytes += dlc;
+static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
 
-	/*
-	 * HM: We had some cases of repeated IRQs so make sure the
-	 * INT is acknowledged I know it's already further up, but
-	 * doing again fixed the issue
-	 */
-	cc770_write_reg(priv, msgobj[mo].ctrl0,
-			MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+	netif_stop_queue(dev);
+
+	if ((cc770_read_reg(priv,
+			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
+		netdev_err(dev, "TX register is still occupied!\n");
+		return NETDEV_TX_BUSY;
+	}
+
+	priv->tx_skb = skb;
+	cc770_tx(dev, mo);
 
 	return NETDEV_TX_OK;
 }
@@ -680,19 +674,46 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o)
 	struct cc770_priv *priv = netdev_priv(dev);
 	struct net_device_stats *stats = &dev->stats;
 	unsigned int mo = obj2msgobj(o);
+	struct can_frame *cf;
+	u8 ctrl1;
+
+	ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
 
-	/* Nothing more to send, switch off interrupts */
 	cc770_write_reg(priv, msgobj[mo].ctrl0,
 			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
-	/*
-	 * We had some cases of repeated IRQ so make sure the
-	 * INT is acknowledged
+	cc770_write_reg(priv, msgobj[mo].ctrl1,
+			RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES);
+
+	if (unlikely(!priv->tx_skb)) {
+		netdev_err(dev, "missing tx skb in tx interrupt\n");
+		return;
+	}
+
+	if (unlikely(ctrl1 & MSGLST_SET)) {
+		stats->rx_over_errors++;
+		stats->rx_errors++;
+	}
+
+	/* When the CC770 is sending an RTR message and it receives a regular
+	 * message that matches the id of the RTR message, it will overwrite the
+	 * outgoing message in the TX register. When this happens we must
+	 * process the received message and try to transmit the outgoing skb
+	 * again.
 	 */
-	cc770_write_reg(priv, msgobj[mo].ctrl0,
-			MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+	if (unlikely(ctrl1 & NEWDAT_SET)) {
+		cc770_rx(dev, mo, ctrl1);
+		cc770_tx(dev, mo);
+		return;
+	}
 
+	cf = (struct can_frame *)priv->tx_skb->data;
+	stats->tx_bytes += cf->can_dlc;
 	stats->tx_packets++;
+
+	can_put_echo_skb(priv->tx_skb, dev, 0);
 	can_get_echo_skb(dev, 0);
+	priv->tx_skb = NULL;
+
 	netif_wake_queue(dev);
 }
 
@@ -804,6 +825,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv)
 	priv->can.do_set_bittiming = cc770_set_bittiming;
 	priv->can.do_set_mode = cc770_set_mode;
 	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
+	priv->tx_skb = NULL;
 
 	memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags));
 
diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h
index a1739db98d91..95752e1d1283 100644
--- a/drivers/net/can/cc770/cc770.h
+++ b/drivers/net/can/cc770/cc770.h
@@ -193,6 +193,8 @@ struct cc770_priv {
 	u8 cpu_interface;	/* CPU interface register */
 	u8 clkout;		/* Clock out register */
 	u8 bus_config;		/* Bus conffiguration register */
+
+	struct sk_buff *tx_skb;
 };
 
 struct net_device *alloc_cc770dev(int sizeof_priv);
diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c
index 3a30fd3b4498..fcd34698074f 100644
--- a/drivers/net/can/cc770/cc770_isa.c
+++ b/drivers/net/can/cc770/cc770_isa.c
@@ -82,29 +82,29 @@ static u8 cor[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static u8 bcr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 
-module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, 0444);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ number");
 
-module_param_array(clk, int, NULL, S_IRUGO);
+module_param_array(clk, int, NULL, 0444);
 MODULE_PARM_DESC(clk, "External oscillator clock frequency "
 		 "(default=16000000 [16 MHz])");
 
-module_param_array(cir, byte, NULL, S_IRUGO);
+module_param_array(cir, byte, NULL, 0444);
 MODULE_PARM_DESC(cir, "CPU interface register (default=0x40 [DSC])");
 
-module_param_array(cor, byte, NULL, S_IRUGO);
+module_param_array(cor, byte, NULL, 0444);
 MODULE_PARM_DESC(cor, "Clockout register (default=0x00)");
 
-module_param_array(bcr, byte, NULL, S_IRUGO);
+module_param_array(bcr, byte, NULL, 0444);
 MODULE_PARM_DESC(bcr, "Bus configuration register (default=0x40 [CBY])");
 
 #define CC770_IOSIZE          0x20
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index 897c6b113d3f..2d3046afa80d 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -1484,7 +1484,7 @@ static netdev_tx_t grcan_start_xmit(struct sk_buff *skb,
 		}							\
 	}								\
 	module_param_named(name, grcan_module_config.name,		\
-			   mtype, S_IRUGO);				\
+			   mtype, 0444);				\
 	MODULE_PARM_DESC(name, desc)
 
 #define GRCAN_CONFIG_ATTR(name, desc)					\
@@ -1513,7 +1513,7 @@ static netdev_tx_t grcan_start_xmit(struct sk_buff *skb,
 		struct grcan_priv *priv = netdev_priv(dev);		\
 		return sprintf(buf, "%d\n", priv->config.name);		\
 	}								\
-	static DEVICE_ATTR(name, S_IRUGO | S_IWUSR,			\
+	static DEVICE_ATTR(name, 0644,					\
 			   grcan_show_##name,				\
 			   grcan_store_##name);				\
 	GRCAN_MODULE_PARAM(name, ushort, GRCAN_NOT_BOOL, desc)
diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
index 2772d05ff11c..fedd927ba6ed 100644
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -30,6 +30,7 @@
 #define IFI_CANFD_STCMD_ERROR_ACTIVE		BIT(2)
 #define IFI_CANFD_STCMD_ERROR_PASSIVE		BIT(3)
 #define IFI_CANFD_STCMD_BUSOFF			BIT(4)
+#define IFI_CANFD_STCMD_ERROR_WARNING		BIT(5)
 #define IFI_CANFD_STCMD_BUSMONITOR		BIT(16)
 #define IFI_CANFD_STCMD_LOOPBACK		BIT(18)
 #define IFI_CANFD_STCMD_DISABLE_CANFD		BIT(24)
@@ -52,7 +53,10 @@
 #define IFI_CANFD_TXSTCMD_OVERFLOW		BIT(13)
 
 #define IFI_CANFD_INTERRUPT			0xc
+#define IFI_CANFD_INTERRUPT_ERROR_BUSOFF	BIT(0)
 #define IFI_CANFD_INTERRUPT_ERROR_WARNING	BIT(1)
+#define IFI_CANFD_INTERRUPT_ERROR_STATE_CHG	BIT(2)
+#define IFI_CANFD_INTERRUPT_ERROR_REC_TEC_INC	BIT(3)
 #define IFI_CANFD_INTERRUPT_ERROR_COUNTER	BIT(10)
 #define IFI_CANFD_INTERRUPT_TXFIFO_EMPTY	BIT(16)
 #define IFI_CANFD_INTERRUPT_TXFIFO_REMOVE	BIT(22)
@@ -61,6 +65,10 @@
 #define IFI_CANFD_INTERRUPT_SET_IRQ		((u32)BIT(31))
 
 #define IFI_CANFD_IRQMASK			0x10
+#define IFI_CANFD_IRQMASK_ERROR_BUSOFF		BIT(0)
+#define IFI_CANFD_IRQMASK_ERROR_WARNING		BIT(1)
+#define IFI_CANFD_IRQMASK_ERROR_STATE_CHG	BIT(2)
+#define IFI_CANFD_IRQMASK_ERROR_REC_TEC_INC	BIT(3)
 #define IFI_CANFD_IRQMASK_SET_ERR		BIT(7)
 #define IFI_CANFD_IRQMASK_SET_TS		BIT(15)
 #define IFI_CANFD_IRQMASK_TXFIFO_EMPTY		BIT(16)
@@ -136,6 +144,8 @@
 #define IFI_CANFD_SYSCLOCK			0x50
 
 #define IFI_CANFD_VER				0x54
+#define IFI_CANFD_VER_REV_MASK			0xff
+#define IFI_CANFD_VER_REV_MIN_SUPPORTED		0x15
 
 #define IFI_CANFD_IP_ID				0x58
 #define IFI_CANFD_IP_ID_VALUE			0xD073CAFD
@@ -220,7 +230,10 @@ static void ifi_canfd_irq_enable(struct net_device *ndev, bool enable)
 
 	if (enable) {
 		enirq = IFI_CANFD_IRQMASK_TXFIFO_EMPTY |
-			IFI_CANFD_IRQMASK_RXFIFO_NEMPTY;
+			IFI_CANFD_IRQMASK_RXFIFO_NEMPTY |
+			IFI_CANFD_IRQMASK_ERROR_STATE_CHG |
+			IFI_CANFD_IRQMASK_ERROR_WARNING |
+			IFI_CANFD_IRQMASK_ERROR_BUSOFF;
 		if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
 			enirq |= IFI_CANFD_INTERRUPT_ERROR_COUNTER;
 	}
@@ -361,12 +374,13 @@ static int ifi_canfd_handle_lost_msg(struct net_device *ndev)
 	return 1;
 }
 
-static int ifi_canfd_handle_lec_err(struct net_device *ndev, const u32 errctr)
+static int ifi_canfd_handle_lec_err(struct net_device *ndev)
 {
 	struct ifi_canfd_priv *priv = netdev_priv(ndev);
 	struct net_device_stats *stats = &ndev->stats;
 	struct can_frame *cf;
 	struct sk_buff *skb;
+	u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR);
 	const u32 errmask = IFI_CANFD_ERROR_CTR_OVERLOAD_FIRST |
 			    IFI_CANFD_ERROR_CTR_ACK_ERROR_FIRST |
 			    IFI_CANFD_ERROR_CTR_BIT0_ERROR_FIRST |
@@ -449,6 +463,11 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev,
 
 	switch (new_state) {
 	case CAN_STATE_ERROR_ACTIVE:
+		/* error active state */
+		priv->can.can_stats.error_warning++;
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+		break;
+	case CAN_STATE_ERROR_WARNING:
 		/* error warning state */
 		priv->can.can_stats.error_warning++;
 		priv->can.state = CAN_STATE_ERROR_WARNING;
@@ -477,7 +496,7 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev,
 	ifi_canfd_get_berr_counter(ndev, &bec);
 
 	switch (new_state) {
-	case CAN_STATE_ERROR_ACTIVE:
+	case CAN_STATE_ERROR_WARNING:
 		/* error warning state */
 		cf->can_id |= CAN_ERR_CRTL;
 		cf->data[1] = (bec.txerr > bec.rxerr) ?
@@ -510,22 +529,21 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev,
 	return 1;
 }
 
-static int ifi_canfd_handle_state_errors(struct net_device *ndev, u32 stcmd)
+static int ifi_canfd_handle_state_errors(struct net_device *ndev)
 {
 	struct ifi_canfd_priv *priv = netdev_priv(ndev);
+	u32 stcmd = readl(priv->base + IFI_CANFD_STCMD);
 	int work_done = 0;
-	u32 isr;
 
-	/*
-	 * The ErrWarn condition is a little special, since the bit is
-	 * located in the INTERRUPT register instead of STCMD register.
-	 */
-	isr = readl(priv->base + IFI_CANFD_INTERRUPT);
-	if ((isr & IFI_CANFD_INTERRUPT_ERROR_WARNING) &&
+	if ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) &&
+	    (priv->can.state != CAN_STATE_ERROR_ACTIVE)) {
+		netdev_dbg(ndev, "Error, entered active state\n");
+		work_done += ifi_canfd_handle_state_change(ndev,
+						CAN_STATE_ERROR_ACTIVE);
+	}
+
+	if ((stcmd & IFI_CANFD_STCMD_ERROR_WARNING) &&
 	    (priv->can.state != CAN_STATE_ERROR_WARNING)) {
-		/* Clear the interrupt */
-		writel(IFI_CANFD_INTERRUPT_ERROR_WARNING,
-		       priv->base + IFI_CANFD_INTERRUPT);
 		netdev_dbg(ndev, "Error, entered warning state\n");
 		work_done += ifi_canfd_handle_state_change(ndev,
 						CAN_STATE_ERROR_WARNING);
@@ -552,18 +570,11 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota)
 {
 	struct net_device *ndev = napi->dev;
 	struct ifi_canfd_priv *priv = netdev_priv(ndev);
-	const u32 stcmd_state_mask = IFI_CANFD_STCMD_ERROR_PASSIVE |
-				     IFI_CANFD_STCMD_BUSOFF;
-	int work_done = 0;
-
-	u32 stcmd = readl(priv->base + IFI_CANFD_STCMD);
 	u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD);
-	u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR);
+	int work_done = 0;
 
 	/* Handle bus state changes */
-	if ((stcmd & stcmd_state_mask) ||
-	    ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) == 0))
-		work_done += ifi_canfd_handle_state_errors(ndev, stcmd);
+	work_done += ifi_canfd_handle_state_errors(ndev);
 
 	/* Handle lost messages on RX */
 	if (rxstcmd & IFI_CANFD_RXSTCMD_OVERFLOW)
@@ -571,7 +582,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota)
 
 	/* Handle lec errors on the bus */
 	if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
-		work_done += ifi_canfd_handle_lec_err(ndev, errctr);
+		work_done += ifi_canfd_handle_lec_err(ndev);
 
 	/* Handle normal messages on RX */
 	if (!(rxstcmd & IFI_CANFD_RXSTCMD_EMPTY))
@@ -592,12 +603,13 @@ static irqreturn_t ifi_canfd_isr(int irq, void *dev_id)
 	struct net_device_stats *stats = &ndev->stats;
 	const u32 rx_irq_mask = IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY |
 				IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER |
+				IFI_CANFD_INTERRUPT_ERROR_COUNTER |
+				IFI_CANFD_INTERRUPT_ERROR_STATE_CHG |
 				IFI_CANFD_INTERRUPT_ERROR_WARNING |
-				IFI_CANFD_INTERRUPT_ERROR_COUNTER;
+				IFI_CANFD_INTERRUPT_ERROR_BUSOFF;
 	const u32 tx_irq_mask = IFI_CANFD_INTERRUPT_TXFIFO_EMPTY |
 				IFI_CANFD_INTERRUPT_TXFIFO_REMOVE;
-	const u32 clr_irq_mask = ~((u32)(IFI_CANFD_INTERRUPT_SET_IRQ |
-					 IFI_CANFD_INTERRUPT_ERROR_WARNING));
+	const u32 clr_irq_mask = ~((u32)IFI_CANFD_INTERRUPT_SET_IRQ);
 	u32 isr;
 
 	isr = readl(priv->base + IFI_CANFD_INTERRUPT);
@@ -933,7 +945,7 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev)
 	struct resource *res;
 	void __iomem *addr;
 	int irq, ret;
-	u32 id;
+	u32 id, rev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	addr = devm_ioremap_resource(dev, res);
@@ -947,6 +959,13 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	rev = readl(addr + IFI_CANFD_VER) & IFI_CANFD_VER_REV_MASK;
+	if (rev < IFI_CANFD_VER_REV_MIN_SUPPORTED) {
+		dev_err(dev, "This block is too old (rev %i), minimum supported is rev %i\n",
+			rev, IFI_CANFD_VER_REV_MIN_SUPPORTED);
+		return -EINVAL;
+	}
+
 	ndev = alloc_candev(sizeof(*priv), 1);
 	if (!ndev)
 		return -ENOMEM;
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index 12a53c8e8e1d..adfdb66a486e 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1865,9 +1865,9 @@ static ssize_t ican3_sysfs_show_fwinfo(struct device *dev,
 	return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo);
 }
 
-static DEVICE_ATTR(termination, S_IWUSR | S_IRUGO, ican3_sysfs_show_term,
-						   ican3_sysfs_set_term);
-static DEVICE_ATTR(fwinfo, S_IRUSR | S_IRUGO, ican3_sysfs_show_fwinfo, NULL);
+static DEVICE_ATTR(termination, 0644, ican3_sysfs_show_term,
+		   ican3_sysfs_set_term);
+static DEVICE_ATTR(fwinfo, 0444, ican3_sysfs_show_fwinfo, NULL);
 
 static struct attribute *ican3_sysfs_attrs[] = {
 	&dev_attr_termination.attr,
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 2594f7779c6f..b397a33f3d32 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/iopoll.h>
 #include <linux/can/dev.h>
+#include <linux/pinctrl/consumer.h>
 
 /* napi related */
 #define M_CAN_NAPI_WEIGHT	64
@@ -253,7 +254,7 @@ enum m_can_mram_cfg {
 
 /* Rx FIFO 0/1 Configuration (RXF0C/RXF1C) */
 #define RXFC_FWM_SHIFT	24
-#define RXFC_FWM_MASK	(0x7f < RXFC_FWM_SHIFT)
+#define RXFC_FWM_MASK	(0x7f << RXFC_FWM_SHIFT)
 #define RXFC_FS_SHIFT	16
 #define RXFC_FS_MASK	(0x7f << RXFC_FS_SHIFT)
 
@@ -1700,6 +1701,8 @@ static __maybe_unused int m_can_suspend(struct device *dev)
 		m_can_clk_stop(priv);
 	}
 
+	pinctrl_pm_select_sleep_state(dev);
+
 	priv->can.state = CAN_STATE_SLEEPING;
 
 	return 0;
@@ -1710,6 +1713,8 @@ static __maybe_unused int m_can_resume(struct device *dev)
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct m_can_priv *priv = netdev_priv(ndev);
 
+	pinctrl_pm_select_default_state(dev);
+
 	m_can_init_ram(priv);
 
 	priv->can.state = CAN_STATE_ERROR_ACTIVE;
diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index 55513411a82e..ed8561d4a90f 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -262,7 +262,6 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
 
 		spin_lock_irqsave(&priv->echo_lock, flags);
 		can_get_echo_skb(priv->ndev, msg->client);
-		spin_unlock_irqrestore(&priv->echo_lock, flags);
 
 		/* count bytes of the echo instead of skb */
 		stats->tx_bytes += cf_len;
@@ -271,6 +270,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
 		/* restart tx queue (a slot is free) */
 		netif_wake_queue(priv->ndev);
 
+		spin_unlock_irqrestore(&priv->echo_lock, flags);
 		return 0;
 	}
 
@@ -333,7 +333,6 @@ static int pucan_handle_status(struct peak_canfd_priv *priv,
 
 	/* this STATUS is the CNF of the RX_BARRIER: Tx path can be setup */
 	if (pucan_status_is_rx_barrier(msg)) {
-		unsigned long flags;
 
 		if (priv->enable_tx_path) {
 			int err = priv->enable_tx_path(priv);
@@ -342,16 +341,8 @@ static int pucan_handle_status(struct peak_canfd_priv *priv,
 				return err;
 		}
 
-		/* restart network queue only if echo skb array is free */
-		spin_lock_irqsave(&priv->echo_lock, flags);
-
-		if (!priv->can.echo_skb[priv->echo_idx]) {
-			spin_unlock_irqrestore(&priv->echo_lock, flags);
-
-			netif_wake_queue(ndev);
-		} else {
-			spin_unlock_irqrestore(&priv->echo_lock, flags);
-		}
+		/* start network queue (echo_skb array is empty) */
+		netif_start_queue(ndev);
 
 		return 0;
 	}
@@ -726,11 +717,6 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb,
 	 */
 	should_stop_tx_queue = !!(priv->can.echo_skb[priv->echo_idx]);
 
-	spin_unlock_irqrestore(&priv->echo_lock, flags);
-
-	/* write the skb on the interface */
-	priv->write_tx_msg(priv, msg);
-
 	/* stop network tx queue if not enough room to save one more msg too */
 	if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
 		should_stop_tx_queue |= (room_left <
@@ -742,6 +728,11 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb,
 	if (should_stop_tx_queue)
 		netif_stop_queue(ndev);
 
+	spin_unlock_irqrestore(&priv->echo_lock, flags);
+
+	/* write the skb on the interface */
+	priv->write_tx_msg(priv, msg);
+
 	return NETDEV_TX_OK;
 }
 
diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c
index 788c3464a3b0..3c51a884db87 100644
--- a/drivers/net/can/peak_canfd/peak_pciefd_main.c
+++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c
@@ -349,8 +349,12 @@ static irqreturn_t pciefd_irq_handler(int irq, void *arg)
 		priv->tx_pages_free++;
 		spin_unlock_irqrestore(&priv->tx_lock, flags);
 
-		/* wake producer up */
-		netif_wake_queue(priv->ucan.ndev);
+		/* wake producer up (only if enough room in echo_skb array) */
+		spin_lock_irqsave(&priv->ucan.echo_lock, flags);
+		if (!priv->ucan.can.echo_skb[priv->ucan.echo_idx])
+			netif_wake_queue(priv->ucan.ndev);
+
+		spin_unlock_irqrestore(&priv->ucan.echo_lock, flags);
 	}
 
 	/* re-enable Rx DMA transfer for this CAN */
diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c
index a89c1e92554d..1a2ae6ce8d87 100644
--- a/drivers/net/can/sja1000/sja1000_isa.c
+++ b/drivers/net/can/sja1000/sja1000_isa.c
@@ -48,27 +48,27 @@ static unsigned char ocr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 static spinlock_t indirect_lock[MAXDEV];  /* lock for indirect access mode */
 
-module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, 0444);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ number");
 
-module_param_array(clk, int, NULL, S_IRUGO);
+module_param_array(clk, int, NULL, 0444);
 MODULE_PARM_DESC(clk, "External oscillator clock frequency "
 		 "(default=16000000 [16 MHz])");
 
-module_param_array(cdr, byte, NULL, S_IRUGO);
+module_param_array(cdr, byte, NULL, 0444);
 MODULE_PARM_DESC(cdr, "Clock divider register "
 		 "(default=0x48 [CDR_CBP | CDR_CLK_OFF])");
 
-module_param_array(ocr, byte, NULL, S_IRUGO);
+module_param_array(ocr, byte, NULL, 0444);
 MODULE_PARM_DESC(ocr, "Output control register "
 		 "(default=0x18 [OCR_TX0_PUSHPULL])");
 
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 5f64deec9f6c..e22696190583 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -601,8 +601,8 @@ static ssize_t store_output(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static const DEVICE_ATTR(chip, S_IRUGO, show_chip, NULL);
-static const DEVICE_ATTR(output, S_IRUGO | S_IWUSR, show_output, store_output);
+static const DEVICE_ATTR(chip, 0444, show_chip, NULL);
+static const DEVICE_ATTR(output, 0644, show_output, store_output);
 
 static const struct attribute *const netdev_sysfs_attrs[] = {
 	&dev_attr_chip.attr,
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 98d118b3aaf4..e90817608645 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -220,7 +220,7 @@
 #define DEVICE_NAME "mcp251x"
 
 static int mcp251x_enable_dma; /* Enable SPI DMA. Default: 0 (Off) */
-module_param(mcp251x_enable_dma, int, S_IRUGO);
+module_param(mcp251x_enable_dma, int, 0444);
 MODULE_PARM_DESC(mcp251x_enable_dma, "Enable SPI DMA. Default: 0 (Off)");
 
 static const struct can_bittiming_const mcp251x_bittiming_const = {
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index c6dcf93675c0..5820fd5b69b5 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -496,7 +496,7 @@ static ssize_t show_firmware(struct device *d,
 		       (dev->version >> 8) & 0xf,
 		       dev->version & 0xff);
 }
-static DEVICE_ATTR(firmware, S_IRUGO, show_firmware, NULL);
+static DEVICE_ATTR(firmware, 0444, show_firmware, NULL);
 
 static ssize_t show_hardware(struct device *d,
 			     struct device_attribute *attr, char *buf)
@@ -509,7 +509,7 @@ static ssize_t show_hardware(struct device *d,
 		       (dev->version >> 24) & 0xf,
 		       (dev->version >> 16) & 0xff);
 }
-static DEVICE_ATTR(hardware, S_IRUGO, show_hardware, NULL);
+static DEVICE_ATTR(hardware, 0444, show_hardware, NULL);
 
 static ssize_t show_nets(struct device *d,
 			 struct device_attribute *attr, char *buf)
@@ -519,7 +519,7 @@ static ssize_t show_nets(struct device *d,
 
 	return sprintf(buf, "%d", dev->net_count);
 }
-static DEVICE_ATTR(nets, S_IRUGO, show_nets, NULL);
+static DEVICE_ATTR(nets, 0444, show_nets, NULL);
 
 static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg)
 {
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index c2b04f505e16..d200a5b0651c 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -65,7 +65,7 @@ MODULE_ALIAS_RTNL_LINK(DRV_NAME);
  */
 
 static bool echo; /* echo testing. Default: 0 (Off) */
-module_param(echo, bool, S_IRUGO);
+module_param(echo, bool, 0444);
 MODULE_PARM_DESC(echo, "Echo sent frames (for testing). Default: 0 (Off)");
 
 
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index d040aeb45172..15c2a831edf1 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_NET_DSA_BCM_SF2)	+= bcm-sf2.o
 bcm-sf2-objs			:= bcm_sf2.o bcm_sf2_cfp.o
-obj-$(CONFIG_NET_DSA_LOOP)	+= dsa_loop.o dsa_loop_bdinfo.o
+obj-$(CONFIG_NET_DSA_LOOP)	+= dsa_loop.o
+ifdef CONFIG_NET_DSA_LOOP
+obj-$(CONFIG_FIXED_PHY)		+= dsa_loop_bdinfo.o
+endif
 obj-$(CONFIG_NET_DSA_MT7530)	+= mt7530.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_QCA8K)	+= qca8k.o
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index db830a1141d9..78616787f2a3 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -814,8 +814,8 @@ void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
 	unsigned int i;
 
 	for (i = 0; i < mib_size; i++)
-		memcpy(data + i * ETH_GSTRING_LEN,
-		       mibs[i].name, ETH_GSTRING_LEN);
+		strlcpy(data + i * ETH_GSTRING_LEN,
+			mibs[i].name, ETH_GSTRING_LEN);
 }
 EXPORT_SYMBOL(b53_get_strings);
 
@@ -852,7 +852,7 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
 }
 EXPORT_SYMBOL(b53_get_ethtool_stats);
 
-int b53_get_sset_count(struct dsa_switch *ds)
+int b53_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct b53_device *dev = ds->priv;
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index d954cf36ecd8..1187ebd79287 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -288,7 +288,7 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
 int b53_configure_vlan(struct dsa_switch *ds);
 void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
 void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds);
+int b53_get_sset_count(struct dsa_switch *ds, int port);
 int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index 7aa84ee4e771..f77be9f85cb3 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -86,7 +86,7 @@ static int dsa_loop_setup(struct dsa_switch *ds)
 	return 0;
 }
 
-static int dsa_loop_get_sset_count(struct dsa_switch *ds)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return __DSA_LOOP_CNT_MAX;
 }
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 6171c0853ff1..fefa454f3e56 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1007,7 +1007,7 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
 	}
 }
 
-static int lan9303_get_sset_count(struct dsa_switch *ds)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(lan9303_mib);
 }
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 663b0d5b982b..bcb3e6c734f2 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -439,7 +439,7 @@ static void ksz_disable_port(struct dsa_switch *ds, int port,
 	ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
 }
 
-static int ksz_sset_count(struct dsa_switch *ds)
+static int ksz_sset_count(struct dsa_switch *ds, int port)
 {
 	return TOTAL_SWITCH_COUNTER_NUM;
 }
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 8a0bb000d056..511ca134f13f 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -604,7 +604,7 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_get_sset_count(struct dsa_switch *ds)
+mt7530_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(mt7530_mib);
 }
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 1aaa7a95ebc4..ae9e7f7cb31c 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -18,3 +18,13 @@ config NET_DSA_MV88E6XXX_GLOBAL2
 
 	  It is required on most chips. If the chip you compile the support for
 	  doesn't have such registers set, say N here. In doubt, say Y.
+
+config NET_DSA_MV88E6XXX_PTP
+	bool "PTP support for Marvell 88E6xxx"
+	default n
+	depends on NET_DSA_MV88E6XXX_GLOBAL2
+	imply NETWORK_PHY_TIMESTAMPING
+	imply PTP_1588_CLOCK
+	help
+	  Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
+	  chips that support it.
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 58a4a0014e59..50de304abe2f 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -5,6 +5,10 @@ mv88e6xxx-objs += global1.o
 mv88e6xxx-objs += global1_atu.o
 mv88e6xxx-objs += global1_vtu.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o
 mv88e6xxx-objs += phy.o
 mv88e6xxx-objs += port.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
 mv88e6xxx-objs += serdes.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index eb328bade225..9a5d786b4885 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -36,8 +36,10 @@
 #include "chip.h"
 #include "global1.h"
 #include "global2.h"
+#include "hwtstamp.h"
 #include "phy.h"
 #include "port.h"
+#include "ptp.h"
 #include "serdes.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
@@ -251,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
 	chip->g1_irq.masked &= ~(1 << n);
 }
 
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
 {
-	struct mv88e6xxx_chip *chip = dev_id;
 	unsigned int nhandled = 0;
 	unsigned int sub_irq;
 	unsigned int n;
@@ -278,6 +279,13 @@ out:
 	return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
 
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+	struct mv88e6xxx_chip *chip = dev_id;
+
+	return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
 static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
 {
 	struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -333,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 {
 	int irq, virq;
 	u16 mask;
@@ -342,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 	mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
 	mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
-	free_irq(chip->irq, chip);
-
 	for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
 		virq = irq_find_mapping(chip->g1_irq.domain, irq);
 		irq_dispose_mapping(virq);
@@ -352,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 	irq_domain_remove(chip->g1_irq.domain);
 }
 
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+	mv88e6xxx_g1_irq_free_common(chip);
+
+	free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
 	int err, irq, virq;
 	u16 reg, mask;
@@ -385,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
 	if (err)
 		goto out_disable;
 
-	err = request_threaded_irq(chip->irq, NULL,
-				   mv88e6xxx_g1_irq_thread_fn,
-				   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-				   dev_name(chip->dev), chip);
-	if (err)
-		goto out_disable;
-
 	return 0;
 
 out_disable:
@@ -409,6 +415,64 @@ out_mapping:
 	return err;
 }
 
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6xxx_g1_irq_setup_common(chip);
+	if (err)
+		return err;
+
+	err = request_threaded_irq(chip->irq, NULL,
+				   mv88e6xxx_g1_irq_thread_fn,
+				   IRQF_ONESHOT,
+				   dev_name(chip->dev), chip);
+	if (err)
+		mv88e6xxx_g1_irq_free_common(chip);
+
+	return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+	struct mv88e6xxx_chip *chip = container_of(work,
+						   struct mv88e6xxx_chip,
+						   irq_poll_work.work);
+	mv88e6xxx_g1_irq_thread_work(chip);
+
+	kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+				   msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6xxx_g1_irq_setup_common(chip);
+	if (err)
+		return err;
+
+	kthread_init_delayed_work(&chip->irq_poll_work,
+				  mv88e6xxx_irq_poll);
+
+	chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+	if (IS_ERR(chip->kworker))
+		return PTR_ERR(chip->kworker);
+
+	kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+				   msecs_to_jiffies(100));
+
+	return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+	mv88e6xxx_g1_irq_free_common(chip);
+
+	kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+	kthread_destroy_worker(chip->kworker);
+}
+
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
 	int i;
@@ -604,7 +668,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 			return UINT64_MAX;
 
 		low = reg;
-		if (s->sizeof_stat == 4) {
+		if (s->size == 4) {
 			err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
 			if (err)
 				return UINT64_MAX;
@@ -617,7 +681,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 	case STATS_TYPE_BANK0:
 		reg |= s->reg | histogram;
 		mv88e6xxx_g1_stats_read(chip, reg, &low);
-		if (s->sizeof_stat == 8)
+		if (s->size == 8)
 			mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
 		break;
 	default:
@@ -627,8 +691,8 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 	return value;
 }
 
-static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data, int types)
+static int mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data, int types)
 {
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
@@ -641,29 +705,41 @@ static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
 			j++;
 		}
 	}
+
+	return j;
 }
 
-static void mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data)
+static int mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data)
 {
-	mv88e6xxx_stats_get_strings(chip, data,
-				    STATS_TYPE_BANK0 | STATS_TYPE_PORT);
+	return mv88e6xxx_stats_get_strings(chip, data,
+					   STATS_TYPE_BANK0 | STATS_TYPE_PORT);
 }
 
-static void mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data)
+static int mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data)
 {
-	mv88e6xxx_stats_get_strings(chip, data,
-				    STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
+	return mv88e6xxx_stats_get_strings(chip, data,
+					   STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
 }
 
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 				  uint8_t *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int count = 0;
+
+	mutex_lock(&chip->reg_lock);
 
 	if (chip->info->ops->stats_get_strings)
-		chip->info->ops->stats_get_strings(chip, data);
+		count = chip->info->ops->stats_get_strings(chip, data);
+
+	if (chip->info->ops->serdes_get_strings) {
+		data += count * ETH_GSTRING_LEN;
+		chip->info->ops->serdes_get_strings(chip, port, data);
+	}
+
+	mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_get_sset_count(struct mv88e6xxx_chip *chip,
@@ -692,19 +768,34 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
 					      STATS_TYPE_BANK1);
 }
 
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int serdes_count = 0;
+	int count = 0;
 
+	mutex_lock(&chip->reg_lock);
 	if (chip->info->ops->stats_get_sset_count)
-		return chip->info->ops->stats_get_sset_count(chip);
+		count = chip->info->ops->stats_get_sset_count(chip);
+	if (count < 0)
+		goto out;
 
-	return 0;
+	if (chip->info->ops->serdes_get_sset_count)
+		serdes_count = chip->info->ops->serdes_get_sset_count(chip,
+								      port);
+	if (serdes_count < 0)
+		count = serdes_count;
+	else
+		count += serdes_count;
+out:
+	mutex_unlock(&chip->reg_lock);
+
+	return count;
 }
 
-static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data, int types,
-				      u16 bank1_select, u16 histogram)
+static int mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data, int types,
+				     u16 bank1_select, u16 histogram)
 {
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
@@ -712,24 +803,28 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
 		stat = &mv88e6xxx_hw_stats[i];
 		if (stat->type & types) {
+			mutex_lock(&chip->reg_lock);
 			data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
 							      bank1_select,
 							      histogram);
+			mutex_unlock(&chip->reg_lock);
+
 			j++;
 		}
 	}
+	return j;
 }
 
-static void mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_PORT,
 					 0, MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -737,8 +832,8 @@ static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 					 MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -749,8 +844,17 @@ static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
 				uint64_t *data)
 {
+	int count = 0;
+
 	if (chip->info->ops->stats_get_stats)
-		chip->info->ops->stats_get_stats(chip, port, data);
+		count = chip->info->ops->stats_get_stats(chip, port, data);
+
+	if (chip->info->ops->serdes_get_stats) {
+		data += count;
+		mutex_lock(&chip->reg_lock);
+		chip->info->ops->serdes_get_stats(chip, port, data);
+		mutex_unlock(&chip->reg_lock);
+	}
 }
 
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
@@ -762,14 +866,13 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
 	mutex_lock(&chip->reg_lock);
 
 	ret = mv88e6xxx_stats_snapshot(chip, port);
-	if (ret < 0) {
-		mutex_unlock(&chip->reg_lock);
+	mutex_unlock(&chip->reg_lock);
+
+	if (ret < 0)
 		return;
-	}
 
 	mv88e6xxx_get_stats(chip, port, data);
 
-	mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)
@@ -1433,7 +1536,9 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 	eth_broadcast_addr(addr.mac);
 
 	do {
+		mutex_lock(&chip->reg_lock);
 		err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr);
+		mutex_unlock(&chip->reg_lock);
 		if (err)
 			return err;
 
@@ -1466,7 +1571,10 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 	int err;
 
 	/* Dump port's default Filtering Information Database (VLAN ID 0) */
+	mutex_lock(&chip->reg_lock);
 	err = mv88e6xxx_port_get_fid(chip, port, &fid);
+	mutex_unlock(&chip->reg_lock);
+
 	if (err)
 		return err;
 
@@ -1476,7 +1584,9 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 
 	/* Dump VLANs' Filtering Information Databases */
 	do {
+		mutex_lock(&chip->reg_lock);
 		err = mv88e6xxx_vtu_getnext(chip, &vlan);
+		mutex_unlock(&chip->reg_lock);
 		if (err)
 			return err;
 
@@ -1496,13 +1606,8 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
 				   dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
-	int err;
-
-	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_dump(chip, port, cb, data);
-	mutex_unlock(&chip->reg_lock);
 
-	return err;
+	return mv88e6xxx_port_db_dump(chip, port, cb, data);
 }
 
 static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,
@@ -2092,6 +2197,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 	if (err)
 		goto unlock;
 
+	/* Setup PTP Hardware Clock and timestamping */
+	if (chip->info->ptp_support) {
+		err = mv88e6xxx_ptp_setup(chip);
+		if (err)
+			goto unlock;
+
+		err = mv88e6xxx_hwtstamp_setup(chip);
+		if (err)
+			goto unlock;
+	}
+
 unlock:
 	mutex_unlock(&chip->reg_lock);
 
@@ -2148,6 +2264,15 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 	struct mii_bus *bus;
 	int err;
 
+	if (external) {
+		mutex_lock(&chip->reg_lock);
+		err = mv88e6xxx_g2_scratch_gpio_set_smi(chip, true);
+		mutex_unlock(&chip->reg_lock);
+
+		if (err)
+			return err;
+	}
+
 	bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
 	if (!bus)
 		return -ENOMEM;
@@ -2170,12 +2295,19 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 	bus->write = mv88e6xxx_mdio_write;
 	bus->parent = chip->dev;
 
+	if (!external) {
+		err = mv88e6xxx_g2_irq_mdio_setup(chip, bus);
+		if (err)
+			return err;
+	}
+
 	if (np)
 		err = of_mdiobus_register(bus, np);
 	else
 		err = mdiobus_register(bus);
 	if (err) {
 		dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
+		mv88e6xxx_g2_irq_mdio_free(chip, bus);
 		return err;
 	}
 
@@ -2202,6 +2334,9 @@ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
 	list_for_each_entry(mdio_bus, &chip->mdios, list) {
 		bus = mdio_bus->bus;
 
+		if (!mdio_bus->external)
+			mv88e6xxx_g2_irq_mdio_free(chip, bus);
+
 		mdiobus_unregister(bus);
 	}
 }
@@ -2472,6 +2607,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -2602,6 +2738,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -2673,6 +2810,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -2736,6 +2874,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -2771,6 +2910,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -2843,6 +2983,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -2879,6 +3021,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -2913,6 +3057,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -2945,6 +3091,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -2981,6 +3129,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -3049,6 +3199,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -3086,6 +3237,11 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
+	.serdes_get_sset_count = mv88e6352_serdes_get_sset_count,
+	.serdes_get_strings = mv88e6352_serdes_get_strings,
+	.serdes_get_stats = mv88e6352_serdes_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -3124,6 +3280,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -3162,6 +3320,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -3171,6 +3331,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6085",
 		.num_databases = 4096,
 		.num_ports = 10,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3191,6 +3352,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6095/88E6095F",
 		.num_databases = 256,
 		.num_ports = 11,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3209,6 +3371,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6097/88E6097F",
 		.num_databases = 4096,
 		.num_ports = 11,
+		.num_internal_phys = 8,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3229,6 +3392,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6123",
 		.num_databases = 4096,
 		.num_ports = 3,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3249,6 +3413,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6131",
 		.num_databases = 256,
 		.num_ports = 8,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3264,15 +3429,18 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 	[MV88E6141] = {
 		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141,
 		.family = MV88E6XXX_FAMILY_6341,
-		.name = "Marvell 88E6341",
+		.name = "Marvell 88E6141",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 5,
+		.num_gpio = 11,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g1_irqs = 9,
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,
@@ -3286,6 +3454,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6161",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3306,6 +3475,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6165",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3326,6 +3496,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6171",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3346,6 +3517,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6172",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3366,6 +3539,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6175",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3386,6 +3560,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6176",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3406,6 +3582,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6185",
 		.num_databases = 256,
 		.num_ports = 10,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3424,6 +3601,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3444,6 +3623,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3464,6 +3645,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6191",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3475,6 +3657,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6191_ops,
 	},
 
@@ -3484,6 +3667,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6240",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3495,6 +3680,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6240_ops,
 	},
 
@@ -3504,6 +3690,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6290",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3515,6 +3703,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6290_ops,
 	},
 
@@ -3524,16 +3713,20 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6320",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6320_ops,
 	},
 
@@ -3543,15 +3736,19 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6321",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6321_ops,
 	},
 
@@ -3560,17 +3757,21 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.family = MV88E6XXX_FAMILY_6341,
 		.name = "Marvell 88E6341",
 		.num_databases = 4096,
+		.num_internal_phys = 5,
 		.num_ports = 6,
+		.num_gpio = 11,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g1_irqs = 9,
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6341_ops,
 	},
 
@@ -3580,6 +3781,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6350",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3600,6 +3802,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6351",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3620,6 +3823,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6352",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3631,6 +3836,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6352_ops,
 	},
 	[MV88E6390] = {
@@ -3639,6 +3845,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3650,6 +3858,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6390_ops,
 	},
 	[MV88E6390X] = {
@@ -3658,6 +3867,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3669,6 +3880,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6390x_ops,
 	},
 };
@@ -3880,6 +4092,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.port_mdb_del           = mv88e6xxx_port_mdb_del,
 	.crosschip_bridge_join	= mv88e6xxx_crosschip_bridge_join,
 	.crosschip_bridge_leave	= mv88e6xxx_crosschip_bridge_leave,
+	.port_hwtstamp_set	= mv88e6xxx_port_hwtstamp_set,
+	.port_hwtstamp_get	= mv88e6xxx_port_hwtstamp_get,
+	.port_txtstamp		= mv88e6xxx_port_txtstamp,
+	.port_rxtstamp		= mv88e6xxx_port_rxtstamp,
+	.get_ts_info		= mv88e6xxx_get_ts_info,
 };
 
 static struct dsa_switch_driver mv88e6xxx_switch_drv = {
@@ -3959,33 +4176,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 		goto out;
 	}
 
-	if (chip->irq > 0) {
-		/* Has to be performed before the MDIO bus is created,
-		 * because the PHYs will link there interrupts to these
-		 * interrupt controllers
-		 */
-		mutex_lock(&chip->reg_lock);
+	/* Has to be performed before the MDIO bus is created, because
+	 * the PHYs will link their interrupts to these interrupt
+	 * controllers
+	 */
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		err = mv88e6xxx_g1_irq_setup(chip);
-		mutex_unlock(&chip->reg_lock);
-
-		if (err)
-			goto out;
-
-		if (chip->info->g2_irqs > 0) {
-			err = mv88e6xxx_g2_irq_setup(chip);
-			if (err)
-				goto out_g1_irq;
-		}
+	else
+		err = mv88e6xxx_irq_poll_setup(chip);
+	mutex_unlock(&chip->reg_lock);
 
-		err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
-		if (err)
-			goto out_g2_irq;
+	if (err)
+		goto out;
 
-		err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+	if (chip->info->g2_irqs > 0) {
+		err = mv88e6xxx_g2_irq_setup(chip);
 		if (err)
-			goto out_g1_atu_prob_irq;
+			goto out_g1_irq;
 	}
 
+	err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+	if (err)
+		goto out_g2_irq;
+
+	err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+	if (err)
+		goto out_g1_atu_prob_irq;
+
 	err = mv88e6xxx_mdios_register(chip, np);
 	if (err)
 		goto out_g1_vtu_prob_irq;
@@ -3999,20 +4217,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
 	mv88e6xxx_mdios_unregister(chip);
 out_g1_vtu_prob_irq:
-	if (chip->irq > 0)
-		mv88e6xxx_g1_vtu_prob_irq_free(chip);
+	mv88e6xxx_g1_vtu_prob_irq_free(chip);
 out_g1_atu_prob_irq:
-	if (chip->irq > 0)
-		mv88e6xxx_g1_atu_prob_irq_free(chip);
+	mv88e6xxx_g1_atu_prob_irq_free(chip);
 out_g2_irq:
-	if (chip->info->g2_irqs > 0 && chip->irq > 0)
+	if (chip->info->g2_irqs > 0)
 		mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
-	if (chip->irq > 0) {
-		mutex_lock(&chip->reg_lock);
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		mv88e6xxx_g1_irq_free(chip);
-		mutex_unlock(&chip->reg_lock);
-	}
+	else
+		mv88e6xxx_irq_poll_free(chip);
+	mutex_unlock(&chip->reg_lock);
 out:
 	return err;
 }
@@ -4022,19 +4239,27 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
 	struct mv88e6xxx_chip *chip = ds->priv;
 
+	if (chip->info->ptp_support) {
+		mv88e6xxx_hwtstamp_free(chip);
+		mv88e6xxx_ptp_free(chip);
+	}
+
 	mv88e6xxx_phy_destroy(chip);
 	mv88e6xxx_unregister_switch(chip);
 	mv88e6xxx_mdios_unregister(chip);
 
-	if (chip->irq > 0) {
-		mv88e6xxx_g1_vtu_prob_irq_free(chip);
-		mv88e6xxx_g1_atu_prob_irq_free(chip);
-		if (chip->info->g2_irqs > 0)
-			mv88e6xxx_g2_irq_free(chip);
-		mutex_lock(&chip->reg_lock);
+	mv88e6xxx_g1_vtu_prob_irq_free(chip);
+	mv88e6xxx_g1_atu_prob_irq_free(chip);
+
+	if (chip->info->g2_irqs > 0)
+		mv88e6xxx_g2_irq_free(chip);
+
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		mv88e6xxx_g1_irq_free(chip);
-		mutex_unlock(&chip->reg_lock);
-	}
+	else
+		mv88e6xxx_irq_poll_free(chip);
+	mutex_unlock(&chip->reg_lock);
 }
 
 static const struct of_device_id mv88e6xxx_of_match[] = {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 3dba6e90adcf..bad211014e91 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -15,7 +15,10 @@
 #include <linux/if_vlan.h>
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
 #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
 #include <net/dsa.h>
 
 #ifndef UINT64_MAX
@@ -39,6 +42,8 @@
 #define MV88E6XXX_MAX_PVT_SWITCHES	32
 #define MV88E6XXX_MAX_PVT_PORTS		16
 
+#define MV88E6XXX_MAX_GPIO	16
+
 enum mv88e6xxx_egress_mode {
 	MV88E6XXX_EGRESS_MODE_UNMODIFIED,
 	MV88E6XXX_EGRESS_MODE_UNTAGGED,
@@ -105,6 +110,8 @@ struct mv88e6xxx_info {
 	const char *name;
 	unsigned int num_databases;
 	unsigned int num_ports;
+	unsigned int num_internal_phys;
+	unsigned int num_gpio;
 	unsigned int max_vid;
 	unsigned int port_base_addr;
 	unsigned int global1_addr;
@@ -126,6 +133,9 @@ struct mv88e6xxx_info {
 	 */
 	u8 atu_move_port_mask;
 	const struct mv88e6xxx_ops *ops;
+
+	/* Supports PTP */
+	bool ptp_support;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -146,6 +156,8 @@ struct mv88e6xxx_vtu_entry {
 
 struct mv88e6xxx_bus_ops;
 struct mv88e6xxx_irq_ops;
+struct mv88e6xxx_gpio_ops;
+struct mv88e6xxx_avb_ops;
 
 struct mv88e6xxx_irq {
 	u16 masked;
@@ -154,6 +166,36 @@ struct mv88e6xxx_irq {
 	unsigned int nirqs;
 };
 
+/* state flags for mv88e6xxx_port_hwtstamp::state */
+enum {
+	MV88E6XXX_HWTSTAMP_ENABLED,
+	MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+};
+
+struct mv88e6xxx_port_hwtstamp {
+	/* Port index */
+	int port_id;
+
+	/* Timestamping state */
+	unsigned long state;
+
+	/* Resources for receive timestamping */
+	struct sk_buff_head rx_queue;
+	struct sk_buff_head rx_queue2;
+
+	/* Resources for transmit timestamping */
+	unsigned long tx_tstamp_start;
+	struct sk_buff *tx_skb;
+	u16 tx_seq_id;
+
+	/* Current timestamp configuration */
+	struct hwtstamp_config tstamp_config;
+};
+
+struct mv88e6xxx_port {
+	u64 serdes_stats[2];
+};
+
 struct mv88e6xxx_chip {
 	const struct mv88e6xxx_info *info;
 
@@ -207,8 +249,34 @@ struct mv88e6xxx_chip {
 	int irq;
 	int device_irq;
 	int watchdog_irq;
+
 	int atu_prob_irq;
 	int vtu_prob_irq;
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work irq_poll_work;
+
+	/* GPIO resources */
+	u8 gpio_data[2];
+
+	/* This cyclecounter abstracts the switch PTP time.
+	 * reg_lock must be held for any operation that read()s.
+	 */
+	struct cyclecounter	tstamp_cc;
+	struct timecounter	tstamp_tc;
+	struct delayed_work	overflow_work;
+
+	struct ptp_clock	*ptp_clock;
+	struct ptp_clock_info	ptp_clock_info;
+	struct delayed_work	tai_event_work;
+	struct ptp_pin_desc	pin_config[MV88E6XXX_MAX_GPIO];
+	u16 trig_config;
+	u16 evcap_config;
+
+	/* Per-port timestamping resources. */
+	struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
+
+	/* Array of port structures. */
+	struct mv88e6xxx_port ports[DSA_MAX_PORTS];
 };
 
 struct mv88e6xxx_bus_ops {
@@ -327,9 +395,9 @@ struct mv88e6xxx_ops {
 
 	/* Return the number of strings describing statistics */
 	int (*stats_get_sset_count)(struct mv88e6xxx_chip *chip);
-	void (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
-	void (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
-				uint64_t *data);
+	int (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
+	int (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+			       uint64_t *data);
 	int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port);
 	int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
 	const struct mv88e6xxx_irq_ops *watchdog_ops;
@@ -339,11 +407,24 @@ struct mv88e6xxx_ops {
 	/* Power on/off a SERDES interface */
 	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, bool on);
 
+	/* Statistics from the SERDES interface */
+	int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
+	void (*serdes_get_strings)(struct mv88e6xxx_chip *chip,  int port,
+				   uint8_t *data);
+	void (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+				 uint64_t *data);
+
 	/* VLAN Translation Unit operations */
 	int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
 			   struct mv88e6xxx_vtu_entry *entry);
 	int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip,
 			     struct mv88e6xxx_vtu_entry *entry);
+
+	/* GPIO operations */
+	const struct mv88e6xxx_gpio_ops *gpio_ops;
+
+	/* Interface to the AVB/PTP registers */
+	const struct mv88e6xxx_avb_ops *avb_ops;
 };
 
 struct mv88e6xxx_irq_ops {
@@ -355,13 +436,49 @@ struct mv88e6xxx_irq_ops {
 	void (*irq_free)(struct mv88e6xxx_chip *chip);
 };
 
+struct mv88e6xxx_gpio_ops {
+	/* Get/set data on GPIO pin */
+	int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin);
+	int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int value);
+
+	/* get/set GPIO direction */
+	int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin);
+	int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin,
+		       bool input);
+
+	/* get/set GPIO pin control */
+	int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int *func);
+	int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int func);
+};
+
+struct mv88e6xxx_avb_ops {
+	/* Access port-scoped Precision Time Protocol registers */
+	int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr,
+			     u16 *data, int len);
+	int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr,
+			      u16 data);
+
+	/* Access global Precision Time Protocol registers */
+	int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+			int len);
+	int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+
+	/* Access global Time Application Interface registers */
+	int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+			int len);
+	int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+};
+
 #define STATS_TYPE_PORT		BIT(0)
 #define STATS_TYPE_BANK0	BIT(1)
 #define STATS_TYPE_BANK1	BIT(2)
 
 struct mv88e6xxx_hw_stat {
 	char string[ETH_GSTRING_LEN];
-	int sizeof_stat;
+	size_t size;
 	int reg;
 	int type;
 };
@@ -386,6 +503,11 @@ static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip)
 	return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
 }
 
+static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->num_gpio;
+}
+
 int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index af0727877825..0ce627fded48 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -20,22 +20,22 @@
 #include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */
 #include "global2.h"
 
-static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
 	return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
 	return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
 	return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
 }
 
-static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
 	return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
 }
@@ -798,6 +798,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus,
 						   val);
 }
 
+/* Offset 0x1B: Watchdog Control */
 static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
 {
 	u16 reg;
@@ -1089,7 +1090,7 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
 
 	err = request_threaded_irq(chip->device_irq, NULL,
 				   mv88e6xxx_g2_irq_thread_fn,
-				   IRQF_ONESHOT, "mv88e6xxx-g1", chip);
+				   IRQF_ONESHOT, "mv88e6xxx-g2", chip);
 	if (err)
 		goto out;
 
@@ -1106,6 +1107,38 @@ out:
 	return err;
 }
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus)
+{
+	int phy, irq, err, err_phy;
+
+	for (phy = 0; phy < chip->info->num_internal_phys; phy++) {
+		irq = irq_find_mapping(chip->g2_irq.domain, phy);
+		if (irq < 0) {
+			err = irq;
+			goto out;
+		}
+		bus->irq[chip->info->port_base_addr + phy] = irq;
+	}
+	return 0;
+out:
+	err_phy = phy;
+
+	for (phy = 0; phy < err_phy; phy++)
+		irq_dispose_mapping(bus->irq[phy]);
+
+	return err;
+}
+
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus)
+{
+	int phy;
+
+	for (phy = 0; phy < chip->info->num_internal_phys; phy++)
+		irq_dispose_mapping(bus->irq[phy]);
+}
+
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 {
 	u16 reg;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 669f59017b12..520ec70d32e8 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -149,7 +149,26 @@
 #define MV88E6390_G2_EEPROM_ADDR_MASK	0xffff
 
 /* Offset 0x16: AVB Command Register */
-#define MV88E6352_G2_AVB_CMD		0x16
+#define MV88E6352_G2_AVB_CMD			0x16
+#define MV88E6352_G2_AVB_CMD_BUSY		0x8000
+#define MV88E6352_G2_AVB_CMD_OP_READ		0x4000
+#define MV88E6352_G2_AVB_CMD_OP_READ_INCR	0x6000
+#define MV88E6352_G2_AVB_CMD_OP_WRITE		0x3000
+#define MV88E6390_G2_AVB_CMD_OP_READ		0x0000
+#define MV88E6390_G2_AVB_CMD_OP_READ_INCR	0x4000
+#define MV88E6390_G2_AVB_CMD_OP_WRITE		0x6000
+#define MV88E6352_G2_AVB_CMD_PORT_MASK		0x0f00
+#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL	0xe
+#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL	0xf
+#define MV88E6390_G2_AVB_CMD_PORT_MASK		0x1f00
+#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL	0x1e
+#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL	0x1f
+#define MV88E6352_G2_AVB_CMD_BLOCK_PTP		0
+#define MV88E6352_G2_AVB_CMD_BLOCK_AVB		1
+#define MV88E6352_G2_AVB_CMD_BLOCK_QAV		2
+#define MV88E6352_G2_AVB_CMD_BLOCK_QVB		3
+#define MV88E6352_G2_AVB_CMD_BLOCK_MASK		0x00e0
+#define MV88E6352_G2_AVB_CMD_ADDR_MASK		0x001f
 
 /* Offset 0x17: AVB Data Register */
 #define MV88E6352_G2_AVB_DATA		0x17
@@ -223,6 +242,40 @@
 #define MV88E6352_G2_NOEGR_POLICY	0x2000
 #define MV88E6390_G2_LAG_ID_4		0x2000
 
+/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */
+/* Offset 0x02: Misc Configuration */
+#define MV88E6352_G2_SCRATCH_MISC_CFG		0x02
+#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI	0x80
+/* Offset 0x60-0x61: GPIO Configuration */
+#define MV88E6352_G2_SCRATCH_GPIO_CFG0		0x60
+#define MV88E6352_G2_SCRATCH_GPIO_CFG1		0x61
+/* Offset 0x62-0x63: GPIO Direction */
+#define MV88E6352_G2_SCRATCH_GPIO_DIR0		0x62
+#define MV88E6352_G2_SCRATCH_GPIO_DIR1		0x63
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT	0
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN	1
+/* Offset 0x64-0x65: GPIO Data */
+#define MV88E6352_G2_SCRATCH_GPIO_DATA0		0x64
+#define MV88E6352_G2_SCRATCH_GPIO_DATA1		0x65
+/* Offset 0x68-0x6F: GPIO Pin Control */
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL0		0x68
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL1		0x69
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL2		0x6A
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL3		0x6B
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL4		0x6C
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL5		0x6D
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL6		0x6E
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL7		0x6F
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA0	0x70
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1	0x71
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU	BIT(2)
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2	0x72
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK	0x3
+
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO	0
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG	1
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ	2
+
 #ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -230,6 +283,11 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+
 int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 
@@ -259,6 +317,11 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus);
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus);
+
 int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 
@@ -267,6 +330,14 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
+extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
+extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
+
+extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
+
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+				      bool external);
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -279,6 +350,26 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
+static inline int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip,
 					    int port)
 {
@@ -364,6 +455,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
 }
 
+static inline int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+					      struct mii_bus *bus)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+					      struct mii_bus *bus)
+{
+}
+
 static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 {
 	return -EOPNOTSUPP;
@@ -382,6 +484,17 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {};
 static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
 
+static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
+static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
+
+static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {};
+
+static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+						    bool external)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c
new file mode 100644
index 000000000000..2e398ccb88ca
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_avb.c
@@ -0,0 +1,193 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *	Vivien Didelot <[email protected]>
+ *
+ * Copyright (c) 2017 National Instruments
+ *	Brandon Streiff <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "global2.h"
+
+/* Offset 0x16: AVB Command Register
+ * Offset 0x17: AVB Data Register
+ *
+ * There are two different versions of this register interface:
+ *    "6352": 3-bit "op" field, 4-bit "port" field.
+ *    "6390": 2-bit "op" field, 5-bit "port" field.
+ *
+ * The "op" codes are different between the two, as well as the special
+ * port fields for global PTP and TAI configuration.
+ */
+
+/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words.
+ * The hardware supports snapshotting up to four contiguous registers.
+ */
+static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop,
+				 u16 *data, int len)
+{
+	int err;
+	int i;
+
+	/* Hardware can only snapshot four words. */
+	if (len > 4)
+		return -E2BIG;
+
+	err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop);
+	if (err)
+		return err;
+
+	for (i = 0; i < len; ++i) {
+		err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA,
+					&data[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */
+static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop,
+				  u16 data)
+{
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop);
+}
+
+static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+					  int port, int addr, u16 *data,
+					  int len)
+{
+	u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ :
+				 MV88E6352_G2_AVB_CMD_OP_READ_INCR) |
+		     (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+		     addr;
+
+	return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+					   int port, int addr, u16 data)
+{
+	u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) |
+		      (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+	return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6352_g2_avb_port_ptp_read(chip,
+					MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6352_g2_avb_port_ptp_write(chip,
+					MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data);
+}
+
+static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6352_g2_avb_port_ptp_read(chip,
+					MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6352_g2_avb_port_ptp_write(chip,
+					MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {
+	.port_ptp_read		= mv88e6352_g2_avb_port_ptp_read,
+	.port_ptp_write		= mv88e6352_g2_avb_port_ptp_write,
+	.ptp_read		= mv88e6352_g2_avb_ptp_read,
+	.ptp_write		= mv88e6352_g2_avb_ptp_write,
+	.tai_read		= mv88e6352_g2_avb_tai_read,
+	.tai_write		= mv88e6352_g2_avb_tai_write,
+};
+
+static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+					  int port, int addr, u16 *data,
+					  int len)
+{
+	u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ :
+				 MV88E6390_G2_AVB_CMD_OP_READ_INCR) |
+		     (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+		     addr;
+
+	return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+					   int port, int addr, u16 data)
+{
+	u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) |
+		      (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+	return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6390_g2_avb_port_ptp_read(chip,
+					MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6390_g2_avb_port_ptp_write(chip,
+					MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data);
+}
+
+static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6390_g2_avb_port_ptp_read(chip,
+					MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6390_g2_avb_port_ptp_write(chip,
+					MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {
+	.port_ptp_read		= mv88e6390_g2_avb_port_ptp_read,
+	.port_ptp_write		= mv88e6390_g2_avb_port_ptp_write,
+	.ptp_read		= mv88e6390_g2_avb_ptp_read,
+	.ptp_write		= mv88e6390_g2_avb_ptp_write,
+	.tai_read		= mv88e6390_g2_avb_tai_read,
+	.tai_write		= mv88e6390_g2_avb_tai_write,
+};
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
new file mode 100644
index 000000000000..3f92b8892dc7
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -0,0 +1,291 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Brandon Streiff <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+
+/* Offset 0x1A: Scratch and Misc. Register */
+static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg,
+				     u8 *data)
+{
+	u16 value;
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC,
+				 reg << 8);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value);
+	if (err)
+		return err;
+
+	*data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK);
+
+	return 0;
+}
+
+static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
+				      u8 data)
+{
+	u16 value = (reg << 8) | data;
+
+	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value);
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: is bit set?
+ */
+static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
+					int base_reg, unsigned int offset,
+					int *set)
+{
+	int reg = base_reg + (offset / 8);
+	u8 mask = (1 << (offset & 0x7));
+	u8 val;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	*set = !!(mask & val);
+
+	return 0;
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: set if true, clear if false
+ *
+ * Helper function for dealing with the direction and data registers.
+ */
+static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip,
+					int base_reg, unsigned int offset,
+					int set)
+{
+	int reg = base_reg + (offset / 8);
+	u8 mask = (1 << (offset & 0x7));
+	u8 val;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	if (set)
+		val |= mask;
+	else
+		val &= ~mask;
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for low, 1 for high, negative error
+ */
+static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip,
+					      unsigned int pin)
+{
+	int val = 0;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_get_bit(chip,
+					   MV88E6352_G2_SCRATCH_GPIO_DATA0,
+					   pin, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ * @value: value to set
+ */
+static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int value)
+{
+	u8 mask = (1 << (pin & 0x7));
+	int offset = (pin / 8);
+	int reg;
+
+	reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset;
+
+	if (value)
+		chip->gpio_data[offset] |= mask;
+	else
+		chip->gpio_data[offset] &= ~mask;
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX).
+ */
+static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip,
+					     unsigned int pin)
+{
+	int val = 0;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_get_bit(chip,
+					   MV88E6352_G2_SCRATCH_GPIO_DIR0,
+					   pin, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ */
+static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip,
+					     unsigned int pin, bool input)
+{
+	int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN :
+			     MV88E6352_G2_SCRATCH_GPIO_DIR_OUT);
+
+	return mv88e6xxx_g2_scratch_set_bit(chip,
+					    MV88E6352_G2_SCRATCH_GPIO_DIR0,
+					    pin, value);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ *
+ * Note that the function numbers themselves may vary by chipset.
+ */
+static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int *func)
+{
+	int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+	int offset = (pin & 0x1) ? 4 : 0;
+	u8 mask = (0x7 << offset);
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	*func = (val & mask) >> offset;
+
+	return 0;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ */
+static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int func)
+{
+	int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+	int offset = (pin & 0x1) ? 4 : 0;
+	u8 mask = (0x7 << offset);
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	val = (val & ~mask) | ((func & mask) << offset);
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
+	.get_data = mv88e6352_g2_scratch_gpio_get_data,
+	.set_data = mv88e6352_g2_scratch_gpio_set_data,
+	.get_dir = mv88e6352_g2_scratch_gpio_get_dir,
+	.set_dir = mv88e6352_g2_scratch_gpio_set_dir,
+	.get_pctl = mv88e6352_g2_scratch_gpio_get_pctl,
+	.set_pctl = mv88e6352_g2_scratch_gpio_set_pctl,
+};
+
+/**
+ * mv88e6xxx_g2_gpio_set_smi - set gpio muxing for external smi
+ * @chip: chip private data
+ * @external: set mux for external smi, or free for gpio usage
+ *
+ * Some mv88e6xxx models have GPIO pins that may be configured as
+ * an external SMI interface, or they may be made free for other
+ * GPIO uses.
+ */
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+				      bool external)
+{
+	int misc_cfg = MV88E6352_G2_SCRATCH_MISC_CFG;
+	int config_data1 = MV88E6352_G2_SCRATCH_CONFIG_DATA1;
+	int config_data2 = MV88E6352_G2_SCRATCH_CONFIG_DATA2;
+	bool no_cpu;
+	u8 p0_mode;
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, config_data2, &val);
+	if (err)
+		return err;
+
+	p0_mode = val & MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK;
+
+	if (p0_mode == 0x01 || p0_mode == 0x02)
+		return -EBUSY;
+
+	err = mv88e6xxx_g2_scratch_read(chip, config_data1, &val);
+	if (err)
+		return err;
+
+	no_cpu = !!(val & MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU);
+
+	err = mv88e6xxx_g2_scratch_read(chip, misc_cfg, &val);
+	if (err)
+		return err;
+
+	/* NO_CPU being 0 inverts the meaning of the bit */
+	if (!no_cpu)
+		external = !external;
+
+	if (external)
+		val |= MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+	else
+		val &= ~MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+
+	return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
+}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
new file mode 100644
index 000000000000..ac7694c71266
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -0,0 +1,576 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <[email protected]>
+ *      Brandon Streiff <[email protected]>
+ *      Dane Wagner <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "hwtstamp.h"
+#include "ptp.h"
+#include <linux/ptp_classify.h>
+
+#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
+
+static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port,
+				   int addr, u16 *data, int len)
+{
+	if (!chip->info->ops->avb_ops->port_ptp_read)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr,
+						       data, len);
+}
+
+static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port,
+				    int addr, u16 data)
+{
+	if (!chip->info->ops->avb_ops->port_ptp_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr,
+							data);
+}
+
+static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+			       u16 data)
+{
+	if (!chip->info->ops->avb_ops->ptp_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->ptp_write(chip, addr, data);
+}
+
+/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX
+ * timestamp. When working properly, hardware will produce a timestamp
+ * within 1ms. Software may enounter delays due to MDIO contention, so
+ * the timeout is set accordingly.
+ */
+#define TX_TSTAMP_TIMEOUT	msecs_to_jiffies(20)
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+			  struct ethtool_ts_info *info)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->phc_index = ptp_clock_index(chip->ptp_clock);
+	info->tx_types =
+		(1 << HWTSTAMP_TX_OFF) |
+		(1 << HWTSTAMP_TX_ON);
+	info->rx_filters =
+		(1 << HWTSTAMP_FILTER_NONE) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
+
+	return 0;
+}
+
+static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
+					 struct hwtstamp_config *config)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	bool tstamp_enable = false;
+	u16 port_config0;
+	int err;
+
+	/* Prevent the TX/RX paths from trying to interact with the
+	 * timestamp hardware while we reconfigure it.
+	 */
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+	/* reserved for future extensions */
+	if (config->flags)
+		return -EINVAL;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tstamp_enable = false;
+		break;
+	case HWTSTAMP_TX_ON:
+		tstamp_enable = true;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	/* The switch supports timestamping both L2 and L4; one cannot be
+	 * disabled independently of the other.
+	 */
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tstamp_enable = false;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	default:
+		config->rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	if (tstamp_enable) {
+		/* Disable transportSpecific value matching, so that packets
+		 * with either 1588 (0) and 802.1AS (1) will be timestamped.
+		 */
+		port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH;
+	} else {
+		/* Disable PTP. This disables both RX and TX timestamping. */
+		port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP;
+	}
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+				       port_config0);
+	mutex_unlock(&chip->reg_lock);
+
+	if (err < 0)
+		return err;
+
+	/* Once hardware has been configured, enable timestamp checks
+	 * in the RX/TX paths.
+	 */
+	if (tstamp_enable)
+		set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+	return 0;
+}
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+				struct ifreq *ifr)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	struct hwtstamp_config config;
+	int err;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = mv88e6xxx_set_hwtstamp_config(chip, port, &config);
+	if (err)
+		return err;
+
+	/* Save the chosen configuration to be returned later. */
+	memcpy(&ps->tstamp_config, &config, sizeof(config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+				struct ifreq *ifr)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	struct hwtstamp_config *config = &ps->tstamp_config;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/* Get the start of the PTP header in this skb */
+static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type)
+{
+	u8 *data = skb_mac_header(skb);
+	unsigned int offset = 0;
+
+	if (type & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (type & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+		break;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+		break;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
+		break;
+	default:
+		return NULL;
+	}
+
+	/* Ensure that the entire header is present in this packet. */
+	if (skb->len + ETH_HLEN < offset + 34)
+		return NULL;
+
+	return data + offset;
+}
+
+/* Returns a pointer to the PTP header if the caller should time stamp,
+ * or NULL if the caller should not.
+ */
+static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
+				   struct sk_buff *skb, unsigned int type)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	u8 *hdr;
+
+	if (!chip->info->ptp_support)
+		return NULL;
+
+	hdr = parse_ptp_header(skb, type);
+	if (!hdr)
+		return NULL;
+
+	if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state))
+		return NULL;
+
+	return hdr;
+}
+
+static int mv88e6xxx_ts_valid(u16 status)
+{
+	if (!(status & MV88E6XXX_PTP_TS_VALID))
+		return 0;
+	if (status & MV88E6XXX_PTP_TS_STATUS_MASK)
+		return 0;
+	return 1;
+}
+
+static int seq_match(struct sk_buff *skb, u16 ts_seqid)
+{
+	unsigned int type = SKB_PTP_TYPE(skb);
+	u8 *hdr = parse_ptp_header(skb, type);
+	__be16 *seqid;
+
+	seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+	return ts_seqid == ntohs(*seqid);
+}
+
+static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
+			       struct mv88e6xxx_port_hwtstamp *ps,
+			       struct sk_buff *skb, u16 reg,
+			       struct sk_buff_head *rxq)
+{
+	u16 buf[4] = { 0 }, status, seq_id;
+	u64 ns, timelo, timehi;
+	struct skb_shared_hwtstamps *shwt;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+				      reg, buf, ARRAY_SIZE(buf));
+	mutex_unlock(&chip->reg_lock);
+	if (err)
+		pr_err("failed to get the receive time stamp\n");
+
+	status = buf[0];
+	timelo = buf[1];
+	timehi = buf[2];
+	seq_id = buf[3];
+
+	if (status & MV88E6XXX_PTP_TS_VALID) {
+		mutex_lock(&chip->reg_lock);
+		err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0);
+		mutex_unlock(&chip->reg_lock);
+		if (err)
+			pr_err("failed to clear the receive status\n");
+	}
+	/* Since the device can only handle one time stamp at a time,
+	 * we purge any extra frames from the queue.
+	 */
+	for ( ; skb; skb = skb_dequeue(rxq)) {
+		if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) {
+			ns = timehi << 16 | timelo;
+
+			mutex_lock(&chip->reg_lock);
+			ns = timecounter_cyc2time(&chip->tstamp_tc, ns);
+			mutex_unlock(&chip->reg_lock);
+			shwt = skb_hwtstamps(skb);
+			memset(shwt, 0, sizeof(*shwt));
+			shwt->hwtstamp = ns_to_ktime(ns);
+			status &= ~MV88E6XXX_PTP_TS_VALID;
+		}
+		netif_rx_ni(skb);
+	}
+}
+
+static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
+				    struct mv88e6xxx_port_hwtstamp *ps)
+{
+	struct sk_buff *skb;
+
+	skb = skb_dequeue(&ps->rx_queue);
+
+	if (skb)
+		mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS,
+				   &ps->rx_queue);
+
+	skb = skb_dequeue(&ps->rx_queue2);
+	if (skb)
+		mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS,
+				   &ps->rx_queue2);
+}
+
+static int is_pdelay_resp(u8 *msgtype)
+{
+	return (*msgtype & 0xf) == 3;
+}
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *skb, unsigned int type)
+{
+	struct mv88e6xxx_port_hwtstamp *ps;
+	struct mv88e6xxx_chip *chip;
+	u8 *hdr;
+
+	chip = ds->priv;
+	ps = &chip->port_hwtstamp[port];
+
+	if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT)
+		return false;
+
+	hdr = mv88e6xxx_should_tstamp(chip, port, skb, type);
+	if (!hdr)
+		return false;
+
+	SKB_PTP_TYPE(skb) = type;
+
+	if (is_pdelay_resp(hdr))
+		skb_queue_tail(&ps->rx_queue2, skb);
+	else
+		skb_queue_tail(&ps->rx_queue, skb);
+
+	ptp_schedule_worker(chip->ptp_clock, 0);
+
+	return true;
+}
+
+static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
+				   struct mv88e6xxx_port_hwtstamp *ps)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	u16 departure_block[4], status;
+	struct sk_buff *tmp_skb;
+	u32 time_raw;
+	int err;
+	u64 ns;
+
+	if (!ps->tx_skb)
+		return 0;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+				      MV88E6XXX_PORT_PTP_DEP_STS,
+				      departure_block,
+				      ARRAY_SIZE(departure_block));
+	mutex_unlock(&chip->reg_lock);
+
+	if (err)
+		goto free_and_clear_skb;
+
+	if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) {
+		if (time_is_before_jiffies(ps->tx_tstamp_start +
+					   TX_TSTAMP_TIMEOUT)) {
+			dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n",
+				 ps->port_id);
+			goto free_and_clear_skb;
+		}
+		/* The timestamp should be available quickly, while getting it
+		 * is high priority and time bounded to only 10ms. A poll is
+		 * warranted so restart the work.
+		 */
+		return 1;
+	}
+
+	/* We have the timestamp; go ahead and clear valid now */
+	mutex_lock(&chip->reg_lock);
+	mv88e6xxx_port_ptp_write(chip, ps->port_id,
+				 MV88E6XXX_PORT_PTP_DEP_STS, 0);
+	mutex_unlock(&chip->reg_lock);
+
+	status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK;
+	if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) {
+		dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id);
+		goto free_and_clear_skb;
+	}
+
+	if (departure_block[3] != ps->tx_seq_id) {
+		dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id);
+		goto free_and_clear_skb;
+	}
+
+	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+	time_raw = ((u32)departure_block[2] << 16) | departure_block[1];
+	mutex_lock(&chip->reg_lock);
+	ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw);
+	mutex_unlock(&chip->reg_lock);
+	shhwtstamps.hwtstamp = ns_to_ktime(ns);
+
+	dev_dbg(chip->dev,
+		"p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n",
+		ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp),
+		departure_block[0], ps->tx_seq_id, departure_block[3]);
+
+	/* skb_complete_tx_timestamp() will free up the client to make
+	 * another timestamp-able transmit. We have to be ready for it
+	 * -- by clearing the ps->tx_skb "flag" -- beforehand.
+	 */
+
+	tmp_skb = ps->tx_skb;
+	ps->tx_skb = NULL;
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+	skb_complete_tx_timestamp(tmp_skb, &shhwtstamps);
+
+	return 0;
+
+free_and_clear_skb:
+	dev_kfree_skb_any(ps->tx_skb);
+	ps->tx_skb = NULL;
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+
+	return 0;
+}
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	struct dsa_switch *ds = chip->ds;
+	struct mv88e6xxx_port_hwtstamp *ps;
+	int i, restart = 0;
+
+	for (i = 0; i < ds->num_ports; i++) {
+		if (!dsa_is_user_port(ds, i))
+			continue;
+
+		ps = &chip->port_hwtstamp[i];
+		if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state))
+			restart |= mv88e6xxx_txtstamp_work(chip, ps);
+
+		mv88e6xxx_rxtstamp_work(chip, ps);
+	}
+
+	return restart ? 1 : -1;
+}
+
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	__be16 *seq_ptr;
+	u8 *hdr;
+
+	if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
+		return false;
+
+	hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
+	if (!hdr)
+		return false;
+
+	seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+	if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+				  &ps->state))
+		return false;
+
+	ps->tx_skb = clone;
+	ps->tx_tstamp_start = jiffies;
+	ps->tx_seq_id = be16_to_cpup(seq_ptr);
+
+	ptp_schedule_worker(chip->ptp_clock, 0);
+	return true;
+}
+
+static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+
+	ps->port_id = port;
+
+	skb_queue_head_init(&ps->rx_queue);
+	skb_queue_head_init(&ps->rx_queue2);
+
+	return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+					MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
+}
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+	int i;
+
+	/* Disable timestamping on all ports. */
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
+		err = mv88e6xxx_hwtstamp_port_setup(chip, i);
+		if (err)
+			return err;
+	}
+
+	/* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to
+	 * timestamp. This affects all ports that have timestamping enabled,
+	 * but the timestamp config is per-port; thus we configure all events
+	 * here and only support the HWTSTAMP_FILTER_*_EVENT filter types.
+	 */
+	err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE,
+				  MV88E6XXX_PTP_MSGTYPE_ALL_EVENT);
+	if (err)
+		return err;
+
+	/* Use ARRIVAL1 for peer delay response messages. */
+	err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR,
+				  MV88E6XXX_PTP_MSGTYPE_PDLAY_RES);
+	if (err)
+		return err;
+
+	/* 88E6341 devices default to timestamping at the PHY, but this has
+	 * a hardware issue that results in unreliable timestamps. Force
+	 * these devices to timestamp at the MAC.
+	 */
+	if (chip->info->family == MV88E6XXX_FAMILY_6341) {
+		u16 val = MV88E6341_PTP_CFG_UPDATE |
+			  MV88E6341_PTP_CFG_MODE_IDX |
+			  MV88E6341_PTP_CFG_MODE_TS_AT_MAC;
+		err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
new file mode 100644
index 000000000000..bc71c9212a08
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
@@ -0,0 +1,172 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <[email protected]>
+ *      Brandon Streiff <[email protected]>
+ *      Dane Wagner <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_HWTSTAMP_H
+#define _MV88E6XXX_HWTSTAMP_H
+
+#include "chip.h"
+
+/* Global PTP registers */
+/* Offset 0x00: PTP EtherType */
+#define MV88E6XXX_PTP_ETHERTYPE	0x00
+
+/* Offset 0x01: Message Type Timestamp Enables */
+#define MV88E6XXX_PTP_MSGTYPE			0x01
+#define MV88E6XXX_PTP_MSGTYPE_SYNC		0x0001
+#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ		0x0002
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ		0x0004
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES		0x0008
+#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT		0x000f
+
+/* Offset 0x02: Timestamp Arrival Capture Pointers */
+#define MV88E6XXX_PTP_TS_ARRIVAL_PTR	0x02
+
+/* Offset 0x07: PTP Global Configuration */
+#define MV88E6341_PTP_CFG			0x07
+#define MV88E6341_PTP_CFG_UPDATE		0x8000
+#define MV88E6341_PTP_CFG_IDX_MASK		0x7f00
+#define MV88E6341_PTP_CFG_DATA_MASK		0x00ff
+#define MV88E6341_PTP_CFG_MODE_IDX		0x0
+#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY	0x00
+#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC	0x80
+
+/* Offset 0x08: PTP Interrupt Status */
+#define MV88E6XXX_PTP_IRQ_STATUS	0x08
+
+/* Per-Port PTP Registers */
+/* Offset 0x00: PTP Configuration 0 */
+#define MV88E6XXX_PORT_PTP_CFG0				0x00
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT		12
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK		0xf000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588		0x0000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS		0x1000
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH	0x0800
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE	0x0002
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP		0x0001
+
+/* Offset 0x01: PTP Configuration 1 */
+#define MV88E6XXX_PORT_PTP_CFG1	0x01
+
+/* Offset 0x02: PTP Configuration 2 */
+#define MV88E6XXX_PORT_PTP_CFG2				0x02
+#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL		0x1000
+#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN		0x0002
+#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN		0x0001
+
+/* Offset 0x03: PTP LED Configuration */
+#define MV88E6XXX_PORT_PTP_LED_CFG	0x03
+
+/* Offset 0x08: PTP Arrival 0 Status */
+#define MV88E6XXX_PORT_PTP_ARR0_STS	0x08
+
+/* Offset 0x09/0x0A: PTP Arrival 0 Time */
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO	0x09
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI	0x0a
+
+/* Offset 0x0B: PTP Arrival 0 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR0_SEQID	0x0b
+
+/* Offset 0x0C: PTP Arrival 1 Status */
+#define MV88E6XXX_PORT_PTP_ARR1_STS	0x0c
+
+/* Offset 0x0D/0x0E: PTP Arrival 1 Time */
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO	0x0d
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI	0x0e
+
+/* Offset 0x0F: PTP Arrival 1 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR1_SEQID	0x0f
+
+/* Offset 0x10: PTP Departure Status */
+#define MV88E6XXX_PORT_PTP_DEP_STS	0x10
+
+/* Offset 0x11/0x12: PTP Deperture Time */
+#define MV88E6XXX_PORT_PTP_DEP_TIME_LO	0x11
+#define MV88E6XXX_PORT_PTP_DEP_TIME_HI	0x12
+
+/* Offset 0x13: PTP Departure Sequence ID */
+#define MV88E6XXX_PORT_PTP_DEP_SEQID	0x13
+
+/* Status fields for arrival and depature timestamp status registers */
+#define MV88E6XXX_PTP_TS_STATUS_MASK		0x0006
+#define MV88E6XXX_PTP_TS_STATUS_NORMAL		0x0000
+#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN	0x0002
+#define MV88E6XXX_PTP_TS_STATUS_DISCARDED	0x0004
+#define MV88E6XXX_PTP_TS_VALID			0x0001
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+				struct ifreq *ifr);
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+				struct ifreq *ifr);
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type);
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type);
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+			  struct ethtool_ts_info *info);
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds,
+					      int port, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds,
+					      int port, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+					   struct sk_buff *clone,
+					   unsigned int type)
+{
+	return false;
+}
+
+static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+					   struct sk_buff *clone,
+					   unsigned int type)
+{
+	return false;
+}
+
+static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+					struct ethtool_ts_info *info)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_HWTSTAMP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
new file mode 100644
index 000000000000..bd85e2c390e1
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -0,0 +1,381 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <[email protected]>
+ *      Brandon Streiff <[email protected]>
+ *      Dane Wagner <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "ptp.h"
+
+/* Raw timestamps are in units of 8-ns clock periods. */
+#define CC_SHIFT	28
+#define CC_MULT		(8 << CC_SHIFT)
+#define CC_MULT_NUM	(1 << 9)
+#define CC_MULT_DEM	15625ULL
+
+#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
+
+#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc)
+#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+					     overflow_work)
+#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+					      tai_event_work)
+
+static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
+			      u16 *data, int len)
+{
+	if (!chip->info->ops->avb_ops->tai_read)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->tai_read(chip, addr, data, len);
+}
+
+static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data)
+{
+	if (!chip->info->ops->avb_ops->tai_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->tai_write(chip, addr, data);
+}
+
+/* TODO: places where this are called should be using pinctrl */
+static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
+				   int func, int input)
+{
+	int err;
+
+	if (!chip->info->ops->gpio_ops)
+		return -EOPNOTSUPP;
+
+	err = chip->info->ops->gpio_ops->set_dir(chip, pin, input);
+	if (err)
+		return err;
+
+	return chip->info->ops->gpio_ops->set_pctl(chip, pin, func);
+}
+
+static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
+{
+	struct mv88e6xxx_chip *chip = cc_to_chip(cc);
+	u16 phc_time[2];
+	int err;
+
+	err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time,
+				 ARRAY_SIZE(phc_time));
+	if (err)
+		return 0;
+	else
+		return ((u32)phc_time[1] << 16) | phc_time[0];
+}
+
+/* mv88e6xxx_config_eventcap - configure TAI event capture
+ * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external)
+ * @rising: zero for falling-edge trigger, else rising-edge trigger
+ *
+ * This will also reset the capture sequence counter.
+ */
+static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
+				     int rising)
+{
+	u16 global_config;
+	u16 cap_config;
+	int err;
+
+	chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE |
+			     MV88E6XXX_TAI_CFG_CAP_CTR_START;
+	if (!rising)
+		chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING;
+
+	global_config = (chip->evcap_config | chip->trig_config);
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config);
+	if (err)
+		return err;
+
+	if (event == PTP_CLOCK_PPS) {
+		cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG;
+	} else if (event == PTP_CLOCK_EXTTS) {
+		/* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */
+		cap_config = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	/* Write the capture config; this also clears the capture counter */
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS,
+				  cap_config);
+
+	return err;
+}
+
+static void mv88e6xxx_tai_event_work(struct work_struct *ugly)
+{
+	struct delayed_work *dw = to_delayed_work(ugly);
+	struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw);
+	struct ptp_clock_event ev;
+	u16 status[4];
+	u32 raw_ts;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS,
+				 status, ARRAY_SIZE(status));
+	mutex_unlock(&chip->reg_lock);
+
+	if (err) {
+		dev_err(chip->dev, "failed to read TAI status register\n");
+		return;
+	}
+	if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) {
+		dev_warn(chip->dev, "missed event capture\n");
+		return;
+	}
+	if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID))
+		goto out;
+
+	raw_ts = ((u32)status[2] << 16) | status[1];
+
+	/* Clear the valid bit so the next timestamp can come in */
+	status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID;
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]);
+	mutex_unlock(&chip->reg_lock);
+
+	/* This is an external timestamp */
+	ev.type = PTP_CLOCK_EXTTS;
+
+	/* We only have one timestamping channel. */
+	ev.index = 0;
+	mutex_lock(&chip->reg_lock);
+	ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts);
+	mutex_unlock(&chip->reg_lock);
+
+	ptp_clock_event(chip->ptp_clock, &ev);
+out:
+	schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL);
+}
+
+static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	int neg_adj = 0;
+	u32 diff, mult;
+	u64 adj;
+
+	if (scaled_ppm < 0) {
+		neg_adj = 1;
+		scaled_ppm = -scaled_ppm;
+	}
+	mult = CC_MULT;
+	adj = CC_MULT_NUM;
+	adj *= scaled_ppm;
+	diff = div_u64(adj, CC_MULT_DEM);
+
+	mutex_lock(&chip->reg_lock);
+
+	timecounter_read(&chip->tstamp_tc);
+	chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff;
+
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+	mutex_lock(&chip->reg_lock);
+	timecounter_adjtime(&chip->tstamp_tc, delta);
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
+				 struct timespec64 *ts)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	u64 ns;
+
+	mutex_lock(&chip->reg_lock);
+	ns = timecounter_read(&chip->tstamp_tc);
+	mutex_unlock(&chip->reg_lock);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	u64 ns;
+
+	ns = timespec64_to_ns(ts);
+
+	mutex_lock(&chip->reg_lock);
+	timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns);
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
+				      struct ptp_clock_request *rq, int on)
+{
+	int rising = (rq->extts.flags & PTP_RISING_EDGE);
+	int func;
+	int pin;
+	int err;
+
+	pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
+
+	if (pin < 0)
+		return -EBUSY;
+
+	mutex_lock(&chip->reg_lock);
+
+	if (on) {
+		func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ;
+
+		err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+		if (err)
+			goto out;
+
+		schedule_delayed_work(&chip->tai_event_work,
+				      TAI_EVENT_WORK_INTERVAL);
+
+		err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
+	} else {
+		func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO;
+
+		err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+
+		cancel_delayed_work_sync(&chip->tai_event_work);
+	}
+
+out:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
+				struct ptp_clock_request *rq, int on)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return mv88e6xxx_ptp_enable_extts(chip, rq, on);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+				enum ptp_pin_function func, unsigned int chan)
+{
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_EXTTS:
+		break;
+	case PTP_PF_PEROUT:
+	case PTP_PF_PHYSYNC:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
+ * seconds; this task forces periodic reads so that we don't miss any.
+ */
+#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16)
+static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
+	struct timespec64 ts;
+
+	mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
+
+	schedule_delayed_work(&chip->overflow_work,
+			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
+}
+
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+	int i;
+
+	/* Set up the cycle counter */
+	memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
+	chip->tstamp_cc.read	= mv88e6xxx_ptp_clock_read;
+	chip->tstamp_cc.mask	= CYCLECOUNTER_MASK(32);
+	chip->tstamp_cc.mult	= CC_MULT;
+	chip->tstamp_cc.shift	= CC_SHIFT;
+
+	timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc,
+			 ktime_to_ns(ktime_get_real()));
+
+	INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
+	INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work);
+
+	chip->ptp_clock_info.owner = THIS_MODULE;
+	snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
+		 dev_name(chip->dev));
+	chip->ptp_clock_info.max_adj	= 1000000;
+
+	chip->ptp_clock_info.n_ext_ts	= 1;
+	chip->ptp_clock_info.n_per_out	= 0;
+	chip->ptp_clock_info.n_pins	= mv88e6xxx_num_gpio(chip);
+	chip->ptp_clock_info.pps	= 0;
+
+	for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) {
+		struct ptp_pin_desc *ppd = &chip->pin_config[i];
+
+		snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i);
+		ppd->index = i;
+		ppd->func = PTP_PF_NONE;
+	}
+	chip->ptp_clock_info.pin_config = chip->pin_config;
+
+	chip->ptp_clock_info.adjfine	= mv88e6xxx_ptp_adjfine;
+	chip->ptp_clock_info.adjtime	= mv88e6xxx_ptp_adjtime;
+	chip->ptp_clock_info.gettime64	= mv88e6xxx_ptp_gettime;
+	chip->ptp_clock_info.settime64	= mv88e6xxx_ptp_settime;
+	chip->ptp_clock_info.enable	= mv88e6xxx_ptp_enable;
+	chip->ptp_clock_info.verify	= mv88e6xxx_ptp_verify;
+	chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
+
+	chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
+	if (IS_ERR(chip->ptp_clock))
+		return PTR_ERR(chip->ptp_clock);
+
+	schedule_delayed_work(&chip->overflow_work,
+			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
+
+	return 0;
+}
+
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+	if (chip->ptp_clock) {
+		cancel_delayed_work_sync(&chip->overflow_work);
+		cancel_delayed_work_sync(&chip->tai_event_work);
+
+		ptp_clock_unregister(chip->ptp_clock);
+		chip->ptp_clock = NULL;
+	}
+}
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
new file mode 100644
index 000000000000..10f271ab650d
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -0,0 +1,108 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <[email protected]>
+ *      Brandon Streiff <[email protected]>
+ *      Dane Wagner <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_PTP_H
+#define _MV88E6XXX_PTP_H
+
+#include "chip.h"
+
+/* Offset 0x00: TAI Global Config */
+#define MV88E6XXX_TAI_CFG			0x00
+#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE		0x8000
+#define MV88E6XXX_TAI_CFG_CAP_CTR_START		0x4000
+#define MV88E6XXX_TAI_CFG_EVREQ_FALLING		0x2000
+#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO	0x1000
+#define MV88E6XXX_TAI_CFG_IRL_ENABLE		0x0400
+#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN		0x0200
+#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN		0x0100
+#define MV88E6XXX_TAI_CFG_TRIG_LOCK		0x0080
+#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE		0x0008
+#define MV88E6XXX_TAI_CFG_MULTI_PTP		0x0004
+#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT	0x0002
+#define MV88E6XXX_TAI_CFG_TRIG_ENABLE		0x0001
+
+/* Offset 0x01: Timestamp Clock Period (ps) */
+#define MV88E6XXX_TAI_CLOCK_PERIOD		0x01
+
+/* Offset 0x02/0x03: Trigger Generation Amount */
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO	0x02
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI	0x03
+
+/* Offset 0x04: Clock Compensation */
+#define MV88E6XXX_TAI_TRIG_CLOCK_COMP		0x04
+
+/* Offset 0x05: Trigger Configuration */
+#define MV88E6XXX_TAI_TRIG_CFG			0x05
+
+/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */
+#define MV88E6XXX_TAI_IRL_AMOUNT		0x06
+
+/* Offset 0x07: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP			0x07
+
+/* Offset 0x08: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP_PS		0x08
+
+/* Offset 0x09: Event Status */
+#define MV88E6XXX_TAI_EVENT_STATUS		0x09
+#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG	0x4000
+#define MV88E6XXX_TAI_EVENT_STATUS_ERROR	0x0200
+#define MV88E6XXX_TAI_EVENT_STATUS_VALID	0x0100
+#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK	0x00ff
+
+/* Offset 0x0A/0x0B: Event Time */
+#define MV88E6XXX_TAI_EVENT_TIME_LO		0x0a
+#define MV88E6XXX_TAI_EVENT_TYPE_HI		0x0b
+
+/* Offset 0x0E/0x0F: PTP Global Time */
+#define MV88E6XXX_TAI_TIME_LO			0x0e
+#define MV88E6XXX_TAI_TIME_HI			0x0f
+
+/* Offset 0x10/0x11: Trig Generation Time */
+#define MV88E6XXX_TAI_TRIG_TIME_LO		0x10
+#define MV88E6XXX_TAI_TRIG_TIME_HI		0x11
+
+/* Offset 0x12: Lock Status */
+#define MV88E6XXX_TAI_LOCK_STATUS		0x12
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp);
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
+
+#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip,	\
+				      ptp_clock_info)
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+	return -1;
+}
+
+static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_PTP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index f3c01119b3d1..b6166424216a 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -55,18 +55,30 @@ static int mv88e6352_serdes_power_set(struct mv88e6xxx_chip *chip, bool on)
 	return err;
 }
 
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
-	int err;
 	u8 cmode;
+	int err;
 
 	err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
-	if (err)
-		return err;
+	if (err) {
+		dev_err(chip->dev, "failed to read cmode\n");
+		return false;
+	}
 
 	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
-	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII)) {
+	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
+		return true;
+
+	return false;
+}
+
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+{
+	int err;
+
+	if (mv88e6352_port_has_serdes(chip, port)) {
 		err = mv88e6352_serdes_power_set(chip, on);
 		if (err < 0)
 			return err;
@@ -75,6 +87,90 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 	return 0;
 }
 
+struct mv88e6352_serdes_hw_stat {
+	char string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int reg;
+};
+
+static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
+	{ "serdes_fibre_rx_error", 16, 21 },
+	{ "serdes_PRBS_error", 32, 24 },
+};
+
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
+{
+	if (mv88e6352_port_has_serdes(chip, port))
+		return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+
+	return 0;
+}
+
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				  int port, uint8_t *data)
+{
+	struct mv88e6352_serdes_hw_stat *stat;
+	int i;
+
+	if (!mv88e6352_port_has_serdes(chip, port))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+		stat = &mv88e6352_serdes_hw_stats[i];
+		memcpy(data + i * ETH_GSTRING_LEN, stat->string,
+		       ETH_GSTRING_LEN);
+	}
+}
+
+static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
+					  struct mv88e6352_serdes_hw_stat *stat)
+{
+	u64 val = 0;
+	u16 reg;
+	int err;
+
+	err = mv88e6352_serdes_read(chip, stat->reg, &reg);
+	if (err) {
+		dev_err(chip->dev, "failed to read statistic\n");
+		return 0;
+	}
+
+	val = reg;
+
+	if (stat->sizeof_stat == 32) {
+		err = mv88e6352_serdes_read(chip, stat->reg + 1, &reg);
+		if (err) {
+			dev_err(chip->dev, "failed to read statistic\n");
+			return 0;
+		}
+		val = val << 16 | reg;
+	}
+
+	return val;
+}
+
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+				uint64_t *data)
+{
+	struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
+	struct mv88e6352_serdes_hw_stat *stat;
+	u64 value;
+	int i;
+
+	if (!mv88e6352_port_has_serdes(chip, port))
+		return;
+
+	BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
+		     ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+		stat = &mv88e6352_serdes_hw_stats[i];
+		value = mv88e6352_serdes_get_stat(chip, stat);
+		mv88e6xxx_port->serdes_stats[i] += value;
+		data[i] = mv88e6xxx_port->serdes_stats[i];
+	}
+}
+
 /* Set the power on/off for 10GBASE-R and 10GBASE-X4/X2 */
 static int mv88e6390_serdes_10g(struct mv88e6xxx_chip *chip, int addr, bool on)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 5c1cd6d8e9a5..641baa75f910 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -44,5 +44,9 @@
 
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				  int port, uint8_t *data);
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+				uint64_t *data);
 #endif
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 9df22ebee822..600d5ad1fbde 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -631,7 +631,7 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-qca8k_get_sset_count(struct dsa_switch *ds)
+qca8k_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(ar8327_mib);
 }
diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig
index 29c3075bfb05..fdc673484add 100644
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig
@@ -3,7 +3,7 @@
 #
 
 config NET_VENDOR_8390
-	bool "National Semi-conductor 8390 devices"
+	bool "National Semiconductor 8390 devices"
 	default y
 	depends on NET_VENDOR_NATSEMI
 	---help---
diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile
index f975c2fc88a3..1d650e66cc6e 100644
--- a/drivers/net/ethernet/8390/Makefile
+++ b/drivers/net/ethernet/8390/Makefile
@@ -7,8 +7,8 @@ obj-$(CONFIG_MAC8390) += mac8390.o
 obj-$(CONFIG_APNE) += apne.o 8390.o
 obj-$(CONFIG_ARM_ETHERH) += etherh.o
 obj-$(CONFIG_AX88796) += ax88796.o
-obj-$(CONFIG_HYDRA) += hydra.o 8390.o
-obj-$(CONFIG_MCF8390) += mcf8390.o 8390.o
+obj-$(CONFIG_HYDRA) += hydra.o
+obj-$(CONFIG_MCF8390) += mcf8390.o
 obj-$(CONFIG_NE2000) += ne.o 8390p.o
 obj-$(CONFIG_NE2K_PCI) += ne2k-pci.o 8390.o
 obj-$(CONFIG_PCMCIA_AXNET) += axnet_cs.o 8390.o
@@ -16,4 +16,4 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o
 obj-$(CONFIG_STNIC) += stnic.o 8390.o
 obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
 obj-$(CONFIG_WD80x3) += wd.o 8390.o
-obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o
+obj-$(CONFIG_ZORRO8390) += zorro8390.o
diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c
index c56ac9ebc08f..fe6c834c422e 100644
--- a/drivers/net/ethernet/8390/apne.c
+++ b/drivers/net/ethernet/8390/apne.c
@@ -117,7 +117,7 @@ static const char version[] =
 static int apne_owned;	/* signal if card already owned */
 
 static u32 apne_msg_enable;
-module_param_named(msg_enable, apne_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, apne_msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
 struct net_device * __init apne_probe(int unit)
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 245554707163..da61cf3cb3a9 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -77,8 +77,6 @@ static unsigned char version[] = "ax88796.c: Copyright 2005,2007 Simtec Electron
 
 #define AX_GPOC_PPDSET	BIT(6)
 
-static u32 ax_msg_enable;
-
 /* device private data */
 
 struct ax_device {
@@ -747,7 +745,6 @@ static int ax_init_dev(struct net_device *dev)
 	ei_local->block_output = &ax_block_output;
 	ei_local->get_8390_hdr = &ax_get_8390_hdr;
 	ei_local->priv = 0;
-	ei_local->msg_enable = ax_msg_enable;
 
 	dev->netdev_ops = &ax_netdev_ops;
 	dev->ethtool_ops = &ax_ethtool_ops;
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 7bddb8efb6d5..d422a124cd7c 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -104,7 +104,6 @@ static void AX88190_init(struct net_device *dev, int startp);
 static int ax_open(struct net_device *dev);
 static int ax_close(struct net_device *dev);
 static irqreturn_t ax_interrupt(int irq, void *dev_id);
-static u32 axnet_msg_enable;
 
 /*====================================================================*/
 
@@ -151,7 +150,6 @@ static int axnet_probe(struct pcmcia_device *link)
 	return -ENOMEM;
 
     ei_local = netdev_priv(dev);
-    ei_local->msg_enable = axnet_msg_enable;
     spin_lock_init(&ei_local->page_lock);
 
     info = PRIV(dev);
diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index 11cbf22ad201..32e9627e3880 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c
@@ -64,8 +64,6 @@ static char version[] =
 
 #include "lib8390.c"
 
-static u32 etherh_msg_enable;
-
 struct etherh_priv {
 	void __iomem	*ioc_fast;
 	void __iomem	*memc;
@@ -502,18 +500,6 @@ etherh_close(struct net_device *dev)
 }
 
 /*
- * Initialisation
- */
-
-static void __init etherh_banner(void)
-{
-	static int version_printed;
-
-	if ((etherh_msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0))
-		pr_info("%s", version);
-}
-
-/*
  * Read the ethernet address string from the on board rom.
  * This is an ascii string...
  */
@@ -671,8 +657,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
 	struct etherh_priv *eh;
 	int ret;
 
-	etherh_banner();
-
 	ret = ecard_request_resources(ec);
 	if (ret)
 		goto out;
@@ -757,7 +741,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
 	ei_local->block_output  = etherh_block_output;
 	ei_local->get_8390_hdr  = etherh_get_header;
 	ei_local->interface_num = 0;
-	ei_local->msg_enable = etherh_msg_enable;
 
 	etherh_reset(dev);
 	__NS8390_init(dev, 0);
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index 8ae249195301..941754ea78ec 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -66,7 +66,6 @@ static void hydra_block_input(struct net_device *dev, int count,
 static void hydra_block_output(struct net_device *dev, int count,
 			       const unsigned char *buf, int start_page);
 static void hydra_remove_one(struct zorro_dev *z);
-static u32 hydra_msg_enable;
 
 static struct zorro_device_id hydra_zorro_tbl[] = {
     { ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET },
@@ -119,7 +118,6 @@ static int hydra_init(struct zorro_dev *z)
     int start_page, stop_page;
     int j;
     int err;
-    struct ei_device *ei_local;
 
     static u32 hydra_offsets[16] = {
 	0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
@@ -138,8 +136,6 @@ static int hydra_init(struct zorro_dev *z)
     start_page = NESM_START_PG;
     stop_page = NESM_STOP_PG;
 
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = hydra_msg_enable;
     dev->base_addr = ioaddr;
     dev->irq = IRQ_AMIGA_PORTS;
 
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index 60f8e2c8e726..c9c55c9eab9f 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -113,7 +113,7 @@ static void __NS8390_init(struct net_device *dev, int startp);
 
 static unsigned version_printed;
 static u32 msg_enable;
-module_param(msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param(msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
 /*
@@ -975,6 +975,8 @@ static void ethdev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	spin_lock_init(&ei_local->page_lock);
+
+	ei_local->msg_enable = msg_enable;
 }
 
 /**
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index 2f91ce8dc614..b6d735bf8011 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -123,8 +123,7 @@ enum mac8390_access {
 };
 
 extern int mac8390_memtest(struct net_device *dev);
-static int mac8390_initdev(struct net_device *dev,
-			   struct nubus_rsrc *ndev,
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
 			   enum mac8390_type type);
 
 static int mac8390_open(struct net_device *dev);
@@ -168,9 +167,8 @@ static void slow_sane_block_output(struct net_device *dev, int count,
 				   const unsigned char *buf, int start_page);
 static void word_memcpy_tocard(unsigned long tp, const void *fp, int count);
 static void word_memcpy_fromcard(void *tp, unsigned long fp, int count);
-static u32 mac8390_msg_enable;
 
-static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
+static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres)
 {
 	switch (fres->dr_sw) {
 	case NUBUS_DRSW_3COM:
@@ -236,7 +234,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
 	return MAC8390_NONE;
 }
 
-static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
+static enum mac8390_access mac8390_testio(unsigned long membase)
 {
 	unsigned long outdata = 0xA5A0B5B0;
 	unsigned long indata =  0x00000000;
@@ -254,7 +252,7 @@ static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
 	return ACCESS_UNKNOWN;
 }
 
-static int __init mac8390_memsize(unsigned long membase)
+static int mac8390_memsize(unsigned long membase)
 {
 	unsigned long flags;
 	int i, j;
@@ -290,36 +288,34 @@ static int __init mac8390_memsize(unsigned long membase)
 	return i * 0x1000;
 }
 
-static bool __init mac8390_init(struct net_device *dev,
-				struct nubus_rsrc *ndev,
-				enum mac8390_type cardtype)
+static bool mac8390_rsrc_init(struct net_device *dev,
+			      struct nubus_rsrc *fres,
+			      enum mac8390_type cardtype)
 {
+	struct nubus_board *board = fres->board;
 	struct nubus_dir dir;
 	struct nubus_dirent ent;
 	int offset;
 	volatile unsigned short *i;
 
-	printk_once(KERN_INFO pr_fmt("%s"), version);
-
-	dev->irq = SLOT2IRQ(ndev->board->slot);
+	dev->irq = SLOT2IRQ(board->slot);
 	/* This is getting to be a habit */
-	dev->base_addr = (ndev->board->slot_addr |
-			  ((ndev->board->slot & 0xf) << 20));
+	dev->base_addr = board->slot_addr | ((board->slot & 0xf) << 20);
 
 	/*
 	 * Get some Nubus info - we will trust the card's idea
 	 * of where its memory and registers are.
 	 */
 
-	if (nubus_get_func_dir(ndev, &dir) == -1) {
-		pr_err("%s: Unable to get Nubus functional directory for slot %X!\n",
-		       dev->name, ndev->board->slot);
+	if (nubus_get_func_dir(fres, &dir) == -1) {
+		dev_err(&board->dev,
+			"Unable to get Nubus functional directory\n");
 		return false;
 	}
 
 	/* Get the MAC address */
 	if (nubus_find_rsrc(&dir, NUBUS_RESID_MAC_ADDRESS, &ent) == -1) {
-		pr_info("%s: Couldn't get MAC address!\n", dev->name);
+		dev_info(&board->dev, "MAC address resource not found\n");
 		return false;
 	}
 
@@ -329,8 +325,8 @@ static bool __init mac8390_init(struct net_device *dev,
 		nubus_rewinddir(&dir);
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS,
 				    &ent) == -1) {
-			pr_err("%s: Memory offset resource for slot %X not found!\n",
-			       dev->name, ndev->board->slot);
+			dev_err(&board->dev,
+				"Memory offset resource not found\n");
 			return false;
 		}
 		nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -340,8 +336,8 @@ static bool __init mac8390_init(struct net_device *dev,
 		nubus_rewinddir(&dir);
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH,
 				    &ent) == -1) {
-			pr_info("%s: Memory length resource for slot %X not found, probing\n",
-				dev->name, ndev->board->slot);
+			dev_info(&board->dev,
+				 "Memory length resource not found, probing\n");
 			offset = mac8390_memsize(dev->mem_start);
 		} else {
 			nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -351,25 +347,25 @@ static bool __init mac8390_init(struct net_device *dev,
 		switch (cardtype) {
 		case MAC8390_KINETICS:
 		case MAC8390_DAYNA: /* it's the same */
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       DAYNA_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       DAYNA_8390_MEM);
 			dev->mem_end = dev->mem_start +
 				       mac8390_memsize(dev->mem_start);
 			break;
 		case MAC8390_INTERLAN:
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       INTERLAN_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       INTERLAN_8390_MEM);
 			dev->mem_end = dev->mem_start +
 				       mac8390_memsize(dev->mem_start);
 			break;
 		case MAC8390_CABLETRON:
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       CABLETRON_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       CABLETRON_8390_MEM);
 			/* The base address is unreadable if 0x00
 			 * has been written to the command register
@@ -384,8 +380,8 @@ static bool __init mac8390_init(struct net_device *dev,
 			break;
 
 		default:
-			pr_err("Card type %s is unsupported, sorry\n",
-			       ndev->board->name);
+			dev_err(&board->dev,
+				"No known base address for card type\n");
 			return false;
 		}
 	}
@@ -393,91 +389,83 @@ static bool __init mac8390_init(struct net_device *dev,
 	return true;
 }
 
-struct net_device * __init mac8390_probe(int unit)
+static int mac8390_device_probe(struct nubus_board *board)
 {
 	struct net_device *dev;
-	struct nubus_rsrc *ndev = NULL;
 	int err = -ENODEV;
-	struct ei_device *ei_local;
-
-	static unsigned int slots;
-
-	enum mac8390_type cardtype;
-
-	/* probably should check for Nubus instead */
-
-	if (!MACH_IS_MAC)
-		return ERR_PTR(-ENODEV);
+	struct nubus_rsrc *fres;
+	enum mac8390_type cardtype = MAC8390_NONE;
 
 	dev = ____alloc_ei_netdev(0);
 	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	if (unit >= 0)
-		sprintf(dev->name, "eth%d", unit);
+		return -ENOMEM;
 
-	for_each_func_rsrc(ndev) {
-		if (ndev->category != NUBUS_CAT_NETWORK ||
-		    ndev->type != NUBUS_TYPE_ETHERNET)
-			continue;
+	SET_NETDEV_DEV(dev, &board->dev);
 
-		/* Have we seen it already? */
-		if (slots & (1 << ndev->board->slot))
+	for_each_board_func_rsrc(board, fres) {
+		if (fres->category != NUBUS_CAT_NETWORK ||
+		    fres->type != NUBUS_TYPE_ETHERNET)
 			continue;
-		slots |= 1 << ndev->board->slot;
 
-		cardtype = mac8390_ident(ndev);
+		cardtype = mac8390_ident(fres);
 		if (cardtype == MAC8390_NONE)
 			continue;
 
-		if (!mac8390_init(dev, ndev, cardtype))
-			continue;
-
-		/* Do the nasty 8390 stuff */
-		if (!mac8390_initdev(dev, ndev, cardtype))
+		if (mac8390_rsrc_init(dev, fres, cardtype))
 			break;
 	}
-
-	if (!ndev)
+	if (!fres)
 		goto out;
 
-	 ei_local = netdev_priv(dev);
-	 ei_local->msg_enable = mac8390_msg_enable;
+	err = mac8390_initdev(dev, board, cardtype);
+	if (err)
+		goto out;
 
 	err = register_netdev(dev);
 	if (err)
 		goto out;
-	return dev;
+
+	nubus_set_drvdata(board, dev);
+	return 0;
 
 out:
 	free_netdev(dev);
-	return ERR_PTR(err);
+	return err;
+}
+
+static int mac8390_device_remove(struct nubus_board *board)
+{
+	struct net_device *dev = nubus_get_drvdata(board);
+
+	unregister_netdev(dev);
+	free_netdev(dev);
+	return 0;
 }
 
-#ifdef MODULE
+static struct nubus_driver mac8390_driver = {
+	.probe = mac8390_device_probe,
+	.remove = mac8390_device_remove,
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+	}
+};
+
 MODULE_AUTHOR("David Huggins-Daines <[email protected]> and others");
 MODULE_DESCRIPTION("Macintosh NS8390-based Nubus Ethernet driver");
 MODULE_LICENSE("GPL");
 
-static struct net_device *dev_mac8390;
-
-int __init init_module(void)
+static int __init mac8390_init(void)
 {
-	dev_mac8390 = mac8390_probe(-1);
-	if (IS_ERR(dev_mac8390)) {
-		pr_warn("mac8390: No card found\n");
-		return PTR_ERR(dev_mac8390);
-	}
-	return 0;
+	return nubus_driver_register(&mac8390_driver);
 }
+module_init(mac8390_init);
 
-void __exit cleanup_module(void)
+static void __exit mac8390_exit(void)
 {
-	unregister_netdev(dev_mac8390);
-	free_netdev(dev_mac8390);
+	nubus_driver_unregister(&mac8390_driver);
 }
-
-#endif /* MODULE */
+module_exit(mac8390_exit);
 
 static const struct net_device_ops mac8390_netdev_ops = {
 	.ndo_open 		= mac8390_open,
@@ -493,9 +481,8 @@ static const struct net_device_ops mac8390_netdev_ops = {
 #endif
 };
 
-static int __init mac8390_initdev(struct net_device *dev,
-				  struct nubus_rsrc *ndev,
-				  enum mac8390_type type)
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
+			   enum mac8390_type type)
 {
 	static u32 fwrd4_offsets[16] = {
 		0,      4,      8,      12,
@@ -546,7 +533,8 @@ static int __init mac8390_initdev(struct net_device *dev,
 	case MAC8390_APPLE:
 		switch (mac8390_testio(dev->mem_start)) {
 		case ACCESS_UNKNOWN:
-			pr_err("Don't know how to access card memory!\n");
+			dev_err(&board->dev,
+				"Don't know how to access card memory\n");
 			return -ENODEV;
 
 		case ACCESS_16:
@@ -612,21 +600,18 @@ static int __init mac8390_initdev(struct net_device *dev,
 		break;
 
 	default:
-		pr_err("Card type %s is unsupported, sorry\n",
-		       ndev->board->name);
+		dev_err(&board->dev, "Unsupported card type\n");
 		return -ENODEV;
 	}
 
 	__NS8390_init(dev, 0);
 
 	/* Good, done, now spit out some messages */
-	pr_info("%s: %s in slot %X (type %s)\n",
-		dev->name, ndev->board->name, ndev->board->slot,
-		cardname[type]);
-	pr_info("MAC %pM IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
-		dev->dev_addr, dev->irq,
-		(unsigned int)(dev->mem_end - dev->mem_start) >> 10,
-		dev->mem_start, access_bitmode ? 32 : 16);
+	dev_info(&board->dev, "%s (type %s)\n", board->name, cardname[type]);
+	dev_info(&board->dev, "MAC %pM, IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
+		 dev->dev_addr, dev->irq,
+		 (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
+		 dev->mem_start, access_bitmode ? 32 : 16);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index 4bb967bc879e..4ad8031ab669 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -38,7 +38,6 @@ static const char version[] =
 
 #define NESM_START_PG	0x40	/* First page of TX buffer */
 #define NESM_STOP_PG	0x80	/* Last page +1 of RX ring */
-static u32 mcf8390_msg_enable;
 
 #ifdef NE2000_ODDOFFSET
 /*
@@ -407,7 +406,6 @@ static int mcf8390_init(struct net_device *dev)
 static int mcf8390_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
-	struct ei_device *ei_local;
 	struct resource *mem, *irq;
 	resource_size_t msize;
 	int ret;
@@ -435,8 +433,6 @@ static int mcf8390_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
-	ei_local = netdev_priv(dev);
-	ei_local->msg_enable = mcf8390_msg_enable;
 
 	dev->irq = irq->start;
 	dev->base_addr = mem->start;
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 66f47987e2a2..99a2453eb34f 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -77,7 +77,7 @@ static u32 ne_msg_enable;
 module_param_hw_array(io, int, ioport, NULL, 0);
 module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(bad, int, NULL, 0);
-module_param_named(msg_enable, ne_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, ne_msg_enable, uint, 0444);
 MODULE_PARM_DESC(io, "I/O base address(es),required");
 MODULE_PARM_DESC(irq, "IRQ number(s)");
 MODULE_PARM_DESC(bad, "Accept card(s) with bad signatures");
@@ -485,7 +485,7 @@ static int __init ne_probe1(struct net_device *dev, unsigned long ioaddr)
 		mdelay(10);		/* wait 10ms for interrupt to propagate */
 		outb_p(0x00, ioaddr + EN0_IMR); 		/* Mask it again. */
 		dev->irq = probe_irq_off(cookie);
-		if (netif_msg_probe(ei_local))
+		if (ne_msg_enable & NETIF_MSG_PROBE)
 			pr_cont(" autoirq is %d", dev->irq);
 	} else if (dev->irq == 2)
 		/* Fixup for users that don't know that IRQ 2 is really IRQ 9,
diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c
index 1bdea746926c..42985a82321a 100644
--- a/drivers/net/ethernet/8390/ne2k-pci.c
+++ b/drivers/net/ethernet/8390/ne2k-pci.c
@@ -76,7 +76,7 @@ MODULE_AUTHOR("Donald Becker / Paul Gortmaker");
 MODULE_DESCRIPTION("PCI NE2000 clone driver");
 MODULE_LICENSE("GPL");
 
-module_param_named(msg_enable, ne2k_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, ne2k_msg_enable, uint, 0444);
 module_param_array(options, int, NULL, 0);
 module_param_array(full_duplex, int, NULL, 0);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index bcad4a7fac9f..61e43802b9a5 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -66,7 +66,6 @@
 #define PCNET_RDC_TIMEOUT (2*HZ/100)	/* Max wait in jiffies for Tx RDC */
 
 static const char *if_names[] = { "auto", "10baseT", "10base2"};
-static u32 pcnet_msg_enable;
 
 /*====================================================================*/
 
@@ -556,7 +555,6 @@ static int pcnet_config(struct pcmcia_device *link)
     int start_pg, stop_pg, cm_offset;
     int has_shmem = 0;
     struct hw_info *local_hw_info;
-    struct ei_device *ei_local;
 
     dev_dbg(&link->dev, "pcnet_config\n");
 
@@ -606,8 +604,6 @@ static int pcnet_config(struct pcmcia_device *link)
 	mii_phy_probe(dev);
 
     SET_NETDEV_DEV(dev, &link->dev);
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = pcnet_msg_enable;
 
     if (register_netdev(dev) != 0) {
 	pr_notice("register_netdev() failed\n");
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 4e02f6a23575..3fe3b4dfa7c5 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -563,7 +563,7 @@ static int irq[MAX_ULTRA_CARDS];
 
 module_param_hw_array(io, int, ioport, NULL, 0);
 module_param_hw_array(irq, int, irq, NULL, 0);
-module_param_named(msg_enable, ultra_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, ultra_msg_enable, uint, 0444);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c
index aca957d4e121..1f0670cd3ea3 100644
--- a/drivers/net/ethernet/8390/stnic.c
+++ b/drivers/net/ethernet/8390/stnic.c
@@ -71,7 +71,7 @@ static void stnic_init (struct net_device *dev);
 
 static u32 stnic_msg_enable;
 
-module_param_named(msg_enable, stnic_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, stnic_msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
 /* SH7750 specific read/write io. */
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index 6efa2722f850..c834123560f1 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -299,7 +299,7 @@ static int __init wd_probe1(struct net_device *dev, int ioaddr)
 
 			outb_p(0x00, nic_addr+EN0_IMR);	/* Mask all intrs. again. */
 
-			if (netif_msg_drv(ei_local))
+			if (wd_msg_enable & NETIF_MSG_PROBE)
 				pr_cont(" autoirq is %d", dev->irq);
 			if (dev->irq < 2)
 				dev->irq = word16 ? 10 : 5;
@@ -507,7 +507,7 @@ module_param_hw_array(io, int, ioport, NULL, 0);
 module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_hw_array(mem, int, iomem, NULL, 0);
 module_param_hw_array(mem_end, int, iomem, NULL, 0);
-module_param_named(msg_enable, wd_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, wd_msg_enable, uint, 0444);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (ignored for PureData boards)");
 MODULE_PARM_DESC(mem, "memory base address(es)(ignored for PureData boards)");
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index 6d93956b293b..35a500a21521 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -44,8 +44,6 @@
 static const char version[] =
 	"8390.c:v1.10cvs 9/23/94 Donald Becker ([email protected])\n";
 
-static u32 zorro8390_msg_enable;
-
 #include "lib8390.c"
 
 #define DRV_NAME	"zorro8390"
@@ -296,7 +294,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
 	int err;
 	unsigned char SA_prom[32];
 	int start_page, stop_page;
-	struct ei_device *ei_local = netdev_priv(dev);
 	static u32 zorro8390_offsets[16] = {
 		0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
 		0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
@@ -388,8 +385,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
 	dev->netdev_ops = &zorro8390_netdev_ops;
 	__NS8390_init(dev, 0);
 
-	ei_local->msg_enable = zorro8390_msg_enable;
-
 	err = register_netdev(dev);
 	if (err) {
 		free_irq(IRQ_AMIGA_PORTS, dev);
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 527908c7e384..baca8f704a45 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -56,7 +56,7 @@
 static atomic_t instance_count = ATOMIC_INIT(~0);
 /* Module parameters */
 static int debug = -1;
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)");
 
 static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -65,12 +65,12 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 
 #define RX_DESCRIPTORS 64
 static int dma_rx_num = RX_DESCRIPTORS;
-module_param(dma_rx_num, int, S_IRUGO | S_IWUSR);
+module_param(dma_rx_num, int, 0644);
 MODULE_PARM_DESC(dma_rx_num, "Number of descriptors in the RX list");
 
 #define TX_DESCRIPTORS 64
 static int dma_tx_num = TX_DESCRIPTORS;
-module_param(dma_tx_num, int, S_IRUGO | S_IWUSR);
+module_param(dma_tx_num, int, 0644);
 MODULE_PARM_DESC(dma_tx_num, "Number of descriptors in the TX list");
 
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index bf2de5298005..1b9d3130af4d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -631,8 +631,10 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 	 */
 	wmb();
 
-	writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+	writel_relaxed(mmio_read_reg,
+		       ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
 
+	mmiowb();
 	for (i = 0; i < timeout; i++) {
 		if (read_resp->req_id == mmio_read->seq_num)
 			break;
@@ -1826,7 +1828,9 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 
 	/* write the aenq doorbell after all AENQ descriptors were read */
 	mb();
-	writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+	writel_relaxed((u32)aenq->head,
+		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+	mmiowb();
 }
 
 int ena_com_dev_reset(struct ena_com_dev *ena_dev,
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 2f7657227cfe..6fdc753d9483 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -107,7 +107,8 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
 	return io_sq->q_depth - 1 - cnt;
 }
 
-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
+					    bool relaxed)
 {
 	u16 tail;
 
@@ -116,7 +117,10 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
 		 io_sq->qid, tail);
 
-	writel(tail, io_sq->db_addr);
+	if (relaxed)
+		writel_relaxed(tail, io_sq->db_addr);
+	else
+		writel(tail, io_sq->db_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6975150d144e..a822e70c2af3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -556,7 +556,8 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 		 * issue a doorbell
 		 */
 		wmb();
-		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
+		mmiowb();
 	}
 
 	rx_ring->next_to_use = next_to_use;
@@ -2151,7 +2152,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
 		/* trigger the dma engine */
-		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
 		u64_stats_update_begin(&tx_ring->syncp);
 		tx_ring->tx_stats.doorbells++;
 		u64_stats_update_end(&tx_ring->syncp);
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 358f7ab77c70..c99e3e845ac0 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -649,7 +649,7 @@ static void amd8111e_free_ring(struct amd8111e_priv *lp)
 static int amd8111e_tx(struct net_device *dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
-	int tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK;
+	int tx_index;
 	int status;
 	/* Complete all the transmit packet */
 	while (lp->tx_complete_idx != lp->tx_idx){
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 100adee778df..7c204f05b418 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -137,21 +137,21 @@ static unsigned int ecc_ded_period = 600;
 
 #ifdef CONFIG_AMD_XGBE_HAVE_ECC
 /* Only expose the ECC parameters if supported */
-module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_sec_info_threshold, uint, 0644);
 MODULE_PARM_DESC(ecc_sec_info_threshold,
 		 " ECC corrected error informational threshold setting");
 
-module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_sec_warn_threshold, uint, 0644);
 MODULE_PARM_DESC(ecc_sec_warn_threshold,
 		 " ECC corrected error warning threshold setting");
 
-module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_sec_period, uint, 0644);
 MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)");
 
-module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_ded_threshold, uint, 0644);
 MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting");
 
-module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_ded_period, uint, 0644);
 MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)");
 #endif
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index d91fa595be98..795e556d4a3f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -131,7 +131,7 @@ MODULE_VERSION(XGBE_DRV_VERSION);
 MODULE_DESCRIPTION(XGBE_DRV_DESC);
 
 static int debug = -1;
-module_param(debug, int, S_IWUSR | S_IRUGO);
+module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, " Network interface message level setting");
 
 static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 3e5833cf1fab..eb23f9ba1a9a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -426,6 +426,8 @@ static int xgbe_pci_resume(struct pci_dev *pdev)
 	struct net_device *netdev = pdata->netdev;
 	int ret = 0;
 
+	XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff);
+
 	pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
 	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
 
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index f17a160dbff2..137cbb470af2 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
 	dev->netdev_ops		= &mace_netdev_ops;
 	dev->watchdog_timeo	= TX_TIMEOUT;
 
-	printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
-	       dev->name, dev->dev_addr);
+	pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+		dev->dev_addr, mp->chipid);
 
 	err = register_netdev(dev);
 	if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
 			else if (fs & (UFLO|LCOL|RTRY)) {
 				++dev->stats.tx_aborted_errors;
 				if (mb->xmtfs & UFLO) {
-					printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
 					dev->stats.tx_fifo_errors++;
 					mace_txdma_reset(dev);
 				}
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 
 	if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
 		dev->stats.rx_errors++;
-		if (frame_status & RS_OFLO) {
-			printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+		if (frame_status & RS_OFLO)
 			dev->stats.rx_fifo_errors++;
-		}
 		if (frame_status & RS_CLSN)
 			dev->stats.collisions++;
 		if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
 	},
 };
 
-static int __init mac_mace_init_module(void)
-{
-	if (!MACH_IS_MAC)
-		return -ENODEV;
-
-	return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
-	platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 0b49f1aeebd3..fc7383106946 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -36,6 +36,8 @@
 #define AQ_CFG_TX_FRAME_MAX  (16U * 1024U)
 #define AQ_CFG_RX_FRAME_MAX  (4U * 1024U)
 
+#define AQ_CFG_TX_CLEAN_BUDGET 256U
+
 /* LRO */
 #define AQ_CFG_IS_LRO_DEF           1U
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index ebbaf63eaf47..c96a92118b8b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -247,6 +247,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self)
 	self->ndev->hw_features |= aq_hw_caps->hw_features;
 	self->ndev->features = aq_hw_caps->hw_features;
 	self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
+	self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
 	self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN;
 	self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN;
 
@@ -937,3 +939,23 @@ err_exit:
 out:
 	return err;
 }
+
+void aq_nic_shutdown(struct aq_nic_s *self)
+{
+	int err = 0;
+
+	if (!self->ndev)
+		return;
+
+	rtnl_lock();
+
+	netif_device_detach(self->ndev);
+
+	err = aq_nic_stop(self);
+	if (err < 0)
+		goto err_exit;
+	aq_nic_deinit(self);
+
+err_exit:
+	rtnl_unlock();
+}
+\ No newline at end of file
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index d16b0f1a95aa..219b550d1665 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -118,5 +118,6 @@ struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self);
 u32 aq_nic_get_fw_version(struct aq_nic_s *self);
 int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg);
 int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self);
+void aq_nic_shutdown(struct aq_nic_s *self);
 
 #endif /* AQ_NIC_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 22889fc158f2..ecc6306f940f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -226,6 +226,10 @@ static int aq_pci_probe(struct pci_dev *pdev,
 		goto err_ioremap;
 
 	self->aq_hw = kzalloc(sizeof(*self->aq_hw), GFP_KERNEL);
+	if (!self->aq_hw) {
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
 	self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self);
 
 	for (bar = 0; bar < 4; ++bar) {
@@ -235,19 +239,19 @@ static int aq_pci_probe(struct pci_dev *pdev,
 			mmio_pa = pci_resource_start(pdev, bar);
 			if (mmio_pa == 0U) {
 				err = -EIO;
-				goto err_ioremap;
+				goto err_free_aq_hw;
 			}
 
 			reg_sz = pci_resource_len(pdev, bar);
 			if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) {
 				err = -EIO;
-				goto err_ioremap;
+				goto err_free_aq_hw;
 			}
 
 			self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz);
 			if (!self->aq_hw->mmio) {
 				err = -EIO;
-				goto err_ioremap;
+				goto err_free_aq_hw;
 			}
 			break;
 		}
@@ -255,7 +259,7 @@ static int aq_pci_probe(struct pci_dev *pdev,
 
 	if (bar == 4) {
 		err = -EIO;
-		goto err_ioremap;
+		goto err_free_aq_hw;
 	}
 
 	numvecs = min((u8)AQ_CFG_VECS_DEF,
@@ -290,6 +294,8 @@ err_register:
 	aq_pci_free_irq_vectors(self);
 err_hwinit:
 	iounmap(self->aq_hw->mmio);
+err_free_aq_hw:
+	kfree(self->aq_hw);
 err_ioremap:
 	free_netdev(ndev);
 err_pci_func:
@@ -317,6 +323,20 @@ static void aq_pci_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
+static void aq_pci_shutdown(struct pci_dev *pdev)
+{
+	struct aq_nic_s *self = pci_get_drvdata(pdev);
+
+	aq_nic_shutdown(self);
+
+	pci_disable_device(pdev);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, false);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
 static int aq_pci_suspend(struct pci_dev *pdev, pm_message_t pm_msg)
 {
 	struct aq_nic_s *self = pci_get_drvdata(pdev);
@@ -339,6 +359,7 @@ static struct pci_driver aq_pci_ops = {
 	.remove = aq_pci_remove,
 	.suspend = aq_pci_suspend,
 	.resume = aq_pci_resume,
+	.shutdown = aq_pci_shutdown,
 };
 
 module_pci_driver(aq_pci_ops);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 0be6a11370bb..b5f1f62e8e25 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -136,11 +136,12 @@ void aq_ring_queue_stop(struct aq_ring_s *ring)
 		netif_stop_subqueue(ndev, ring->idx);
 }
 
-void aq_ring_tx_clean(struct aq_ring_s *self)
+bool aq_ring_tx_clean(struct aq_ring_s *self)
 {
 	struct device *dev = aq_nic_get_dev(self->aq_nic);
+	unsigned int budget = AQ_CFG_TX_CLEAN_BUDGET;
 
-	for (; self->sw_head != self->hw_head;
+	for (; self->sw_head != self->hw_head && budget--;
 		self->sw_head = aq_ring_next_dx(self, self->sw_head)) {
 		struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
 
@@ -167,6 +168,8 @@ void aq_ring_tx_clean(struct aq_ring_s *self)
 		buff->pa = 0U;
 		buff->eop_index = 0xffffU;
 	}
+
+	return !!budget;
 }
 
 #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 965fae0fb6e0..ac1329f4051d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -153,7 +153,7 @@ void aq_ring_free(struct aq_ring_s *self);
 void aq_ring_update_queue_state(struct aq_ring_s *ring);
 void aq_ring_queue_wake(struct aq_ring_s *ring);
 void aq_ring_queue_stop(struct aq_ring_s *ring);
-void aq_ring_tx_clean(struct aq_ring_s *self);
+bool aq_ring_tx_clean(struct aq_ring_s *self);
 int aq_ring_rx_clean(struct aq_ring_s *self,
 		     struct napi_struct *napi,
 		     int *work_done,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index f890b8a5a862..d335c334fa56 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -35,12 +35,12 @@ struct aq_vec_s {
 static int aq_vec_poll(struct napi_struct *napi, int budget)
 {
 	struct aq_vec_s *self = container_of(napi, struct aq_vec_s, napi);
+	unsigned int sw_tail_old = 0U;
 	struct aq_ring_s *ring = NULL;
+	bool was_tx_cleaned = true;
+	unsigned int i = 0U;
 	int work_done = 0;
 	int err = 0;
-	unsigned int i = 0U;
-	unsigned int sw_tail_old = 0U;
-	bool was_tx_cleaned = false;
 
 	if (!self) {
 		err = -EINVAL;
@@ -57,9 +57,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
 
 			if (ring[AQ_VEC_TX_ID].sw_head !=
 			    ring[AQ_VEC_TX_ID].hw_head) {
-				aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
+				was_tx_cleaned = aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
 				aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]);
-				was_tx_cleaned = true;
 			}
 
 			err = self->aq_hw_ops->hw_ring_rx_receive(self->aq_hw,
@@ -90,7 +89,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
 			}
 		}
 
-		if (was_tx_cleaned)
+		if (!was_tx_cleaned)
 			work_done = budget;
 
 		if (work_done < budget) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 967f0fd07fcf..84d7f4dd4ce1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -21,6 +21,10 @@
 
 #define HW_ATL_UCP_0X370_REG    0x0370U
 
+#define HW_ATL_MIF_CMD          0x0200U
+#define HW_ATL_MIF_ADDR         0x0208U
+#define HW_ATL_MIF_VAL          0x020CU
+
 #define HW_ATL_FW_SM_RAM        0x2U
 #define HW_ATL_MPI_FW_VERSION	0x18
 #define HW_ATL_MPI_CONTROL_ADR  0x0368U
@@ -79,16 +83,15 @@ int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 
 static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self)
 {
+	u32 gsr, val;
 	int k = 0;
-	u32 gsr;
 
 	aq_hw_write_reg(self, 0x404, 0x40e1);
 	AQ_HW_SLEEP(50);
 
 	/* Cleanup SPI */
-	aq_hw_write_reg(self, 0x534, 0xA0);
-	aq_hw_write_reg(self, 0x100, 0x9F);
-	aq_hw_write_reg(self, 0x100, 0x809F);
+	val = aq_hw_read_reg(self, 0x53C);
+	aq_hw_write_reg(self, 0x53C, val | 0x10);
 
 	gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR);
 	aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000);
@@ -97,7 +100,14 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self)
 	aq_hw_write_reg(self, 0x404, 0x80e0);
 	aq_hw_write_reg(self, 0x32a8, 0x0);
 	aq_hw_write_reg(self, 0x520, 0x1);
+
+	/* Reset SPI again because of possible interrupted SPI burst */
+	val = aq_hw_read_reg(self, 0x53C);
+	aq_hw_write_reg(self, 0x53C, val | 0x10);
 	AQ_HW_SLEEP(10);
+	/* Clear SPI reset state */
+	aq_hw_write_reg(self, 0x53C, val & ~0x10);
+
 	aq_hw_write_reg(self, 0x404, 0x180e0);
 
 	for (k = 0; k < 1000; k++) {
@@ -141,13 +151,15 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self)
 		aq_pr_err("FW kickstart failed\n");
 		return -EIO;
 	}
+	/* Old FW requires fixed delay after init */
+	AQ_HW_SLEEP(15);
 
 	return 0;
 }
 
 static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
 {
-	u32 gsr, rbl_status;
+	u32 gsr, val, rbl_status;
 	int k;
 
 	aq_hw_write_reg(self, 0x404, 0x40e1);
@@ -157,6 +169,10 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
 	/* Alter RBL status */
 	aq_hw_write_reg(self, 0x388, 0xDEAD);
 
+	/* Cleanup SPI */
+	val = aq_hw_read_reg(self, 0x53C);
+	aq_hw_write_reg(self, 0x53C, val | 0x10);
+
 	/* Global software reset*/
 	hw_atl_rx_rx_reg_res_dis_set(self, 0U);
 	hw_atl_tx_tx_reg_res_dis_set(self, 0U);
@@ -204,6 +220,8 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
 		aq_pr_err("FW kickstart failed\n");
 		return -EIO;
 	}
+	/* Old FW requires fixed delay after init */
+	AQ_HW_SLEEP(15);
 
 	return 0;
 }
@@ -255,18 +273,22 @@ int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
 		}
 	}
 
-	aq_hw_write_reg(self, 0x00000208U, a);
-
-	for (++cnt; --cnt;) {
-		u32 i = 0U;
+	aq_hw_write_reg(self, HW_ATL_MIF_ADDR, a);
 
-		aq_hw_write_reg(self, 0x00000200U, 0x00008000U);
+	for (++cnt; --cnt && !err;) {
+		aq_hw_write_reg(self, HW_ATL_MIF_CMD, 0x00008000U);
 
-		for (i = 1024U;
-			(0x100U & aq_hw_read_reg(self, 0x00000200U)) && --i;) {
-		}
+		if (IS_CHIP_FEATURE(REVISION_B1))
+			AQ_HW_WAIT_FOR(a != aq_hw_read_reg(self,
+							   HW_ATL_MIF_ADDR),
+				       1, 1000U);
+		else
+			AQ_HW_WAIT_FOR(!(0x100 & aq_hw_read_reg(self,
+							   HW_ATL_MIF_CMD)),
+				       1, 1000U);
 
-		*(p++) = aq_hw_read_reg(self, 0x0000020CU);
+		*(p++) = aq_hw_read_reg(self, HW_ATL_MIF_VAL);
+		a += 4;
 	}
 
 	hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
@@ -483,7 +505,7 @@ void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
 err_exit:;
 }
 
-int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
+static int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
 {
 	u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
 
@@ -662,14 +684,18 @@ void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p)
 	u32 val = hw_atl_reg_glb_mif_id_get(self);
 	u32 mif_rev = val & 0xFFU;
 
-	if ((3U & mif_rev) == 1U) {
-		chip_features |=
-			HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 |
+	if ((0xFU & mif_rev) == 1U) {
+		chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 |
 			HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
 			HAL_ATLANTIC_UTILS_CHIP_MIPS;
-	} else if ((3U & mif_rev) == 2U) {
-		chip_features |=
-			HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 |
+	} else if ((0xFU & mif_rev) == 2U) {
+		chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 |
+			HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
+			HAL_ATLANTIC_UTILS_CHIP_MIPS |
+			HAL_ATLANTIC_UTILS_CHIP_TPO2 |
+			HAL_ATLANTIC_UTILS_CHIP_RPF2;
+	} else if ((0xFU & mif_rev) == 0xAU) {
+		chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 |
 			HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
 			HAL_ATLANTIC_UTILS_CHIP_MIPS |
 			HAL_ATLANTIC_UTILS_CHIP_TPO2 |
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index 2c690947910a..cd8f18f39c61 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -161,6 +161,7 @@ struct __packed hw_aq_atl_utils_mbox {
 #define HAL_ATLANTIC_UTILS_CHIP_MPI_AQ       0x00000010U
 #define HAL_ATLANTIC_UTILS_CHIP_REVISION_A0  0x01000000U
 #define HAL_ATLANTIC_UTILS_CHIP_REVISION_B0  0x02000000U
+#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B1  0x04000000U
 
 #define IS_CHIP_FEATURE(_F_) (HAL_ATLANTIC_UTILS_CHIP_##_F_ & \
 	self->chip_features)
diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h
index 5265b937677b..a445de6837a6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -13,7 +13,7 @@
 #define NIC_MAJOR_DRIVER_VERSION           2
 #define NIC_MINOR_DRIVER_VERSION           0
 #define NIC_BUILD_DRIVER_VERSION           2
-#define NIC_REVISION_DRIVER_VERSION        0
+#define NIC_REVISION_DRIVER_VERSION        1
 
 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
 
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index 16f9bee992fe..0f6576802607 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -169,8 +169,10 @@ static int emac_rockchip_probe(struct platform_device *pdev)
 	/* Optional regulator for PHY */
 	priv->regulator = devm_regulator_get_optional(dev, "phy");
 	if (IS_ERR(priv->regulator)) {
-		if (PTR_ERR(priv->regulator) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
+		if (PTR_ERR(priv->regulator) == -EPROBE_DEFER) {
+			err = -EPROBE_DEFER;
+			goto out_clk_disable;
+		}
 		dev_err(dev, "no regulator found\n");
 		priv->regulator = NULL;
 	}
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index f15a8fc6dfc9..4e26f606a7f2 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/net_dim.h>
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
@@ -574,21 +575,55 @@ static int bcm_sysport_set_wol(struct net_device *dev,
 	return 0;
 }
 
+static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
+{
+	u32 reg;
+
+	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
+	reg &= ~(RDMA_INTR_THRESH_MASK |
+		 RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
+	reg |= priv->dim.coal_pkts;
+	reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) <<
+			    RDMA_TIMEOUT_SHIFT;
+	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+}
+
+static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring)
+{
+	struct bcm_sysport_priv *priv = ring->priv;
+	u32 reg;
+
+	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+	reg &= ~(RING_INTR_THRESH_MASK |
+		 RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
+	reg |= ring->dim.coal_pkts;
+	reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) <<
+			    RING_TIMEOUT_SHIFT;
+	tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+}
+
 static int bcm_sysport_get_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_tx_ring *ring;
+	unsigned int i;
 	u32 reg;
 
 	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0));
 
 	ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / 1000;
 	ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK;
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		ring = &priv->tx_rings[i];
+		ec->use_adaptive_tx_coalesce |= ring->dim.use_dim;
+	}
 
 	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
 
 	ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 / 1000;
 	ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK;
+	ec->use_adaptive_rx_coalesce = priv->dim.use_dim;
 
 	return 0;
 }
@@ -597,8 +632,8 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_tx_ring *ring;
 	unsigned int i;
-	u32 reg;
 
 	/* Base system clock is 125Mhz, DMA timeout is this reference clock
 	 * divided by 1024, which yield roughly 8.192 us, our maximum value has
@@ -615,22 +650,26 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 		return -EINVAL;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
-		reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i));
-		reg &= ~(RING_INTR_THRESH_MASK |
-			 RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
-		reg |= ec->tx_max_coalesced_frames;
-		reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) <<
-			 RING_TIMEOUT_SHIFT;
-		tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i));
+		ring = &priv->tx_rings[i];
+		ring->dim.coal_pkts = ec->tx_max_coalesced_frames;
+		ring->dim.coal_usecs = ec->tx_coalesce_usecs;
+		if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) {
+			ring->dim.coal_pkts = 1;
+			ring->dim.coal_usecs = 0;
+		}
+		ring->dim.use_dim = ec->use_adaptive_tx_coalesce;
+		bcm_sysport_set_tx_coalesce(ring);
 	}
 
-	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
-	reg &= ~(RDMA_INTR_THRESH_MASK |
-		 RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
-	reg |= ec->rx_max_coalesced_frames;
-	reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) <<
-			    RDMA_TIMEOUT_SHIFT;
-	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+	priv->dim.coal_usecs = ec->rx_coalesce_usecs;
+	priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
+
+	if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) {
+		priv->dim.coal_pkts = 1;
+		priv->dim.coal_usecs = 0;
+	}
+	priv->dim.use_dim = ec->use_adaptive_rx_coalesce;
+	bcm_sysport_set_rx_coalesce(priv);
 
 	return 0;
 }
@@ -709,6 +748,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct net_device *ndev = priv->netdev;
 	unsigned int processed = 0, to_process;
+	unsigned int processed_bytes = 0;
 	struct bcm_sysport_cb *cb;
 	struct sk_buff *skb;
 	unsigned int p_index;
@@ -800,6 +840,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 		 */
 		skb_pull(skb, sizeof(*rsb) + 2);
 		len -= (sizeof(*rsb) + 2);
+		processed_bytes += len;
 
 		/* UniMAC may forward CRC */
 		if (priv->crc_fwd) {
@@ -824,6 +865,9 @@ next:
 			priv->rx_read_ptr = 0;
 	}
 
+	priv->dim.packets = processed;
+	priv->dim.bytes = processed_bytes;
+
 	return processed;
 }
 
@@ -855,10 +899,12 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring,
 static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 					     struct bcm_sysport_tx_ring *ring)
 {
-	unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
 	struct net_device *ndev = priv->netdev;
+	unsigned int txbds_processed = 0;
 	struct bcm_sysport_cb *cb;
+	unsigned int txbds_ready;
+	unsigned int c_index;
 	u32 hw_ind;
 
 	/* Clear status before servicing to reduce spurious interrupts */
@@ -871,35 +917,31 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 	/* Compute how many descriptors have been processed since last call */
 	hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index));
 	c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK;
-	ring->p_index = (hw_ind & RING_PROD_INDEX_MASK);
-
-	last_c_index = ring->c_index;
-	num_tx_cbs = ring->size;
-
-	c_index &= (num_tx_cbs - 1);
-
-	if (c_index >= last_c_index)
-		last_tx_cn = c_index - last_c_index;
-	else
-		last_tx_cn = num_tx_cbs - last_c_index + c_index;
+	txbds_ready = (c_index - ring->c_index) & RING_CONS_INDEX_MASK;
 
 	netif_dbg(priv, tx_done, ndev,
-		  "ring=%d c_index=%d last_tx_cn=%d last_c_index=%d\n",
-		  ring->index, c_index, last_tx_cn, last_c_index);
+		  "ring=%d old_c_index=%u c_index=%u txbds_ready=%u\n",
+		  ring->index, ring->c_index, c_index, txbds_ready);
 
-	while (last_tx_cn-- > 0) {
-		cb = ring->cbs + last_c_index;
+	while (txbds_processed < txbds_ready) {
+		cb = &ring->cbs[ring->clean_index];
 		bcm_sysport_tx_reclaim_one(ring, cb, &bytes_compl, &pkts_compl);
 
 		ring->desc_count++;
-		last_c_index++;
-		last_c_index &= (num_tx_cbs - 1);
+		txbds_processed++;
+
+		if (likely(ring->clean_index < ring->size - 1))
+			ring->clean_index++;
+		else
+			ring->clean_index = 0;
 	}
 
 	u64_stats_update_begin(&priv->syncp);
 	ring->packets += pkts_compl;
 	ring->bytes += bytes_compl;
 	u64_stats_update_end(&priv->syncp);
+	ring->dim.packets = pkts_compl;
+	ring->dim.bytes = bytes_compl;
 
 	ring->c_index = c_index;
 
@@ -945,6 +987,7 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 {
 	struct bcm_sysport_tx_ring *ring =
 		container_of(napi, struct bcm_sysport_tx_ring, napi);
+	struct net_dim_sample dim_sample;
 	unsigned int work_done = 0;
 
 	work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
@@ -961,6 +1004,12 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 		return 0;
 	}
 
+	if (ring->dim.use_dim) {
+		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
+			       ring->dim.bytes, &dim_sample);
+		net_dim(&ring->dim.dim, dim_sample);
+	}
+
 	return budget;
 }
 
@@ -976,6 +1025,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 {
 	struct bcm_sysport_priv *priv =
 		container_of(napi, struct bcm_sysport_priv, napi);
+	struct net_dim_sample dim_sample;
 	unsigned int work_done = 0;
 
 	work_done = bcm_sysport_desc_rx(priv, budget);
@@ -998,6 +1048,12 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 		intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE);
 	}
 
+	if (priv->dim.use_dim) {
+		net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
+			       priv->dim.bytes, &dim_sample);
+		net_dim(&priv->dim.dim, dim_sample);
+	}
+
 	return work_done;
 }
 
@@ -1016,6 +1072,40 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
 	netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
 }
 
+static void bcm_sysport_dim_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct bcm_sysport_net_dim *ndim =
+			container_of(dim, struct bcm_sysport_net_dim, dim);
+	struct bcm_sysport_priv *priv =
+			container_of(ndim, struct bcm_sysport_priv, dim);
+	struct net_dim_cq_moder cur_profile =
+				net_dim_get_profile(dim->mode, dim->profile_ix);
+
+	priv->dim.coal_usecs = cur_profile.usec;
+	priv->dim.coal_pkts = cur_profile.pkts;
+
+	bcm_sysport_set_rx_coalesce(priv);
+	dim->state = NET_DIM_START_MEASURE;
+}
+
+static void bcm_sysport_dim_tx_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct bcm_sysport_net_dim *ndim =
+			container_of(dim, struct bcm_sysport_net_dim, dim);
+	struct bcm_sysport_tx_ring *ring =
+			container_of(ndim, struct bcm_sysport_tx_ring, dim);
+	struct net_dim_cq_moder cur_profile =
+				net_dim_get_profile(dim->mode, dim->profile_ix);
+
+	ring->dim.coal_usecs = cur_profile.usec;
+	ring->dim.coal_pkts = cur_profile.pkts;
+
+	bcm_sysport_set_tx_coalesce(ring);
+	dim->state = NET_DIM_START_MEASURE;
+}
+
 /* RX and misc interrupt routine */
 static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 {
@@ -1034,6 +1124,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 	}
 
 	if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) {
+		priv->dim.event_ctr++;
 		if (likely(napi_schedule_prep(&priv->napi))) {
 			/* disable RX interrupts */
 			intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE);
@@ -1061,6 +1152,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 			continue;
 
 		txr = &priv->tx_rings[ring];
+		txr->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&txr->napi))) {
 			intrl2_0_mask_set(priv, ring_bit);
@@ -1093,6 +1185,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id)
 			continue;
 
 		txr = &priv->tx_rings[ring];
+		txr->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&txr->napi))) {
 			intrl2_1_mask_set(priv, BIT(ring));
@@ -1358,6 +1451,16 @@ out:
 		phy_print_status(phydev);
 }
 
+static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
+				 void (*cb)(struct work_struct *work))
+{
+	INIT_WORK(&dim->dim.work, cb);
+	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->event_ctr = 0;
+	dim->packets = 0;
+	dim->bytes = 0;
+}
+
 static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 				    unsigned int index)
 {
@@ -1394,6 +1497,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 	netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64);
 	ring->index = index;
 	ring->size = size;
+	ring->clean_index = 0;
 	ring->alloc_size = ring->size;
 	ring->desc_cpu = p;
 	ring->desc_count = ring->size;
@@ -1447,6 +1551,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 	reg |= (1 << index);
 	tdma_writel(priv, reg, TDMA_TIER1_ARB_0_QUEUE_EN);
 
+	bcm_sysport_init_dim(&ring->dim, bcm_sysport_dim_tx_work);
 	napi_enable(&ring->napi);
 
 	netif_dbg(priv, hw, priv->netdev,
@@ -1477,6 +1582,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv,
 		return;
 
 	napi_disable(&ring->napi);
+	cancel_work_sync(&ring->dim.dim.work);
 	netif_napi_del(&ring->napi);
 
 	bcm_sysport_tx_clean(priv, ring);
@@ -1766,6 +1872,7 @@ static void bcm_sysport_netif_start(struct net_device *dev)
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 
 	/* Enable NAPI */
+	bcm_sysport_init_dim(&priv->dim, bcm_sysport_dim_work);
 	napi_enable(&priv->napi);
 
 	/* Enable RX interrupt and TX ring full interrupt */
@@ -1951,6 +2058,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev)
 	/* stop all software from updating hardware */
 	netif_tx_stop_all_queues(dev);
 	napi_disable(&priv->napi);
+	cancel_work_sync(&priv->dim.dim.work);
 	phy_stop(dev->phydev);
 
 	/* mask all interrupts */
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index f5a984c1c986..e1c97d4a82b4 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -12,6 +12,7 @@
 #define __BCM_SYSPORT_H
 
 #include <linux/if_vlan.h>
+#include <linux/net_dim.h>
 
 /* Receive/transmit descriptor format */
 #define DESC_ADDR_HI_STATUS_LEN	0x00
@@ -695,6 +696,16 @@ struct bcm_sysport_hw_params {
 	unsigned int	num_rx_desc_words;
 };
 
+struct bcm_sysport_net_dim {
+	u16			use_dim;
+	u16			event_ctr;
+	unsigned long		packets;
+	unsigned long		bytes;
+	u32			coal_usecs;
+	u32			coal_pkts;
+	struct net_dim		dim;
+};
+
 /* Software view of the TX ring */
 struct bcm_sysport_tx_ring {
 	spinlock_t	lock;		/* Ring lock for tx reclaim/xmit */
@@ -706,12 +717,13 @@ struct bcm_sysport_tx_ring {
 	unsigned int	desc_count;	/* Number of descriptors */
 	unsigned int	curr_desc;	/* Current descriptor */
 	unsigned int	c_index;	/* Last consumer index */
-	unsigned int	p_index;	/* Current producer index */
+	unsigned int	clean_index;	/* Current clean index */
 	struct bcm_sysport_cb *cbs;	/* Transmit control blocks */
 	struct dma_desc	*desc_cpu;	/* CPU view of the descriptor */
 	struct bcm_sysport_priv *priv;	/* private context backpointer */
 	unsigned long	packets;	/* packets statistics */
 	unsigned long	bytes;		/* bytes statistics */
+	struct bcm_sysport_net_dim dim;	/* Net DIM context */
 	unsigned int	switch_queue;	/* switch port queue number */
 	unsigned int	switch_port;	/* switch port queue number */
 	bool		inspect;	/* inspect switch port and queue */
@@ -743,6 +755,8 @@ struct bcm_sysport_priv {
 	unsigned int		rx_read_ptr;
 	unsigned int		rx_c_index;
 
+	struct bcm_sysport_net_dim	dim;
+
 	/* PHY device */
 	struct device_node	*phy_dn;
 	phy_interface_t		phy_interface;
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 5e34b34f7740..9ffc4a8c5fc7 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -87,7 +87,7 @@ MODULE_FIRMWARE(FW_RV2P_FILE_09_Ax);
 
 static int disable_msi = 0;
 
-module_param(disable_msi, int, S_IRUGO);
+module_param(disable_msi, int, 0444);
 MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
 
 typedef enum {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 352beff796ae..d847e1b9c37b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -166,6 +166,12 @@ do {						\
 #define REG_RD8(bp, offset)		readb(REG_ADDR(bp, offset))
 #define REG_RD16(bp, offset)		readw(REG_ADDR(bp, offset))
 
+#define REG_WR_RELAXED(bp, offset, val)	\
+	writel_relaxed((u32)val, REG_ADDR(bp, offset))
+
+#define REG_WR16_RELAXED(bp, offset, val) \
+	writew_relaxed((u16)val, REG_ADDR(bp, offset))
+
 #define REG_WR(bp, offset, val)		writel((u32)val, REG_ADDR(bp, offset))
 #define REG_WR8(bp, offset, val)	writeb((u8)val, REG_ADDR(bp, offset))
 #define REG_WR16(bp, offset, val)	writew((u16)val, REG_ADDR(bp, offset))
@@ -758,10 +764,8 @@ struct bnx2x_fastpath {
 #if (BNX2X_DB_SHIFT < BNX2X_DB_MIN_SHIFT)
 #error "Min DB doorbell stride is 8"
 #endif
-#define DOORBELL(bp, cid, val) \
-	do { \
-		writel((u32)(val), bp->doorbells + (bp->db_size * (cid))); \
-	} while (0)
+#define DOORBELL_RELAXED(bp, cid, val) \
+	writel_relaxed((u32)(val), (bp)->doorbells + ((bp)->db_size * (cid)))
 
 /* TX CSUM helpers */
 #define SKB_CS_OFF(skb)		(offsetof(struct tcphdr, check) - \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index d7c98e807ca8..95871576ab92 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4153,9 +4153,10 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	wmb();
 
 	txdata->tx_db.data.prod += nbd;
-	barrier();
+	/* make sure descriptor update is observed by HW */
+	wmb();
 
-	DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
+	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
 	mmiowb();
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index a5265e1344f1..a8ce5c55bbb0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -522,8 +522,8 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 	wmb();
 
 	for (i = 0; i < sizeof(rx_prods)/4; i++)
-		REG_WR(bp, fp->ustorm_rx_prods_offset + i*4,
-		       ((u32 *)&rx_prods)[i]);
+		REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4,
+			       ((u32 *)&rx_prods)[i]);
 
 	mmiowb(); /* keep prod updates ordered */
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 1e33abde4a3e..da18aa239acb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2591,8 +2591,9 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
 	wmb();
 
 	txdata->tx_db.data.prod += 2;
-	barrier();
-	DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
+	/* make sure descriptor update is observed by the HW */
+	wmb();
+	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
 	mmiowb();
 	barrier();
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 74fc9af4aadb..c766ae23bc74 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -97,29 +97,29 @@ MODULE_FIRMWARE(FW_FILE_NAME_E1H);
 MODULE_FIRMWARE(FW_FILE_NAME_E2);
 
 int bnx2x_num_queues;
-module_param_named(num_queues, bnx2x_num_queues, int, S_IRUGO);
+module_param_named(num_queues, bnx2x_num_queues, int, 0444);
 MODULE_PARM_DESC(num_queues,
 		 " Set number of queues (default is as a number of CPUs)");
 
 static int disable_tpa;
-module_param(disable_tpa, int, S_IRUGO);
+module_param(disable_tpa, int, 0444);
 MODULE_PARM_DESC(disable_tpa, " Disable the TPA (LRO) feature");
 
 static int int_mode;
-module_param(int_mode, int, S_IRUGO);
+module_param(int_mode, int, 0444);
 MODULE_PARM_DESC(int_mode, " Force interrupt mode other than MSI-X "
 				"(1 INT#x; 2 MSI)");
 
 static int dropless_fc;
-module_param(dropless_fc, int, S_IRUGO);
+module_param(dropless_fc, int, 0444);
 MODULE_PARM_DESC(dropless_fc, " Pause on exhausted host ring");
 
 static int mrrs = -1;
-module_param(mrrs, int, S_IRUGO);
+module_param(mrrs, int, 0444);
 MODULE_PARM_DESC(mrrs, " Force Max Read Req Size (0..3) (for debug)");
 
 static int debug;
-module_param(debug, int, S_IRUGO);
+module_param(debug, int, 0444);
 MODULE_PARM_DESC(debug, " Default debug msglevel");
 
 static struct workqueue_struct *bnx2x_wq;
@@ -3817,8 +3817,8 @@ static void bnx2x_sp_prod_update(struct bnx2x *bp)
 	 */
 	mb();
 
-	REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
-		 bp->spq_prod_idx);
+	REG_WR16_RELAXED(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
+			 bp->spq_prod_idx);
 	mmiowb();
 }
 
@@ -13913,7 +13913,7 @@ static void bnx2x_register_phc(struct bnx2x *bp)
 	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &bp->pdev->dev);
 	if (IS_ERR(bp->ptp_clock)) {
 		bp->ptp_clock = NULL;
-		BNX2X_ERR("PTP clock registeration failed\n");
+		BNX2X_ERR("PTP clock registration failed\n");
 	}
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 76a4668c50fe..8e0a317b31f7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -170,7 +170,9 @@ static int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping)
 	wmb();
 
 	/* Trigger the PF FW */
-	writeb(1, &zone_data->trigger.vf_pf_channel.addr_valid);
+	writeb_relaxed(1, &zone_data->trigger.vf_pf_channel.addr_valid);
+
+	mmiowb();
 
 	/* Wait for PF to complete */
 	while ((tout >= 0) && (!*done)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 1500243b9886..3ff5f65758a3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1439,7 +1439,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	    (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 		u16 vlan_proto = tpa_info->metadata >>
 			RX_CMP_FLAGS2_METADATA_TPID_SFT;
-		u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK;
+		u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK;
 
 		__vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
 	}
@@ -1623,7 +1623,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 	     cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
 	    (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 		u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data);
-		u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK;
+		u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK;
 		u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT;
 
 		__vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
@@ -1922,7 +1922,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 		/* Sync BD data before updating doorbell */
 		wmb();
 
-		bnxt_db_write(bp, db, DB_KEY_TX | prod);
+		bnxt_db_write_relaxed(bp, db, DB_KEY_TX | prod);
 	}
 
 	cpr->cp_raw_cons = raw_cons;
@@ -3847,6 +3847,9 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 	struct hwrm_vnic_tpa_cfg_input req = {0};
 
+	if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
+		return 0;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_TPA_CFG, -1, -1);
 
 	if (tpa_flags) {
@@ -4558,18 +4561,17 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
 	return rc;
 }
 
-static int
-bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-			   int ring_grps, int cp_rings, int vnics)
+static void
+__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
+			     int tx_rings, int rx_rings, int ring_grps,
+			     int cp_rings, int vnics)
 {
-	struct hwrm_func_cfg_input req = {0};
 	u32 enables = 0;
-	int rc;
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-	req.fid = cpu_to_le16(0xffff);
+	bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_CFG, -1, -1);
+	req->fid = cpu_to_le16(0xffff);
 	enables |= tx_rings ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
-	req.num_tx_rings = cpu_to_le16(tx_rings);
+	req->num_tx_rings = cpu_to_le16(tx_rings);
 	if (bp->flags & BNXT_FLAG_NEW_RM) {
 		enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
 		enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
@@ -4578,16 +4580,53 @@ bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 			   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
 		enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
 
-		req.num_rx_rings = cpu_to_le16(rx_rings);
-		req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-		req.num_cmpl_rings = cpu_to_le16(cp_rings);
-		req.num_stat_ctxs = req.num_cmpl_rings;
-		req.num_vnics = cpu_to_le16(vnics);
+		req->num_rx_rings = cpu_to_le16(rx_rings);
+		req->num_hw_ring_grps = cpu_to_le16(ring_grps);
+		req->num_cmpl_rings = cpu_to_le16(cp_rings);
+		req->num_stat_ctxs = req->num_cmpl_rings;
+		req->num_vnics = cpu_to_le16(vnics);
 	}
-	if (!enables)
+	req->enables = cpu_to_le32(enables);
+}
+
+static void
+__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
+			     struct hwrm_func_vf_cfg_input *req, int tx_rings,
+			     int rx_rings, int ring_grps, int cp_rings,
+			     int vnics)
+{
+	u32 enables = 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_VF_CFG, -1, -1);
+	enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
+	enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
+	enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
+			      FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+	enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
+	enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
+
+	req->num_tx_rings = cpu_to_le16(tx_rings);
+	req->num_rx_rings = cpu_to_le16(rx_rings);
+	req->num_hw_ring_grps = cpu_to_le16(ring_grps);
+	req->num_cmpl_rings = cpu_to_le16(cp_rings);
+	req->num_stat_ctxs = req->num_cmpl_rings;
+	req->num_vnics = cpu_to_le16(vnics);
+
+	req->enables = cpu_to_le32(enables);
+}
+
+static int
+bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
+			   int ring_grps, int cp_rings, int vnics)
+{
+	struct hwrm_func_cfg_input req = {0};
+	int rc;
+
+	__bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+				     cp_rings, vnics);
+	if (!req.enables)
 		return 0;
 
-	req.enables = cpu_to_le32(enables);
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		return -ENOMEM;
@@ -4604,7 +4643,6 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 			   int ring_grps, int cp_rings, int vnics)
 {
 	struct hwrm_func_vf_cfg_input req = {0};
-	u32 enables = 0;
 	int rc;
 
 	if (!(bp->flags & BNXT_FLAG_NEW_RM)) {
@@ -4612,22 +4650,8 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 		return 0;
 	}
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
-	enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
-	enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
-	enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-			      FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
-	enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
-	enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
-
-	req.num_tx_rings = cpu_to_le16(tx_rings);
-	req.num_rx_rings = cpu_to_le16(rx_rings);
-	req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-	req.num_cmpl_rings = cpu_to_le16(cp_rings);
-	req.num_stat_ctxs = req.num_cmpl_rings;
-	req.num_vnics = cpu_to_le16(vnics);
-
-	req.enables = cpu_to_le32(enables);
+	__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+				     cp_rings, vnics);
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		return -ENOMEM;
@@ -4743,39 +4767,25 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
 }
 
 static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-				    int ring_grps, int cp_rings)
+				    int ring_grps, int cp_rings, int vnics)
 {
 	struct hwrm_func_vf_cfg_input req = {0};
-	u32 flags, enables;
+	u32 flags;
 	int rc;
 
 	if (!(bp->flags & BNXT_FLAG_NEW_RM))
 		return 0;
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
+	__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+				     cp_rings, vnics);
 	flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
-	enables = FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS |
-		  FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
-		  FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-		  FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
-		  FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS |
-		  FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS;
 
 	req.flags = cpu_to_le32(flags);
-	req.enables = cpu_to_le32(enables);
-	req.num_tx_rings = cpu_to_le16(tx_rings);
-	req.num_rx_rings = cpu_to_le16(rx_rings);
-	req.num_cmpl_rings = cpu_to_le16(cp_rings);
-	req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-	req.num_stat_ctxs = cpu_to_le16(cp_rings);
-	req.num_vnics = cpu_to_le16(1);
-	if (bp->flags & BNXT_FLAG_RFS)
-		req.num_vnics = cpu_to_le16(rx_rings + 1);
 	rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		return -ENOMEM;
@@ -4783,38 +4793,23 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 }
 
 static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-				    int ring_grps, int cp_rings)
+				    int ring_grps, int cp_rings, int vnics)
 {
 	struct hwrm_func_cfg_input req = {0};
-	u32 flags, enables;
+	u32 flags;
 	int rc;
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-	req.fid = cpu_to_le16(0xffff);
+	__bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+				     cp_rings, vnics);
 	flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST;
-	enables = FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS;
-	req.num_tx_rings = cpu_to_le16(tx_rings);
-	if (bp->flags & BNXT_FLAG_NEW_RM) {
+	if (bp->flags & BNXT_FLAG_NEW_RM)
 		flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
-		enables |= FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS |
-			   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-			   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
-			   FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS |
-			   FUNC_CFG_REQ_ENABLES_NUM_VNICS;
-		req.num_rx_rings = cpu_to_le16(rx_rings);
-		req.num_cmpl_rings = cpu_to_le16(cp_rings);
-		req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-		req.num_stat_ctxs = cpu_to_le16(cp_rings);
-		req.num_vnics = cpu_to_le16(1);
-		if (bp->flags & BNXT_FLAG_RFS)
-			req.num_vnics = cpu_to_le16(rx_rings + 1);
-	}
+
 	req.flags = cpu_to_le32(flags);
-	req.enables = cpu_to_le32(enables);
 	rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		return -ENOMEM;
@@ -4822,17 +4817,17 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 }
 
 static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-				 int ring_grps, int cp_rings)
+				 int ring_grps, int cp_rings, int vnics)
 {
 	if (bp->hwrm_spec_code < 0x10801)
 		return 0;
 
 	if (BNXT_PF(bp))
 		return bnxt_hwrm_check_pf_rings(bp, tx_rings, rx_rings,
-						ring_grps, cp_rings);
+						ring_grps, cp_rings, vnics);
 
 	return bnxt_hwrm_check_vf_rings(bp, tx_rings, rx_rings, ring_grps,
-					cp_rings);
+					cp_rings, vnics);
 }
 
 static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal,
@@ -5865,7 +5860,6 @@ static int bnxt_init_msix(struct bnxt *bp)
 		if (rc)
 			goto msix_setup_exit;
 
-		bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
 		bp->cp_nr_rings = (min == 1) ?
 				  max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
 				  bp->tx_nr_rings + bp->rx_nr_rings;
@@ -5897,7 +5891,6 @@ static int bnxt_init_inta(struct bnxt *bp)
 	bp->rx_nr_rings = 1;
 	bp->tx_nr_rings = 1;
 	bp->cp_nr_rings = 1;
-	bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
 	bp->flags |= BNXT_FLAG_SHARED_RINGS;
 	bp->irq_tbl[0].vector = bp->pdev->irq;
 	return 0;
@@ -7531,7 +7524,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 	int max_rx, max_tx, tx_sets = 1;
 	int tx_rings_needed;
 	int rx_rings = rx;
-	int cp, rc;
+	int cp, vnics, rc;
 
 	if (tcs)
 		tx_sets = tcs;
@@ -7547,10 +7540,15 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 	if (max_tx < tx_rings_needed)
 		return -ENOMEM;
 
+	vnics = 1;
+	if (bp->flags & BNXT_FLAG_RFS)
+		vnics += rx_rings;
+
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx_rings <<= 1;
 	cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
-	return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp);
+	return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp,
+				     vnics);
 }
 
 static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@ -8437,13 +8435,20 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp)
 		return 0;
 
 	bnxt_hwrm_func_qcaps(bp);
-	__bnxt_close_nic(bp, true, false);
+
+	if (netif_running(bp->dev))
+		__bnxt_close_nic(bp, true, false);
+
 	bnxt_clear_int_mode(bp);
 	rc = bnxt_init_int_mode(bp);
-	if (rc)
-		dev_close(bp->dev);
-	else
-		rc = bnxt_open_nic(bp, true, false);
+
+	if (netif_running(bp->dev)) {
+		if (rc)
+			dev_close(bp->dev);
+		else
+			rc = bnxt_open_nic(bp, true, false);
+	}
+
 	return rc;
 }
 
@@ -8664,6 +8669,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
+	/* No TC has been set yet and rings may have been trimmed due to
+	 * limited MSIX, so we re-initialize the TX rings per TC.
+	 */
+	bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+
 	bnxt_get_wol_settings(bp);
 	if (bp->flags & BNXT_FLAG_WOL_CAP)
 		device_set_wakeup_enable(&pdev->dev, bp->wol);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 1989c470172c..960162c9386c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -189,6 +189,7 @@ struct rx_cmp_ext {
 	#define RX_CMP_FLAGS2_T_L4_CS_CALC			(0x1 << 3)
 	#define RX_CMP_FLAGS2_META_FORMAT_VLAN			(0x1 << 4)
 	__le32 rx_cmp_meta_data;
+	#define RX_CMP_FLAGS2_METADATA_TCI_MASK			0xffff
 	#define RX_CMP_FLAGS2_METADATA_VID_MASK			0xfff
 	#define RX_CMP_FLAGS2_METADATA_TPID_MASK		0xffff0000
 	 #define RX_CMP_FLAGS2_METADATA_TPID_SFT		 16
@@ -1401,6 +1402,15 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
 		((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
 }
 
+/* For TX and RX ring doorbells with no ordering guarantee*/
+static inline void bnxt_db_write_relaxed(struct bnxt *bp, void __iomem *db,
+					 u32 val)
+{
+	writel_relaxed(val, db);
+	if (bp->flags & BNXT_FLAG_DOUBLE_DB)
+		writel_relaxed(val, db);
+}
+
 /* For TX and RX ring doorbells */
 static inline void bnxt_db_write(struct bnxt *bp, void __iomem *db, u32 val)
 {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index fbe6e208e17b..65c2cee35766 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -349,6 +349,9 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle)
 	if (rc)
 		netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d",
 			    __func__, flow_handle, rc);
+
+	if (rc)
+		rc = -EIO;
 	return rc;
 }
 
@@ -484,13 +487,15 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	req.action_flags = cpu_to_le16(action_flags);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
-
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc)
 		*flow_handle = resp->flow_handle;
-
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
+	if (rc == HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR)
+		rc = -ENOSPC;
+	else if (rc)
+		rc = -EIO;
 	return rc;
 }
 
@@ -561,6 +566,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
+	if (rc)
+		rc = -EIO;
 	return rc;
 }
 
@@ -576,6 +583,9 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+
+	if (rc)
+		rc = -EIO;
 	return rc;
 }
 
@@ -624,6 +634,8 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
+	if (rc)
+		rc = -EIO;
 	return rc;
 }
 
@@ -639,6 +651,9 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp,
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+
+	if (rc)
+		rc = -EIO;
 	return rc;
 }
 
@@ -1269,11 +1284,8 @@ static int bnxt_tc_del_flow(struct bnxt *bp,
 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
 					   &tc_flow_cmd->cookie,
 					   tc_info->flow_ht_params);
-	if (!flow_node) {
-		netdev_info(bp->dev, "ERROR: no flow_node for cookie %lx",
-			    tc_flow_cmd->cookie);
+	if (!flow_node)
 		return -EINVAL;
-	}
 
 	return __bnxt_tc_del_flow(bp, flow_node);
 }
@@ -1290,11 +1302,8 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp,
 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
 					   &tc_flow_cmd->cookie,
 					   tc_info->flow_ht_params);
-	if (!flow_node) {
-		netdev_info(bp->dev, "Error: no flow_node for cookie %lx",
-			    tc_flow_cmd->cookie);
+	if (!flow_node)
 		return -1;
-	}
 
 	flow = &flow_node->flow;
 	curr_stats = &flow->stats;
@@ -1344,8 +1353,10 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
 	} else {
 		netdev_info(bp->dev, "error rc=%d", rc);
 	}
-
 	mutex_unlock(&bp->hwrm_cmd_lock);
+
+	if (rc)
+		rc = -EIO;
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index b1e35a9accf1..7db8edc643ec 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -603,6 +603,8 @@ static int bcmgenet_get_coalesce(struct net_device *dev,
 				 struct ethtool_coalesce *ec)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rx_ring *ring;
+	unsigned int i;
 
 	ec->tx_max_coalesced_frames =
 		bcmgenet_tdma_ring_readl(priv, DESC_INDEX,
@@ -613,15 +615,37 @@ static int bcmgenet_get_coalesce(struct net_device *dev,
 	ec->rx_coalesce_usecs =
 		bcmgenet_rdma_readl(priv, DMA_RING16_TIMEOUT) * 8192 / 1000;
 
+	for (i = 0; i < priv->hw_params->rx_queues; i++) {
+		ring = &priv->rx_rings[i];
+		ec->use_adaptive_rx_coalesce |= ring->dim.use_dim;
+	}
+	ring = &priv->rx_rings[DESC_INDEX];
+	ec->use_adaptive_rx_coalesce |= ring->dim.use_dim;
+
 	return 0;
 }
 
+static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring)
+{
+	struct bcmgenet_priv *priv = ring->priv;
+	unsigned int i = ring->index;
+	u32 reg;
+
+	bcmgenet_rdma_ring_writel(priv, i, ring->dim.coal_pkts,
+				  DMA_MBUF_DONE_THRESH);
+
+	reg = bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT + i);
+	reg &= ~DMA_TIMEOUT_MASK;
+	reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192);
+	bcmgenet_rdma_writel(priv, reg, DMA_RING0_TIMEOUT + i);
+}
+
 static int bcmgenet_set_coalesce(struct net_device *dev,
 				 struct ethtool_coalesce *ec)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rx_ring *ring;
 	unsigned int i;
-	u32 reg;
 
 	/* Base system clock is 125Mhz, DMA timeout is this reference clock
 	 * divided by 1024, which yields roughly 8.192us, our maximum value
@@ -641,7 +665,8 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
 	 * transmitted, or when the ring is empty.
 	 */
 	if (ec->tx_coalesce_usecs || ec->tx_coalesce_usecs_high ||
-	    ec->tx_coalesce_usecs_irq || ec->tx_coalesce_usecs_low)
+	    ec->tx_coalesce_usecs_irq || ec->tx_coalesce_usecs_low ||
+	    ec->use_adaptive_tx_coalesce)
 		return -EOPNOTSUPP;
 
 	/* Program all TX queues with the same values, as there is no
@@ -656,24 +681,26 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
 				  DMA_MBUF_DONE_THRESH);
 
 	for (i = 0; i < priv->hw_params->rx_queues; i++) {
-		bcmgenet_rdma_ring_writel(priv, i,
-					  ec->rx_max_coalesced_frames,
-					  DMA_MBUF_DONE_THRESH);
-
-		reg = bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT + i);
-		reg &= ~DMA_TIMEOUT_MASK;
-		reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192);
-		bcmgenet_rdma_writel(priv, reg, DMA_RING0_TIMEOUT + i);
+		ring = &priv->rx_rings[i];
+		ring->dim.coal_usecs = ec->rx_coalesce_usecs;
+		ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
+		if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
+			ring->dim.coal_pkts = 1;
+			ring->dim.coal_usecs = 0;
+		}
+		ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
+		bcmgenet_set_rx_coalesce(ring);
 	}
 
-	bcmgenet_rdma_ring_writel(priv, DESC_INDEX,
-				  ec->rx_max_coalesced_frames,
-				  DMA_MBUF_DONE_THRESH);
-
-	reg = bcmgenet_rdma_readl(priv, DMA_RING16_TIMEOUT);
-	reg &= ~DMA_TIMEOUT_MASK;
-	reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192);
-	bcmgenet_rdma_writel(priv, reg, DMA_RING16_TIMEOUT);
+	ring = &priv->rx_rings[DESC_INDEX];
+	ring->dim.coal_usecs = ec->rx_coalesce_usecs;
+	ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
+	if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
+		ring->dim.coal_pkts = 1;
+		ring->dim.coal_usecs = 0;
+	}
+	ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
+	bcmgenet_set_rx_coalesce(ring);
 
 	return 0;
 }
@@ -1713,6 +1740,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 	unsigned long dma_flag;
 	int len;
 	unsigned int rxpktprocessed = 0, rxpkttoprocess;
+	unsigned int bytes_processed = 0;
 	unsigned int p_index, mask;
 	unsigned int discards;
 	unsigned int chksum_ok = 0;
@@ -1832,6 +1860,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 			len -= ETH_FCS_LEN;
 		}
 
+		bytes_processed += len;
+
 		/*Finish setting up the received SKB and send it to the kernel*/
 		skb->protocol = eth_type_trans(skb, priv->dev);
 		ring->packets++;
@@ -1854,6 +1884,9 @@ next:
 		bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX);
 	}
 
+	ring->dim.bytes = bytes_processed;
+	ring->dim.packets = rxpktprocessed;
+
 	return rxpktprocessed;
 }
 
@@ -1862,6 +1895,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 {
 	struct bcmgenet_rx_ring *ring = container_of(napi,
 			struct bcmgenet_rx_ring, napi);
+	struct net_dim_sample dim_sample;
 	unsigned int work_done;
 
 	work_done = bcmgenet_desc_rx(ring, budget);
@@ -1871,9 +1905,32 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 		ring->int_enable(ring);
 	}
 
+	if (ring->dim.use_dim) {
+		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
+			       ring->dim.bytes, &dim_sample);
+		net_dim(&ring->dim.dim, dim_sample);
+	}
+
 	return work_done;
 }
 
+static void bcmgenet_dim_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct bcmgenet_net_dim *ndim =
+			container_of(dim, struct bcmgenet_net_dim, dim);
+	struct bcmgenet_rx_ring *ring =
+			container_of(ndim, struct bcmgenet_rx_ring, dim);
+	struct net_dim_cq_moder cur_profile =
+			net_dim_get_profile(dim->mode, dim->profile_ix);
+
+	ring->dim.coal_usecs = cur_profile.usec;
+	ring->dim.coal_pkts = cur_profile.pkts;
+
+	bcmgenet_set_rx_coalesce(ring);
+	dim->state = NET_DIM_START_MEASURE;
+}
+
 /* Assign skb to RX DMA descriptor. */
 static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
 				     struct bcmgenet_rx_ring *ring)
@@ -2022,6 +2079,16 @@ static void init_umac(struct bcmgenet_priv *priv)
 	dev_dbg(kdev, "done init umac\n");
 }
 
+static void bcmgenet_init_dim(struct bcmgenet_net_dim *dim,
+			      void (*cb)(struct work_struct *work))
+{
+	INIT_WORK(&dim->dim.work, cb);
+	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->event_ctr = 0;
+	dim->packets = 0;
+	dim->bytes = 0;
+}
+
 /* Initialize a Tx ring along with corresponding hardware registers */
 static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
 				  unsigned int index, unsigned int size,
@@ -2111,6 +2178,8 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
 	if (ret)
 		return ret;
 
+	bcmgenet_init_dim(&ring->dim, bcmgenet_dim_work);
+
 	/* Initialize Rx NAPI */
 	netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll,
 		       NAPI_POLL_WEIGHT);
@@ -2276,10 +2345,12 @@ static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv)
 	for (i = 0; i < priv->hw_params->rx_queues; ++i) {
 		ring = &priv->rx_rings[i];
 		napi_disable(&ring->napi);
+		cancel_work_sync(&ring->dim.dim.work);
 	}
 
 	ring = &priv->rx_rings[DESC_INDEX];
 	napi_disable(&ring->napi);
+	cancel_work_sync(&ring->dim.dim.work);
 }
 
 static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv)
@@ -2557,6 +2628,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 			continue;
 
 		rx_ring = &priv->rx_rings[index];
+		rx_ring->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&rx_ring->napi))) {
 			rx_ring->int_disable(rx_ring);
@@ -2601,6 +2673,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 
 	if (status & UMAC_IRQ_RXDMA_DONE) {
 		rx_ring = &priv->rx_rings[DESC_INDEX];
+		rx_ring->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&rx_ring->napi))) {
 			rx_ring->int_disable(rx_ring);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 3c50431ccd2a..22c41e0430fb 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -16,6 +16,7 @@
 #include <linux/mii.h>
 #include <linux/if_vlan.h>
 #include <linux/phy.h>
+#include <linux/net_dim.h>
 
 /* total number of Buffer Descriptors, same for Rx/Tx */
 #define TOTAL_DESC				256
@@ -572,6 +573,16 @@ struct bcmgenet_tx_ring {
 	struct bcmgenet_priv *priv;
 };
 
+struct bcmgenet_net_dim {
+	u16		use_dim;
+	u16		event_ctr;
+	unsigned long	packets;
+	unsigned long	bytes;
+	u32		coal_usecs;
+	u32		coal_pkts;
+	struct net_dim	dim;
+};
+
 struct bcmgenet_rx_ring {
 	struct napi_struct napi;	/* Rx NAPI struct */
 	unsigned long	bytes;
@@ -586,6 +597,7 @@ struct bcmgenet_rx_ring {
 	unsigned int	cb_ptr;		/* Rx ring initial CB ptr */
 	unsigned int	end_ptr;	/* Rx ring end CB ptr */
 	unsigned int	old_discards;
+	struct bcmgenet_net_dim dim;
 	void (*int_enable)(struct bcmgenet_rx_ring *);
 	void (*int_disable)(struct bcmgenet_rx_ring *);
 	struct bcmgenet_priv *priv;
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index ecdef42f0ae6..ef4a0c326736 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -63,24 +63,24 @@ MODULE_DESCRIPTION("Broadcom SiByte SOC GB Ethernet driver");
 
 /* 1 normal messages, 0 quiet .. 7 verbose. */
 static int debug = 1;
-module_param(debug, int, S_IRUGO);
+module_param(debug, int, 0444);
 MODULE_PARM_DESC(debug, "Debug messages");
 
 #ifdef CONFIG_SBMAC_COALESCE
 static int int_pktcnt_tx = 255;
-module_param(int_pktcnt_tx, int, S_IRUGO);
+module_param(int_pktcnt_tx, int, 0444);
 MODULE_PARM_DESC(int_pktcnt_tx, "TX packet count");
 
 static int int_timeout_tx = 255;
-module_param(int_timeout_tx, int, S_IRUGO);
+module_param(int_timeout_tx, int, 0444);
 MODULE_PARM_DESC(int_timeout_tx, "TX timeout value");
 
 static int int_pktcnt_rx = 64;
-module_param(int_pktcnt_rx, int, S_IRUGO);
+module_param(int_pktcnt_rx, int, 0444);
 MODULE_PARM_DESC(int_pktcnt_rx, "RX packet count");
 
 static int int_timeout_rx = 64;
-module_param(int_timeout_rx, int, S_IRUGO);
+module_param(int_timeout_rx, int, 0444);
 MODULE_PARM_DESC(int_timeout_rx, "RX timeout value");
 #endif
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a77ee2f8fb8d..08bbb639be1a 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -922,8 +922,8 @@ static int tg3_ape_send_event(struct tg3 *tp, u32 event)
 	if (!(apedata & APE_FW_STATUS_READY))
 		return -EAGAIN;
 
-	/* Wait for up to 1 millisecond for APE to service previous event. */
-	err = tg3_ape_event_lock(tp, 1000);
+	/* Wait for up to 20 millisecond for APE to service previous event. */
+	err = tg3_ape_event_lock(tp, 20000);
 	if (err)
 		return err;
 
@@ -946,6 +946,7 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
 
 	switch (kind) {
 	case RESET_KIND_INIT:
+		tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
 		tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG,
 				APE_HOST_SEG_SIG_MAGIC);
 		tg3_ape_write32(tp, TG3_APE_HOST_SEG_LEN,
@@ -962,13 +963,6 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
 		event = APE_EVENT_STATUS_STATE_START;
 		break;
 	case RESET_KIND_SHUTDOWN:
-		/* With the interface we are currently using,
-		 * APE does not track driver state.  Wiping
-		 * out the HOST SEGMENT SIGNATURE forces
-		 * the APE to assume OS absent status.
-		 */
-		tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG, 0x0);
-
 		if (device_may_wakeup(&tp->pdev->dev) &&
 		    tg3_flag(tp, WOL_ENABLE)) {
 			tg3_ape_write32(tp, TG3_APE_HOST_WOL_SPEED,
@@ -990,6 +984,18 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
 	tg3_ape_send_event(tp, event);
 }
 
+static void tg3_send_ape_heartbeat(struct tg3 *tp,
+				   unsigned long interval)
+{
+	/* Check if hb interval has exceeded */
+	if (!tg3_flag(tp, ENABLE_APE) ||
+	    time_before(jiffies, tp->ape_hb_jiffies + interval))
+		return;
+
+	tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
+	tp->ape_hb_jiffies = jiffies;
+}
+
 static void tg3_disable_ints(struct tg3 *tp)
 {
 	int i;
@@ -7262,6 +7268,7 @@ static int tg3_poll_msix(struct napi_struct *napi, int budget)
 		}
 	}
 
+	tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
 	return work_done;
 
 tx_recovery:
@@ -7344,6 +7351,7 @@ static int tg3_poll(struct napi_struct *napi, int budget)
 		}
 	}
 
+	tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
 	return work_done;
 
 tx_recovery:
@@ -10732,7 +10740,7 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
 	if (tg3_flag(tp, ENABLE_APE))
 		/* Write our heartbeat update interval to APE. */
 		tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_INT_MS,
-				APE_HOST_HEARTBEAT_INT_DISABLE);
+				APE_HOST_HEARTBEAT_INT_5SEC);
 
 	tg3_write_sig_post_reset(tp, RESET_KIND_INIT);
 
@@ -10791,11 +10799,11 @@ static ssize_t tg3_show_temp(struct device *dev,
 }
 
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, tg3_show_temp, NULL,
+static SENSOR_DEVICE_ATTR(temp1_input, 0444, tg3_show_temp, NULL,
 			  TG3_TEMP_SENSOR_OFFSET);
-static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, tg3_show_temp, NULL,
+static SENSOR_DEVICE_ATTR(temp1_crit, 0444, tg3_show_temp, NULL,
 			  TG3_TEMP_CAUTION_OFFSET);
-static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, tg3_show_temp, NULL,
+static SENSOR_DEVICE_ATTR(temp1_max, 0444, tg3_show_temp, NULL,
 			  TG3_TEMP_MAX_OFFSET);
 
 static struct attribute *tg3_attrs[] = {
@@ -11077,6 +11085,9 @@ static void tg3_timer(struct timer_list *t)
 		tp->asf_counter = tp->asf_multiplier;
 	}
 
+	/* Update the APE heartbeat every 5 seconds.*/
+	tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL);
+
 	spin_unlock(&tp->lock);
 
 restart_timer:
@@ -16653,6 +16664,8 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent)
 				       pci_state_reg);
 
 		tg3_ape_lock_init(tp);
+		tp->ape_hb_interval =
+			msecs_to_jiffies(APE_HOST_HEARTBEAT_INT_5SEC);
 	}
 
 	/* Set up tp->grc_local_ctrl before calling
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 47f51cc0566d..1d61aa3efda1 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2508,6 +2508,7 @@
 #define TG3_APE_LOCK_PHY3		5
 #define TG3_APE_LOCK_GPIO		7
 
+#define TG3_APE_HB_INTERVAL             (tp->ape_hb_interval)
 #define TG3_EEPROM_SB_F1R2_MBA_OFF	0x10
 
 
@@ -3423,6 +3424,10 @@ struct tg3 {
 	struct device			*hwmon_dev;
 	bool				link_up;
 	bool				pcierr_recovery;
+
+	u32                             ape_hb;
+	unsigned long                   ape_hb_interval;
+	unsigned long                   ape_hb_jiffies;
 };
 
 /* Accessor macros for chip and asic attributes
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index a843076597ec..69cc3e0119d6 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -46,7 +46,7 @@ module_param(bnad_ioc_auto_recover, uint, 0444);
 MODULE_PARM_DESC(bnad_ioc_auto_recover, "Enable / Disable auto recovery");
 
 static uint bna_debugfs_enable = 1;
-module_param(bna_debugfs_enable, uint, S_IRUGO | S_IWUSR);
+module_param(bna_debugfs_enable, uint, 0644);
 MODULE_PARM_DESC(bna_debugfs_enable, "Enables debugfs feature, default=1,"
 		 " Range[false:0|true:1]");
 
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index cebfe3bd086e..933799be0471 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -486,11 +486,11 @@ struct bnad_debugfs_entry {
 };
 
 static const struct bnad_debugfs_entry bnad_debugfs_files[] = {
-	{ "fwtrc",  S_IFREG|S_IRUGO, &bnad_debugfs_op_fwtrc, },
-	{ "fwsave", S_IFREG|S_IRUGO, &bnad_debugfs_op_fwsave, },
-	{ "regrd",  S_IFREG|S_IRUGO|S_IWUSR, &bnad_debugfs_op_regrd, },
-	{ "regwr",  S_IFREG|S_IWUSR, &bnad_debugfs_op_regwr, },
-	{ "drvinfo", S_IFREG|S_IRUGO, &bnad_debugfs_op_drvinfo, },
+	{ "fwtrc",  S_IFREG | 0444, &bnad_debugfs_op_fwtrc, },
+	{ "fwsave", S_IFREG | 0444, &bnad_debugfs_op_fwsave, },
+	{ "regrd",  S_IFREG | 0644, &bnad_debugfs_op_regrd, },
+	{ "regwr",  S_IFREG | 0200, &bnad_debugfs_op_regwr, },
+	{ "drvinfo", S_IFREG | 0444, &bnad_debugfs_op_drvinfo, },
 };
 
 static struct dentry *bna_debugfs_root;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index e84afcf1ecb5..d09bd43680b3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -472,8 +472,44 @@ static int macb_mii_probe(struct net_device *dev)
 	struct macb *bp = netdev_priv(dev);
 	struct macb_platform_data *pdata;
 	struct phy_device *phydev;
-	int phy_irq;
-	int ret;
+	struct device_node *np;
+	int phy_irq, ret, i;
+
+	pdata = dev_get_platdata(&bp->pdev->dev);
+	np = bp->pdev->dev.of_node;
+	ret = 0;
+
+	if (np) {
+		if (of_phy_is_fixed_link(np)) {
+			if (of_phy_register_fixed_link(np) < 0) {
+				dev_err(&bp->pdev->dev,
+					"broken fixed-link specification\n");
+				return -ENODEV;
+			}
+			bp->phy_node = of_node_get(np);
+		} else {
+			bp->phy_node = of_parse_phandle(np, "phy-handle", 0);
+			/* fallback to standard phy registration if no
+			 * phy-handle was found nor any phy found during
+			 * dt phy registration
+			 */
+			if (!bp->phy_node && !phy_find_first(bp->mii_bus)) {
+				for (i = 0; i < PHY_MAX_ADDR; i++) {
+					struct phy_device *phydev;
+
+					phydev = mdiobus_scan(bp->mii_bus, i);
+					if (IS_ERR(phydev) &&
+					    PTR_ERR(phydev) != -ENODEV) {
+						ret = PTR_ERR(phydev);
+						break;
+					}
+				}
+
+				if (ret)
+					return -ENODEV;
+			}
+		}
+	}
 
 	if (bp->phy_node) {
 		phydev = of_phy_connect(dev, bp->phy_node,
@@ -488,7 +524,6 @@ static int macb_mii_probe(struct net_device *dev)
 			return -ENXIO;
 		}
 
-		pdata = dev_get_platdata(&bp->pdev->dev);
 		if (pdata) {
 			if (gpio_is_valid(pdata->phy_irq_pin)) {
 				ret = devm_gpio_request(&bp->pdev->dev,
@@ -533,7 +568,7 @@ static int macb_mii_init(struct macb *bp)
 {
 	struct macb_platform_data *pdata;
 	struct device_node *np;
-	int err = -ENXIO, i;
+	int err;
 
 	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -556,43 +591,10 @@ static int macb_mii_init(struct macb *bp)
 	dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
 
 	np = bp->pdev->dev.of_node;
-	if (np) {
-		if (of_phy_is_fixed_link(np)) {
-			if (of_phy_register_fixed_link(np) < 0) {
-				dev_err(&bp->pdev->dev,
-					"broken fixed-link specification\n");
-				goto err_out_unregister_bus;
-			}
-			bp->phy_node = of_node_get(np);
-
-			err = mdiobus_register(bp->mii_bus);
-		} else {
-			/* try dt phy registration */
-			err = of_mdiobus_register(bp->mii_bus, np);
-
-			/* fallback to standard phy registration if no phy were
-			 * found during dt phy registration
-			 */
-			if (!err && !phy_find_first(bp->mii_bus)) {
-				for (i = 0; i < PHY_MAX_ADDR; i++) {
-					struct phy_device *phydev;
-
-					phydev = mdiobus_scan(bp->mii_bus, i);
-					if (IS_ERR(phydev) &&
-					    PTR_ERR(phydev) != -ENODEV) {
-						err = PTR_ERR(phydev);
-						break;
-					}
-				}
 
-				if (err)
-					goto err_out_unregister_bus;
-			}
-		}
+	if (np) {
+		err = of_mdiobus_register(bp->mii_bus, np);
 	} else {
-		for (i = 0; i < PHY_MAX_ADDR; i++)
-			bp->mii_bus->irq[i] = PHY_POLL;
-
 		if (pdata)
 			bp->mii_bus->phy_mask = pdata->phy_mask;
 
@@ -610,10 +612,10 @@ static int macb_mii_init(struct macb *bp)
 
 err_out_unregister_bus:
 	mdiobus_unregister(bp->mii_bus);
-err_out_free_mdiobus:
-	of_node_put(bp->phy_node);
 	if (np && of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
+err_out_free_mdiobus:
+	of_node_put(bp->phy_node);
 	mdiobus_free(bp->mii_bus);
 err_out:
 	return err;
diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c
index c87c9c684a33..d59497a7bdce 100644
--- a/drivers/net/ethernet/cavium/common/cavium_ptp.c
+++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c
@@ -75,6 +75,8 @@ EXPORT_SYMBOL(cavium_ptp_get);
 
 void cavium_ptp_put(struct cavium_ptp *ptp)
 {
+	if (!ptp)
+		return;
 	pci_dev_put(ptp->pdev);
 }
 EXPORT_SYMBOL(cavium_ptp_put);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 32ae63b6f20e..73e70e076e61 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -164,15 +164,6 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 		}
 		break;
 
-	case OCTNET_CMD_CHANGE_MTU:
-		/* If command is successful, change the MTU. */
-		netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
-			   netdev->mtu, nctrl->ncmd.s.param1);
-		netdev->mtu = nctrl->ncmd.s.param1;
-		queue_delayed_work(lio->link_status_wq.wq,
-				   &lio->link_status_wq.wk.work, 0);
-		break;
-
 	case OCTNET_CMD_GPIO_ACCESS:
 		netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
 
@@ -386,20 +377,12 @@ static void lio_update_txq_status(struct octeon_device *oct, int iq_num)
 		return;
 
 	lio = GET_LIO(netdev);
-	if (netif_is_multiqueue(netdev)) {
-		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
-		    lio->linfo.link.s.link_up &&
-		    (!octnet_iq_is_full(oct, iq_num))) {
-			netif_wake_subqueue(netdev, iq->q_index);
-			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
-						  tx_restart, 1);
-		}
-	} else if (netif_queue_stopped(netdev) &&
-		   lio->linfo.link.s.link_up &&
-		   (!octnet_iq_is_full(oct, lio->txq))) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+	if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+	    lio->linfo.link.s.link_up &&
+	    (!octnet_iq_is_full(oct, iq_num))) {
+		netif_wake_subqueue(netdev, iq->q_index);
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
 					  tx_restart, 1);
-		netif_wake_queue(netdev);
 	}
 }
 
@@ -571,7 +554,8 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 
 		napi_gro_receive(napi, skb);
 
-		droq->stats.rx_bytes_received += len;
+		droq->stats.rx_bytes_received += len -
+			rh->r_dh.len * BYTES_PER_DHLEN_UNIT;
 		droq->stats.rx_pkts_received++;
 	} else {
 		recv_buffer_free(skb);
@@ -635,9 +619,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 	iq_no = droq->q_no;
 
 	/* Handle Droq descriptors */
-	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
-						 POLL_EVENT_PROCESS_PKTS,
-						 budget);
+	work_done = octeon_droq_process_poll_pkts(oct, droq, budget);
 
 	/* Flush the instruction queue */
 	iq = oct->instr_queue[iq_no];
@@ -668,8 +650,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 		tx_done = 1;
 		napi_complete_done(napi, work_done);
 
-		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
-					     POLL_EVENT_ENABLE_INTR, 0);
+		octeon_enable_irq(droq->oct_dev, droq->q_no);
 		return 0;
 	}
 
@@ -1080,3 +1061,88 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
 	}
 	return 0;
 }
+
+static void liquidio_change_mtu_completion(struct octeon_device *oct,
+					   u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_if_cfg_context *ctx;
+
+	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	if (status) {
+		dev_err(&oct->pci_dev->dev, "MTU change failed. Status: %llx\n",
+			CVM_CAST64(status));
+		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_FAIL);
+	} else {
+		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_SUCCESS);
+	}
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct liquidio_if_cfg_context *ctx;
+	struct octeon_soft_command *sc;
+	union octnet_cmd *ncmd;
+	int ctx_size;
+	int ret = 0;
+
+	ctx_size = sizeof(struct liquidio_if_cfg_context);
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, ctx_size);
+
+	ncmd = (union octnet_cmd *)sc->virtdptr;
+	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = OCTNET_CMD_CHANGE_MTU;
+	ncmd->s.param1 = new_mtu;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_CMD, 0, 0, 0);
+
+	sc->callback = liquidio_change_mtu_completion;
+	sc->callback_arg = sc;
+	sc->wait_time = 100;
+
+	ret = octeon_send_soft_command(oct, sc);
+	if (ret == IQ_SEND_FAILED) {
+		netif_info(lio, rx_err, lio->netdev, "Failed to change MTU\n");
+		return -EINVAL;
+	}
+	/* Sleep on a wait queue till the cond flag indicates that the
+	 * response arrived or timed-out.
+	 */
+	if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR ||
+	    ctx->cond == LIO_CHANGE_MTU_FAIL) {
+		octeon_free_soft_command(oct, sc);
+		return -EINVAL;
+	}
+
+	netdev->mtu = new_mtu;
+	lio->mtu = new_mtu;
+
+	octeon_free_soft_command(oct, sc);
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index a63ddf07f168..550ac29682a5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -232,10 +232,16 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
 	linfo = &lio->linfo;
 
-	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
+	switch (linfo->link.s.phy_type) {
+	case LIO_PHY_PORT_TP:
+		ecmd->base.port = PORT_TP;
+		supported = (SUPPORTED_10000baseT_Full |
+			     SUPPORTED_TP | SUPPORTED_Pause);
+		advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
+		ecmd->base.autoneg = AUTONEG_DISABLE;
+		break;
+
+	case LIO_PHY_PORT_FIBRE:
 		ecmd->base.port = PORT_FIBRE;
 
 		if (linfo->link.s.speed == SPEED_10000) {
@@ -245,12 +251,18 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
 		supported |= SUPPORTED_FIBRE | SUPPORTED_Pause;
 		advertising |= ADVERTISED_Pause;
+		ecmd->base.autoneg = AUTONEG_DISABLE;
+		break;
+	}
+
+	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
 		ethtool_convert_legacy_u32_to_link_mode(
 			ecmd->link_modes.supported, supported);
 		ethtool_convert_legacy_u32_to_link_mode(
 			ecmd->link_modes.advertising, advertising);
-		ecmd->base.autoneg = AUTONEG_DISABLE;
-
 	} else {
 		dev_err(&oct->pci_dev->dev, "Unknown link interface reported %d\n",
 			linfo->link.s.if_mode);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index a5eecd895a82..43c5ba0af12b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -91,18 +91,9 @@ static int octeon_console_debug_enabled(u32 console)
  */
 #define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000
 
-struct liquidio_if_cfg_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
-struct liquidio_if_cfg_resp {
-	u64 rh;
-	struct liquidio_if_cfg_info cfg_info;
-	u64 status;
+struct lio_trusted_vf_ctx {
+	struct completion complete;
+	int status;
 };
 
 struct liquidio_rx_ctl_context {
@@ -523,148 +514,30 @@ static void liquidio_deinit_pci(void)
 }
 
 /**
- * \brief Stop Tx queues
- * @param netdev network device
- */
-static inline void txqs_stop(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
-	} else {
-		netif_stop_queue(netdev);
-	}
-}
-
-/**
- * \brief Start Tx queues
- * @param netdev network device
- */
-static inline void txqs_start(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-	} else {
-		netif_start_queue(netdev);
-	}
-}
-
-/**
- * \brief Wake Tx queues
- * @param netdev network device
- */
-static inline void txqs_wake(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++) {
-			int qno = lio->linfo.txpciq[i %
-				lio->oct_dev->num_iqs].s.q_no;
-
-			if (__netif_subqueue_stopped(netdev, i)) {
-				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
-							  tx_restart, 1);
-				netif_wake_subqueue(netdev, i);
-			}
-		}
-	} else {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-					  tx_restart, 1);
-		netif_wake_queue(netdev);
-	}
-}
-
-/**
- * \brief Stop Tx queue
- * @param netdev network device
- */
-static void stop_txq(struct net_device *netdev)
-{
-	txqs_stop(netdev);
-}
-
-/**
- * \brief Start Tx queue
- * @param netdev network device
- */
-static void start_txq(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (lio->linfo.link.s.link_up) {
-		txqs_start(netdev);
-		return;
-	}
-}
-
-/**
- * \brief Wake a queue
- * @param netdev network device
- * @param q which queue to wake
- */
-static inline void wake_q(struct net_device *netdev, int q)
-{
-	if (netif_is_multiqueue(netdev))
-		netif_wake_subqueue(netdev, q);
-	else
-		netif_wake_queue(netdev);
-}
-
-/**
- * \brief Stop a queue
- * @param netdev network device
- * @param q which queue to stop
- */
-static inline void stop_q(struct net_device *netdev, int q)
-{
-	if (netif_is_multiqueue(netdev))
-		netif_stop_subqueue(netdev, q);
-	else
-		netif_stop_queue(netdev);
-}
-
-/**
  * \brief Check Tx queue status, and take appropriate action
  * @param lio per-network private data
  * @returns 0 if full, number of queues woken up otherwise
  */
 static inline int check_txq_status(struct lio *lio)
 {
+	int numqs = lio->netdev->num_tx_queues;
 	int ret_val = 0;
+	int q, iq;
 
-	if (netif_is_multiqueue(lio->netdev)) {
-		int numqs = lio->netdev->num_tx_queues;
-		int q, iq = 0;
-
-		/* check each sub-queue state */
-		for (q = 0; q < numqs; q++) {
-			iq = lio->linfo.txpciq[q %
-				lio->oct_dev->num_iqs].s.q_no;
-			if (octnet_iq_is_full(lio->oct_dev, iq))
-				continue;
-			if (__netif_subqueue_stopped(lio->netdev, q)) {
-				wake_q(lio->netdev, q);
-				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
-							  tx_restart, 1);
-				ret_val++;
-			}
+	/* check each sub-queue state */
+	for (q = 0; q < numqs; q++) {
+		iq = lio->linfo.txpciq[q %
+			lio->oct_dev->num_iqs].s.q_no;
+		if (octnet_iq_is_full(lio->oct_dev, iq))
+			continue;
+		if (__netif_subqueue_stopped(lio->netdev, q)) {
+			netif_wake_subqueue(lio->netdev, q);
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
+						  tx_restart, 1);
+			ret_val++;
 		}
-	} else {
-		if (octnet_iq_is_full(lio->oct_dev, lio->txq))
-			return 0;
-		wake_q(lio->netdev, lio->txq);
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-					  tx_restart, 1);
-		ret_val = 1;
 	}
+
 	return ret_val;
 }
 
@@ -841,8 +714,12 @@ static void octnet_link_status_change(struct work_struct *work)
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct lio *lio = (struct lio *)wk->ctxptr;
 
+	/* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+	 * this API is invoked only when new max-MTU of the interface is
+	 * less than current MTU.
+	 */
 	rtnl_lock();
-	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
 	rtnl_unlock();
 }
 
@@ -891,7 +768,11 @@ static inline void update_link_status(struct net_device *netdev,
 {
 	struct lio *lio = GET_LIO(netdev);
 	int changed = (lio->linfo.link.u64 != ls->u64);
+	int current_max_mtu = lio->linfo.link.s.mtu;
+	struct octeon_device *oct = lio->oct_dev;
 
+	dev_dbg(&oct->pci_dev->dev, "%s: lio->linfo.link.u64=%llx, ls->u64=%llx\n",
+		__func__, lio->linfo.link.u64, ls->u64);
 	lio->linfo.link.u64 = ls->u64;
 
 	if ((lio->intf_open) && (changed)) {
@@ -899,11 +780,25 @@ static inline void update_link_status(struct net_device *netdev,
 		lio->link_changes++;
 
 		if (lio->linfo.link.s.link_up) {
+			dev_dbg(&oct->pci_dev->dev, "%s: link_up", __func__);
 			netif_carrier_on(netdev);
-			txqs_wake(netdev);
+			wake_txqs(netdev);
 		} else {
+			dev_dbg(&oct->pci_dev->dev, "%s: link_off", __func__);
 			netif_carrier_off(netdev);
-			stop_txq(netdev);
+			stop_txqs(netdev);
+		}
+		if (lio->linfo.link.s.mtu != current_max_mtu) {
+			netif_info(lio, probe, lio->netdev, "Max MTU changed from %d to %d\n",
+				   current_max_mtu, lio->linfo.link.s.mtu);
+			netdev->max_mtu = lio->linfo.link.s.mtu;
+		}
+		if (lio->linfo.link.s.mtu < netdev->mtu) {
+			dev_warn(&oct->pci_dev->dev,
+				 "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
+				     netdev->mtu, lio->linfo.link.s.mtu);
+			queue_delayed_work(lio->link_status_wq.wq,
+					   &lio->link_status_wq.wk.work, 0);
 		}
 	}
 }
@@ -1739,43 +1634,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
-static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
-{
-	int q = 0;
-
-	if (netif_is_multiqueue(lio->netdev))
-		q = skb->queue_mapping % lio->linfo.num_txpciq;
-
-	return q;
-}
-
-/**
- * \brief Check Tx queue state for a given network buffer
- * @param lio per-network private data
- * @param skb network buffer
- */
-static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
-{
-	int q = 0, iq = 0;
-
-	if (netif_is_multiqueue(lio->netdev)) {
-		q = skb->queue_mapping;
-		iq = lio->linfo.txpciq[(q % lio->oct_dev->num_iqs)].s.q_no;
-	} else {
-		iq = lio->txq;
-		q = iq;
-	}
-
-	if (octnet_iq_is_full(lio->oct_dev, iq))
-		return 0;
-
-	if (__netif_subqueue_stopped(lio->netdev, q)) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-		wake_q(lio->netdev, q);
-	}
-	return 1;
-}
-
 /**
  * \brief Unmap and free network buffer
  * @param buf buffer
@@ -1793,8 +1651,6 @@ static void free_netbuf(void *buf)
 	dma_unmap_single(&lio->oct_dev->pci_dev->dev, finfo->dptr, skb->len,
 			 DMA_TO_DEVICE);
 
-	check_txq_state(lio, skb);
-
 	tx_buffer_free(skb);
 }
 
@@ -1835,8 +1691,6 @@ static void free_netsgbuf(void *buf)
 	list_add_tail(&g->list, &lio->glist[iq]);
 	spin_unlock(&lio->glist_lock[iq]);
 
-	check_txq_state(lio, skb);     /* mq support: sub-queue state check */
-
 	tx_buffer_free(skb);
 }
 
@@ -1882,8 +1736,6 @@ static void free_netsgbuf_with_resp(void *buf)
 	spin_unlock(&lio->glist_lock[iq]);
 
 	/* Don't free the skb yet */
-
-	check_txq_state(lio, skb);
 }
 
 /**
@@ -2211,7 +2063,7 @@ static int liquidio_open(struct net_device *netdev)
 			return -1;
 	}
 
-	start_txq(netdev);
+	start_txqs(netdev);
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
@@ -2449,38 +2301,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 }
 
 /**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct lio *lio = GET_LIO(netdev);
-	struct octeon_device *oct = lio->oct_dev;
-	struct octnic_ctrl_pkt nctrl;
-	int ret = 0;
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-	nctrl.ncmd.s.param1 = new_mtu;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-		return -1;
-	}
-
-	lio->mtu = new_mtu;
-
-	return 0;
-}
-
-/**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
  * @param ifr interface request
@@ -2685,14 +2505,9 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	if (netif_is_multiqueue(netdev)) {
-		q_idx = skb->queue_mapping;
-		q_idx = (q_idx % (lio->linfo.num_txpciq));
-		tag = q_idx;
-		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
-	} else {
-		iq_no = lio->txq;
-	}
+	q_idx = skb_iq(lio, skb);
+	tag = q_idx;
+	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
 	stats = &oct->instr_queue[iq_no]->stats;
 
@@ -2723,23 +2538,14 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	ndata.q_no = iq_no;
 
-	if (netif_is_multiqueue(netdev)) {
-		if (octnet_iq_is_full(oct, ndata.q_no)) {
-			/* defer sending if queue is full */
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
-			stats->tx_iq_busy++;
-			return NETDEV_TX_BUSY;
-		}
-	} else {
-		if (octnet_iq_is_full(oct, lio->txq)) {
-			/* defer sending if queue is full */
-			stats->tx_iq_busy++;
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   lio->txq);
-			return NETDEV_TX_BUSY;
-		}
+	if (octnet_iq_is_full(oct, ndata.q_no)) {
+		/* defer sending if queue is full */
+		netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+			   ndata.q_no);
+		stats->tx_iq_busy++;
+		return NETDEV_TX_BUSY;
 	}
+
 	/* pr_info(" XMIT - valid Qs: %d, 1st Q no: %d, cpu:  %d, q_no:%d\n",
 	 *	lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no);
 	 */
@@ -2895,7 +2701,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
 
 	if (status == IQ_SEND_STOP)
-		stop_q(netdev, q_idx);
+		netif_stop_subqueue(netdev, q_idx);
 
 	netif_trans_update(netdev);
 
@@ -2934,7 +2740,7 @@ static void liquidio_tx_timeout(struct net_device *netdev)
 		   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
 		   netdev->stats.tx_dropped);
 	netif_trans_update(netdev);
-	txqs_wake(netdev);
+	wake_txqs(netdev);
 }
 
 static int liquidio_vlan_rx_add_vid(struct net_device *netdev,
@@ -3289,10 +3095,120 @@ static int liquidio_get_vf_config(struct net_device *netdev, int vfidx,
 	ether_addr_copy(&ivi->mac[0], macaddr);
 	ivi->vlan = oct->sriov_info.vf_vlantci[vfidx] & VLAN_VID_MASK;
 	ivi->qos = oct->sriov_info.vf_vlantci[vfidx] >> VLAN_PRIO_SHIFT;
+	if (oct->sriov_info.trusted_vf.active &&
+	    oct->sriov_info.trusted_vf.id == vfidx)
+		ivi->trusted = true;
+	else
+		ivi->trusted = false;
 	ivi->linkstate = oct->sriov_info.vf_linkstate[vfidx];
 	return 0;
 }
 
+static void trusted_vf_callback(struct octeon_device *oct_dev,
+				u32 status, void *ptr)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+	struct lio_trusted_vf_ctx *ctx;
+
+	ctx = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+	ctx->status = status;
+
+	complete(&ctx->complete);
+}
+
+static int liquidio_send_vf_trust_cmd(struct lio *lio, int vfidx, bool trusted)
+{
+	struct octeon_device *oct = lio->oct_dev;
+	struct lio_trusted_vf_ctx *ctx;
+	struct octeon_soft_command *sc;
+	int ctx_size, retval;
+
+	ctx_size = sizeof(struct lio_trusted_vf_ctx);
+	sc = octeon_alloc_soft_command(oct, 0, 0, ctx_size);
+
+	ctx  = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+	init_completion(&ctx->complete);
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	/* vfidx is 0 based, but vf_num (param1) is 1 based */
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_SET_TRUSTED_VF, 0, vfidx + 1,
+				    trusted);
+
+	sc->callback = trusted_vf_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 1000;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		retval = -1;
+	} else {
+		/* Wait for response or timeout */
+		if (wait_for_completion_timeout(&ctx->complete,
+						msecs_to_jiffies(2000)))
+			retval = ctx->status;
+		else
+			retval = -1;
+	}
+
+	octeon_free_soft_command(oct, sc);
+
+	return retval;
+}
+
+static int liquidio_set_vf_trust(struct net_device *netdev, int vfidx,
+				 bool setting)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if (strcmp(oct->fw_info.liquidio_firmware_version, "1.7.1") < 0) {
+		/* trusted vf is not supported by firmware older than 1.7.1 */
+		return -EOPNOTSUPP;
+	}
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced) {
+		netif_info(lio, drv, lio->netdev, "Invalid vfidx %d\n", vfidx);
+		return -EINVAL;
+	}
+
+	if (setting) {
+		/* Set */
+
+		if (oct->sriov_info.trusted_vf.active &&
+		    oct->sriov_info.trusted_vf.id == vfidx)
+			return 0;
+
+		if (oct->sriov_info.trusted_vf.active) {
+			netif_info(lio, drv, lio->netdev, "More than one trusted VF is not allowed\n");
+			return -EPERM;
+		}
+	} else {
+		/* Clear */
+
+		if (!oct->sriov_info.trusted_vf.active)
+			return 0;
+	}
+
+	if (!liquidio_send_vf_trust_cmd(lio, vfidx, setting)) {
+		if (setting) {
+			oct->sriov_info.trusted_vf.id = vfidx;
+			oct->sriov_info.trusted_vf.active = true;
+		} else {
+			oct->sriov_info.trusted_vf.active = false;
+		}
+
+		netif_info(lio, drv, lio->netdev, "VF %u is %strusted\n", vfidx,
+			   setting ? "" : "not ");
+	} else {
+		netif_info(lio, drv, lio->netdev, "Failed to set VF trusted\n");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
 				      int linkstate)
 {
@@ -3423,6 +3339,7 @@ static const struct net_device_ops lionetdevops = {
 	.ndo_set_vf_mac		= liquidio_set_vf_mac,
 	.ndo_set_vf_vlan	= liquidio_set_vf_vlan,
 	.ndo_get_vf_config	= liquidio_get_vf_config,
+	.ndo_set_vf_trust	= liquidio_set_vf_trust,
 	.ndo_set_vf_link_state  = liquidio_set_vf_link_state,
 };
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index fd70a4844e2d..dc62698bdaf7 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -40,20 +40,6 @@ MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-struct liquidio_if_cfg_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
-struct liquidio_if_cfg_resp {
-	u64 rh;
-	struct liquidio_if_cfg_info cfg_info;
-	u64 status;
-};
-
 struct liquidio_rx_ctl_context {
 	int octeon_id;
 
@@ -299,105 +285,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 };
 
 /**
- * \brief Stop Tx queues
- * @param netdev network device
- */
-static void txqs_stop(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
-	} else {
-		netif_stop_queue(netdev);
-	}
-}
-
-/**
- * \brief Start Tx queues
- * @param netdev network device
- */
-static void txqs_start(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-	} else {
-		netif_start_queue(netdev);
-	}
-}
-
-/**
- * \brief Wake Tx queues
- * @param netdev network device
- */
-static void txqs_wake(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++) {
-			int qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs]
-				      .s.q_no;
-			if (__netif_subqueue_stopped(netdev, i)) {
-				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
-							  tx_restart, 1);
-				netif_wake_subqueue(netdev, i);
-			}
-		}
-	} else {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-					  tx_restart, 1);
-		netif_wake_queue(netdev);
-	}
-}
-
-/**
- * \brief Start Tx queue
- * @param netdev network device
- */
-static void start_txq(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (lio->linfo.link.s.link_up) {
-		txqs_start(netdev);
-		return;
-	}
-}
-
-/**
- * \brief Wake a queue
- * @param netdev network device
- * @param q which queue to wake
- */
-static void wake_q(struct net_device *netdev, int q)
-{
-	if (netif_is_multiqueue(netdev))
-		netif_wake_subqueue(netdev, q);
-	else
-		netif_wake_queue(netdev);
-}
-
-/**
- * \brief Stop a queue
- * @param netdev network device
- * @param q which queue to stop
- */
-static void stop_q(struct net_device *netdev, int q)
-{
-	if (netif_is_multiqueue(netdev))
-		netif_stop_subqueue(netdev, q);
-	else
-		netif_stop_queue(netdev);
-}
-
-/**
  * Remove the node at the head of the list. The list would be empty at
  * the end of this call if there are no more nodes in the list.
  */
@@ -564,8 +451,12 @@ static void octnet_link_status_change(struct work_struct *work)
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct lio *lio = (struct lio *)wk->ctxptr;
 
+	/* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+	 * this API is invoked only when new max-MTU of the interface is
+	 * less than current MTU.
+	 */
 	rtnl_lock();
-	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
 	rtnl_unlock();
 }
 
@@ -613,6 +504,7 @@ static void update_link_status(struct net_device *netdev,
 			       union oct_link_status *ls)
 {
 	struct lio *lio = GET_LIO(netdev);
+	int current_max_mtu = lio->linfo.link.s.mtu;
 	struct octeon_device *oct = lio->oct_dev;
 
 	if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
@@ -623,24 +515,23 @@ static void update_link_status(struct net_device *netdev,
 
 		if (lio->linfo.link.s.link_up) {
 			netif_carrier_on(netdev);
-			txqs_wake(netdev);
+			wake_txqs(netdev);
 		} else {
 			netif_carrier_off(netdev);
-			txqs_stop(netdev);
+			stop_txqs(netdev);
 		}
 
-		if (lio->linfo.link.s.mtu != netdev->max_mtu) {
-			dev_info(&oct->pci_dev->dev, "Max MTU Changed from %d to %d\n",
-				 netdev->max_mtu, lio->linfo.link.s.mtu);
+		if (lio->linfo.link.s.mtu != current_max_mtu) {
+			dev_info(&oct->pci_dev->dev,
+				 "Max MTU Changed from %d to %d\n",
+				 current_max_mtu, lio->linfo.link.s.mtu);
 			netdev->max_mtu = lio->linfo.link.s.mtu;
 		}
 
 		if (lio->linfo.link.s.mtu < netdev->mtu) {
 			dev_warn(&oct->pci_dev->dev,
-				 "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
+				 "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
 				 netdev->mtu, lio->linfo.link.s.mtu);
-			lio->mtu = lio->linfo.link.s.mtu;
-			netdev->mtu = lio->linfo.link.s.mtu;
 			queue_delayed_work(lio->link_status_wq.wq,
 					   &lio->link_status_wq.wk.work, 0);
 		}
@@ -1062,44 +953,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
-static int skb_iq(struct lio *lio, struct sk_buff *skb)
-{
-	int q = 0;
-
-	if (netif_is_multiqueue(lio->netdev))
-		q = skb->queue_mapping % lio->linfo.num_txpciq;
-
-	return q;
-}
-
-/**
- * \brief Check Tx queue state for a given network buffer
- * @param lio per-network private data
- * @param skb network buffer
- */
-static int check_txq_state(struct lio *lio, struct sk_buff *skb)
-{
-	int q = 0, iq = 0;
-
-	if (netif_is_multiqueue(lio->netdev)) {
-		q = skb->queue_mapping;
-		iq = lio->linfo.txpciq[q % lio->oct_dev->num_iqs].s.q_no;
-	} else {
-		iq = lio->txq;
-		q = iq;
-	}
-
-	if (octnet_iq_is_full(lio->oct_dev, iq))
-		return 0;
-
-	if (__netif_subqueue_stopped(lio->netdev, q)) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-		wake_q(lio->netdev, q);
-	}
-
-	return 1;
-}
-
 /**
  * \brief Unmap and free network buffer
  * @param buf buffer
@@ -1117,8 +970,6 @@ static void free_netbuf(void *buf)
 	dma_unmap_single(&lio->oct_dev->pci_dev->dev, finfo->dptr, skb->len,
 			 DMA_TO_DEVICE);
 
-	check_txq_state(lio, skb);
-
 	tx_buffer_free(skb);
 }
 
@@ -1160,8 +1011,6 @@ static void free_netsgbuf(void *buf)
 	list_add_tail(&g->list, &lio->glist[iq]);
 	spin_unlock(&lio->glist_lock[iq]);
 
-	check_txq_state(lio, skb); /* mq support: sub-queue state check */
-
 	tx_buffer_free(skb);
 }
 
@@ -1207,8 +1056,6 @@ static void free_netsgbuf_with_resp(void *buf)
 	spin_unlock(&lio->glist_lock[iq]);
 
 	/* Don't free the skb yet */
-
-	check_txq_state(lio, skb);
 }
 
 /**
@@ -1268,7 +1115,7 @@ static int liquidio_open(struct net_device *netdev)
 	lio->intf_open = 1;
 
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-	start_txq(netdev);
+	start_txqs(netdev);
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
@@ -1310,7 +1157,7 @@ static int liquidio_stop(struct net_device *netdev)
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
-	txqs_stop(netdev);
+	stop_txqs(netdev);
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
@@ -1538,41 +1385,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 }
 
 /**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct octnic_ctrl_pkt nctrl;
-	struct octeon_device *oct;
-	struct lio *lio;
-	int ret = 0;
-
-	lio = GET_LIO(netdev);
-	oct = lio->oct_dev;
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-	nctrl.ncmd.s.param1 = new_mtu;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = LIO_CMD_WAIT_TM;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-		return -EIO;
-	}
-
-	lio->mtu = new_mtu;
-
-	return 0;
-}
-
-/**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
  * @param ifr interface request
@@ -1763,14 +1575,9 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	if (netif_is_multiqueue(netdev)) {
-		q_idx = skb->queue_mapping;
-		q_idx = (q_idx % (lio->linfo.num_txpciq));
-		tag = q_idx;
-		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
-	} else {
-		iq_no = lio->txq;
-	}
+	q_idx = skb_iq(lio, skb);
+	tag = q_idx;
+	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
 	stats = &oct->instr_queue[iq_no]->stats;
 
@@ -1799,22 +1606,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	ndata.q_no = iq_no;
 
-	if (netif_is_multiqueue(netdev)) {
-		if (octnet_iq_is_full(oct, ndata.q_no)) {
-			/* defer sending if queue is full */
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
-			stats->tx_iq_busy++;
-			return NETDEV_TX_BUSY;
-		}
-	} else {
-		if (octnet_iq_is_full(oct, lio->txq)) {
-			/* defer sending if queue is full */
-			stats->tx_iq_busy++;
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
-			return NETDEV_TX_BUSY;
-		}
+	if (octnet_iq_is_full(oct, ndata.q_no)) {
+		/* defer sending if queue is full */
+		netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+			   ndata.q_no);
+		stats->tx_iq_busy++;
+		return NETDEV_TX_BUSY;
 	}
 
 	ndata.datasize = skb->len;
@@ -1956,7 +1753,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if (status == IQ_SEND_STOP) {
 		dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
 			iq_no);
-		stop_q(netdev, q_idx);
+		netif_stop_subqueue(netdev, q_idx);
 	}
 
 	netif_trans_update(netdev);
@@ -1996,7 +1793,7 @@ static void liquidio_tx_timeout(struct net_device *netdev)
 		   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
 		   netdev->stats.tx_dropped);
 	netif_trans_update(netdev);
-	txqs_wake(netdev);
+	wake_txqs(netdev);
 }
 
 static int
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 522dcc4dcff7..82a783db5baf 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -84,6 +84,7 @@ enum octeon_tag_type {
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
 #define OPCODE_NIC_INTRMOD_PARAMS      0x0B
+#define OPCODE_NIC_SET_TRUSTED_VF	0x13
 #define OPCODE_NIC_SYNC_OCTEON_TIME	0x14
 #define VF_DRV_LOADED                  1
 #define VF_DRV_REMOVED                -1
@@ -192,7 +193,8 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_MAX_FRM_SIZE        (16000 + OCTNET_FRM_HEADER_SIZE)
 
-#define   OCTNET_DEFAULT_FRM_SIZE    (1500 + OCTNET_FRM_HEADER_SIZE)
+#define   OCTNET_DEFAULT_MTU         (1500)
+#define   OCTNET_DEFAULT_FRM_SIZE  (OCTNET_DEFAULT_MTU + OCTNET_FRM_HEADER_SIZE)
 
 /** NIC Commands are sent using this Octeon Input Queue */
 #define   OCTNET_CMD_Q                0
@@ -675,9 +677,11 @@ union oct_link_status {
 		u64 if_mode:5;
 		u64 pause:1;
 		u64 flashing:1;
-		u64 reserved:15;
+		u64 phy_type:5;
+		u64 reserved:10;
 #else
-		u64 reserved:15;
+		u64 reserved:10;
+		u64 phy_type:5;
 		u64 flashing:1;
 		u64 pause:1;
 		u64 if_mode:5;
@@ -690,6 +694,12 @@ union oct_link_status {
 	} s;
 };
 
+enum lio_phy_type {
+	LIO_PHY_PORT_TP = 0x0,
+	LIO_PHY_PORT_FIBRE = 0x1,
+	LIO_PHY_PORT_UNKNOWN,
+};
+
 /** The txpciq info passed to host from the firmware */
 
 union oct_txpciq {
@@ -909,6 +919,12 @@ union oct_nic_if_cfg {
 	} s;
 };
 
+struct lio_trusted_vf {
+	uint64_t active: 1;
+	uint64_t id : 8;
+	uint64_t reserved: 55;
+};
+
 struct lio_time {
 	s64 sec;   /* seconds */
 	s64 nsec;  /* nanoseconds */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 63b0c758a0a6..91937cc5c1d7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -370,6 +370,8 @@ struct octeon_sriov_info {
 
 	u32	sriov_enabled;
 
+	struct lio_trusted_vf	trusted_vf;
+
 	/*lookup table that maps DPI ring number to VF pci_dev struct pointer*/
 	struct pci_dev *dpiring_to_vfpcidev_lut[MAX_POSSIBLE_VFS];
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 3461d65ff4eb..f044718cea52 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -788,7 +788,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
  * called before calling this routine.
  */
 
-static int
+int
 octeon_droq_process_poll_pkts(struct octeon_device *oct,
 			      struct octeon_droq *droq, u32 budget)
 {
@@ -835,71 +835,46 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
 	return total_pkts_processed;
 }
 
+/* Enable Pkt Interrupt */
 int
-octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
-			     u32 arg)
+octeon_enable_irq(struct octeon_device *oct, u32 q_no)
 {
-	struct octeon_droq *droq;
-
-	droq = oct->droq[q_no];
+	switch (oct->chip_id) {
+	case OCTEON_CN66XX:
+	case OCTEON_CN68XX: {
+		struct octeon_cn6xxx *cn6xxx =
+			(struct octeon_cn6xxx *)oct->chip;
+		unsigned long flags;
+		u32 value;
 
-	if (cmd == POLL_EVENT_PROCESS_PKTS)
-		return octeon_droq_process_poll_pkts(oct, droq, arg);
+		spin_lock_irqsave
+			(&cn6xxx->lock_for_droq_int_enb_reg, flags);
+		value = octeon_read_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB);
+		value |= (1 << q_no);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB, value);
+		value = octeon_read_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB);
+		value |= (1 << q_no);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, value);
 
-	if (cmd == POLL_EVENT_PENDING_PKTS) {
-		u32 pkt_cnt = atomic_read(&droq->pkts_pending);
+		/* don't bother flushing the enables */
 
-		return  octeon_droq_process_packets(oct, droq, pkt_cnt);
+		spin_unlock_irqrestore
+			(&cn6xxx->lock_for_droq_int_enb_reg, flags);
 	}
-
-	if (cmd == POLL_EVENT_ENABLE_INTR) {
-		u32 value;
-		unsigned long flags;
-
-		/* Enable Pkt Interrupt */
-		switch (oct->chip_id) {
-		case OCTEON_CN66XX:
-		case OCTEON_CN68XX: {
-			struct octeon_cn6xxx *cn6xxx =
-				(struct octeon_cn6xxx *)oct->chip;
-			spin_lock_irqsave
-				(&cn6xxx->lock_for_droq_int_enb_reg, flags);
-			value =
-				octeon_read_csr(oct,
-						CN6XXX_SLI_PKT_TIME_INT_ENB);
-			value |= (1 << q_no);
-			octeon_write_csr(oct,
-					 CN6XXX_SLI_PKT_TIME_INT_ENB,
-					 value);
-			value =
-				octeon_read_csr(oct,
-						CN6XXX_SLI_PKT_CNT_INT_ENB);
-			value |= (1 << q_no);
-			octeon_write_csr(oct,
-					 CN6XXX_SLI_PKT_CNT_INT_ENB,
-					 value);
-
-			/* don't bother flushing the enables */
-
-			spin_unlock_irqrestore
-				(&cn6xxx->lock_for_droq_int_enb_reg, flags);
-			return 0;
-		}
 		break;
-		case OCTEON_CN23XX_PF_VID: {
-			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
-		}
+	case OCTEON_CN23XX_PF_VID:
+		lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		break;
 
-		case OCTEON_CN23XX_VF_VID:
-			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+	case OCTEON_CN23XX_VF_VID:
+		lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		break;
-		}
-		return 0;
+	default:
+		dev_err(&oct->pci_dev->dev, "%s Unknown Chip\n", __func__);
+		return 1;
 	}
 
-	dev_err(&oct->pci_dev->dev, "%s Unknown command: %d\n", __func__, cmd);
-	return -EINVAL;
+	return 0;
 }
 
 int octeon_register_droq_ops(struct octeon_device *oct, u32 q_no,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 815a9f56fd59..f28f262d4ab6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -123,11 +123,6 @@ struct oct_droq_stats {
 
 };
 
-#define POLL_EVENT_INTR_ARRIVED  1
-#define POLL_EVENT_PROCESS_PKTS  2
-#define POLL_EVENT_PENDING_PKTS  3
-#define POLL_EVENT_ENABLE_INTR   4
-
 /* The maximum number of buffers that can be dispatched from the
  * output/dma queue. Set to 64 assuming 1K buffers in DROQ and the fact that
  * max packet size from DROQ is 64K.
@@ -414,8 +409,10 @@ int octeon_droq_process_packets(struct octeon_device *oct,
 				struct octeon_droq *droq,
 				u32 budget);
 
-int octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no,
-				 int cmd, u32 arg);
+int octeon_droq_process_poll_pkts(struct octeon_device *oct,
+				  struct octeon_droq *droq, u32 budget);
+
+int octeon_enable_irq(struct octeon_device *oct, u32 q_no);
 
 void octeon_droq_check_oom(struct octeon_droq *droq);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
index 57af7df74ced..28e74ee23ff8 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
@@ -87,7 +87,7 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
 	}
 
 	if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
-		if (mbox->mbox_req.recv_len < msg.s.len) {
+		if (mbox->mbox_req.recv_len < mbox->mbox_req.msg.s.len) {
 			ret = 0;
 		} else {
 			mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVING;
@@ -96,7 +96,8 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
 		}
 	} else {
 		if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
-			if (mbox->mbox_resp.recv_len < msg.s.len) {
+			if (mbox->mbox_resp.recv_len <
+			    mbox->mbox_resp.msg.s.len) {
 				ret = 0;
 			} else {
 				mbox->state &=
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index f2d1a076a038..8782206271b6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -35,6 +35,18 @@
 #define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
 #define   LIO_IFSTATE_RESETTING		   0x10
 
+struct liquidio_if_cfg_context {
+	u32 octeon_id;
+	wait_queue_head_t wc;
+	int cond;
+};
+
+struct liquidio_if_cfg_resp {
+	u64 rh;
+	struct liquidio_if_cfg_info cfg_info;
+	u64 status;
+};
+
 struct oct_nic_stats_resp {
 	u64     rh;
 	struct oct_link_stats stats;
@@ -184,6 +196,14 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu);
+#define LIO_CHANGE_MTU_SUCCESS 1
+#define LIO_CHANGE_MTU_FAIL    2
+
 #define SKB_ADJ_MASK  0x3F
 #define SKB_ADJ       (SKB_ADJ_MASK + 1)
 
@@ -486,4 +506,56 @@ static inline int wait_for_pending_requests(struct octeon_device *oct)
 	return 0;
 }
 
+/**
+ * \brief Stop Tx queues
+ * @param netdev network device
+ */
+static inline void stop_txqs(struct net_device *netdev)
+{
+	int i;
+
+	for (i = 0; i < netdev->num_tx_queues; i++)
+		netif_stop_subqueue(netdev, i);
+}
+
+/**
+ * \brief Wake Tx queues
+ * @param netdev network device
+ */
+static inline void wake_txqs(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	int i, qno;
+
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs].s.q_no;
+
+		if (__netif_subqueue_stopped(netdev, i)) {
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+						  tx_restart, 1);
+			netif_wake_subqueue(netdev, i);
+		}
+	}
+}
+
+/**
+ * \brief Start Tx queues
+ * @param netdev network device
+ */
+static inline void start_txqs(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	int i;
+
+	if (lio->linfo.link.s.link_up) {
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_start_subqueue(netdev, i);
+	}
+}
+
+static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+	return skb->queue_mapping % lio->linfo.num_txpciq;
+}
+
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index e07d2093b971..2766af05b89e 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -366,6 +366,7 @@ int
 lio_process_iq_request_list(struct octeon_device *oct,
 			    struct octeon_instr_queue *iq, u32 napi_budget)
 {
+	struct cavium_wq *cwq = &oct->dma_comp_wq;
 	int reqtype;
 	void *buf;
 	u32 old = iq->flush_index;
@@ -450,6 +451,10 @@ lio_process_iq_request_list(struct octeon_device *oct,
 						   bytes_compl);
 	iq->flush_index = old;
 
+	if (atomic_read(&oct->response_list
+			[OCTEON_ORDERED_SC_LIST].pending_req_count))
+		queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
+
 	return inst_count;
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 3d691c69f74d..fe5b53700576 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -49,7 +49,6 @@ int octeon_setup_response_list(struct octeon_device *oct)
 	INIT_DELAYED_WORK(&cwq->wk.work, oct_poll_req_completion);
 	cwq->wk.ctxptr = oct;
 	oct->cmd_resp_state = OCT_DRV_ONLINE;
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 
 	return ret;
 }
@@ -164,5 +163,8 @@ static void oct_poll_req_completion(struct work_struct *work)
 	struct cavium_wq *cwq = &oct->dma_comp_wq;
 
 	lio_process_ordered_list(oct, 0);
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
+
+	if (atomic_read(&oct->response_list
+			[OCTEON_ORDERED_SC_LIST].pending_req_count))
+		queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
 }
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index b68cde9f17d2..73fe3881414b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -63,15 +63,10 @@ module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Debug message level bitmap");
 
 static int cpi_alg = CPI_ALG_NONE;
-module_param(cpi_alg, int, S_IRUGO);
+module_param(cpi_alg, int, 0444);
 MODULE_PARM_DESC(cpi_alg,
 		 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
 
-struct nicvf_xdp_tx {
-	u64 dma_addr;
-	u8  qidx;
-};
-
 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
 {
 	if (nic->sqs_mode)
@@ -507,29 +502,14 @@ static int nicvf_init_resources(struct nicvf *nic)
 	return 0;
 }
 
-static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
-{
-	/* Check if it's a recycled page, if not unmap the DMA mapping.
-	 * Recycled page holds an extra reference.
-	 */
-	if (page_ref_count(page) == 1) {
-		dma_addr &= PAGE_MASK;
-		dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
-				     RCV_FRAG_LEN + XDP_HEADROOM,
-				     DMA_FROM_DEVICE,
-				     DMA_ATTR_SKIP_CPU_SYNC);
-	}
-}
-
 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 				struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
 				struct rcv_queue *rq, struct sk_buff **skb)
 {
 	struct xdp_buff xdp;
 	struct page *page;
-	struct nicvf_xdp_tx *xdp_tx = NULL;
 	u32 action;
-	u16 len, err, offset = 0;
+	u16 len, offset = 0;
 	u64 dma_addr, cpu_addr;
 	void *orig_data;
 
@@ -543,7 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 	cpu_addr = (u64)phys_to_virt(cpu_addr);
 	page = virt_to_page((void *)cpu_addr);
 
-	xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM;
+	xdp.data_hard_start = page_address(page);
 	xdp.data = (void *)cpu_addr;
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + len;
@@ -563,7 +543,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 
 	switch (action) {
 	case XDP_PASS:
-		nicvf_unmap_page(nic, page, dma_addr);
+		/* Check if it's a recycled page, if not
+		 * unmap the DMA mapping.
+		 *
+		 * Recycled page holds an extra reference.
+		 */
+		if (page_ref_count(page) == 1) {
+			dma_addr &= PAGE_MASK;
+			dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+					     RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+					     DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+		}
 
 		/* Build SKB and pass on packet to network stack */
 		*skb = build_skb(xdp.data,
@@ -576,20 +567,6 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 	case XDP_TX:
 		nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
 		return true;
-	case XDP_REDIRECT:
-		/* Save DMA address for use while transmitting */
-		xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
-		xdp_tx->dma_addr = dma_addr;
-		xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
-
-		err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog);
-		if (!err)
-			return true;
-
-		/* Free the page on error */
-		nicvf_unmap_page(nic, page, dma_addr);
-		put_page(page);
-		break;
 	default:
 		bpf_warn_invalid_xdp_action(action);
 		/* fall through */
@@ -597,7 +574,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 		trace_xdp_exception(nic->netdev, prog, action);
 		/* fall through */
 	case XDP_DROP:
-		nicvf_unmap_page(nic, page, dma_addr);
+		/* Check if it's a recycled page, if not
+		 * unmap the DMA mapping.
+		 *
+		 * Recycled page holds an extra reference.
+		 */
+		if (page_ref_count(page) == 1) {
+			dma_addr &= PAGE_MASK;
+			dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+					     RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+					     DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+		}
 		put_page(page);
 		return true;
 	}
@@ -1864,50 +1852,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 	}
 }
 
-static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp)
-{
-	struct nicvf *nic = netdev_priv(netdev);
-	struct nicvf *snic = nic;
-	struct nicvf_xdp_tx *xdp_tx;
-	struct snd_queue *sq;
-	struct page *page;
-	int err, qidx;
-
-	if (!netif_running(netdev) || !nic->xdp_prog)
-		return -EINVAL;
-
-	page = virt_to_page(xdp->data);
-	xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
-	qidx = xdp_tx->qidx;
-
-	if (xdp_tx->qidx >= nic->xdp_tx_queues)
-		return -EINVAL;
-
-	/* Get secondary Qset's info */
-	if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) {
-		qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS;
-		snic = (struct nicvf *)nic->snicvf[qidx - 1];
-		if (!snic)
-			return -EINVAL;
-		qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS;
-	}
-
-	sq = &snic->qs->sq[qidx];
-	err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data,
-				      xdp_tx->dma_addr,
-				      xdp->data_end - xdp->data);
-	if (err)
-		return -ENOMEM;
-
-	nicvf_xdp_sq_doorbell(snic, sq, qidx);
-	return 0;
-}
-
-static void nicvf_xdp_flush(struct net_device *dev)
-{
-	return;
-}
-
 static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
 {
 	struct hwtstamp_config config;
@@ -1986,8 +1930,6 @@ static const struct net_device_ops nicvf_netdev_ops = {
 	.ndo_fix_features       = nicvf_fix_features,
 	.ndo_set_features       = nicvf_set_features,
 	.ndo_bpf		= nicvf_xdp,
-	.ndo_xdp_xmit		= nicvf_xdp_xmit,
-	.ndo_xdp_flush          = nicvf_xdp_flush,
 	.ndo_do_ioctl           = nicvf_ioctl,
 };
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 3eae9ff9b53a..d42704d07484 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -204,7 +204,7 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
 
 	/* Reserve space for header modifications by BPF program */
 	if (rbdr->is_xdp)
-		buf_len += XDP_HEADROOM;
+		buf_len += XDP_PACKET_HEADROOM;
 
 	/* Check if it's recycled */
 	if (pgcache)
@@ -224,9 +224,8 @@ ret:
 			nic->rb_page = NULL;
 			return -ENOMEM;
 		}
-
 		if (pgcache)
-			pgcache->dma_addr = *rbuf + XDP_HEADROOM;
+			pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
 		nic->rb_page_offset += buf_len;
 	}
 
@@ -1244,7 +1243,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
 	int qentry;
 
 	if (subdesc_cnt > sq->xdp_free_cnt)
-		return -1;
+		return 0;
 
 	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
 
@@ -1255,7 +1254,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
 
 	sq->xdp_desc_cnt += subdesc_cnt;
 
-	return 0;
+	return 1;
 }
 
 /* Calculate no of SQ subdescriptors needed to transmit all
@@ -1656,7 +1655,7 @@ static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
 		if (page_ref_count(page) != 1)
 			return;
 
-		len += XDP_HEADROOM;
+		len += XDP_PACKET_HEADROOM;
 		/* Receive buffers in XDP mode are mapped from page start */
 		dma_addr &= PAGE_MASK;
 	}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index ce1eed7a6d63..5e9a03cf1b4d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -11,7 +11,6 @@
 
 #include <linux/netdevice.h>
 #include <linux/iommu.h>
-#include <linux/bpf.h>
 #include <net/xdp.h>
 #include "q_struct.h"
 
@@ -94,9 +93,6 @@
 #define RCV_FRAG_LEN	 (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
-#define RCV_BUF_HEADROOM	128 /* To store dma address for XDP redirect */
-#define XDP_HEADROOM		(XDP_PACKET_HEADROOM + RCV_BUF_HEADROOM)
-
 #define MAX_CQES_FOR_TX		((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
 				 MAX_CQE_PER_PKT_XMIT)
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 185fe8df7628..2edfdbdaae48 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -776,11 +776,11 @@ static ssize_t store_nservers(struct device *d, struct device_attribute *attr,
 
 #define CXGB3_ATTR_R(name, val_expr) \
 CXGB3_SHOW(name, val_expr) \
-static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
 #define CXGB3_ATTR_RW(name, val_expr, store_method) \
 CXGB3_SHOW(name, val_expr) \
-static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_method)
+static DEVICE_ATTR(name, 0644, show_##name, store_method)
 
 CXGB3_ATTR_R(cam_size, t3_mc5_size(&adap->mc5));
 CXGB3_ATTR_RW(nfilters, adap->params.mc5.nfilters, store_nfilters);
@@ -859,7 +859,7 @@ static ssize_t store_##name(struct device *d, struct device_attribute *attr, \
 { \
 	return tm_attr_store(d, buf, len, sched); \
 } \
-static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_##name)
+static DEVICE_ATTR(name, 0644, show_##name, store_##name)
 
 TM_ATTR(sched0, 0);
 TM_ATTR(sched1, 1);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index a89721fad633..080918af773c 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -681,18 +681,18 @@ int t3_seeprom_wp(struct adapter *adapter, int enable)
 	return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
 }
 
-static int vpdstrtouint(char *s, int len, unsigned int base, unsigned int *val)
+static int vpdstrtouint(char *s, u8 len, unsigned int base, unsigned int *val)
 {
-	char tok[len + 1];
+	char tok[256];
 
 	memcpy(tok, s, len);
 	tok[len] = 0;
 	return kstrtouint(strim(tok), base, val);
 }
 
-static int vpdstrtou16(char *s, int len, unsigned int base, u16 *val)
+static int vpdstrtou16(char *s, u8 len, unsigned int base, u16 *val)
 {
-	char tok[len + 1];
+	char tok[256];
 
 	memcpy(tok, s, len);
 	tok[len] = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 53b6a02c778e..bea6a059a8f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
-	      cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
+	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
 	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
 	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 557fd8bfd54e..9da6f57901a9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -472,7 +472,7 @@ int cudbg_collect_cim_la(struct cudbg_init *pdbg_init,
 
 	if (is_t6(padap->params.chip)) {
 		size = padap->params.cim_la_size / 10 + 1;
-		size *= 11 * sizeof(u32);
+		size *= 10 * sizeof(u32);
 	} else {
 		size = padap->params.cim_la_size / 8;
 		size *= 8 * sizeof(u32);
@@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
 				      &payload->start, &payload->end);
 }
 
+static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
+			     int mtype, u32 addr, u32 len, void *hbuf)
+{
+	u32 win_pf, memoffset, mem_aperture, mem_base;
+	struct adapter *adap = pdbg_init->adap;
+	u32 pos, offset, resid;
+	u32 *res_buf;
+	u64 *buf;
+	int ret;
+
+	/* Argument sanity checks ...
+	 */
+	if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
+		return -EINVAL;
+
+	buf = (u64 *)hbuf;
+
+	/* Try to do 64-bit reads.  Residual will be handled later. */
+	resid = len & 0x7;
+	len -= resid;
+
+	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+				&mem_aperture);
+	if (ret)
+		return ret;
+
+	addr = addr + memoffset;
+	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
+
+	pos = addr & ~(mem_aperture - 1);
+	offset = addr - pos;
+
+	/* Set up initial PCI-E Memory Window to cover the start of our
+	 * transfer.
+	 */
+	t4_memory_update_win(adap, win, pos | win_pf);
+
+	/* Transfer data from the adapter */
+	while (len > 0) {
+		*buf++ = le64_to_cpu((__force __le64)
+				     t4_read_reg64(adap, mem_base + offset));
+		offset += sizeof(u64);
+		len -= sizeof(u64);
+
+		/* If we've reached the end of our current window aperture,
+		 * move the PCI-E Memory Window on to the next.
+		 */
+		if (offset == mem_aperture) {
+			pos += mem_aperture;
+			offset = 0;
+			t4_memory_update_win(adap, win, pos | win_pf);
+		}
+	}
+
+	res_buf = (u32 *)buf;
+	/* Read residual in 32-bit multiples */
+	while (resid > sizeof(u32)) {
+		*res_buf++ = le32_to_cpu((__force __le32)
+					 t4_read_reg(adap, mem_base + offset));
+		offset += sizeof(u32);
+		resid -= sizeof(u32);
+
+		/* If we've reached the end of our current window aperture,
+		 * move the PCI-E Memory Window on to the next.
+		 */
+		if (offset == mem_aperture) {
+			pos += mem_aperture;
+			offset = 0;
+			t4_memory_update_win(adap, win, pos | win_pf);
+		}
+	}
+
+	/* Transfer residual < 32-bits */
+	if (resid)
+		t4_memory_rw_residual(adap, resid, mem_base + offset,
+				      (u8 *)res_buf, T4_MEMORY_READ);
+
+	return 0;
+}
+
 #define CUDBG_YIELD_ITERATION 256
 
 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
 				goto skip_read;
 
 		spin_lock(&padap->win0_lock);
-		rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
-				  bytes_read, bytes,
-				  (__be32 *)temp_buff.data,
-				  1);
+		rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
+				       bytes_read, bytes, temp_buff.data);
 		spin_unlock(&padap->win0_lock);
 		if (rc) {
 			cudbg_err->sys_err = rc;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9040e13ce4b7..688f95440af2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -390,6 +390,8 @@ struct adapter_params {
 	 * used by the Port
 	 */
 	u8 mps_bg_map[MAX_NPORTS];	/* MPS Buffer Group Map */
+	bool write_w_imm_support;       /* FW supports WRITE_WITH_IMMEDIATE */
+	bool write_cmpl_support;        /* FW supports WRITE_CMPL */
 };
 
 /* State needed to monitor the forward progress of SGE Ingress DMA activities
@@ -831,6 +833,16 @@ struct vf_info {
 	u16 vlan;
 };
 
+enum {
+	HMA_DMA_MAPPED_FLAG = 1
+};
+
+struct hma_data {
+	unsigned char flags;
+	struct sg_table *sgt;
+	dma_addr_t *phy_addr;	/* physical address of the page */
+};
+
 struct mbox_list {
 	struct list_head list;
 };
@@ -907,6 +919,7 @@ struct adapter {
 	struct work_struct tid_release_task;
 	struct work_struct db_full_task;
 	struct work_struct db_drop_task;
+	struct work_struct fatal_err_notify_task;
 	bool tid_release_task_busy;
 
 	/* lock for mailbox cmd list */
@@ -946,6 +959,11 @@ struct adapter {
 
 	/* Ethtool Dump */
 	struct ethtool_dump eth_dump;
+
+	/* HMA */
+	struct hma_data hma;
+
+	struct srq_data *srq;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1488,6 +1506,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
 u32 t4_get_util_window(struct adapter *adap);
 void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
 
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+		      u32 *mem_base, u32 *mem_aperture);
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+			   int dir);
 #define T4_MEMORY_WRITE	0
 #define T4_MEMORY_READ	1
 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 30485f9a598f..143686c60234 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -102,7 +102,7 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
 	case CUDBG_CIM_LA:
 		if (is_t6(adap->params.chip)) {
 			len = adap->params.cim_la_size / 10 + 1;
-			len *= 11 * sizeof(u32);
+			len *= 10 * sizeof(u32);
 		} else {
 			len = adap->params.cim_la_size / 8;
 			len *= 8 * sizeof(u32);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 2822bbff73e8..251d5bdc972f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2617,7 +2617,7 @@ int mem_open(struct inode *inode, struct file *file)
 
 	file->private_data = inode->i_private;
 
-	mem = (uintptr_t)file->private_data & 0x3;
+	mem = (uintptr_t)file->private_data & 0x7;
 	adap = file->private_data - mem;
 
 	(void)t4_fwcache(adap, FW_PARAM_DEV_FWCACHE_FLUSH);
@@ -2630,7 +2630,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
 {
 	loff_t pos = *ppos;
 	loff_t avail = file_inode(file)->i_size;
-	unsigned int mem = (uintptr_t)file->private_data & 3;
+	unsigned int mem = (uintptr_t)file->private_data & 0x7;
 	struct adapter *adap = file->private_data - mem;
 	__be32 *data;
 	int ret;
@@ -2752,7 +2752,7 @@ DEFINE_SIMPLE_DEBUGFS_FILE(tid_info);
 static void add_debugfs_mem(struct adapter *adap, const char *name,
 			    unsigned int idx, unsigned int size_mb)
 {
-	debugfs_create_file_size(name, S_IRUSR, adap->debugfs_root,
+	debugfs_create_file_size(name, 0400, adap->debugfs_root,
 				 (void *)adap + idx, &mem_debugfs_fops,
 				 size_mb << 20);
 }
@@ -2947,65 +2947,65 @@ int t4_setup_debugfs(struct adapter *adap)
 	struct dentry *de;
 
 	static struct t4_debugfs_entry t4_debugfs_files[] = {
-		{ "cim_la", &cim_la_fops, S_IRUSR, 0 },
-		{ "cim_pif_la", &cim_pif_la_fops, S_IRUSR, 0 },
-		{ "cim_ma_la", &cim_ma_la_fops, S_IRUSR, 0 },
-		{ "cim_qcfg", &cim_qcfg_fops, S_IRUSR, 0 },
-		{ "clk", &clk_debugfs_fops, S_IRUSR, 0 },
-		{ "devlog", &devlog_fops, S_IRUSR, 0 },
-		{ "mboxlog", &mboxlog_fops, S_IRUSR, 0 },
-		{ "mbox0", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 0 },
-		{ "mbox1", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 1 },
-		{ "mbox2", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 2 },
-		{ "mbox3", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 3 },
-		{ "mbox4", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 4 },
-		{ "mbox5", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 5 },
-		{ "mbox6", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 6 },
-		{ "mbox7", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 7 },
-		{ "trace0", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 0 },
-		{ "trace1", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 1 },
-		{ "trace2", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 2 },
-		{ "trace3", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 3 },
-		{ "l2t", &t4_l2t_fops, S_IRUSR, 0},
-		{ "mps_tcam", &mps_tcam_debugfs_fops, S_IRUSR, 0 },
-		{ "rss", &rss_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_config", &rss_config_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_key", &rss_key_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_pf_config", &rss_pf_config_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_vf_config", &rss_vf_config_debugfs_fops, S_IRUSR, 0 },
-		{ "sge_qinfo", &sge_qinfo_debugfs_fops, S_IRUSR, 0 },
-		{ "ibq_tp0",  &cim_ibq_fops, S_IRUSR, 0 },
-		{ "ibq_tp1",  &cim_ibq_fops, S_IRUSR, 1 },
-		{ "ibq_ulp",  &cim_ibq_fops, S_IRUSR, 2 },
-		{ "ibq_sge0", &cim_ibq_fops, S_IRUSR, 3 },
-		{ "ibq_sge1", &cim_ibq_fops, S_IRUSR, 4 },
-		{ "ibq_ncsi", &cim_ibq_fops, S_IRUSR, 5 },
-		{ "obq_ulp0", &cim_obq_fops, S_IRUSR, 0 },
-		{ "obq_ulp1", &cim_obq_fops, S_IRUSR, 1 },
-		{ "obq_ulp2", &cim_obq_fops, S_IRUSR, 2 },
-		{ "obq_ulp3", &cim_obq_fops, S_IRUSR, 3 },
-		{ "obq_sge",  &cim_obq_fops, S_IRUSR, 4 },
-		{ "obq_ncsi", &cim_obq_fops, S_IRUSR, 5 },
-		{ "tp_la", &tp_la_fops, S_IRUSR, 0 },
-		{ "ulprx_la", &ulprx_la_fops, S_IRUSR, 0 },
-		{ "sensors", &sensors_debugfs_fops, S_IRUSR, 0 },
-		{ "pm_stats", &pm_stats_debugfs_fops, S_IRUSR, 0 },
-		{ "tx_rate", &tx_rate_debugfs_fops, S_IRUSR, 0 },
-		{ "cctrl", &cctrl_tbl_debugfs_fops, S_IRUSR, 0 },
+		{ "cim_la", &cim_la_fops, 0400, 0 },
+		{ "cim_pif_la", &cim_pif_la_fops, 0400, 0 },
+		{ "cim_ma_la", &cim_ma_la_fops, 0400, 0 },
+		{ "cim_qcfg", &cim_qcfg_fops, 0400, 0 },
+		{ "clk", &clk_debugfs_fops, 0400, 0 },
+		{ "devlog", &devlog_fops, 0400, 0 },
+		{ "mboxlog", &mboxlog_fops, 0400, 0 },
+		{ "mbox0", &mbox_debugfs_fops, 0600, 0 },
+		{ "mbox1", &mbox_debugfs_fops, 0600, 1 },
+		{ "mbox2", &mbox_debugfs_fops, 0600, 2 },
+		{ "mbox3", &mbox_debugfs_fops, 0600, 3 },
+		{ "mbox4", &mbox_debugfs_fops, 0600, 4 },
+		{ "mbox5", &mbox_debugfs_fops, 0600, 5 },
+		{ "mbox6", &mbox_debugfs_fops, 0600, 6 },
+		{ "mbox7", &mbox_debugfs_fops, 0600, 7 },
+		{ "trace0", &mps_trc_debugfs_fops, 0600, 0 },
+		{ "trace1", &mps_trc_debugfs_fops, 0600, 1 },
+		{ "trace2", &mps_trc_debugfs_fops, 0600, 2 },
+		{ "trace3", &mps_trc_debugfs_fops, 0600, 3 },
+		{ "l2t", &t4_l2t_fops, 0400, 0},
+		{ "mps_tcam", &mps_tcam_debugfs_fops, 0400, 0 },
+		{ "rss", &rss_debugfs_fops, 0400, 0 },
+		{ "rss_config", &rss_config_debugfs_fops, 0400, 0 },
+		{ "rss_key", &rss_key_debugfs_fops, 0400, 0 },
+		{ "rss_pf_config", &rss_pf_config_debugfs_fops, 0400, 0 },
+		{ "rss_vf_config", &rss_vf_config_debugfs_fops, 0400, 0 },
+		{ "sge_qinfo", &sge_qinfo_debugfs_fops, 0400, 0 },
+		{ "ibq_tp0",  &cim_ibq_fops, 0400, 0 },
+		{ "ibq_tp1",  &cim_ibq_fops, 0400, 1 },
+		{ "ibq_ulp",  &cim_ibq_fops, 0400, 2 },
+		{ "ibq_sge0", &cim_ibq_fops, 0400, 3 },
+		{ "ibq_sge1", &cim_ibq_fops, 0400, 4 },
+		{ "ibq_ncsi", &cim_ibq_fops, 0400, 5 },
+		{ "obq_ulp0", &cim_obq_fops, 0400, 0 },
+		{ "obq_ulp1", &cim_obq_fops, 0400, 1 },
+		{ "obq_ulp2", &cim_obq_fops, 0400, 2 },
+		{ "obq_ulp3", &cim_obq_fops, 0400, 3 },
+		{ "obq_sge",  &cim_obq_fops, 0400, 4 },
+		{ "obq_ncsi", &cim_obq_fops, 0400, 5 },
+		{ "tp_la", &tp_la_fops, 0400, 0 },
+		{ "ulprx_la", &ulprx_la_fops, 0400, 0 },
+		{ "sensors", &sensors_debugfs_fops, 0400, 0 },
+		{ "pm_stats", &pm_stats_debugfs_fops, 0400, 0 },
+		{ "tx_rate", &tx_rate_debugfs_fops, 0400, 0 },
+		{ "cctrl", &cctrl_tbl_debugfs_fops, 0400, 0 },
 #if IS_ENABLED(CONFIG_IPV6)
-		{ "clip_tbl", &clip_tbl_debugfs_fops, S_IRUSR, 0 },
+		{ "clip_tbl", &clip_tbl_debugfs_fops, 0400, 0 },
 #endif
-		{ "tids", &tid_info_debugfs_fops, S_IRUSR, 0},
-		{ "blocked_fl", &blocked_fl_fops, S_IRUSR | S_IWUSR, 0 },
-		{ "meminfo", &meminfo_fops, S_IRUSR, 0 },
-		{ "crypto", &chcr_stats_debugfs_fops, S_IRUSR, 0 },
+		{ "tids", &tid_info_debugfs_fops, 0400, 0},
+		{ "blocked_fl", &blocked_fl_fops, 0600, 0 },
+		{ "meminfo", &meminfo_fops, 0400, 0 },
+		{ "crypto", &chcr_stats_debugfs_fops, 0400, 0 },
 	};
 
 	/* Debug FS nodes common to all T5 and later adapters.
 	 */
 	static struct t4_debugfs_entry t5_debugfs_files[] = {
-		{ "obq_sge_rx_q0", &cim_obq_fops, S_IRUSR, 6 },
-		{ "obq_sge_rx_q1", &cim_obq_fops, S_IRUSR, 7 },
+		{ "obq_sge_rx_q0", &cim_obq_fops, 0400, 6 },
+		{ "obq_sge_rx_q1", &cim_obq_fops, 0400, 7 },
 	};
 
 	add_debugfs_files(adap,
@@ -3042,13 +3042,19 @@ int t4_setup_debugfs(struct adapter *adap)
 			add_debugfs_mem(adap, "mc", MEM_MC,
 					EXT_MEM_SIZE_G(size));
 		}
+
+		if (i & HMA_MUX_F) {
+			size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+			add_debugfs_mem(adap, "hma", MEM_HMA,
+					EXT_MEM1_SIZE_G(size));
+		}
 	}
 
-	de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap,
+	de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
 				      &flash_debugfs_fops, adap->params.sf_size);
-	debugfs_create_bool("use_backdoor", S_IWUSR | S_IRUSR,
+	debugfs_create_bool("use_backdoor", 0600,
 			    adap->debugfs_root, &adap->use_bd);
-	debugfs_create_bool("trace_rss", S_IWUSR | S_IRUSR,
+	debugfs_create_bool("trace_rss", 0600,
 			    adap->debugfs_root, &adap->trace_rss);
 
 	return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7852d98bad75..59d04d73c672 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -597,22 +597,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_KR:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP4_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
-		SET_LMM(10000baseKX4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
 		break;
 
 	case FW_PORT_TYPE_FIBER_XFI:
@@ -628,7 +628,9 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 	case FW_PORT_TYPE_BP40_BA:
 	case FW_PORT_TYPE_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		break;
 
 	case FW_PORT_TYPE_CR_QSFP:
@@ -655,12 +657,14 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_CR2_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(50000baseSR2_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
 		break;
 
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
+		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 3177b0c9bd2d..db92f1858060 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		return ret;
 	}
 
-	/* Clear out any old resources being used by the filter before
-	 * we start constructing the new filter.
-	 */
-	if (f->valid)
-		clear_filter(adapter, f);
-
 	if (is_t6(adapter->params.chip) && fs->type &&
 	    ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
 	    IPV6_ADDR_ANY) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 56bc626ef006..57d38f8ed455 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -75,6 +75,7 @@
 #include "t4fw_api.h"
 #include "t4fw_version.h"
 #include "cxgb4_dcb.h"
+#include "srq.h"
 #include "cxgb4_debugfs.h"
 #include "clip_tbl.h"
 #include "l2t.h"
@@ -210,6 +211,9 @@ static void link_report(struct net_device *dev)
 		case 40000:
 			s = "40Gbps";
 			break;
+		case 50000:
+			s = "50Gbps";
+			break;
 		case 100000:
 			s = "100Gbps";
 			break;
@@ -583,6 +587,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 		const struct cpl_abort_rpl_rss *p = (void *)rsp;
 
 		hash_del_filter_rpl(q->adap, p);
+	} else if (opcode == CPL_SRQ_TABLE_RPL) {
+		const struct cpl_srq_table_rpl *p = (void *)rsp;
+
+		do_srq_table_rpl(q->adap, p);
 	} else
 		dev_err(q->adap->pdev_dev,
 			"unexpected CPL %#x on FW event queue\n", opcode);
@@ -833,8 +841,6 @@ static int setup_fw_sge_queues(struct adapter *adap)
 
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
 			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
-	if (err)
-		t4_free_sge_resources(adap);
 	return err;
 }
 
@@ -1733,10 +1739,11 @@ EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
 
 int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 {
-	struct adapter *adap;
-	u32 offset, memtype, memaddr;
 	u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
 	u32 edc0_end, edc1_end, mc0_end, mc1_end;
+	u32 offset, memtype, memaddr;
+	struct adapter *adap;
+	u32 hma_size = 0;
 	int ret;
 
 	adap = netdev2adap(dev);
@@ -1756,6 +1763,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 	size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
 	mc0_size = EXT_MEM0_SIZE_G(size) << 20;
 
+	if (t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A) & HMA_MUX_F) {
+		size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+		hma_size = EXT_MEM1_SIZE_G(size) << 20;
+	}
 	edc0_end = edc0_size;
 	edc1_end = edc0_end + edc1_size;
 	mc0_end = edc1_end + mc0_size;
@@ -1767,7 +1778,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 		memtype = MEM_EDC1;
 		memaddr = offset - edc0_end;
 	} else {
-		if (offset < mc0_end) {
+		if (hma_size && (offset < (edc1_end + hma_size))) {
+			memtype = MEM_HMA;
+			memaddr = offset - edc1_end;
+		} else if (offset < mc0_end) {
 			memtype = MEM_MC0;
 			memaddr = offset - edc1_end;
 		} else if (is_t5(adap->params.chip)) {
@@ -2681,13 +2695,17 @@ static int cxgb4_mgmt_get_vf_config(struct net_device *dev,
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adap = pi->adapter;
+	struct vf_info *vfinfo;
 
 	if (vf >= adap->num_vfs)
 		return -EINVAL;
+	vfinfo = &adap->vfinfo[vf];
+
 	ivi->vf = vf;
-	ivi->max_tx_rate = adap->vfinfo[vf].tx_rate;
+	ivi->max_tx_rate = vfinfo->tx_rate;
 	ivi->min_tx_rate = 0;
-	ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+	ether_addr_copy(ivi->mac, vfinfo->vf_mac_addr);
+	ivi->vlan = vfinfo->vlan;
 	return 0;
 }
 
@@ -2870,11 +2888,11 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	/* Convert from Mbps to Kbps */
 	req_rate = rate << 10;
 
-	/* Max rate is 10 Gbps */
+	/* Max rate is 100 Gbps */
 	if (req_rate >= SCHED_MAX_RATE_KBPS) {
 		dev_err(adap->pdev_dev,
-			"Invalid rate %u Mbps, Max rate is %u Gbps\n",
-			rate, SCHED_MAX_RATE_KBPS);
+			"Invalid rate %u Mbps, Max rate is %u Mbps\n",
+			rate, SCHED_MAX_RATE_KBPS >> 10);
 		return -ERANGE;
 	}
 
@@ -3244,6 +3262,14 @@ static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
 	.get_drvinfo       = cxgb4_mgmt_get_drvinfo,
 };
 
+static void notify_fatal_err(struct work_struct *work)
+{
+	struct adapter *adap;
+
+	adap = container_of(work, struct adapter, fatal_err_notify_task);
+	notify_ulds(adap, CXGB4_STATE_FATAL_ERROR);
+}
+
 void t4_fatal_err(struct adapter *adap)
 {
 	int port;
@@ -3268,6 +3294,7 @@ void t4_fatal_err(struct adapter *adap)
 		netif_carrier_off(dev);
 	}
 	dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
+	queue_work(adap->workq, &adap->fatal_err_notify_task);
 }
 
 static void setup_memwin(struct adapter *adap)
@@ -3298,6 +3325,206 @@ static void setup_memwin_rdma(struct adapter *adap)
 	}
 }
 
+/* HMA Definitions */
+
+/* The maximum number of address that can be send in a single FW cmd */
+#define HMA_MAX_ADDR_IN_CMD	5
+
+#define HMA_PAGE_SIZE		PAGE_SIZE
+
+#define HMA_MAX_NO_FW_ADDRESS	(16 << 10)  /* FW supports 16K addresses */
+
+#define HMA_PAGE_ORDER					\
+	((HMA_PAGE_SIZE < HMA_MAX_NO_FW_ADDRESS) ?	\
+	ilog2(HMA_MAX_NO_FW_ADDRESS / HMA_PAGE_SIZE) : 0)
+
+/* The minimum and maximum possible HMA sizes that can be specified in the FW
+ * configuration(in units of MB).
+ */
+#define HMA_MIN_TOTAL_SIZE	1
+#define HMA_MAX_TOTAL_SIZE				\
+	(((HMA_PAGE_SIZE << HMA_PAGE_ORDER) *		\
+	  HMA_MAX_NO_FW_ADDRESS) >> 20)
+
+static void adap_free_hma_mem(struct adapter *adapter)
+{
+	struct scatterlist *iter;
+	struct page *page;
+	int i;
+
+	if (!adapter->hma.sgt)
+		return;
+
+	if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
+		dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
+			     adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+		adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
+	}
+
+	for_each_sg(adapter->hma.sgt->sgl, iter,
+		    adapter->hma.sgt->orig_nents, i) {
+		page = sg_page(iter);
+		if (page)
+			__free_pages(page, HMA_PAGE_ORDER);
+	}
+
+	kfree(adapter->hma.phy_addr);
+	sg_free_table(adapter->hma.sgt);
+	kfree(adapter->hma.sgt);
+	adapter->hma.sgt = NULL;
+}
+
+static int adap_config_hma(struct adapter *adapter)
+{
+	struct scatterlist *sgl, *iter;
+	struct sg_table *sgt;
+	struct page *newpage;
+	unsigned int i, j, k;
+	u32 param, hma_size;
+	unsigned int ncmds;
+	size_t page_size;
+	u32 page_order;
+	int node, ret;
+
+	/* HMA is supported only for T6+ cards.
+	 * Avoid initializing HMA in kdump kernels.
+	 */
+	if (is_kdump_kernel() ||
+	    CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
+		return 0;
+
+	/* Get the HMA region size required by fw */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HMA_SIZE));
+	ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+			      1, &param, &hma_size);
+	/* An error means card has its own memory or HMA is not supported by
+	 * the firmware. Return without any errors.
+	 */
+	if (ret || !hma_size)
+		return 0;
+
+	if (hma_size < HMA_MIN_TOTAL_SIZE ||
+	    hma_size > HMA_MAX_TOTAL_SIZE) {
+		dev_err(adapter->pdev_dev,
+			"HMA size %uMB beyond bounds(%u-%lu)MB\n",
+			hma_size, HMA_MIN_TOTAL_SIZE, HMA_MAX_TOTAL_SIZE);
+		return -EINVAL;
+	}
+
+	page_size = HMA_PAGE_SIZE;
+	page_order = HMA_PAGE_ORDER;
+	adapter->hma.sgt = kzalloc(sizeof(*adapter->hma.sgt), GFP_KERNEL);
+	if (unlikely(!adapter->hma.sgt)) {
+		dev_err(adapter->pdev_dev, "HMA SG table allocation failed\n");
+		return -ENOMEM;
+	}
+	sgt = adapter->hma.sgt;
+	/* FW returned value will be in MB's
+	 */
+	sgt->orig_nents = (hma_size << 20) / (page_size << page_order);
+	if (sg_alloc_table(sgt, sgt->orig_nents, GFP_KERNEL)) {
+		dev_err(adapter->pdev_dev, "HMA SGL allocation failed\n");
+		kfree(adapter->hma.sgt);
+		adapter->hma.sgt = NULL;
+		return -ENOMEM;
+	}
+
+	sgl = adapter->hma.sgt->sgl;
+	node = dev_to_node(adapter->pdev_dev);
+	for_each_sg(sgl, iter, sgt->orig_nents, i) {
+		newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL,
+					   page_order);
+		if (!newpage) {
+			dev_err(adapter->pdev_dev,
+				"Not enough memory for HMA page allocation\n");
+			ret = -ENOMEM;
+			goto free_hma;
+		}
+		sg_set_page(iter, newpage, page_size << page_order, 0);
+	}
+
+	sgt->nents = dma_map_sg(adapter->pdev_dev, sgl, sgt->orig_nents,
+				DMA_BIDIRECTIONAL);
+	if (!sgt->nents) {
+		dev_err(adapter->pdev_dev,
+			"Not enough memory for HMA DMA mapping");
+		ret = -ENOMEM;
+		goto free_hma;
+	}
+	adapter->hma.flags |= HMA_DMA_MAPPED_FLAG;
+
+	adapter->hma.phy_addr = kcalloc(sgt->nents, sizeof(dma_addr_t),
+					GFP_KERNEL);
+	if (unlikely(!adapter->hma.phy_addr))
+		goto free_hma;
+
+	for_each_sg(sgl, iter, sgt->nents, i) {
+		newpage = sg_page(iter);
+		adapter->hma.phy_addr[i] = sg_dma_address(iter);
+	}
+
+	ncmds = DIV_ROUND_UP(sgt->nents, HMA_MAX_ADDR_IN_CMD);
+	/* Pass on the addresses to firmware */
+	for (i = 0, k = 0; i < ncmds; i++, k += HMA_MAX_ADDR_IN_CMD) {
+		struct fw_hma_cmd hma_cmd;
+		u8 naddr = HMA_MAX_ADDR_IN_CMD;
+		u8 soc = 0, eoc = 0;
+		u8 hma_mode = 1; /* Presently we support only Page table mode */
+
+		soc = (i == 0) ? 1 : 0;
+		eoc = (i == ncmds - 1) ? 1 : 0;
+
+		/* For last cmd, set naddr corresponding to remaining
+		 * addresses
+		 */
+		if (i == ncmds - 1) {
+			naddr = sgt->nents % HMA_MAX_ADDR_IN_CMD;
+			naddr = naddr ? naddr : HMA_MAX_ADDR_IN_CMD;
+		}
+		memset(&hma_cmd, 0, sizeof(hma_cmd));
+		hma_cmd.op_pkd = htonl(FW_CMD_OP_V(FW_HMA_CMD) |
+				       FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
+		hma_cmd.retval_len16 = htonl(FW_LEN16(hma_cmd));
+
+		hma_cmd.mode_to_pcie_params =
+			htonl(FW_HMA_CMD_MODE_V(hma_mode) |
+			      FW_HMA_CMD_SOC_V(soc) | FW_HMA_CMD_EOC_V(eoc));
+
+		/* HMA cmd size specified in MB's */
+		hma_cmd.naddr_size =
+			htonl(FW_HMA_CMD_SIZE_V(hma_size) |
+			      FW_HMA_CMD_NADDR_V(naddr));
+
+		/* Total Page size specified in units of 4K */
+		hma_cmd.addr_size_pkd =
+			htonl(FW_HMA_CMD_ADDR_SIZE_V
+				((page_size << page_order) >> 12));
+
+		/* Fill the 5 addresses */
+		for (j = 0; j < naddr; j++) {
+			hma_cmd.phy_address[j] =
+				cpu_to_be64(adapter->hma.phy_addr[j + k]);
+		}
+		ret = t4_wr_mbox(adapter, adapter->mbox, &hma_cmd,
+				 sizeof(hma_cmd), &hma_cmd);
+		if (ret) {
+			dev_err(adapter->pdev_dev,
+				"HMA FW command failed with err %d\n", ret);
+			goto free_hma;
+		}
+	}
+
+	if (!ret)
+		dev_info(adapter->pdev_dev,
+			 "Reserved %uMB host memory for HMA\n", hma_size);
+	return ret;
+
+free_hma:
+	adap_free_hma_mem(adapter);
+	return ret;
+}
+
 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
 {
 	u32 v;
@@ -3751,6 +3978,12 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 	if (ret < 0)
 		goto bye;
 
+	/* We will proceed even if HMA init fails. */
+	ret = adap_config_hma(adapter);
+	if (ret)
+		dev_err(adapter->pdev_dev,
+			"HMA configuration failed with error %d\n", ret);
+
 	/*
 	 * And finally tell the firmware to initialize itself using the
 	 * parameters from the Configuration File.
@@ -3957,6 +4190,11 @@ static int adap_init0(struct adapter *adap)
 	 * effect. Otherwise, it's time to try initializing the adapter.
 	 */
 	if (state == DEV_STATE_INIT) {
+		ret = adap_config_hma(adap);
+		if (ret)
+			dev_err(adap->pdev_dev,
+				"HMA configuration failed with error %d\n",
+				ret);
 		dev_info(adap->pdev_dev, "Coming up as %s: "\
 			 "Adapter already initialized\n",
 			 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
@@ -4236,6 +4474,20 @@ static int adap_init0(struct adapter *adap)
 		adap->vres.pbl.start = val[4];
 		adap->vres.pbl.size = val[5] - val[4] + 1;
 
+		params[0] = FW_PARAM_PFVF(SRQ_START);
+		params[1] = FW_PARAM_PFVF(SRQ_END);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+				      params, val);
+		if (!ret) {
+			adap->vres.srq.start = val[0];
+			adap->vres.srq.size = val[1] - val[0] + 1;
+		}
+		if (adap->vres.srq.size) {
+			adap->srq = t4_init_srq(adap->vres.srq.size);
+			if (!adap->srq)
+				dev_warn(&adap->pdev->dev, "could not allocate SRQ, continuing\n");
+		}
+
 		params[0] = FW_PARAM_PFVF(SQRQ_START);
 		params[1] = FW_PARAM_PFVF(SQRQ_END);
 		params[2] = FW_PARAM_PFVF(CQ_START);
@@ -4269,6 +4521,18 @@ static int adap_init0(struct adapter *adap)
 			 "max_ordird_qp %d max_ird_adapter %d\n",
 			 adap->params.max_ordird_qp,
 			 adap->params.max_ird_adapter);
+
+		/* Enable write_with_immediate if FW supports it */
+		params[0] = FW_PARAM_DEV(RDMA_WRITE_WITH_IMM);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+				      val);
+		adap->params.write_w_imm_support = (ret == 0 && val[0] != 0);
+
+		/* Enable write_cmpl if FW supports it */
+		params[0] = FW_PARAM_DEV(RI_WRITE_CMPL_WR);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+				      val);
+		adap->params.write_cmpl_support = (ret == 0 && val[0] != 0);
 		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.iscsicaps) {
@@ -4346,6 +4610,7 @@ static int adap_init0(struct adapter *adap)
 	 * happened to HW/FW, stop issuing commands.
 	 */
 bye:
+	adap_free_hma_mem(adap);
 	kfree(adap->sge.egr_map);
 	kfree(adap->sge.ingr_map);
 	kfree(adap->sge.starving_fl);
@@ -4903,6 +5168,7 @@ static void free_some_resources(struct adapter *adapter)
 
 	kvfree(adapter->smt);
 	kvfree(adapter->l2t);
+	kvfree(adapter->srq);
 	t4_cleanup_sched(adapter);
 	kvfree(adapter->tids.tid_tab);
 	cxgb4_cleanup_tc_flower(adapter);
@@ -4970,7 +5236,6 @@ static void cxgb4_mgmt_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
 	dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
-	dev->needs_free_netdev = true;
 }
 
 static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
@@ -4982,9 +5247,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 
 	pcie_fw = readl(adap->regs + PCIE_FW_A);
 	/* Check if cxgb4 is the MASTER and fw is initialized */
-	if (!(pcie_fw & PCIE_FW_INIT_F) ||
+	if (num_vfs &&
+	    (!(pcie_fw & PCIE_FW_INIT_F) ||
 	    !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
-	    PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF) {
+	    PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF)) {
 		dev_warn(&pdev->dev,
 			 "cxgb4 driver needs to be MASTER to support SRIOV\n");
 		return -EOPNOTSUPP;
@@ -5180,6 +5446,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->name = pci_name(pdev);
 	adapter->mbox = func;
 	adapter->pf = func;
+	adapter->params.chip = chip;
+	adapter->adap_idx = adap_idx;
 	adapter->msg_enable = DFLT_MSG_ENABLE;
 	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
 				    (sizeof(struct mbox_cmd) *
@@ -5255,6 +5523,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
 	INIT_WORK(&adapter->db_full_task, process_db_full);
 	INIT_WORK(&adapter->db_drop_task, process_db_drop);
+	INIT_WORK(&adapter->fatal_err_notify_task, notify_fatal_err);
 
 	err = t4_prep_adapter(adapter);
 	if (err)
@@ -5472,6 +5741,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto out_free_dev;
 
+	err = setup_fw_sge_queues(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"FW sge queue allocation failed, err %d", err);
+		goto out_free_dev;
+	}
+
 	/*
 	 * The card is now ready to go.  If any errors occur during device
 	 * registration we do not fail the whole card but rather proceed only
@@ -5520,10 +5796,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		cxgb4_ptp_init(adapter);
 
 	print_adapter_info(adapter);
-	setup_fw_sge_queues(adapter);
 	return 0;
 
  out_free_dev:
+	t4_free_sge_resources(adapter);
 	free_some_resources(adapter);
 	if (adapter->flags & USING_MSIX)
 		free_msix_info(adapter);
@@ -5572,6 +5848,8 @@ static void remove_one(struct pci_dev *pdev)
 			t4_uld_clean_up(adapter);
 		}
 
+		adap_free_hma_mem(adapter);
+
 		disable_interrupts(adapter);
 
 		for_each_port(adapter, i)
@@ -5599,24 +5877,24 @@ static void remove_one(struct pci_dev *pdev)
 #if IS_ENABLED(CONFIG_IPV6)
 		t4_cleanup_clip_tbl(adapter);
 #endif
-		iounmap(adapter->regs);
 		if (!is_t4(adapter->params.chip))
 			iounmap(adapter->bar2);
-		pci_disable_pcie_error_reporting(pdev);
-		if ((adapter->flags & DEV_ENABLED)) {
-			pci_disable_device(pdev);
-			adapter->flags &= ~DEV_ENABLED;
-		}
-		pci_release_regions(pdev);
-		kfree(adapter->mbox_log);
-		synchronize_rcu();
-		kfree(adapter);
 	}
 #ifdef CONFIG_PCI_IOV
 	else {
 		cxgb4_iov_configure(adapter->pdev, 0);
 	}
 #endif
+	iounmap(adapter->regs);
+	pci_disable_pcie_error_reporting(pdev);
+	if ((adapter->flags & DEV_ENABLED)) {
+		pci_disable_device(pdev);
+		adapter->flags &= ~DEV_ENABLED;
+	}
+	pci_release_regions(pdev);
+	kfree(adapter->mbox_log);
+	synchronize_rcu();
+	kfree(adapter);
 }
 
 /* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 6b5fea4532f3..a95cde0fadf7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -342,6 +342,7 @@ static void free_queues_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 
+	adap->sge.uld_rxq_info[uld_type] = NULL;
 	kfree(rxq_info->rspq_id);
 	kfree(rxq_info->uldrxq);
 	kfree(rxq_info);
@@ -665,6 +666,8 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
 	lld->nodeid = dev_to_node(adap->pdev_dev);
 	lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support;
+	lld->write_w_imm_support = adap->params.write_w_imm_support;
+	lld->write_cmpl_support = adap->params.write_cmpl_support;
 }
 
 static void uld_attach(struct adapter *adap, unsigned int uld)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index a14e8db51cdc..b0ca06edaa7c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -257,7 +257,8 @@ enum cxgb4_state {
 	CXGB4_STATE_UP,
 	CXGB4_STATE_START_RECOVERY,
 	CXGB4_STATE_DOWN,
-	CXGB4_STATE_DETACH
+	CXGB4_STATE_DETACH,
+	CXGB4_STATE_FATAL_ERROR
 };
 
 enum cxgb4_control {
@@ -283,6 +284,7 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	struct cxgb4_range iscsi;
 	struct cxgb4_range stag;
 	struct cxgb4_range rq;
+	struct cxgb4_range srq;
 	struct cxgb4_range pbl;
 	struct cxgb4_range qp;
 	struct cxgb4_range cq;
@@ -352,6 +354,8 @@ struct cxgb4_lld_info {
 	void **iscsi_ppm;		     /* iscsi page pod manager */
 	int nodeid;			     /* device numa node id */
 	bool fr_nsmr_tpte_wr_support;	     /* FW supports FR_NSMR_TPTE_WR */
+	bool write_w_imm_support;         /* FW supports WRITE_WITH_IMMEDIATE */
+	bool write_cmpl_support;             /* FW supports WRITE_CMPL WR */
 };
 
 struct cxgb4_uld_info {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
index 77b2b3fd9021..3a49e00a38a1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h
@@ -42,8 +42,8 @@
 
 #define FW_SCHED_CLS_NONE 0xffffffff
 
-/* Max rate that can be set to a scheduling class is 10 Gbps */
-#define SCHED_MAX_RATE_KBPS 10000000U
+/* Max rate that can be set to a scheduling class is 100 Gbps */
+#define SCHED_MAX_RATE_KBPS 100000000U
 
 enum {
 	SCHED_STATE_ACTIVE,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.c b/drivers/net/ethernet/chelsio/cxgb4/srq.c
new file mode 100644
index 000000000000..6228a5708307
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.c
@@ -0,0 +1,138 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "srq.h"
+
+struct srq_data *t4_init_srq(int srq_size)
+{
+	struct srq_data *s;
+
+	s = kvzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+
+	s->srq_size = srq_size;
+	init_completion(&s->comp);
+	mutex_init(&s->lock);
+
+	return s;
+}
+
+/* cxgb4_get_srq_entry: read the SRQ table entry
+ * @dev: Pointer to the net_device
+ * @idx: Index to the srq
+ * @entryp: pointer to the srq entry
+ *
+ * Sends CPL_SRQ_TABLE_REQ message for the given index.
+ * Contents will be returned in CPL_SRQ_TABLE_RPL message.
+ *
+ * Returns zero if the read is successful, else a error
+ * number will be returned. Caller should not use the srq
+ * entry if the return value is non-zero.
+ *
+ *
+ */
+int cxgb4_get_srq_entry(struct net_device *dev,
+			int srq_idx, struct srq_entry *entryp)
+{
+	struct cpl_srq_table_req *req;
+	struct adapter *adap;
+	struct sk_buff *skb;
+	struct srq_data *s;
+	int rc = -ENODEV;
+
+	adap = netdev2adap(dev);
+	s = adap->srq;
+
+	if (!(adap->flags & FULL_INIT_DONE) || !s)
+		goto out;
+
+	skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	req = (struct cpl_srq_table_req *)
+		__skb_put(skb, sizeof(*req));
+	memset(req, 0, sizeof(*req));
+	INIT_TP_WR(req, 0);
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SRQ_TABLE_REQ,
+					      TID_TID_V(srq_idx) |
+				TID_QID_V(adap->sge.fw_evtq.abs_id)));
+	req->idx = srq_idx;
+
+	mutex_lock(&s->lock);
+
+	s->entryp = entryp;
+	t4_mgmt_tx(adap, skb);
+
+	rc = wait_for_completion_timeout(&s->comp, SRQ_WAIT_TO);
+	if (rc)
+		rc = 0;
+	else /* !rc means we timed out */
+		rc = -ETIMEDOUT;
+
+	WARN_ON_ONCE(entryp->idx != srq_idx);
+	mutex_unlock(&s->lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(cxgb4_get_srq_entry);
+
+void do_srq_table_rpl(struct adapter *adap,
+		      const struct cpl_srq_table_rpl *rpl)
+{
+	unsigned int idx = TID_TID_G(GET_TID(rpl));
+	struct srq_data *s = adap->srq;
+	struct srq_entry *e;
+
+	if (unlikely(rpl->status != CPL_CONTAINS_READ_RPL)) {
+		dev_err(adap->pdev_dev,
+			"Unexpected SRQ_TABLE_RPL status %u for entry %u\n",
+				rpl->status, idx);
+		goto out;
+	}
+
+	/* Store the read entry */
+	e = s->entryp;
+	e->valid = 1;
+	e->idx = idx;
+	e->pdid = SRQT_PDID_G(be64_to_cpu(rpl->rsvd_pdid));
+	e->qlen = SRQT_QLEN_G(be32_to_cpu(rpl->qlen_qbase));
+	e->qbase = SRQT_QBASE_G(be32_to_cpu(rpl->qlen_qbase));
+	e->cur_msn = be16_to_cpu(rpl->cur_msn);
+	e->max_msn = be16_to_cpu(rpl->max_msn);
+out:
+	complete(&s->comp);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.h b/drivers/net/ethernet/chelsio/cxgb4/srq.h
new file mode 100644
index 000000000000..ec85cf93865a
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.h
@@ -0,0 +1,65 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SRQ_H
+#define __CXGB4_SRQ_H
+
+struct adapter;
+struct cpl_srq_table_rpl;
+
+#define SRQ_WAIT_TO	(HZ * 5)
+
+struct srq_entry {
+	u8 valid;
+	u8 idx;
+	u8 qlen;
+	u16 pdid;
+	u16 cur_msn;
+	u16 max_msn;
+	u32 qbase;
+};
+
+struct srq_data {
+	unsigned int srq_size;
+	struct srq_entry *entryp;
+	struct completion comp;
+	struct mutex lock; /* generic mutex for srq data */
+};
+
+struct srq_data *t4_init_srq(int srq_size);
+int cxgb4_get_srq_entry(struct net_device *dev,
+			int srq_idx, struct srq_entry *entryp);
+void do_srq_table_rpl(struct adapter *adap,
+		      const struct cpl_srq_table_rpl *rpl);
+#endif  /* __CXGB4_SRQ_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 047609ef0515..7cb3ef466cc7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
 }
 
 /**
+ * t4_memory_rw_init - Get memory window relative offset, base, and size.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1, MEM_HMA or MEM_MC
+ * @mem_off: memory relative offset with respect to @mtype.
+ * @mem_base: configured memory base address.
+ * @mem_aperture: configured memory window aperture.
+ *
+ * Get the configured memory window's relative offset, base, and size.
+ */
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+		      u32 *mem_base, u32 *mem_aperture)
+{
+	u32 edc_size, mc_size, mem_reg;
+
+	/* Offset into the region of memory which is being accessed
+	 * MEM_EDC0 = 0
+	 * MEM_EDC1 = 1
+	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
+	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
+	 * MEM_HMA  = 4
+	 */
+	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
+	if (mtype == MEM_HMA) {
+		*mem_off = 2 * (edc_size * 1024 * 1024);
+	} else if (mtype != MEM_MC1) {
+		*mem_off = (mtype * (edc_size * 1024 * 1024));
+	} else {
+		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
+						      MA_EXT_MEMORY0_BAR_A));
+		*mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
+	}
+
+	/* Each PCI-E Memory Window is programmed with a window size -- or
+	 * "aperture" -- which controls the granularity of its mapping onto
+	 * adapter memory.  We need to grab that aperture in order to know
+	 * how to use the specified window.  The window is also programmed
+	 * with the base address of the Memory Window in BAR0's address
+	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
+	 * the address is relative to BAR0.
+	 */
+	mem_reg = t4_read_reg(adap,
+			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
+						  win));
+	/* a dead adapter will return 0xffffffff for PIO reads */
+	if (mem_reg == 0xffffffff)
+		return -ENXIO;
+
+	*mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
+	*mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
+	if (is_t4(adap->params.chip))
+		*mem_base -= adap->t4_bar0;
+
+	return 0;
+}
+
+/**
+ * t4_memory_update_win - Move memory window to specified address.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @addr: location to move.
+ *
+ * Move memory window to specified address.
+ */
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
+{
+	t4_write_reg(adap,
+		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
+		     addr);
+	/* Read it back to ensure that changes propagate before we
+	 * attempt to use the new value.
+	 */
+	t4_read_reg(adap,
+		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+}
+
+/**
+ * t4_memory_rw_residual - Read/Write residual data.
+ * @adap: the adapter
+ * @off: relative offset within residual to start read/write.
+ * @addr: address within indicated memory type.
+ * @buf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Read/Write residual data less than 32-bits.
+ */
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+			   int dir)
+{
+	union {
+		u32 word;
+		char byte[4];
+	} last;
+	unsigned char *bp;
+	int i;
+
+	if (dir == T4_MEMORY_READ) {
+		last.word = le32_to_cpu((__force __le32)
+					t4_read_reg(adap, addr));
+		for (bp = (unsigned char *)buf, i = off; i < 4; i++)
+			bp[i] = last.byte[i];
+	} else {
+		last.word = *buf;
+		for (i = off; i < 4; i++)
+			last.byte[i] = 0;
+		t4_write_reg(adap, addr,
+			     (__force u32)cpu_to_le32(last.word));
+	}
+}
+
+/**
  *	t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
  *	@adap: the adapter
  *	@win: PCI-E Memory Window to use
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 		 u32 len, void *hbuf, int dir)
 {
 	u32 pos, offset, resid, memoffset;
-	u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
+	u32 win_pf, mem_aperture, mem_base;
 	u32 *buf;
+	int ret;
 
 	/* Argument sanity checks ...
 	 */
@@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 	resid = len & 0x3;
 	len -= resid;
 
-	/* Offset into the region of memory which is being accessed
-	 * MEM_EDC0 = 0
-	 * MEM_EDC1 = 1
-	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
-	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
-	 * MEM_HMA  = 4
-	 */
-	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
-	if (mtype == MEM_HMA) {
-		memoffset = 2 * (edc_size * 1024 * 1024);
-	} else if (mtype != MEM_MC1) {
-		memoffset = (mtype * (edc_size * 1024 * 1024));
-	} else {
-		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
-						      MA_EXT_MEMORY0_BAR_A));
-		memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
-	}
+	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+				&mem_aperture);
+	if (ret)
+		return ret;
 
 	/* Determine the PCIE_MEM_ACCESS_OFFSET */
 	addr = addr + memoffset;
 
-	/* Each PCI-E Memory Window is programmed with a window size -- or
-	 * "aperture" -- which controls the granularity of its mapping onto
-	 * adapter memory.  We need to grab that aperture in order to know
-	 * how to use the specified window.  The window is also programmed
-	 * with the base address of the Memory Window in BAR0's address
-	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
-	 * the address is relative to BAR0.
-	 */
-	mem_reg = t4_read_reg(adap,
-			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
-						  win));
-	mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
-	mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
-	if (is_t4(adap->params.chip))
-		mem_base -= adap->t4_bar0;
 	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
 
 	/* Calculate our initial PCI-E Memory Window Position and Offset into
 	 * that Window.
 	 */
-	pos = addr & ~(mem_aperture-1);
+	pos = addr & ~(mem_aperture - 1);
 	offset = addr - pos;
 
 	/* Set up initial PCI-E Memory Window to cover the start of our
-	 * transfer.  (Read it back to ensure that changes propagate before we
-	 * attempt to use the new value.)
+	 * transfer.
 	 */
-	t4_write_reg(adap,
-		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
-		     pos | win_pf);
-	t4_read_reg(adap,
-		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+	t4_memory_update_win(adap, win, pos | win_pf);
 
 	/* Transfer data to/from the adapter as long as there's an integral
 	 * number of 32-bit transfers to complete.
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 		if (offset == mem_aperture) {
 			pos += mem_aperture;
 			offset = 0;
-			t4_write_reg(adap,
-				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-						    win), pos | win_pf);
-			t4_read_reg(adap,
-				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-						    win));
+			t4_memory_update_win(adap, win, pos | win_pf);
 		}
 	}
 
@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 	 * residual amount.  The PCI-E Memory Window has already been moved
 	 * above (if necessary) to cover this final transfer.
 	 */
-	if (resid) {
-		union {
-			u32 word;
-			char byte[4];
-		} last;
-		unsigned char *bp;
-		int i;
-
-		if (dir == T4_MEMORY_READ) {
-			last.word = le32_to_cpu(
-					(__force __le32)t4_read_reg(adap,
-						mem_base + offset));
-			for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
-				bp[i] = last.byte[i];
-		} else {
-			last.word = *buf;
-			for (i = resid; i < 4; i++)
-				last.byte[i] = 0;
-			t4_write_reg(adap, mem_base + offset,
-				     (__force u32)cpu_to_le32(last.word));
-		}
-	}
+	if (resid)
+		t4_memory_rw_residual(adap, resid, mem_base + offset,
+				      (u8 *)buf, dir);
 
 	return 0;
 }
@@ -2637,7 +2692,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 }
 
 #define EEPROM_STAT_ADDR   0x7bfc
-#define VPD_SIZE           0x800
 #define VPD_BASE           0x400
 #define VPD_BASE_OLD       0
 #define VPD_LEN            1024
@@ -2704,15 +2758,6 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 	if (!vpd)
 		return -ENOMEM;
 
-	/* We have two VPD data structures stored in the adapter VPD area.
-	 * By default, Linux calculates the size of the VPD area by traversing
-	 * the first VPD area at offset 0x0, so we need to tell the OS what
-	 * our real VPD size is.
-	 */
-	ret = pci_set_vpd_size(adapter->pdev, VPD_SIZE);
-	if (ret < 0)
-		goto out;
-
 	/* Card information normally starts at VPD_BASE but early cards had
 	 * it at 0.
 	 */
@@ -4021,8 +4066,6 @@ int t4_link_l1cfg(struct adapter *adapter, unsigned int mbox,
 	unsigned int fw_mdi = FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO);
 	fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap;
 
-	lc->link_ok = 0;
-
 	/* Convert driver coding of Pause Frame Flow Control settings into the
 	 * Firmware's API.
 	 */
@@ -6046,6 +6089,7 @@ unsigned int t4_get_tp_ch_map(struct adapter *adap, int pidx)
 
 	case CHELSIO_T6:
 		switch (nports) {
+		case 1:
 		case 2: return 1 << pidx;
 		}
 		break;
@@ -8560,6 +8604,25 @@ static int t4_get_flash_params(struct adapter *adap)
 		}
 		break;
 	}
+	case 0x9d: { /* ISSI -- Integrated Silicon Solution, Inc. */
+		/* This Density -> Size decoding table is taken from ISSI
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x16: /* 32 MB */
+			size = 1 << 25;
+			break;
+		case 0x17: /* 64MB */
+			size = 1 << 26;
+			break;
+		default:
+			dev_err(adap->pdev_dev, "ISSI Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
+			return -EINVAL;
+		}
+		break;
+	}
 	case 0xc2: { /* Macronix */
 		/* This Density -> Size decoding table is taken from Macronix
 		 * Data Sheets.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index d0db4427b77e..5e8f5ca8e3ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -52,6 +52,7 @@ enum {
 	CPL_L2T_WRITE_REQ     = 0x12,
 	CPL_SMT_WRITE_REQ     = 0x14,
 	CPL_TID_RELEASE       = 0x1A,
+	CPL_SRQ_TABLE_REQ     = 0x1C,
 	CPL_TX_DATA_ISO	      = 0x1F,
 
 	CPL_CLOSE_LISTSRV_RPL = 0x20,
@@ -102,6 +103,7 @@ enum {
 	CPL_FW4_MSG           = 0xC0,
 	CPL_FW4_PLD           = 0xC1,
 	CPL_FW4_ACK           = 0xC3,
+	CPL_SRQ_TABLE_RPL     = 0xCC,
 
 	CPL_RX_PHYS_DSGL      = 0xD0,
 
@@ -136,6 +138,8 @@ enum CPL_error {
 	CPL_ERR_KEEPALV_NEG_ADVICE = 37,
 	CPL_ERR_ABORT_FAILED       = 42,
 	CPL_ERR_IWARP_FLM          = 50,
+	CPL_CONTAINS_READ_RPL      = 60,
+	CPL_CONTAINS_WRITE_RPL     = 61,
 };
 
 enum {
@@ -198,6 +202,7 @@ union opcode_tid {
 /* partitioning of TID fields that also carry a queue id */
 #define TID_TID_S    0
 #define TID_TID_M    0x3fff
+#define TID_TID_V(x) ((x) << TID_TID_S)
 #define TID_TID_G(x) (((x) >> TID_TID_S) & TID_TID_M)
 
 #define TID_QID_S    14
@@ -743,6 +748,22 @@ struct cpl_abort_req_rss {
 	u8 status;
 };
 
+struct cpl_abort_req_rss6 {
+	WR_HDR;
+	union opcode_tid ot;
+	__u32 srqidx_status;
+};
+
+#define ABORT_RSS_STATUS_S    0
+#define ABORT_RSS_STATUS_M    0xff
+#define ABORT_RSS_STATUS_V(x) ((x) << ABORT_RSS_STATUS_S)
+#define ABORT_RSS_STATUS_G(x) (((x) >> ABORT_RSS_STATUS_S) & ABORT_RSS_STATUS_M)
+
+#define ABORT_RSS_SRQIDX_S    8
+#define ABORT_RSS_SRQIDX_M    0xffffff
+#define ABORT_RSS_SRQIDX_V(x) ((x) << ABORT_RSS_SRQIDX_S)
+#define ABORT_RSS_SRQIDX_G(x) (((x) >> ABORT_RSS_SRQIDX_S) & ABORT_RSS_SRQIDX_M)
+
 struct cpl_abort_req {
 	WR_HDR;
 	union opcode_tid ot;
@@ -758,6 +779,11 @@ struct cpl_abort_rpl_rss {
 	u8 status;
 };
 
+struct cpl_abort_rpl_rss6 {
+	union opcode_tid ot;
+	__u32 srqidx_status;
+};
+
 struct cpl_abort_rpl {
 	WR_HDR;
 	union opcode_tid ot;
@@ -2112,4 +2138,49 @@ enum {
 	X_CPL_RX_MPS_PKT_TYPE_QFC   = 1 << 2,
 	X_CPL_RX_MPS_PKT_TYPE_PTP   = 1 << 3
 };
+
+struct cpl_srq_table_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__u8 status;
+	__u8 rsvd[2];
+	__u8 idx;
+	__be64 rsvd_pdid;
+	__be32 qlen_qbase;
+	__be16 cur_msn;
+	__be16 max_msn;
+};
+
+struct cpl_srq_table_rpl {
+	union opcode_tid ot;
+	__u8 status;
+	__u8 rsvd[2];
+	__u8 idx;
+	__be64 rsvd_pdid;
+	__be32 qlen_qbase;
+	__be16 cur_msn;
+	__be16 max_msn;
+};
+
+/* cpl_srq_table_{req,rpl}.params fields */
+#define SRQT_QLEN_S   28
+#define SRQT_QLEN_M   0xF
+#define SRQT_QLEN_V(x) ((x) << SRQT_QLEN_S)
+#define SRQT_QLEN_G(x) (((x) >> SRQT_QLEN_S) & SRQT_QLEN_M)
+
+#define SRQT_QBASE_S    0
+#define SRQT_QBASE_M   0x3FFFFFF
+#define SRQT_QBASE_V(x) ((x) << SRQT_QBASE_S)
+#define SRQT_QBASE_G(x) (((x) >> SRQT_QBASE_S) & SRQT_QBASE_M)
+
+#define SRQT_PDID_S    0
+#define SRQT_PDID_M   0xFF
+#define SRQT_PDID_V(x) ((x) << SRQT_PDID_S)
+#define SRQT_PDID_G(x) (((x) >> SRQT_PDID_S) & SRQT_PDID_M)
+
+#define SRQT_IDX_S    0
+#define SRQT_IDX_M    0xF
+#define SRQT_IDX_V(x) ((x) << SRQT_IDX_S)
+#define SRQT_IDX_G(x) (((x) >> SRQT_IDX_S) & SRQT_IDX_M)
+
 #endif  /* __T4_MSG_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 0d83b4064a78..544757f6ab3a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -101,6 +101,7 @@ enum fw_wr_opcodes {
 	FW_RI_BIND_MW_WR               = 0x18,
 	FW_RI_FR_NSMR_WR               = 0x19,
 	FW_RI_FR_NSMR_TPTE_WR	       = 0x20,
+	FW_RI_RDMA_WRITE_CMPL_WR       = 0x21,
 	FW_RI_INV_LSTAG_WR             = 0x1a,
 	FW_ISCSI_TX_DATA_WR	       = 0x45,
 	FW_PTP_TX_PKT_WR               = 0x46,
@@ -766,6 +767,7 @@ enum fw_cmd_opcodes {
 	FW_DEVLOG_CMD                  = 0x25,
 	FW_CLIP_CMD                    = 0x28,
 	FW_PTP_CMD                     = 0x3e,
+	FW_HMA_CMD                     = 0x3f,
 	FW_LASTC2E_CMD                 = 0x40,
 	FW_ERROR_CMD                   = 0x80,
 	FW_DEBUG_CMD                   = 0x81,
@@ -1132,6 +1134,7 @@ enum fw_memtype_cf {
 	FW_MEMTYPE_CF_FLASH		= 0x4,
 	FW_MEMTYPE_CF_INTERNAL		= 0x5,
 	FW_MEMTYPE_CF_EXTMEM1           = 0x6,
+	FW_MEMTYPE_CF_HMA		= 0x7,
 };
 
 struct fw_caps_config_cmd {
@@ -1210,6 +1213,9 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
+	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
+	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
+	FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
 };
 
 /*
@@ -1241,6 +1247,8 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_SQRQ_END	= 0x16,
 	FW_PARAMS_PARAM_PFVF_CQ_START	= 0x17,
 	FW_PARAMS_PARAM_PFVF_CQ_END	= 0x18,
+	FW_PARAMS_PARAM_PFVF_SRQ_START  = 0x19,
+	FW_PARAMS_PARAM_PFVF_SRQ_END    = 0x1A,
 	FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH = 0x20,
 	FW_PARAMS_PARAM_PFVF_VIID       = 0x24,
 	FW_PARAMS_PARAM_PFVF_CPMASK     = 0x25,
@@ -3435,6 +3443,59 @@ struct fw_debug_cmd {
 #define FW_DEBUG_CMD_TYPE_G(x)	\
 	(((x) >> FW_DEBUG_CMD_TYPE_S) & FW_DEBUG_CMD_TYPE_M)
 
+struct fw_hma_cmd {
+	__be32 op_pkd;
+	__be32 retval_len16;
+	__be32 mode_to_pcie_params;
+	__be32 naddr_size;
+	__be32 addr_size_pkd;
+	__be32 r6;
+	__be64 phy_address[5];
+};
+
+#define FW_HMA_CMD_MODE_S	31
+#define FW_HMA_CMD_MODE_M	0x1
+#define FW_HMA_CMD_MODE_V(x)	((x) << FW_HMA_CMD_MODE_S)
+#define FW_HMA_CMD_MODE_G(x)	\
+	(((x) >> FW_HMA_CMD_MODE_S) & FW_HMA_CMD_MODE_M)
+#define FW_HMA_CMD_MODE_F	FW_HMA_CMD_MODE_V(1U)
+
+#define FW_HMA_CMD_SOC_S	30
+#define FW_HMA_CMD_SOC_M	0x1
+#define FW_HMA_CMD_SOC_V(x)	((x) << FW_HMA_CMD_SOC_S)
+#define FW_HMA_CMD_SOC_G(x)	(((x) >> FW_HMA_CMD_SOC_S) & FW_HMA_CMD_SOC_M)
+#define FW_HMA_CMD_SOC_F	FW_HMA_CMD_SOC_V(1U)
+
+#define FW_HMA_CMD_EOC_S	29
+#define FW_HMA_CMD_EOC_M	0x1
+#define FW_HMA_CMD_EOC_V(x)	((x) << FW_HMA_CMD_EOC_S)
+#define FW_HMA_CMD_EOC_G(x)	(((x) >> FW_HMA_CMD_EOC_S) & FW_HMA_CMD_EOC_M)
+#define FW_HMA_CMD_EOC_F	FW_HMA_CMD_EOC_V(1U)
+
+#define FW_HMA_CMD_PCIE_PARAMS_S	0
+#define FW_HMA_CMD_PCIE_PARAMS_M	0x7ffffff
+#define FW_HMA_CMD_PCIE_PARAMS_V(x)	((x) << FW_HMA_CMD_PCIE_PARAMS_S)
+#define FW_HMA_CMD_PCIE_PARAMS_G(x)	\
+	(((x) >> FW_HMA_CMD_PCIE_PARAMS_S) & FW_HMA_CMD_PCIE_PARAMS_M)
+
+#define FW_HMA_CMD_NADDR_S	12
+#define FW_HMA_CMD_NADDR_M	0x3f
+#define FW_HMA_CMD_NADDR_V(x)	((x) << FW_HMA_CMD_NADDR_S)
+#define FW_HMA_CMD_NADDR_G(x)	\
+	(((x) >> FW_HMA_CMD_NADDR_S) & FW_HMA_CMD_NADDR_M)
+
+#define FW_HMA_CMD_SIZE_S	0
+#define FW_HMA_CMD_SIZE_M	0xfff
+#define FW_HMA_CMD_SIZE_V(x)	((x) << FW_HMA_CMD_SIZE_S)
+#define FW_HMA_CMD_SIZE_G(x)	\
+	(((x) >> FW_HMA_CMD_SIZE_S) & FW_HMA_CMD_SIZE_M)
+
+#define FW_HMA_CMD_ADDR_SIZE_S		11
+#define FW_HMA_CMD_ADDR_SIZE_M		0x1fffff
+#define FW_HMA_CMD_ADDR_SIZE_V(x)	((x) << FW_HMA_CMD_ADDR_SIZE_S)
+#define FW_HMA_CMD_ADDR_SIZE_G(x)	\
+	(((x) >> FW_HMA_CMD_ADDR_SIZE_S) & FW_HMA_CMD_ADDR_SIZE_M)
+
 enum pcie_fw_eval {
 	PCIE_FW_EVAL_CRASH = 0,
 };
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index b7e79e64d2ed..9a81b52307a9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -155,8 +155,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 		const char *fc;
 		const struct port_info *pi = netdev_priv(dev);
 
-		netif_carrier_on(dev);
-
 		switch (pi->link_cfg.speed) {
 		case 100:
 			s = "100Mbps";
@@ -202,7 +200,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 
 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 	} else {
-		netif_carrier_off(dev);
 		netdev_info(dev, "link down\n");
 	}
 }
@@ -278,6 +275,17 @@ static int link_start(struct net_device *dev)
 	 */
 	if (ret == 0)
 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
+
+	/* The Virtual Interfaces are connected to an internal switch on the
+	 * chip which allows VIs attached to the same port to talk to each
+	 * other even when the port link is down.  As a result, we generally
+	 * want to always report a VI's link as being "up", provided there are
+	 * no errors in enabling vi.
+	 */
+
+	if (ret == 0)
+		netif_carrier_on(dev);
+
 	return ret;
 }
 
@@ -1281,22 +1289,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_KR:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP4_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
-		SET_LMM(10000baseKX4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
 		break;
 
 	case FW_PORT_TYPE_FIBER_XFI:
@@ -1312,18 +1320,24 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 	case FW_PORT_TYPE_BP40_BA:
 	case FW_PORT_TYPE_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		break;
 
 	case FW_PORT_TYPE_CR_QSFP:
 	case FW_PORT_TYPE_SFP28:
 		SET_LMM(FIBRE);
-		SET_LMM(25000baseCR_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		break;
 
 	case FW_PORT_TYPE_KR_SFP28:
 		SET_LMM(Backplane);
-		SET_LMM(25000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_KR_XLAUI:
@@ -1335,13 +1349,18 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_CR2_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(50000baseSR2_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
 		break;
 
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(100000baseCR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
+		FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
 		break;
 
 	default:
@@ -2383,11 +2402,11 @@ struct cxgb4vf_debugfs_entry {
 };
 
 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
-	{ "mboxlog",    S_IRUGO, &mboxlog_fops },
-	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
-	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
-	{ "resources",  S_IRUGO, &resources_proc_fops },
-	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
+	{ "mboxlog",    0444, &mboxlog_fops },
+	{ "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
+	{ "sge_qstats", 0444, &sge_qstats_proc_fops },
+	{ "resources",  0444, &resources_proc_fops },
+	{ "interfaces", 0444, &interfaces_proc_fops },
 };
 
 /*
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 977d4c2c759d..3f8fe8fd79cc 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -56,21 +56,11 @@
   local_irq_{dis,en}able()
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 static const char version[] =
 "cs89x0.c:v1.02 11/26/96 Russell Nelson <[email protected]>\n";
 
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
-   or override something. */
 #include <linux/module.h>
 
 /*
@@ -93,6 +83,7 @@ static const char version[] =
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
@@ -105,24 +96,22 @@ static const char version[] =
 
 #include "cs89x0.h"
 
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
 
 /* Information that need to be kept for each board. */
 struct net_local {
+	int msg_enable;
 	int chip_type;		/* one of: CS8900, CS8920, CS8920M */
 	char chip_revision;	/* revision letter of the chip ('A'...) */
 	int send_cmd;		/* the propercommand used to send a packet. */
 	int rx_mode;
 	int curr_rx_cfg;
         int send_underrun;      /* keep track of how many underruns in a row we get */
-	struct sk_buff *skb;
 };
 
 /* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
 static int net_open(struct net_device *dev);
 static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +121,6 @@ static int net_close(struct net_device *dev);
 static struct net_device_stats *net_get_stats(struct net_device *dev);
 static int set_mac_address(struct net_device *dev, void *addr);
 
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
 /* For reading/writing registers ISA-style */
 static inline int
 readreg_io(struct net_device *dev, int portno)
@@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = {
 
 /* Probe for the CS8900 card in slot E.  We won't bother looking
    anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
-	static int once_is_enough;
 	struct net_local *lp;
-	static unsigned version_printed;
 	int i, slot;
 	unsigned rev_type = 0;
 	unsigned long ioaddr;
@@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit)
 	int err = -ENODEV;
 	struct nubus_rsrc *fres;
 
-	if (!MACH_IS_MAC)
-		return ERR_PTR(-ENODEV);
-
 	dev = alloc_etherdev(sizeof(struct net_local));
 	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	if (unit >= 0) {
-		sprintf(dev->name, "eth%d", unit);
-		netdev_boot_setup_check(dev);
-	}
-
-	if (once_is_enough)
-		goto out;
-	once_is_enough = 1;
+		return -ENOMEM;
 
 	/* We might have to parameterize this later */
 	slot = 0xE;
@@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit)
 	if (sig != swab16(CHIP_EISA_ID_SIG))
 		goto out;
 
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
 	/* Initialize the net_device structure. */
 	lp = netdev_priv(dev);
 
+	lp->msg_enable = netif_msg_init(debug, 0);
+
 	/* Fill in the 'dev' fields. */
 	dev->base_addr = ioaddr;
 	dev->mem_start = (unsigned long)
@@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit)
 	if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
 		lp->send_cmd = TX_NOW;
 
-	if (net_debug && version_printed++ == 0)
-		printk(version);
+	netif_dbg(lp, drv, dev, "%s", version);
 
-	printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
-	       dev->name,
-	       lp->chip_type==CS8900?'0':'2',
-	       lp->chip_type==CS8920M?"M":"",
-	       lp->chip_revision,
-	       dev->base_addr);
+	pr_info("cs89%c0%s rev %c found at %#8lx\n",
+		lp->chip_type == CS8900 ? '0' : '2',
+		lp->chip_type == CS8920M ? "M" : "",
+		lp->chip_revision, dev->base_addr);
 
 	/* Try to read the MAC address */
 	if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
-		printk("\nmac89x0: No EEPROM, giving up now.\n");
+		pr_info("No EEPROM, giving up now.\n");
 		goto out1;
         } else {
                 for (i = 0; i < ETH_ALEN; i += 2) {
@@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit)
 
 	/* print the IRQ and ethernet address. */
 
-	printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+	pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
 
 	dev->netdev_ops		= &mac89x0_netdev_ops;
 
 	err = register_netdev(dev);
 	if (err)
 		goto out1;
-	return NULL;
+
+	platform_set_drvdata(pdev, dev);
+	return 0;
 out1:
 	nubus_writew(0, dev->base_addr + ADD_PORT);
 out:
 	free_netdev(dev);
-	return ERR_PTR(err);
+	return err;
 }
 
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
-	int reset_start_time;
-
-	writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
-	/* wait 30 ms */
-	msleep_interruptible(30);
-
-	/* Wait until the chip is reset */
-	reset_start_time = jiffies;
-	while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
-		;
-}
-#endif
-
 /* Open/initialize the board.  This is called (in the current kernel)
    sometime after booting when the 'ifconfig' program is run.
 
@@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
 	struct net_local *lp = netdev_priv(dev);
 	unsigned long flags;
 
-	if (net_debug > 3)
-		printk("%s: sent %d byte packet of type %x\n",
-		       dev->name, skb->len,
-		       (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-		       | skb->data[ETH_ALEN+ETH_ALEN+1]);
+	netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+		  skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+		  skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
 	/* keep the upload from being interrupted, since we
 	   ask the chip to start transmitting before the
@@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 	struct net_local *lp;
 	int ioaddr, status;
 
-	if (dev == NULL) {
-		printk ("net_interrupt(): irq %d for unknown device.\n", irq);
-		return IRQ_NONE;
-	}
-
 	ioaddr = dev->base_addr;
 	lp = netdev_priv(dev);
 
@@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
            faster than you can read them off, you're screwed.  Hasta la
            vista, baby!  */
 	while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
-		if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+		netif_dbg(lp, intr, dev, "status=%04x\n", status);
 		switch(status & ISQ_EVENT_MASK) {
 		case ISQ_RECEIVER_EVENT:
 			/* Got a packet(s). */
@@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 				netif_wake_queue(dev);
 			}
 			if (status & TX_UNDERRUN) {
-				if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+				netif_dbg(lp, tx_err, dev, "transmit underrun\n");
                                 lp->send_underrun++;
                                 if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
                                 else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 static void
 net_rx(struct net_device *dev)
 {
+	struct net_local *lp = netdev_priv(dev);
 	struct sk_buff *skb;
 	int status, length;
 
@@ -506,7 +456,6 @@ net_rx(struct net_device *dev)
 	/* Malloc up new buffer. */
 	skb = alloc_skb(length, GFP_ATOMIC);
 	if (skb == NULL) {
-		printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 		dev->stats.rx_dropped++;
 		return;
 	}
@@ -515,10 +464,9 @@ net_rx(struct net_device *dev)
 	skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
 				length);
 
-	if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
-                                 dev->name, length,
-                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-				 | skb->data[ETH_ALEN+ETH_ALEN+1]);
+	netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+		  length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+		  skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
         skb->protocol=eth_type_trans(skb,dev);
 	netif_rx(skb);
@@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
 		return -EADDRNOTAVAIL;
 
 	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-	printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+	netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
 
 	/* set the Ethernet address */
 	for (i=0; i < ETH_ALEN/2; i++)
@@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
 	return 0;
 }
 
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
 MODULE_LICENSE("GPL");
 
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
 {
-	net_debug = debug;
-        dev_cs89x0 = mac89x0_probe(-1);
-	if (IS_ERR(dev_cs89x0)) {
-                printk(KERN_WARNING "mac89x0.c: No card found\n");
-		return PTR_ERR(dev_cs89x0);
-	}
+	struct net_device *dev = platform_get_drvdata(pdev);
+
+	unregister_netdev(dev);
+	nubus_writew(0, dev->base_addr + ADD_PORT);
+	free_netdev(dev);
 	return 0;
 }
 
-void
-cleanup_module(void)
-{
-	unregister_netdev(dev_cs89x0);
-	nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
-	free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+	.probe = mac89x0_device_probe,
+	.remove = mac89x0_device_remove,
+	.driver = {
+		.name = "mac89x0",
+	},
+};
+
+module_platform_driver(mac89x0_platform_driver);
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 9b218f0e5a4c..0dd64acd2a3f 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -33,7 +33,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"2.3.0.45"
+#define DRV_VERSION		"2.3.0.53"
 #define DRV_COPYRIGHT		"Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX		6
@@ -140,6 +140,7 @@ struct enic_rfs_flw_tbl {
 struct vxlan_offload {
 	u16 vxlan_udp_port_number;
 	u8 patch_level;
+	u8 flags;
 };
 
 /* Per-instance private data structure */
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index efb9333c7cf8..869006c2002d 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -474,6 +474,39 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
 	return 0;
 }
 
+static int enic_get_rx_flow_hash(struct enic *enic, struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	switch (cmd->flow_type) {
+	case TCP_V6_FLOW:
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case UDP_V6_FLOW:
+	case UDP_V4_FLOW:
+		if (vnic_dev_capable_udp_rss(enic->vdev))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			  u32 *rule_locs)
 {
@@ -500,6 +533,9 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		ret = enic_grxclsrule(enic, cmd);
 		spin_unlock_bh(&enic->rfs_h.lock);
 		break;
+	case ETHTOOL_GRXFH:
+		ret = enic_get_rx_flow_hash(enic, cmd);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index f202ba72a811..81684acf52af 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -191,8 +191,16 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 		goto error;
 	}
 
-	if (ti->sa_family != AF_INET) {
-		netdev_info(netdev, "vxlan: only IPv4 offload supported");
+	switch (ti->sa_family) {
+	case AF_INET6:
+		if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) {
+			netdev_info(netdev, "vxlan: only IPv4 offload supported");
+			goto error;
+		}
+		/* Fall through */
+	case AF_INET:
+		break;
+	default:
 		goto error;
 	}
 
@@ -204,6 +212,11 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 
 		goto error;
 	}
+	if ((vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) != 1) &&
+	    !(enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ)) {
+		netdev_info(netdev, "vxlan: vxlan offload with multi wq not supported on this adapter");
+		goto error;
+	}
 
 	err = vnic_dev_overlay_offload_cfg(enic->vdev,
 					   OVERLAY_CFG_VXLAN_PORT_UPDATE,
@@ -238,9 +251,8 @@ static void enic_udp_tunnel_del(struct net_device *netdev,
 
 	spin_lock_bh(&enic->devcmd_lock);
 
-	if ((ti->sa_family != AF_INET) ||
-	    ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) ||
-	    (ti->type != UDP_TUNNEL_TYPE_VXLAN)) {
+	if ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number) ||
+	    ti->type != UDP_TUNNEL_TYPE_VXLAN) {
 		netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded",
 			    ntohs(ti->port), ti->sa_family, ti->type);
 		goto unlock;
@@ -271,22 +283,37 @@ static netdev_features_t enic_features_check(struct sk_buff *skb,
 	struct enic *enic = netdev_priv(dev);
 	struct udphdr *udph;
 	u16 port = 0;
-	u16 proto;
+	u8 proto;
 
 	if (!skb->encapsulation)
 		return features;
 
 	features = vxlan_features_check(skb, features);
 
-	/* hardware only supports IPv4 vxlan tunnel */
-	if (vlan_get_protocol(skb) != htons(ETH_P_IP))
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IPV6):
+		if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6))
+			goto out;
+		proto = ipv6_hdr(skb)->nexthdr;
+		break;
+	case htons(ETH_P_IP):
+		proto = ip_hdr(skb)->protocol;
+		break;
+	default:
 		goto out;
+	}
 
-	/* hardware does not support offload of ipv6 inner pkt */
-	if (eth->h_proto != ntohs(ETH_P_IP))
+	switch (eth->h_proto) {
+	case ntohs(ETH_P_IPV6):
+		if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6))
+			goto out;
+		/* Fall through */
+	case ntohs(ETH_P_IP):
+		break;
+	default:
 		goto out;
+	}
 
-	proto = ip_hdr(skb)->protocol;
 
 	if (proto == IPPROTO_UDP) {
 		udph = udp_hdr(skb);
@@ -635,12 +662,25 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 
 static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
 {
-	if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+	const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+	switch (eth->h_proto) {
+	case ntohs(ETH_P_IP):
 		inner_ip_hdr(skb)->check = 0;
 		inner_tcp_hdr(skb)->check =
 			~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
 					   inner_ip_hdr(skb)->daddr, 0,
 					   IPPROTO_TCP, 0);
+		break;
+	case ntohs(ETH_P_IPV6):
+		inner_tcp_hdr(skb)->check =
+			~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+					 &inner_ipv6_hdr(skb)->daddr, 0,
+					 IPPROTO_TCP, 0);
+		break;
+	default:
+		WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload");
+		break;
 	}
 }
 
@@ -1898,6 +1938,8 @@ static int enic_open(struct net_device *netdev)
 	}
 
 	for (i = 0; i < enic->rq_count; i++) {
+		/* enable rq before updating rq desc */
+		vnic_rq_enable(&enic->rq[i]);
 		vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
 		/* Need at least one buffer on ring to get going */
 		if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
@@ -1909,8 +1951,6 @@ static int enic_open(struct net_device *netdev)
 
 	for (i = 0; i < enic->wq_count; i++)
 		vnic_wq_enable(&enic->wq[i]);
-	for (i = 0; i < enic->rq_count; i++)
-		vnic_rq_enable(&enic->rq[i]);
 
 	if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
 		enic_dev_add_station_addr(enic);
@@ -1936,8 +1976,12 @@ static int enic_open(struct net_device *netdev)
 	return 0;
 
 err_out_free_rq:
-	for (i = 0; i < enic->rq_count; i++)
+	for (i = 0; i < enic->rq_count; i++) {
+		err = vnic_rq_disable(&enic->rq[i]);
+		if (err)
+			return err;
 		vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+	}
 	enic_dev_notify_unset(enic);
 err_out_free_intr:
 	enic_unset_affinity_hint(enic);
@@ -2151,9 +2195,10 @@ static int enic_dev_wait(struct vnic_dev *vdev,
 static int enic_dev_open(struct enic *enic)
 {
 	int err;
+	u32 flags = CMD_OPENF_IG_DESCCACHE;
 
 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
-		vnic_dev_open_done, 0);
+		vnic_dev_open_done, flags);
 	if (err)
 		dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n",
 			err);
@@ -2275,7 +2320,7 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 {
 	struct device *dev = enic_get_dev(enic);
 	const u8 rss_default_cpu = 0;
-	const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
+	u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
 		NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
 		NIC_CFG_RSS_HASH_TYPE_IPV6 |
 		NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
@@ -2283,6 +2328,8 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 	const u8 rss_base_cpu = 0;
 	u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
 
+	if (vnic_dev_capable_udp_rss(enic->vdev))
+		rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP;
 	if (rss_enable) {
 		if (!enic_set_rsskey(enic)) {
 			if (enic_set_rsscpu(enic, rss_hash_bits)) {
@@ -2901,9 +2948,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->hw_features |= NETIF_F_RXCSUM;
 	if (ENIC_SETTING(enic, VXLAN)) {
 		u64 patch_level;
+		u64 a1 = 0;
 
 		netdev->hw_enc_features |= NETIF_F_RXCSUM		|
 					   NETIF_F_TSO			|
+					   NETIF_F_TSO6			|
 					   NETIF_F_TSO_ECN		|
 					   NETIF_F_GSO_UDP_TUNNEL	|
 					   NETIF_F_HW_CSUM		|
@@ -2922,9 +2971,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 */
 		err = vnic_dev_get_supported_feature_ver(enic->vdev,
 							 VIC_FEATURE_VXLAN,
-							 &patch_level);
+							 &patch_level, &a1);
 		if (err)
 			patch_level = 0;
+		enic->vxlan.flags = (u8)a1;
 		/* mask bits that are supported by driver
 		 */
 		patch_level &= BIT_ULL(0) | BIT_ULL(2);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 39bad67422dd..76cdd4c9d11f 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -1269,16 +1269,32 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 }
 
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-				       u64 *supported_versions)
+				       u64 *supported_versions, u64 *a1)
 {
 	u64 a0 = feature;
 	int wait = 1000;
-	u64 a1 = 0;
 	int ret;
 
-	ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
+	ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, a1, wait);
 	if (!ret)
 		*supported_versions = a0;
 
 	return ret;
 }
+
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev)
+{
+	u64 a0 = CMD_NIC_CFG, a1 = 0;
+	u64 rss_hash_type;
+	int wait = 1000;
+	int err;
+
+	err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
+	if (err || !a0)
+		return false;
+
+	rss_hash_type = (a1 >> NIC_CFG_RSS_HASH_TYPE_SHIFT) &
+			NIC_CFG_RSS_HASH_TYPE_MASK_FIELD;
+
+	return (rss_hash_type & NIC_CFG_RSS_HASH_TYPE_UDP);
+}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h
index 9d43d6bb9907..59d4cc8fbb85 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h
@@ -183,6 +183,7 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config);
 int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 				 u16 vxlan_udp_port_number);
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-				       u64 *supported_versions);
+				       u64 *supported_versions, u64 *a1);
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev);
 
 #endif /* _VNIC_DEV_H_ */
diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
index d83880b0d468..41de4ba622a1 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
@@ -439,6 +439,7 @@ enum vnic_devcmd_cmd {
 
 /* flags for CMD_OPEN */
 #define CMD_OPENF_OPROM		0x1	/* open coming from option rom */
+#define CMD_OPENF_IG_DESCCACHE	0x2	/* Do not flush IG DESC cache */
 
 /* flags for CMD_INIT */
 #define CMD_INITF_DEFAULT_MAC	0x1	/* init with default mac addr */
@@ -697,6 +698,10 @@ enum overlay_ofld_cmd {
 
 #define OVERLAY_CFG_VXLAN_PORT_UPDATE	0
 
+#define ENIC_VXLAN_INNER_IPV6		BIT(0)
+#define ENIC_VXLAN_OUTER_IPV6		BIT(1)
+#define ENIC_VXLAN_MULTI_WQ		BIT(2)
+
 /* Use this enum to get the supported versions for each of these features
  * If you need to use the devcmd_get_supported_feature_version(), add
  * the new feature into this enum and install function handler in devcmd.c
diff --git a/drivers/net/ethernet/cisco/enic/vnic_nic.h b/drivers/net/ethernet/cisco/enic/vnic_nic.h
index 995a50dd4c99..5a93db0d7afc 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_nic.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_nic.h
@@ -47,6 +47,7 @@
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6		(1 << 4)
 #define NIC_CFG_RSS_HASH_TYPE_IPV6_EX		(1 << 5)
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX	(1 << 6)
+#define NIC_CFG_RSS_HASH_TYPE_UDP		(1 << 7)
 
 static inline void vnic_set_nic_cfg(u32 *nic_cfg,
 	u8 rss_default_cpu, u8 rss_hash_type,
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 5eb999af2c40..bd3f6e4d1341 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -540,6 +540,7 @@ static int gmac_setup_txqs(struct net_device *netdev)
 
 	if (port->txq_dma_base & ~DMA_Q_BASE_MASK) {
 		dev_warn(geth->dev, "TX queue base it not aligned\n");
+		kfree(skb_tab);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 1b79a6defd56..d71cba0842c5 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -602,7 +602,7 @@ static struct pci_driver pci_driver = {
 };
 module_pci_driver(pci_driver);
 
-module_param(polling_frequency, long, S_IRUGO);
+module_param(polling_frequency, long, 0444);
 MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns");
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1a49297224ed..ff92ab1daeb8 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -19,7 +19,7 @@
 #include "be.h"
 #include "be_cmds.h"
 
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
 	"Physical Link is functional",
 	"Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
 	"Optics of two types installed – Remove one optic or install matching pair of optics.",
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 09da2d82c2f0..e8b43cf44b6f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -201,7 +201,7 @@ enum {
 			 phy_state == BE_PHY_UNQUALIFIED ||	\
 			 phy_state == BE_PHY_UNCERTIFIED)
 
-extern  char *be_misconfig_evt_port_state[];
+extern const  char * const be_misconfig_evt_port_state[];
 
 /* async event indicating misconfigured port */
 struct be_async_event_misconfig_port {
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 5774fb6f8aa0..c697e79e491e 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -34,11 +34,11 @@ MODULE_LICENSE("GPL");
  * Use sysfs method to enable/disable VFs.
  */
 static unsigned int num_vfs;
-module_param(num_vfs, uint, S_IRUGO);
+module_param(num_vfs, uint, 0444);
 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
 
 static ushort rx_frag_size = 2048;
-module_param(rx_frag_size, ushort, S_IRUGO);
+module_param(rx_frag_size, ushort, 0444);
 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
 
 /* Per-module error detection/recovery workq shared across all functions.
@@ -5788,7 +5788,7 @@ static ssize_t be_hwmon_show_temp(struct device *dev,
 			       adapter->hwmon_info.be_on_die_temp * 1000);
 }
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+static SENSOR_DEVICE_ATTR(temp1_input, 0444,
 			  be_hwmon_show_temp, NULL, 1);
 
 static struct attribute *be_hwmon_attrs[] = {
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 7caa8da48421..fd43f98ddbe7 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
 				  err);
 	}
 
+	if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+		priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+		err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+						  priv->mac_dev->allmulti);
+		if (err < 0)
+			netif_err(priv, drv, net_dev,
+				  "mac_dev->set_allmulti() = %d\n",
+				  err);
+	}
+
 	err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
 	if (err < 0)
 		netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
@@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 		goto csum_failed;
 	}
 
+	/* SGT[0] is used by the linear part */
 	sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
-	qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+	frag_len = skb_headlen(skb);
+	qm_sg_entry_set_len(&sgt[0], frag_len);
 	sgt[0].bpid = FSL_DPAA_BPID_INV;
 	sgt[0].offset = 0;
 	addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	qm_sg_entry_set64(&sgt[0], addr);
 
 	/* populate the rest of SGT entries */
-	frag = &skb_shinfo(skb)->frags[0];
-	frag_len = frag->size;
-	for (i = 1; i <= nr_frags; i++, frag++) {
+	for (i = 0; i < nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		frag_len = frag->size;
 		WARN_ON(!skb_frag_page(frag));
 		addr = skb_frag_dma_map(dev, frag, 0,
 					frag_len, dma_dir);
@@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 			goto sg_map_failed;
 		}
 
-		qm_sg_entry_set_len(&sgt[i], frag_len);
-		sgt[i].bpid = FSL_DPAA_BPID_INV;
-		sgt[i].offset = 0;
+		qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+		sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+		sgt[i + 1].offset = 0;
 
 		/* keep the offset in the address */
-		qm_sg_entry_set64(&sgt[i], addr);
-		frag_len = frag->size;
+		qm_sg_entry_set64(&sgt[i + 1], addr);
 	}
-	qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+	/* Set the final bit in the last used entry of the SGT */
+	qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
 	qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 
@@ -2008,7 +2021,6 @@ static inline int dpaa_xmit(struct dpaa_priv *priv,
 	}
 
 	if (unlikely(err < 0)) {
-		percpu_stats->tx_errors++;
 		percpu_stats->tx_fifo_errors++;
 		return err;
 	}
@@ -2052,19 +2064,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 	/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
 	 * make sure we don't feed FMan with more fragments than it supports.
 	 */
-	if (nonlinear &&
-	    likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
-		/* Just create a S/G fd based on the skb */
-		err = skb_to_sg_fd(priv, skb, &fd);
-		percpu_priv->tx_frag_skbuffs++;
-	} else {
+	if (unlikely(nonlinear &&
+		     (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
 		/* If the egress skb contains more fragments than we support
 		 * we have no choice but to linearize it ourselves.
 		 */
-		if (unlikely(nonlinear) && __skb_linearize(skb))
+		if (__skb_linearize(skb))
 			goto enomem;
 
-		/* Finally, create a contig FD from this skb */
+		nonlinear = skb_is_nonlinear(skb);
+	}
+
+	if (nonlinear) {
+		/* Just create a S/G fd based on the skb */
+		err = skb_to_sg_fd(priv, skb, &fd);
+		percpu_priv->tx_frag_skbuffs++;
+	} else {
+		/* Create a contig FD from this skb */
 		err = skb_to_contig_fd(priv, skb, &fd, &offset);
 	}
 	if (unlikely(err < 0))
@@ -2201,14 +2217,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
 	if (dpaa_eth_napi_schedule(percpu_priv, portal))
 		return qman_cb_dqrr_stop;
 
-	if (dpaa_eth_refill_bpools(priv))
-		/* Unable to refill the buffer pool due to insufficient
-		 * system memory. Just release the frame back into the pool,
-		 * otherwise we'll soon end up with an empty buffer pool.
-		 */
-		dpaa_fd_release(net_dev, &dq->fd);
-	else
-		dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+	dpaa_eth_refill_bpools(priv);
+	dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
 
 	return qman_cb_dqrr_consume;
 }
@@ -2278,7 +2288,6 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 	vaddr = phys_to_virt(addr);
 	prefetch(vaddr + qm_fd_get_offset(fd));
 
-	fd_format = qm_fd_get_format(fd);
 	/* The only FD types that we may receive are contig and S/G */
 	WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg));
 
@@ -2311,8 +2320,10 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 
 	skb_len = skb->len;
 
-	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
+	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) {
+		percpu_stats->rx_dropped++;
 		return qman_cb_dqrr_consume;
+	}
 
 	percpu_stats->rx_packets++;
 	percpu_stats->rx_bytes += skb_len;
@@ -2766,7 +2777,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 
 	priv->channel = (u16)channel;
 
-	/* Start a thread that will walk the CPUs with affine portals
+	/* Walk the CPUs with affine portals
 	 * and add this pool channel to each's dequeue mask.
 	 */
 	dpaa_eth_add_channel(priv->channel);
@@ -2860,7 +2871,7 @@ static int dpaa_remove(struct platform_device *pdev)
 	struct device *dev;
 	int err;
 
-	dev = &pdev->dev;
+	dev = pdev->dev.parent;
 	net_dev = dev_get_drvdata(dev);
 
 	priv = netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index faea674094b9..2f933b6b2f4e 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -211,7 +211,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev,
 	if (epause->rx_pause)
 		newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
 	if (epause->tx_pause)
-		newadv |= ADVERTISED_Asym_Pause;
+		newadv ^= ADVERTISED_Asym_Pause;
 
 	oldadv = phydev->advertising &
 			(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
@@ -344,7 +344,7 @@ static void dpaa_get_ethtool_stats(struct net_device *net_dev,
 
 	/* gather congestion related counters */
 	cg_num    = 0;
-	cg_status = 0;
+	cg_status = false;
 	cg_time   = jiffies_to_msecs(priv->cgr_data.congested_jiffies);
 	if (qman_query_cgr_congested(&priv->cgr_data.cgr, &cg_status) == 0) {
 		cg_num    = priv->cgr_data.cgr_congested_count;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 7a7f3a42b2aa..d4604bc8eb5b 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3600,6 +3600,8 @@ fec_drv_remove(struct platform_device *pdev)
 	fec_enet_mii_remove(fep);
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 	if (of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
 	of_node_put(fep->phy_node);
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index 8870a9a798ca..dc0850b3b517 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -2,7 +2,6 @@ config FSL_FMAN
 	tristate "FMan support"
 	depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
 	select GENERIC_ALLOCATOR
-	depends on HAS_DMA
 	select PHYLIB
 	default n
 	help
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index ea43b4974149..57b1e2b47c0a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1100,7 +1100,7 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 	set_bucket(dtsec->regs, bucket, true);
 
 	/* Create element to be added to the driver hash table */
-	hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+	hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
 	if (!hash_entry)
 		return -ENOMEM;
 	hash_entry->addr = addr;
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+	u32 tmp;
+	struct dtsec_regs __iomem *regs = dtsec->regs;
+
+	if (!is_init_done(dtsec->dtsec_drv_param))
+		return -EINVAL;
+
+	tmp = ioread32be(&regs->rctrl);
+	if (enable)
+		tmp |= RCTRL_MPROM;
+	else
+		tmp &= ~RCTRL_MPROM;
+
+	iowrite32be(tmp, &regs->rctrl);
+
+	return 0;
+}
+
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
index c4467c072058..1a689adf5a22 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
 int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
 
 #endif /* __DTSEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index c0296880feba..446a97b792e3 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -350,6 +350,7 @@ struct fman_mac {
 	struct fman_rev_info fm_rev_info;
 	bool basex_if;
 	struct phy_device *pcsphy;
+	bool allmulti_enabled;
 };
 
 static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+	u32 entry;
+	struct memac_regs __iomem *regs = memac->regs;
+
+	if (!is_init_done(memac->memac_drv_param))
+		return -EINVAL;
+
+	if (enable) {
+		for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry | HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	} else {
+		for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
+
+	memac->allmulti_enabled = enable;
+
+	return 0;
+}
+
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 {
 	struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 			break;
 		}
 	}
-	if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
-		iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+	if (!memac->allmulti_enabled) {
+		if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+			iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h
index c4a66469a907..b5a50338ed9a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.h
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
 			enum fman_mac_exceptions exception, bool enable);
 int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
 
 #endif /* __MEMAC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 4b0f3a50b293..284735d4ebe9 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -217,6 +217,7 @@ struct fman_mac {
 	struct tgec_cfg *cfg;
 	void *fm;
 	struct fman_rev_info fm_rev_info;
+	bool allmulti_enabled;
 };
 
 static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+	u32 entry;
+	struct tgec_regs __iomem *regs = tgec->regs;
+
+	if (!is_init_done(tgec->cfg))
+		return -EINVAL;
+
+	if (enable) {
+		for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry | TGEC_HASH_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	} else {
+		for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
+
+	tgec->allmulti_enabled = enable;
+
+	return 0;
+}
+
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 {
 	struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 			break;
 		}
 	}
-	if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
-		iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
-			    &regs->hashtable_ctrl);
+
+	if (!tgec->allmulti_enabled) {
+		if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+			iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+				    &regs->hashtable_ctrl);
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h
index 514bba9f47ce..cbbd3b422a98 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
 int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
 
 #endif /* __TGEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 88c0a0636b44..7b5b95f52c09 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= dtsec_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= dtsec_accept_rx_pause_frames;
 	mac_dev->set_exception		= dtsec_set_exception;
+	mac_dev->set_allmulti		= dtsec_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= tgec_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= tgec_accept_rx_pause_frames;
 	mac_dev->set_exception		= tgec_set_exception;
+	mac_dev->set_allmulti		= tgec_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= memac_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= memac_accept_rx_pause_frames;
 	mac_dev->set_exception		= memac_set_exception;
+	mac_dev->set_allmulti		= memac_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
@@ -564,7 +567,6 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
 	}
 
 	pdev->dev.parent = priv->dev;
-	set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
 
 	ret = platform_device_add_data(pdev, &data, sizeof(data));
 	if (ret)
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index eefb3357e304..b520cec120ee 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -59,6 +59,7 @@ struct mac_device {
 	bool rx_pause_active;
 	bool tx_pause_active;
 	bool promisc;
+	bool allmulti;
 
 	int (*init)(struct mac_device *mac_dev);
 	int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
 	void (*adjust_link)(struct mac_device *mac_dev);
 	int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
 	int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+	int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
 	int (*set_multi)(struct net_device *net_dev,
 			 struct mac_device *mac_dev);
 	int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 3bdeb295514b..f27f9bae1a4a 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2934,29 +2934,17 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 {
 	int size = lstatus & BD_LENGTH_MASK;
 	struct page *page = rxb->page;
-	bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
-
-	/* Remove the FCS from the packet length */
-	if (last)
-		size -= ETH_FCS_LEN;
 
 	if (likely(first)) {
 		skb_put(skb, size);
 	} else {
 		/* the last fragments' length contains the full frame length */
-		if (last)
+		if (lstatus & BD_LFLAG(RXBD_LAST))
 			size -= skb->len;
 
-		/* Add the last fragment if it contains something other than
-		 * the FCS, otherwise drop it and trim off any part of the FCS
-		 * that was already received.
-		 */
-		if (size > 0)
-			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-					rxb->page_offset + RXBUF_ALIGNMENT,
-					size, GFAR_RXB_TRUESIZE);
-		else if (size < 0)
-			pskb_trim(skb, skb->len + size);
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+				rxb->page_offset + RXBUF_ALIGNMENT,
+				size, GFAR_RXB_TRUESIZE);
 	}
 
 	/* try reuse page */
@@ -3069,12 +3057,12 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
 	if (priv->padding)
 		skb_pull(skb, priv->padding);
 
+	/* Trim off the FCS */
+	pskb_trim(skb, skb->len - ETH_FCS_LEN);
+
 	if (ndev->features & NETIF_F_RXCSUM)
 		gfar_rx_checksum(skb, fcb);
 
-	/* Tell the skb what kind of packet this is */
-	skb->protocol = eth_type_trans(skb, ndev);
-
 	/* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here.
 	 * Even if vlan rx accel is disabled, on some chips
 	 * RXFCB_VLN is pseudo randomly set.
@@ -3145,13 +3133,15 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
 			continue;
 		}
 
+		gfar_process_frame(ndev, skb);
+
 		/* Increment the number of packets */
 		total_pkts++;
 		total_bytes += skb->len;
 
 		skb_record_rx_queue(skb, rx_queue->qindex);
 
-		gfar_process_frame(ndev, skb);
+		skb->protocol = eth_type_trans(skb, ndev);
 
 		/* Send the packet up the stack */
 		napi_gro_receive(&rx_queue->grp->napi_rx, skb);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 86944bc3b273..74bd260ca02a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -666,7 +666,7 @@ static void hns_gmac_get_strings(u32 stringset, u8 *data)
 
 static int hns_gmac_get_sset_count(int stringset)
 {
-	if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS)
+	if (stringset == ETH_SS_STATS)
 		return ARRAY_SIZE(g_gmac_stats_string);
 
 	return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index b62816c1574e..93e71e27401b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -422,7 +422,7 @@ void hns_ppe_update_stats(struct hns_ppe_cb *ppe_cb)
 
 int hns_ppe_get_sset_count(int stringset)
 {
-	if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS)
+	if (stringset == ETH_SS_STATS)
 		return ETH_PPE_STATIC_NUM;
 	return 0;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 6f3570cfb501..e2e28532e4dc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -876,7 +876,7 @@ void hns_rcb_get_stats(struct hnae_queue *queue, u64 *data)
  */
 int hns_rcb_get_ring_sset_count(int stringset)
 {
-	if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS)
+	if (stringset == ETH_SS_STATS)
 		return HNS_RING_STATIC_REG_NUM;
 
 	return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 7ea7f8a4aa2a..2e14a3ae1d8b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -993,8 +993,10 @@ int hns_get_sset_count(struct net_device *netdev, int stringset)
 			cnt--;
 
 		return cnt;
-	} else {
+	} else if (stringset == ETH_SS_STATS) {
 		return (HNS_NET_STATS_CNT + ops->get_sset_count(h, stringset));
+	} else {
+		return -EOPNOTSUPP;
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 3e9203ea42a6..519e2bd6aa60 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -11,6 +11,7 @@
 
 enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_RESET = 0x01,		/* (VF -> PF) assert reset */
+	HCLGE_MBX_ASSERTING_RESET,	/* (PF -> VF) PF is asserting reset*/
 	HCLGE_MBX_SET_UNICAST,		/* (VF -> PF) set UC addr */
 	HCLGE_MBX_SET_MULTICAST,	/* (VF -> PF) set MC addr */
 	HCLGE_MBX_SET_VLAN,		/* (VF -> PF) set VLAN */
@@ -57,6 +58,8 @@ enum hclge_mbx_vlan_cfg_subcode {
 
 #define HCLGE_MBX_MAX_MSG_SIZE	16
 #define HCLGE_MBX_MAX_RESP_DATA_SIZE	8
+#define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM	3
+#define HCLGE_MBX_RING_NODE_VARIABLE_NUM	3
 
 struct hclgevf_mbx_resp_status {
 	struct mutex mbx_mutex; /* protects against contending sync cmd resp */
@@ -83,6 +86,21 @@ struct hclge_mbx_pf_to_vf_cmd {
 	u16 msg[8];
 };
 
+/* used by VF to store the received Async responses from PF */
+struct hclgevf_mbx_arq_ring {
+#define HCLGE_MBX_MAX_ARQ_MSG_SIZE	8
+#define HCLGE_MBX_MAX_ARQ_MSG_NUM	1024
+	struct hclgevf_dev *hdev;
+	u32 head;
+	u32 tail;
+	u32 count;
+	u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
+};
+
 #define hclge_mbx_ring_ptr_move_crq(crq) \
 	(crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
+#define hclge_mbx_tail_ptr_move_arq(arq) \
+	(arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
+#define hclge_mbx_head_ptr_move_arq(arq) \
+		(arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index fd06bc78c58e..37ec1b3286c6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -118,6 +118,8 @@ enum hnae3_reset_notify_type {
 };
 
 enum hnae3_reset_type {
+	HNAE3_VF_RESET,
+	HNAE3_VF_FULL_RESET,
 	HNAE3_FUNC_RESET,
 	HNAE3_CORE_RESET,
 	HNAE3_GLOBAL_RESET,
@@ -265,6 +267,8 @@ struct hnae3_ae_dev {
  *   Get tc size of handle
  * get_vector()
  *   Get vector number and vector information
+ * put_vector()
+ *   Put the vector in hdev
  * map_ring_to_vector()
  *   Map rings to vector
  * unmap_ring_from_vector()
@@ -336,7 +340,8 @@ struct hnae3_ae_ops {
 				   u32 *tx_usecs_high, u32 *rx_usecs_high);
 
 	void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
-	int (*set_mac_addr)(struct hnae3_handle *handle, void *p);
+	int (*set_mac_addr)(struct hnae3_handle *handle, void *p,
+			    bool is_first);
 	int (*add_uc_addr)(struct hnae3_handle *handle,
 			   const unsigned char *addr);
 	int (*rm_uc_addr)(struct hnae3_handle *handle,
@@ -375,6 +380,7 @@ struct hnae3_ae_ops {
 
 	int (*get_vector)(struct hnae3_handle *handle, u16 vector_num,
 			  struct hnae3_vector_info *vector_info);
+	int (*put_vector)(struct hnae3_handle *handle, int vector_num);
 	int (*map_ring_to_vector)(struct hnae3_handle *handle,
 				  int vector_num,
 				  struct hnae3_ring_chain_node *vr_chain);
@@ -396,8 +402,7 @@ struct hnae3_ae_ops {
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
 				  u16 vlan, u8 qos, __be16 proto);
 	int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
-	void (*reset_event)(struct hnae3_handle *handle,
-			    enum hnae3_reset_type reset);
+	void (*reset_event)(struct hnae3_handle *handle);
 	void (*get_channels)(struct hnae3_handle *handle,
 			     struct ethtool_channels *ch);
 	void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
@@ -407,6 +412,10 @@ struct hnae3_ae_ops {
 				 u32 *flowctrl_adv);
 	int (*set_led_id)(struct hnae3_handle *handle,
 			  enum ethtool_phys_id_state status);
+	void (*get_link_mode)(struct hnae3_handle *handle,
+			      unsigned long *supported,
+			      unsigned long *advertising);
+	void (*get_port_type)(struct hnae3_handle *handle, u8 *port_type);
 };
 
 struct hnae3_dcb_ops {
@@ -487,6 +496,9 @@ struct hnae3_handle {
 	struct hnae3_ae_algo *ae_algo;  /* the class who provides this handle */
 	u64 flags; /* Indicate the capabilities for this handle*/
 
+	unsigned long last_reset_time;
+	enum hnae3_reset_type reset_level;
+
 	union {
 		struct net_device *netdev; /* first member */
 		struct hnae3_knic_private_info kinfo;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 601b6295d3f8..a31b4adf6e6a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -168,8 +168,8 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
 	 */
 
-	if (rl_reg > 0 && !tqp_vector->tx_group.gl_adapt_enable &&
-	    !tqp_vector->rx_group.gl_adapt_enable)
+	if (rl_reg > 0 && !tqp_vector->tx_group.coal.gl_adapt_enable &&
+	    !tqp_vector->rx_group.coal.gl_adapt_enable)
 		/* According to the hardware, the range of rl_reg is
 		 * 0-59 and the unit is 4.
 		 */
@@ -205,23 +205,30 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
 	 */
 
 	/* Default: enable interrupt coalescing self-adaptive and GL */
-	tqp_vector->tx_group.gl_adapt_enable = 1;
-	tqp_vector->rx_group.gl_adapt_enable = 1;
+	tqp_vector->tx_group.coal.gl_adapt_enable = 1;
+	tqp_vector->rx_group.coal.gl_adapt_enable = 1;
 
-	tqp_vector->tx_group.int_gl = HNS3_INT_GL_50K;
-	tqp_vector->rx_group.int_gl = HNS3_INT_GL_50K;
-
-	hns3_set_vector_coalesce_tx_gl(tqp_vector,
-				       tqp_vector->tx_group.int_gl);
-	hns3_set_vector_coalesce_rx_gl(tqp_vector,
-				       tqp_vector->rx_group.int_gl);
+	tqp_vector->tx_group.coal.int_gl = HNS3_INT_GL_50K;
+	tqp_vector->rx_group.coal.int_gl = HNS3_INT_GL_50K;
 
 	/* Default: disable RL */
 	h->kinfo.int_rl_setting = 0;
-	hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
 
-	tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
-	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
+	tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW;
+}
+
+static void hns3_vector_gl_rl_init_hw(struct hns3_enet_tqp_vector *tqp_vector,
+				      struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+
+	hns3_set_vector_coalesce_tx_gl(tqp_vector,
+				       tqp_vector->tx_group.coal.int_gl);
+	hns3_set_vector_coalesce_rx_gl(tqp_vector,
+				       tqp_vector->rx_group.coal.int_gl);
+	hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
 }
 
 static int hns3_nic_set_real_num_queue(struct net_device *netdev)
@@ -249,6 +256,16 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
 	return 0;
 }
 
+static u16 hns3_get_max_available_channels(struct hnae3_handle *h)
+{
+	u16 free_tqps, max_rss_size, max_tqps;
+
+	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
+	max_tqps = h->kinfo.num_tc * max_rss_size;
+
+	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+}
+
 static int hns3_nic_net_up(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -303,7 +320,7 @@ static int hns3_nic_net_open(struct net_device *netdev)
 		return ret;
 	}
 
-	priv->last_reset_time = jiffies;
+	priv->ae_handle->last_reset_time = jiffies;
 	return 0;
 }
 
@@ -747,7 +764,7 @@ static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
 {
 	/* Config bd buffer end */
 	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_BDTYPE_M,
-		       HNS3_TXD_BDTYPE_M, 0);
+		       HNS3_TXD_BDTYPE_S, 0);
 	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
 	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
 	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0);
@@ -1104,7 +1121,7 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
 	if (!mac_addr || !is_valid_ether_addr((const u8 *)mac_addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data);
+	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false);
 	if (ret) {
 		netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
 		return ret;
@@ -1388,11 +1405,15 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev,
 				__be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
 
+	if (!ret)
+		set_bit(vid, priv->active_vlans);
+
 	return ret;
 }
 
@@ -1400,14 +1421,32 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
 				 __be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
 
+	if (!ret)
+		clear_bit(vid, priv->active_vlans);
+
 	return ret;
 }
 
+static void hns3_restore_vlan(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	u16 vid;
+	int ret;
+
+	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
+		ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
+		if (ret)
+			netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
+				    vid, ret);
+	}
+}
+
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 				u8 qos, __be16 vlan_proto)
 {
@@ -1504,7 +1543,6 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 static void hns3_nic_net_timeout(struct net_device *ndev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
-	unsigned long last_reset_time = priv->last_reset_time;
 	struct hnae3_handle *h = priv->ae_handle;
 
 	if (!hns3_get_tx_timeo_queue_info(ndev))
@@ -1512,24 +1550,12 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
 
 	priv->tx_timeout_count++;
 
-	/* This timeout is far away enough from last timeout,
-	 * if timeout again,set the reset type to PF reset
-	 */
-	if (time_after(jiffies, (last_reset_time + 20 * HZ)))
-		priv->reset_level = HNAE3_FUNC_RESET;
-
-	/* Don't do any new action before the next timeout */
-	else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo)))
+	if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo)))
 		return;
 
-	priv->last_reset_time = jiffies;
-
+	/* request the reset */
 	if (h->ae_algo->ops->reset_event)
-		h->ae_algo->ops->reset_event(h, priv->reset_level);
-
-	priv->reset_level++;
-	if (priv->reset_level > HNAE3_GLOBAL_RESET)
-		priv->reset_level = HNAE3_GLOBAL_RESET;
+		h->ae_algo->ops->reset_event(h);
 }
 
 static const struct net_device_ops hns3_nic_netdev_ops = {
@@ -2064,15 +2090,13 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	desc = &ring->desc[ring->next_to_clean];
 	size = le16_to_cpu(desc->rx.size);
 
-	if (twobufs) {
-		truesize = hnae_buf_size(ring);
-	} else {
-		truesize = ALIGN(size, L1_CACHE_BYTES);
+	truesize = hnae_buf_size(ring);
+
+	if (!twobufs)
 		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
-	}
 
 	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
-			size - pull_len, truesize - pull_len);
+			size - pull_len, truesize);
 
 	 /* Avoid re-using remote pages,flag default unreuse */
 	if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))
@@ -2369,20 +2393,20 @@ out:
 
 static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 {
-#define HNS3_RX_ULTRA_PACKET_RATE 40000
+	struct hns3_enet_tqp_vector *tqp_vector =
+					ring_group->ring->tqp_vector;
 	enum hns3_flow_level_range new_flow_level;
-	struct hns3_enet_tqp_vector *tqp_vector;
-	int packets_per_secs;
-	int bytes_per_usecs;
+	int packets_per_msecs;
+	int bytes_per_msecs;
+	u32 time_passed_ms;
 	u16 new_int_gl;
-	int usecs;
 
-	if (!ring_group->int_gl)
+	if (!ring_group->coal.int_gl || !tqp_vector->last_jiffies)
 		return false;
 
 	if (ring_group->total_packets == 0) {
-		ring_group->int_gl = HNS3_INT_GL_50K;
-		ring_group->flow_level = HNS3_FLOW_LOW;
+		ring_group->coal.int_gl = HNS3_INT_GL_50K;
+		ring_group->coal.flow_level = HNS3_FLOW_LOW;
 		return true;
 	}
 
@@ -2392,35 +2416,46 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 	 * 20-1249MB/s high      (18000 ints/s)
 	 * > 40000pps  ultra     (8000 ints/s)
 	 */
-	new_flow_level = ring_group->flow_level;
-	new_int_gl = ring_group->int_gl;
-	tqp_vector = ring_group->ring->tqp_vector;
-	usecs = (ring_group->int_gl << 1);
-	bytes_per_usecs = ring_group->total_bytes / usecs;
-	/* 1000000 microseconds */
-	packets_per_secs = ring_group->total_packets * 1000000 / usecs;
+	new_flow_level = ring_group->coal.flow_level;
+	new_int_gl = ring_group->coal.int_gl;
+	time_passed_ms =
+		jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
+
+	if (!time_passed_ms)
+		return false;
+
+	do_div(ring_group->total_packets, time_passed_ms);
+	packets_per_msecs = ring_group->total_packets;
+
+	do_div(ring_group->total_bytes, time_passed_ms);
+	bytes_per_msecs = ring_group->total_bytes;
+
+#define HNS3_RX_LOW_BYTE_RATE 10000
+#define HNS3_RX_MID_BYTE_RATE 20000
 
 	switch (new_flow_level) {
 	case HNS3_FLOW_LOW:
-		if (bytes_per_usecs > 10)
+		if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_MID;
 		break;
 	case HNS3_FLOW_MID:
-		if (bytes_per_usecs > 20)
+		if (bytes_per_msecs > HNS3_RX_MID_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_HIGH;
-		else if (bytes_per_usecs <= 10)
+		else if (bytes_per_msecs <= HNS3_RX_LOW_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_LOW;
 		break;
 	case HNS3_FLOW_HIGH:
 	case HNS3_FLOW_ULTRA:
 	default:
-		if (bytes_per_usecs <= 20)
+		if (bytes_per_msecs <= HNS3_RX_MID_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_MID;
 		break;
 	}
 
-	if ((packets_per_secs > HNS3_RX_ULTRA_PACKET_RATE) &&
-	    (&tqp_vector->rx_group == ring_group))
+#define HNS3_RX_ULTRA_PACKET_RATE 40
+
+	if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
+	    &tqp_vector->rx_group == ring_group)
 		new_flow_level = HNS3_FLOW_ULTRA;
 
 	switch (new_flow_level) {
@@ -2442,9 +2477,9 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 
 	ring_group->total_bytes = 0;
 	ring_group->total_packets = 0;
-	ring_group->flow_level = new_flow_level;
-	if (new_int_gl != ring_group->int_gl) {
-		ring_group->int_gl = new_int_gl;
+	ring_group->coal.flow_level = new_flow_level;
+	if (new_int_gl != ring_group->coal.int_gl) {
+		ring_group->coal.int_gl = new_int_gl;
 		return true;
 	}
 	return false;
@@ -2456,19 +2491,27 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
 	struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
 	bool rx_update, tx_update;
 
-	if (rx_group->gl_adapt_enable) {
+	if (tqp_vector->int_adapt_down > 0) {
+		tqp_vector->int_adapt_down--;
+		return;
+	}
+
+	if (rx_group->coal.gl_adapt_enable) {
 		rx_update = hns3_get_new_int_gl(rx_group);
 		if (rx_update)
 			hns3_set_vector_coalesce_rx_gl(tqp_vector,
-						       rx_group->int_gl);
+						       rx_group->coal.int_gl);
 	}
 
-	if (tx_group->gl_adapt_enable) {
+	if (tx_group->coal.gl_adapt_enable) {
 		tx_update = hns3_get_new_int_gl(&tqp_vector->tx_group);
 		if (tx_update)
 			hns3_set_vector_coalesce_tx_gl(tqp_vector,
-						       tx_group->int_gl);
+						       tx_group->coal.int_gl);
 	}
+
+	tqp_vector->last_jiffies = jiffies;
+	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 }
 
 static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
@@ -2615,32 +2658,18 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
-	struct hnae3_vector_info *vector;
-	struct pci_dev *pdev = h->pdev;
-	u16 tqp_num = h->kinfo.num_tqps;
-	u16 vector_num;
 	int ret = 0;
 	u16 i;
 
-	/* RSS size, cpu online and vector_num should be the same */
-	/* Should consider 2p/4p later */
-	vector_num = min_t(u16, num_online_cpus(), tqp_num);
-	vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
-			      GFP_KERNEL);
-	if (!vector)
-		return -ENOMEM;
-
-	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
-
-	priv->vector_num = vector_num;
-	priv->tqp_vector = (struct hns3_enet_tqp_vector *)
-		devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
-			     GFP_KERNEL);
-	if (!priv->tqp_vector)
-		return -ENOMEM;
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		hns3_vector_gl_rl_init_hw(tqp_vector, priv);
+		tqp_vector->num_tqps = 0;
+	}
 
-	for (i = 0; i < tqp_num; i++) {
-		u16 vector_i = i % vector_num;
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		u16 vector_i = i % priv->vector_num;
+		u16 tqp_num = h->kinfo.num_tqps;
 
 		tqp_vector = &priv->tqp_vector[vector_i];
 
@@ -2650,52 +2679,94 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 		hns3_add_ring_to_group(&tqp_vector->rx_group,
 				       priv->ring_data[i + tqp_num].ring);
 
-		tqp_vector->idx = vector_i;
-		tqp_vector->mask_addr = vector[vector_i].io_addr;
-		tqp_vector->vector_irq = vector[vector_i].vector;
-		tqp_vector->num_tqps++;
-
 		priv->ring_data[i].ring->tqp_vector = tqp_vector;
 		priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+		tqp_vector->num_tqps++;
 	}
 
-	for (i = 0; i < vector_num; i++) {
+	for (i = 0; i < priv->vector_num; i++) {
 		tqp_vector = &priv->tqp_vector[i];
 
 		tqp_vector->rx_group.total_bytes = 0;
 		tqp_vector->rx_group.total_packets = 0;
 		tqp_vector->tx_group.total_bytes = 0;
 		tqp_vector->tx_group.total_packets = 0;
-		hns3_vector_gl_rl_init(tqp_vector, priv);
 		tqp_vector->handle = h;
 
 		ret = hns3_get_vector_ring_chain(tqp_vector,
 						 &vector_ring_chain);
 		if (ret)
-			goto out;
+			return ret;
 
 		ret = h->ae_algo->ops->map_ring_to_vector(h,
 			tqp_vector->vector_irq, &vector_ring_chain);
-		if (ret)
-			goto out;
 
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
+		if (ret)
+			return ret;
+
 		netif_napi_add(priv->netdev, &tqp_vector->napi,
 			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
 	}
 
+	return 0;
+}
+
+static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct hnae3_vector_info *vector;
+	struct pci_dev *pdev = h->pdev;
+	u16 tqp_num = h->kinfo.num_tqps;
+	u16 vector_num;
+	int ret = 0;
+	u16 i;
+
+	/* RSS size, cpu online and vector_num should be the same */
+	/* Should consider 2p/4p later */
+	vector_num = min_t(u16, num_online_cpus(), tqp_num);
+	vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
+			      GFP_KERNEL);
+	if (!vector)
+		return -ENOMEM;
+
+	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
+
+	priv->vector_num = vector_num;
+	priv->tqp_vector = (struct hns3_enet_tqp_vector *)
+		devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
+			     GFP_KERNEL);
+	if (!priv->tqp_vector) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		tqp_vector->idx = i;
+		tqp_vector->mask_addr = vector[i].io_addr;
+		tqp_vector->vector_irq = vector[i].vector;
+		hns3_vector_gl_rl_init(tqp_vector, priv);
+	}
+
 out:
 	devm_kfree(&pdev->dev, vector);
 	return ret;
 }
 
+static void hns3_clear_ring_group(struct hns3_enet_ring_group *group)
+{
+	group->ring = NULL;
+	group->count = 0;
+}
+
 static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
-	struct pci_dev *pdev = h->pdev;
 	int i, ret;
 
 	for (i = 0; i < priv->vector_num; i++) {
@@ -2711,6 +2782,10 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		if (ret)
 			return ret;
 
+		ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+		if (ret)
+			return ret;
+
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
 		if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
@@ -2722,12 +2797,30 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		}
 
 		priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
-
+		hns3_clear_ring_group(&tqp_vector->rx_group);
+		hns3_clear_ring_group(&tqp_vector->tx_group);
 		netif_napi_del(&priv->tqp_vector[i].napi);
 	}
 
-	devm_kfree(&pdev->dev, priv->tqp_vector);
+	return 0;
+}
 
+static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct pci_dev *pdev = h->pdev;
+	int i, ret;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		struct hns3_enet_tqp_vector *tqp_vector;
+
+		tqp_vector = &priv->tqp_vector[i];
+		ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+		if (ret)
+			return ret;
+	}
+
+	devm_kfree(&pdev->dev, priv->tqp_vector);
 	return 0;
 }
 
@@ -2957,13 +3050,8 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 			h->ae_algo->ops->reset_queue(h, i);
 
 		hns3_fini_ring(priv->ring_data[i].ring);
-		devm_kfree(priv->dev, priv->ring_data[i].ring);
 		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
-		devm_kfree(priv->dev,
-			   priv->ring_data[i + h->kinfo.num_tqps].ring);
 	}
-	devm_kfree(priv->dev, priv->ring_data);
-
 	return 0;
 }
 
@@ -2987,7 +3075,7 @@ static void hns3_init_mac_addr(struct net_device *netdev)
 	}
 
 	if (h->ae_algo->ops->set_mac_addr)
-		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
 
 }
 
@@ -3013,7 +3101,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	int ret;
 
 	netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv),
-				   handle->kinfo.num_tqps);
+				   hns3_get_max_available_channels(handle));
 	if (!netdev)
 		return -ENOMEM;
 
@@ -3021,8 +3109,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	priv->dev = &pdev->dev;
 	priv->netdev = netdev;
 	priv->ae_handle = handle;
-	priv->last_reset_time = jiffies;
-	priv->reset_level = HNAE3_FUNC_RESET;
+	priv->ae_handle->reset_level = HNAE3_NONE_RESET;
+	priv->ae_handle->last_reset_time = jiffies;
 	priv->tx_timeout_count = 0;
 
 	handle->kinfo.netdev = netdev;
@@ -3048,6 +3136,12 @@ static int hns3_client_init(struct hnae3_handle *handle)
 		goto out_get_ring_cfg;
 	}
 
+	ret = hns3_nic_alloc_vector_data(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_alloc_vector_data;
+	}
+
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret) {
 		ret = -ENOMEM;
@@ -3076,8 +3170,10 @@ static int hns3_client_init(struct hnae3_handle *handle)
 out_reg_netdev_fail:
 out_init_ring_data:
 	(void)hns3_nic_uninit_vector_data(priv);
-	priv->ring_data = NULL;
 out_init_vector_data:
+	hns3_nic_dealloc_vector_data(priv);
+out_alloc_vector_data:
+	priv->ring_data = NULL;
 out_get_ring_cfg:
 	priv->ae_handle = NULL;
 	free_netdev(netdev);
@@ -3097,10 +3193,16 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 	if (ret)
 		netdev_err(netdev, "uninit vector error\n");
 
+	ret = hns3_nic_dealloc_vector_data(priv);
+	if (ret)
+		netdev_err(netdev, "dealloc vector error\n");
+
 	ret = hns3_uninit_all_ring(priv);
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
 
+	hns3_put_ring_config(priv);
+
 	priv->ring_data = NULL;
 
 	free_netdev(netdev);
@@ -3240,7 +3342,6 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
-	struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
 	int ret = 0;
 
 	if (netif_running(kinfo->netdev)) {
@@ -3250,8 +3351,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 				   "hns net up fail, ret=%d!\n", ret);
 			return ret;
 		}
-
-		priv->last_reset_time = jiffies;
+		handle->last_reset_time = jiffies;
 	}
 
 	return ret;
@@ -3263,11 +3363,14 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret;
 
-	priv->reset_level = 1;
 	hns3_init_mac_addr(netdev);
 	hns3_nic_set_rx_mode(netdev);
 	hns3_recover_hw_addr(netdev);
 
+	/* Hardware table is only clear when pf resets */
+	if (!(handle->flags & HNAE3_SUPPORT_VF))
+		hns3_restore_vlan(netdev);
+
 	/* Carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
 
@@ -3306,6 +3409,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
 
+	hns3_put_ring_config(priv);
+
 	priv->ring_data = NULL;
 
 	return ret;
@@ -3336,18 +3441,24 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
 	return ret;
 }
 
-static u16 hns3_get_max_available_channels(struct net_device *netdev)
+static void hns3_restore_coal(struct hns3_nic_priv *priv,
+			      struct hns3_enet_coalesce *tx,
+			      struct hns3_enet_coalesce *rx)
 {
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-	u16 free_tqps, max_rss_size, max_tqps;
-
-	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
-	max_tqps = h->kinfo.num_tc * max_rss_size;
+	u16 vector_num = priv->vector_num;
+	int i;
 
-	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+	for (i = 0; i < vector_num; i++) {
+		memcpy(&priv->tqp_vector[i].tx_group.coal, tx,
+		       sizeof(struct hns3_enet_coalesce));
+		memcpy(&priv->tqp_vector[i].rx_group.coal, rx,
+		       sizeof(struct hns3_enet_coalesce));
+	}
 }
 
-static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
+static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num,
+			       struct hns3_enet_coalesce *tx,
+			       struct hns3_enet_coalesce *rx)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -3361,6 +3472,12 @@ static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
 	if (ret)
 		return ret;
 
+	ret = hns3_nic_alloc_vector_data(priv);
+	if (ret)
+		goto err_alloc_vector;
+
+	hns3_restore_coal(priv, tx, rx);
+
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret)
 		goto err_uninit_vector;
@@ -3375,6 +3492,8 @@ err_put_ring:
 	hns3_put_ring_config(priv);
 err_uninit_vector:
 	hns3_nic_uninit_vector_data(priv);
+err_alloc_vector:
+	hns3_nic_dealloc_vector_data(priv);
 	return ret;
 }
 
@@ -3389,6 +3508,7 @@ int hns3_set_channels(struct net_device *netdev,
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	struct hns3_enet_coalesce tx_coal, rx_coal;
 	bool if_running = netif_running(netdev);
 	u32 new_tqp_num = ch->combined_count;
 	u16 org_tqp_num;
@@ -3397,12 +3517,12 @@ int hns3_set_channels(struct net_device *netdev,
 	if (ch->rx_count || ch->tx_count)
 		return -EINVAL;
 
-	if (new_tqp_num > hns3_get_max_available_channels(netdev) ||
+	if (new_tqp_num > hns3_get_max_available_channels(h) ||
 	    new_tqp_num < kinfo->num_tc) {
 		dev_err(&netdev->dev,
 			"Change tqps fail, the tqp range is from %d to %d",
 			kinfo->num_tc,
-			hns3_get_max_available_channels(netdev));
+			hns3_get_max_available_channels(h));
 		return -EINVAL;
 	}
 
@@ -3411,7 +3531,7 @@ int hns3_set_channels(struct net_device *netdev,
 		return 0;
 
 	if (if_running)
-		dev_close(netdev);
+		hns3_nic_net_stop(netdev);
 
 	hns3_clear_all_ring(h);
 
@@ -3422,12 +3542,26 @@ int hns3_set_channels(struct net_device *netdev,
 		goto open_netdev;
 	}
 
+	/* Changing the tqp num may also change the vector num,
+	 * ethtool only support setting and querying one coal
+	 * configuation for now, so save the vector 0' coal
+	 * configuation here in order to restore it.
+	 */
+	memcpy(&tx_coal, &priv->tqp_vector[0].tx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+	memcpy(&rx_coal, &priv->tqp_vector[0].rx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+
+	hns3_nic_dealloc_vector_data(priv);
+
 	hns3_uninit_all_ring(priv);
+	hns3_put_ring_config(priv);
 
 	org_tqp_num = h->kinfo.num_tqps;
-	ret = hns3_modify_tqp_num(netdev, new_tqp_num);
+	ret = hns3_modify_tqp_num(netdev, new_tqp_num, &tx_coal, &rx_coal);
 	if (ret) {
-		ret = hns3_modify_tqp_num(netdev, org_tqp_num);
+		ret = hns3_modify_tqp_num(netdev, org_tqp_num,
+					  &tx_coal, &rx_coal);
 		if (ret) {
 			/* If revert to old tqp failed, fatal error occurred */
 			dev_err(&netdev->dev,
@@ -3440,7 +3574,7 @@ int hns3_set_channels(struct net_device *netdev,
 
 open_netdev:
 	if (if_running)
-		dev_open(netdev);
+		hns3_nic_net_open(netdev);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 213f501b30bb..9e4cfbbf8dcd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -10,6 +10,8 @@
 #ifndef __HNS3_ENET_H
 #define __HNS3_ENET_H
 
+#include <linux/if_vlan.h>
+
 #include "hnae3.h"
 
 extern const char hns3_driver_version[];
@@ -460,15 +462,21 @@ enum hns3_link_mode_bits {
 #define HNS3_INT_RL_MAX			0x00EC
 #define HNS3_INT_RL_ENABLE_MASK		0x40
 
+#define HNS3_INT_ADAPT_DOWN_START	100
+
+struct hns3_enet_coalesce {
+	u16 int_gl;
+	u8 gl_adapt_enable;
+	enum hns3_flow_level_range flow_level;
+};
+
 struct hns3_enet_ring_group {
 	/* array of pointers to rings */
 	struct hns3_enet_ring *ring;
 	u64 total_bytes;	/* total bytes processed this group */
 	u64 total_packets;	/* total packets processed this group */
 	u16 count;
-	enum hns3_flow_level_range flow_level;
-	u16 int_gl;
-	u8 gl_adapt_enable;
+	struct hns3_enet_coalesce coal;
 };
 
 struct hns3_enet_tqp_vector {
@@ -491,6 +499,7 @@ struct hns3_enet_tqp_vector {
 
 	/* when 0 should adjust interrupt coalesce parameter */
 	u8 int_adapt_down;
+	unsigned long last_jiffies;
 } ____cacheline_internodealigned_in_smp;
 
 enum hns3_udp_tnl_type {
@@ -523,8 +532,6 @@ struct hns3_nic_priv {
 	/* The most recently read link state */
 	int link;
 	u64 tx_timeout_count;
-	enum hnae3_reset_type reset_level;
-	unsigned long last_reset_time;
 
 	unsigned long state;
 
@@ -535,6 +542,7 @@ struct hns3_nic_priv {
 	struct notifier_block notifier_block;
 	/* Vxlan/Geneve information */
 	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 };
 
 union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index b034c7f24eda..eb3c34f3cf87 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -74,19 +74,6 @@ struct hns3_link_mode_mapping {
 	u32 ethtool_link_mode;
 };
 
-static const struct hns3_link_mode_mapping hns3_lm_map[] = {
-	{HNS3_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
-	{HNS3_LM_AUTONEG_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
-	{HNS3_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
-	{HNS3_LM_PAUSE_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
-	{HNS3_LM_BACKPLANE_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
-	{HNS3_LM_10BASET_HALF_BIT, ETHTOOL_LINK_MODE_10baseT_Half_BIT},
-	{HNS3_LM_10BASET_FULL_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT},
-	{HNS3_LM_100BASET_HALF_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT},
-	{HNS3_LM_100BASET_FULL_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT},
-	{HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
-};
-
 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
@@ -309,6 +296,9 @@ static void hns3_self_test(struct net_device *ndev,
 	struct hnae3_handle *h = priv->ae_handle;
 	int st_param[HNS3_SELF_TEST_TPYE_NUM][2];
 	bool if_running = netif_running(ndev);
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	bool dis_vlan_filter;
+#endif
 	int test_index = 0;
 	u32 i;
 
@@ -323,6 +313,14 @@ static void hns3_self_test(struct net_device *ndev,
 	if (if_running)
 		dev_close(ndev);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	/* Disable the vlan filter for selftest does not support it */
+	dis_vlan_filter = (ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+				h->ae_algo->ops->enable_vlan_filter;
+	if (dis_vlan_filter)
+		h->ae_algo->ops->enable_vlan_filter(h, false);
+#endif
+
 	set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
 	for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) {
@@ -345,28 +343,15 @@ static void hns3_self_test(struct net_device *ndev,
 
 	clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	if (dis_vlan_filter)
+		h->ae_algo->ops->enable_vlan_filter(h, true);
+#endif
+
 	if (if_running)
 		dev_open(ndev);
 }
 
-static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
-				  bool is_advertised)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hns3_lm_map); i++) {
-		if (!(caps & hns3_lm_map[i].hns3_link_mode))
-			continue;
-
-		if (is_advertised)
-			__set_bit(hns3_lm_map[i].ethtool_link_mode,
-				  cmd->link_modes.advertising);
-		else
-			__set_bit(hns3_lm_map[i].ethtool_link_mode,
-				  cmd->link_modes.supported);
-	}
-}
-
 static int hns3_get_sset_count(struct net_device *netdev, int stringset)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -578,18 +563,19 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	u32 flowctrl_adv = 0;
-	u32 supported_caps;
-	u32 advertised_caps;
-	u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
 	u8 link_stat;
 
 	if (!h->ae_algo || !h->ae_algo->ops)
 		return -EOPNOTSUPP;
 
 	/* 1.auto_neg & speed & duplex from cmd */
-	if (netdev->phydev)
+	if (netdev->phydev) {
 		phy_ethtool_ksettings_get(netdev->phydev, cmd);
-	else if (h->ae_algo->ops->get_ksettings_an_result)
+
+		return 0;
+	}
+
+	if (h->ae_algo->ops->get_ksettings_an_result)
 		h->ae_algo->ops->get_ksettings_an_result(h,
 							 &cmd->base.autoneg,
 							 &cmd->base.speed,
@@ -603,62 +589,16 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	/* 2.media_type get from bios parameter block */
-	if (h->ae_algo->ops->get_media_type) {
-		h->ae_algo->ops->get_media_type(h, &media_type);
-
-		switch (media_type) {
-		case HNAE3_MEDIA_TYPE_FIBER:
-			cmd->base.port = PORT_FIBRE;
-			supported_caps = HNS3_LM_FIBRE_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT;
-
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_COPPER:
-			cmd->base.port = PORT_TP;
-			supported_caps = HNS3_LM_TP_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT |
-					 HNS3_LM_100BASET_FULL_BIT |
-					 HNS3_LM_100BASET_HALF_BIT |
-					 HNS3_LM_10BASET_FULL_BIT |
-					 HNS3_LM_10BASET_HALF_BIT;
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_BACKPLANE:
-			cmd->base.port = PORT_NONE;
-			supported_caps = HNS3_LM_BACKPLANE_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT |
-					 HNS3_LM_100BASET_FULL_BIT |
-					 HNS3_LM_100BASET_HALF_BIT |
-					 HNS3_LM_10BASET_FULL_BIT |
-					 HNS3_LM_10BASET_HALF_BIT;
-
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_UNKNOWN:
-		default:
-			cmd->base.port = PORT_OTHER;
-			supported_caps = 0;
-			advertised_caps = 0;
-			break;
-		}
-
-		if (!cmd->base.autoneg)
-			advertised_caps &= ~HNS3_LM_AUTONEG_BIT;
-
-		advertised_caps &= ~HNS3_LM_PAUSE_BIT;
+	/* 2.get link mode and port type*/
+	if (h->ae_algo->ops->get_link_mode)
+		h->ae_algo->ops->get_link_mode(h,
+					       cmd->link_modes.supported,
+					       cmd->link_modes.advertising);
 
-		/* now, map driver link modes to ethtool link modes */
-		hns3_driv_to_eth_caps(supported_caps, cmd, false);
-		hns3_driv_to_eth_caps(advertised_caps, cmd, true);
-	}
+	cmd->base.port = PORT_NONE;
+	if (h->ae_algo->ops->get_port_type)
+		h->ae_algo->ops->get_port_type(h,
+					       &cmd->base.port);
 
 	/* 3.mdix_ctrl&mdix get from phy reg */
 	if (h->ae_algo->ops->get_mdix_mode)
@@ -698,7 +638,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev)
 
 	if (!h->ae_algo || !h->ae_algo->ops ||
 	    !h->ae_algo->ops->get_rss_key_size)
-		return -EOPNOTSUPP;
+		return 0;
 
 	return h->ae_algo->ops->get_rss_key_size(h);
 }
@@ -709,7 +649,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev)
 
 	if (!h->ae_algo || !h->ae_algo->ops ||
 	    !h->ae_algo->ops->get_rss_indir_size)
-		return -EOPNOTSUPP;
+		return 0;
 
 	return h->ae_algo->ops->get_rss_indir_size(h);
 }
@@ -905,11 +845,13 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
 	tx_vector = priv->ring_data[queue].ring->tqp_vector;
 	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-	cmd->use_adaptive_tx_coalesce = tx_vector->tx_group.gl_adapt_enable;
-	cmd->use_adaptive_rx_coalesce = rx_vector->rx_group.gl_adapt_enable;
+	cmd->use_adaptive_tx_coalesce =
+			tx_vector->tx_group.coal.gl_adapt_enable;
+	cmd->use_adaptive_rx_coalesce =
+			rx_vector->rx_group.coal.gl_adapt_enable;
 
-	cmd->tx_coalesce_usecs = tx_vector->tx_group.int_gl;
-	cmd->rx_coalesce_usecs = rx_vector->rx_group.int_gl;
+	cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl;
+	cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl;
 
 	cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting;
 	cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting;
@@ -1029,14 +971,18 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
 	tx_vector = priv->ring_data[queue].ring->tqp_vector;
 	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-	tx_vector->tx_group.gl_adapt_enable = cmd->use_adaptive_tx_coalesce;
-	rx_vector->rx_group.gl_adapt_enable = cmd->use_adaptive_rx_coalesce;
+	tx_vector->tx_group.coal.gl_adapt_enable =
+				cmd->use_adaptive_tx_coalesce;
+	rx_vector->rx_group.coal.gl_adapt_enable =
+				cmd->use_adaptive_rx_coalesce;
 
-	tx_vector->tx_group.int_gl = cmd->tx_coalesce_usecs;
-	rx_vector->rx_group.int_gl = cmd->rx_coalesce_usecs;
+	tx_vector->tx_group.coal.int_gl = cmd->tx_coalesce_usecs;
+	rx_vector->rx_group.coal.int_gl = cmd->rx_coalesce_usecs;
 
-	hns3_set_vector_coalesce_tx_gl(tx_vector, tx_vector->tx_group.int_gl);
-	hns3_set_vector_coalesce_rx_gl(rx_vector, rx_vector->rx_group.int_gl);
+	hns3_set_vector_coalesce_tx_gl(tx_vector,
+				       tx_vector->tx_group.coal.int_gl);
+	hns3_set_vector_coalesce_rx_gl(rx_vector,
+				       rx_vector->rx_group.coal.int_gl);
 
 	hns3_set_vector_coalesce_rl(tx_vector, h->kinfo.int_rl_setting);
 	hns3_set_vector_coalesce_rl(rx_vector, h->kinfo.int_rl_setting);
@@ -1111,6 +1057,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.get_channels = hns3_get_channels,
 	.get_coalesce = hns3_get_coalesce,
 	.set_coalesce = hns3_set_coalesce,
+	.get_link = hns3_get_link,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 3fd10a6bec53..ee3cbac6dfaa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -12,7 +12,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 
-#define HCLGE_CMDQ_TX_TIMEOUT		1000
+#define HCLGE_CMDQ_TX_TIMEOUT		30000
 
 struct hclge_dev;
 struct hclge_desc {
@@ -414,6 +414,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_DEFAULT_SPEED_M	GENMASK(23, 16)
 #define HCLGE_CFG_RSS_SIZE_S	24
 #define HCLGE_CFG_RSS_SIZE_M	GENMASK(31, 24)
+#define HCLGE_CFG_SPEED_ABILITY_S	0
+#define HCLGE_CFG_SPEED_ABILITY_M	GENMASK(7, 0)
 
 struct hclge_cfg_param_cmd {
 	__le32 offset;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 5018d6633133..955f0e3d5c95 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -144,6 +144,8 @@ static int hclge_map_update(struct hnae3_handle *h)
 	if (ret)
 		return ret;
 
+	hclge_rss_indir_init_cfg(hdev);
+
 	return hclge_rss_init_hw(hdev);
 }
 
@@ -203,9 +205,11 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
 
 static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 {
+	u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC];
 	struct hclge_vport *vport = hclge_get_vport(h);
 	struct hclge_dev *hdev = vport->back;
 	u8 i, j, pfc_map, *prio_tc;
+	int ret;
 
 	memset(pfc, 0, sizeof(*pfc));
 	pfc->pfc_cap = hdev->pfc_max;
@@ -220,6 +224,18 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 		}
 	}
 
+	ret = hclge_pfc_tx_stats_get(hdev, requests);
+	if (ret)
+		return ret;
+
+	ret = hclge_pfc_rx_stats_get(hdev, indications);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		pfc->requests[i] = requests[i];
+		pfc->indications[i] = indications[i];
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 32bc6f68e297..2066dd734444 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -55,6 +55,8 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
 	{0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl);
+
 static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = {
 	"Mac    Loopback test",
 	"Serdes Loopback test",
@@ -1024,6 +1026,45 @@ static int hclge_parse_speed(int speed_cmd, int *speed)
 	return 0;
 }
 
+static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
+					u8 speed_ability)
+{
+	unsigned long *supported = hdev->hw.mac.supported;
+
+	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+			supported);
+
+	set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
+	set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+}
+
+static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
+{
+	u8 media_type = hdev->hw.mac.media_type;
+
+	if (media_type != HNAE3_MEDIA_TYPE_FIBER)
+		return;
+
+	hclge_parse_fiber_link_mode(hdev, speed_ability);
+}
+
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
 	struct hclge_cfg_param_cmd *req;
@@ -1072,6 +1113,10 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 
 	req = (struct hclge_cfg_param_cmd *)desc[1].data;
 	cfg->numa_node_map = __le32_to_cpu(req->param[0]);
+
+	cfg->speed_ability = hnae_get_field(__le32_to_cpu(req->param[1]),
+					    HCLGE_CFG_SPEED_ABILITY_M,
+					    HCLGE_CFG_SPEED_ABILITY_S);
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1160,6 +1205,8 @@ static int hclge_configure(struct hclge_dev *hdev)
 		return ret;
 	}
 
+	hclge_parse_link_mode(hdev, cfg.speed_ability);
+
 	if ((hdev->tc_max > HNAE3_MAX_TC) ||
 	    (hdev->tc_max < 1)) {
 		dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
@@ -2702,7 +2749,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
 {
 	struct hclge_desc desc;
 	struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data;
@@ -2798,27 +2845,31 @@ static void hclge_reset(struct hclge_dev *hdev)
 	hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
-static void hclge_reset_event(struct hnae3_handle *handle,
-			      enum hnae3_reset_type reset)
+static void hclge_reset_event(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	dev_info(&hdev->pdev->dev,
-		 "Receive reset event , reset_type is %d", reset);
+	/* check if this is a new reset request and we are not here just because
+	 * last reset attempt did not succeed and watchdog hit us again. We will
+	 * know this if last reset request did not occur very recently (watchdog
+	 * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
+	 * In case of new request we reset the "reset level" to PF reset.
+	 */
+	if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
+		handle->reset_level = HNAE3_FUNC_RESET;
 
-	switch (reset) {
-	case HNAE3_FUNC_RESET:
-	case HNAE3_CORE_RESET:
-	case HNAE3_GLOBAL_RESET:
-		/* request reset & schedule reset task */
-		set_bit(reset, &hdev->reset_request);
-		hclge_reset_task_schedule(hdev);
-		break;
-	default:
-		dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset);
-		break;
-	}
+	dev_info(&hdev->pdev->dev, "received reset event , reset type is %d",
+		 handle->reset_level);
+
+	/* request reset & schedule reset task */
+	set_bit(handle->reset_level, &hdev->reset_request);
+	hclge_reset_task_schedule(hdev);
+
+	if (handle->reset_level < HNAE3_GLOBAL_RESET)
+		handle->reset_level++;
+
+	handle->last_reset_time = jiffies;
 }
 
 static void hclge_reset_subtask(struct hclge_dev *hdev)
@@ -2969,6 +3020,24 @@ static int hclge_get_vector_index(struct hclge_dev *hdev, int vector)
 	return -EINVAL;
 }
 
+static int hclge_put_vector(struct hnae3_handle *handle, int vector)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int vector_id;
+
+	vector_id = hclge_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&hdev->pdev->dev,
+			"Get vector index fail. vector_id =%d\n", vector_id);
+		return vector_id;
+	}
+
+	hclge_free_vector(hdev, vector_id);
+
+	return 0;
+}
+
 static u32 hclge_get_rss_key_size(struct hnae3_handle *handle)
 {
 	return HCLGE_RSS_KEY_SIZE;
@@ -2979,31 +3048,6 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
 	return HCLGE_RSS_IND_TBL_SIZE;
 }
 
-static int hclge_get_rss_algo(struct hclge_dev *hdev)
-{
-	struct hclge_rss_config_cmd *req;
-	struct hclge_desc desc;
-	int rss_hash_algo;
-	int ret;
-
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, true);
-
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Get link status error, status =%d\n", ret);
-		return ret;
-	}
-
-	req = (struct hclge_rss_config_cmd *)desc.data;
-	rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
-
-	if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
-		return ETH_RSS_HASH_TOP;
-
-	return -EINVAL;
-}
-
 static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 				  const u8 hfunc, const u8 *key)
 {
@@ -3042,7 +3086,7 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 	return 0;
 }
 
-static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
+static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u8 *indir)
 {
 	struct hclge_rss_indirection_table_cmd *req;
 	struct hclge_desc desc;
@@ -3116,14 +3160,16 @@ static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
 	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-	req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-	req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+	/* Get the tuple cfg from pf */
+	req->ipv4_tcp_en = hdev->vport[0].rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = hdev->vport[0].rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = hdev->vport[0].rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = hdev->vport[0].rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = hdev->vport[0].rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = hdev->vport[0].rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = hdev->vport[0].rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = hdev->vport[0].rss_tuple_sets.ipv6_fragment_en;
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
@@ -3138,12 +3184,11 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
 			 u8 *key, u8 *hfunc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
 	int i;
 
 	/* Get hash algorithm */
 	if (hfunc)
-		*hfunc = hclge_get_rss_algo(hdev);
+		*hfunc = vport->rss_algo;
 
 	/* Get the RSS Key required by the user */
 	if (key)
@@ -3167,8 +3212,6 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 
 	/* Set the RSS Hash Key if specififed by the user */
 	if (key) {
-		/* Update the shadow RSS key with user specified qids */
-		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
 
 		if (hfunc == ETH_RSS_HASH_TOP ||
 		    hfunc == ETH_RSS_HASH_NO_CHANGE)
@@ -3178,6 +3221,10 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 		ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
 		if (ret)
 			return ret;
+
+		/* Update the shadow RSS key with user specified qids */
+		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
+		vport->rss_algo = hash_algo;
 	}
 
 	/* Update the shadow RSS table with user specified qids */
@@ -3185,8 +3232,7 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 		vport->rss_indirection_tbl[i] = indir[i];
 
 	/* Update the hardware */
-	ret = hclge_set_rss_indir_table(hdev, indir);
-	return ret;
+	return hclge_set_rss_indir_table(hdev, vport->rss_indirection_tbl);
 }
 
 static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
@@ -3229,15 +3275,16 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 		return -EINVAL;
 
 	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Read rss tuple fail, status = %d\n", ret);
-		return ret;
-	}
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
-	hclge_cmd_reuse_desc(&desc, false);
+	req->ipv4_tcp_en = vport->rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = vport->rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = vport->rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = vport->rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = vport->rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = vport->rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = vport->rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = vport->rss_tuple_sets.ipv6_fragment_en;
 
 	tuple_sets = hclge_get_rss_hash_bits(nfc);
 	switch (nfc->flow_type) {
@@ -3274,52 +3321,49 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 	}
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
+	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Set rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
 
-	return ret;
+	vport->rss_tuple_sets.ipv4_tcp_en = req->ipv4_tcp_en;
+	vport->rss_tuple_sets.ipv4_udp_en = req->ipv4_udp_en;
+	vport->rss_tuple_sets.ipv4_sctp_en = req->ipv4_sctp_en;
+	vport->rss_tuple_sets.ipv4_fragment_en = req->ipv4_fragment_en;
+	vport->rss_tuple_sets.ipv6_tcp_en = req->ipv6_tcp_en;
+	vport->rss_tuple_sets.ipv6_udp_en = req->ipv6_udp_en;
+	vport->rss_tuple_sets.ipv6_sctp_en = req->ipv6_sctp_en;
+	vport->rss_tuple_sets.ipv6_fragment_en = req->ipv6_fragment_en;
+	return 0;
 }
 
 static int hclge_get_rss_tuple(struct hnae3_handle *handle,
 			       struct ethtool_rxnfc *nfc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-	struct hclge_rss_input_tuple_cmd *req;
-	struct hclge_desc desc;
 	u8 tuple_sets;
-	int ret;
 
 	nfc->data = 0;
 
-	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Read rss tuple fail, status = %d\n", ret);
-		return ret;
-	}
-
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
-		tuple_sets = req->ipv4_tcp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_tcp_en;
 		break;
 	case UDP_V4_FLOW:
-		tuple_sets = req->ipv4_udp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_udp_en;
 		break;
 	case TCP_V6_FLOW:
-		tuple_sets = req->ipv6_tcp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_tcp_en;
 		break;
 	case UDP_V6_FLOW:
-		tuple_sets = req->ipv6_udp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_udp_en;
 		break;
 	case SCTP_V4_FLOW:
-		tuple_sets = req->ipv4_sctp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_sctp_en;
 		break;
 	case SCTP_V6_FLOW:
-		tuple_sets = req->ipv6_sctp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_sctp_en;
 		break;
 	case IPV4_FLOW:
 	case IPV6_FLOW:
@@ -3354,50 +3398,28 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
 
 int hclge_rss_init_hw(struct hclge_dev *hdev)
 {
-	const  u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 	struct hclge_vport *vport = hdev->vport;
+	u8 *rss_indir = vport[0].rss_indirection_tbl;
+	u16 rss_size = vport[0].alloc_rss_size;
+	u8 *key = vport[0].rss_hash_key;
+	u8 hfunc = vport[0].rss_algo;
 	u16 tc_offset[HCLGE_MAX_TC_NUM];
-	u8 rss_key[HCLGE_RSS_KEY_SIZE];
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
 	u16 tc_size[HCLGE_MAX_TC_NUM];
-	u32 *rss_indir = NULL;
-	u16 rss_size = 0, roundup_size;
-	const u8 *key;
-	int i, ret, j;
-
-	rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
-	if (!rss_indir)
-		return -ENOMEM;
-
-	/* Get default RSS key */
-	netdev_rss_key_fill(rss_key, HCLGE_RSS_KEY_SIZE);
-
-	/* Initialize RSS indirect table for each vport */
-	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
-		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
-			vport[j].rss_indirection_tbl[i] =
-				i % vport[j].alloc_rss_size;
-
-			/* vport 0 is for PF */
-			if (j != 0)
-				continue;
+	u16 roundup_size;
+	int i, ret;
 
-			rss_size = vport[j].alloc_rss_size;
-			rss_indir[i] = vport[j].rss_indirection_tbl[i];
-		}
-	}
 	ret = hclge_set_rss_indir_table(hdev, rss_indir);
 	if (ret)
-		goto err;
+		return ret;
 
-	key = rss_key;
 	ret = hclge_set_rss_algo_key(hdev, hfunc, key);
 	if (ret)
-		goto err;
+		return ret;
 
 	ret = hclge_set_rss_input_tuple(hdev);
 	if (ret)
-		goto err;
+		return ret;
 
 	/* Each TC have the same queue size, and tc_size set to hardware is
 	 * the log2 of roundup power of two of rss_size, the acutal queue
@@ -3407,8 +3429,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 		dev_err(&hdev->pdev->dev,
 			"Configure rss tc size failed, invalid TC_SIZE = %d\n",
 			rss_size);
-		ret = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
 	roundup_size = roundup_pow_of_two(rss_size);
@@ -3425,12 +3446,50 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 		tc_offset[i] = rss_size * i;
 	}
 
-	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+	return hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+}
 
-err:
-	kfree(rss_indir);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int i, j;
 
-	return ret;
+	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
+		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+			vport[j].rss_indirection_tbl[i] =
+				i % vport[j].alloc_rss_size;
+	}
+}
+
+static void hclge_rss_init_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int i;
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		vport[i].rss_tuple_sets.ipv4_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv4_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv4_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[i].rss_tuple_sets.ipv4_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[i].rss_tuple_sets.ipv6_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+		vport[i].rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+
+		netdev_rss_key_fill(vport[i].rss_hash_key, HCLGE_RSS_KEY_SIZE);
+	}
+
+	hclge_rss_indir_init_cfg(hdev);
 }
 
 int hclge_bind_ring_with_vector(struct hclge_vport *vport,
@@ -3525,6 +3584,9 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
 	struct hclge_dev *hdev = vport->back;
 	int vector_id, ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	vector_id = hclge_get_vector_index(hdev, vector);
 	if (vector_id < 0) {
 		dev_err(&handle->pdev->dev,
@@ -3533,18 +3595,13 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
 	}
 
 	ret = hclge_bind_ring_with_vector(vport, vector_id, false, ring_chain);
-	if (ret) {
+	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vectorid=%d, ret =%d\n",
 			vector_id,
 			ret);
-		return ret;
-	}
 
-	/* Free this MSIX or MSI vector */
-	hclge_free_vector(hdev, vector_id);
-
-	return 0;
+	return ret;
 }
 
 int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
@@ -3717,32 +3774,26 @@ static int hclge_ae_start(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i, queue_id, ret;
+	int i, ret;
 
-	for (i = 0; i < vport->alloc_tqps; i++) {
-		/* todo clear interrupt */
-		/* ring enable */
-		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
+	for (i = 0; i < vport->alloc_tqps; i++)
+		hclge_tqp_enable(hdev, i, 0, true);
 
-		hclge_tqp_enable(hdev, queue_id, 0, true);
-	}
 	/* mac enable */
 	hclge_cfg_mac_mode(hdev, true);
 	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
 	mod_timer(&hdev->service_timer, jiffies + HZ);
 
+	/* reset tqp stats */
+	hclge_reset_tqp_stats(handle);
+
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	ret = hclge_mac_start_phy(hdev);
 	if (ret)
 		return ret;
 
-	/* reset tqp stats */
-	hclge_reset_tqp_stats(handle);
-
 	return 0;
 }
 
@@ -3750,19 +3801,17 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i, queue_id;
+	int i;
 
-	for (i = 0; i < vport->alloc_tqps; i++) {
-		/* Ring disable */
-		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
+	del_timer_sync(&hdev->service_timer);
+	cancel_work_sync(&hdev->service_task);
+
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return;
+
+	for (i = 0; i < vport->alloc_tqps; i++)
+		hclge_tqp_enable(hdev, i, 0, false);
 
-		hclge_tqp_enable(hdev, queue_id, 0, false);
-	}
 	/* Mac disable */
 	hclge_cfg_mac_mode(hdev, false);
 
@@ -3770,6 +3819,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	/* reset tqp stats */
 	hclge_reset_tqp_stats(handle);
+	hclge_update_link_status(hdev);
 }
 
 static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
@@ -3790,11 +3840,11 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if ((!resp_code) || (resp_code == 1)) {
 			return_status = 0;
 		} else if (resp_code == 2) {
-			return_status = -EIO;
+			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for uc_overflow.\n");
 		} else if (resp_code == 3) {
-			return_status = -EIO;
+			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for mc_overflow.\n");
 		} else {
@@ -3806,7 +3856,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if (!resp_code) {
 			return_status = 0;
 		} else if (resp_code == 1) {
-			return_status = -EIO;
+			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"remove mac addr failed for miss.\n");
 		} else {
@@ -3818,7 +3868,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if (!resp_code) {
 			return_status = 0;
 		} else if (resp_code == 1) {
-			return_status = -EIO;
+			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"lookup mac addr failed for miss.\n");
 		} else {
@@ -3827,7 +3877,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 				resp_code);
 		}
 	} else {
-		return_status = -EIO;
+		return_status = -EINVAL;
 		dev_err(&hdev->pdev->dev,
 			"unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
 			op);
@@ -4118,8 +4168,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
-	enum hclge_cmd_status status;
+	struct hclge_desc desc;
 	u16 egress_port = 0;
+	int ret;
 
 	/* mac addr check */
 	if (is_zero_ether_addr(addr) ||
@@ -4151,9 +4202,23 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 
 	hclge_prepare_mac_addr(&req, addr);
 
-	status = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+	/* Lookup the mac address in the mac_vlan table, and add
+	 * it if the entry is inexistent. Repeated unicast entry
+	 * is not allowed in the mac vlan table.
+	 */
+	ret = hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false);
+	if (ret == -ENOENT)
+		return hclge_add_mac_vlan_tbl(vport, &req, NULL);
+
+	/* check if we just hit the duplicate */
+	if (!ret)
+		ret = -EINVAL;
 
-	return status;
+	dev_err(&hdev->pdev->dev,
+		"PF failed to add unicast entry(%pM) in the MAC table\n",
+		addr);
+
+	return ret;
 }
 
 static int hclge_rm_uc_addr(struct hnae3_handle *handle,
@@ -4169,7 +4234,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
-	enum hclge_cmd_status status;
+	int ret;
 
 	/* mac addr check */
 	if (is_zero_ether_addr(addr) ||
@@ -4185,9 +4250,9 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
 	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
 	hclge_prepare_mac_addr(&req, addr);
-	status = hclge_remove_mac_vlan_tbl(vport, &req);
+	ret = hclge_remove_mac_vlan_tbl(vport, &req);
 
-	return status;
+	return ret;
 }
 
 static int hclge_add_mc_addr(struct hnae3_handle *handle,
@@ -4392,7 +4457,8 @@ static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 	ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+			      bool is_first)
 {
 	const unsigned char *new_addr = (const unsigned char *)p;
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4409,11 +4475,9 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
 		return -EINVAL;
 	}
 
-	ret = hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr);
-	if (ret)
+	if (!is_first && hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr))
 		dev_warn(&hdev->pdev->dev,
-			 "remove old uc mac address fail, ret =%d.\n",
-			 ret);
+			 "remove old uc mac address fail.\n");
 
 	ret = hclge_add_uc_addr(handle, new_addr);
 	if (ret) {
@@ -4421,17 +4485,15 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
 			"add uc mac address fail, ret =%d.\n",
 			ret);
 
-		ret = hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr);
-		if (ret) {
+		if (!is_first &&
+		    hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr))
 			dev_err(&hdev->pdev->dev,
-				"restore uc mac address fail, ret =%d.\n",
-				ret);
-		}
+				"restore uc mac address fail.\n");
 
 		return -EIO;
 	}
 
-	ret = hclge_mac_pause_addr_cfg(hdev, new_addr);
+	ret = hclge_pause_addr_cfg(hdev, new_addr);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"configure mac pause address fail, ret =%d.\n",
@@ -4771,11 +4833,9 @@ static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 	return hclge_set_vlan_rx_offload_cfg(vport);
 }
 
-static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mtu)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_config_max_frm_size_cmd *req;
-	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
 	int max_frm_size;
 	int ret;
@@ -4804,6 +4864,27 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 	return 0;
 }
 
+static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	ret = hclge_set_mac_mtu(hdev, new_mtu);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Change mtu fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_buffer_alloc(hdev);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Allocate buffer fail, ret =%d\n", ret);
+
+	return ret;
+}
+
 static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 				    bool enable)
 {
@@ -4848,21 +4929,39 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
 	return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
 }
 
+static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
+					  u16 queue_id)
+{
+	struct hnae3_queue *queue;
+	struct hclge_tqp *tqp;
+
+	queue = handle->kinfo.tqp[queue_id];
+	tqp = container_of(queue, struct hclge_tqp, q);
+
+	return tqp->index;
+}
+
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	int reset_try_times = 0;
 	int reset_status;
+	u16 queue_gid;
 	int ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return;
+
+	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
+
 	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
 		return;
 	}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, true);
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev,
 			 "Send reset tqp cmd fail, ret = %d\n", ret);
@@ -4873,7 +4972,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 		/* Wait for tqp hw reset */
 		msleep(20);
-		reset_status = hclge_get_reset_status(hdev, queue_id);
+		reset_status = hclge_get_reset_status(hdev, queue_gid);
 		if (reset_status)
 			break;
 	}
@@ -4883,7 +4982,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 		return;
 	}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, false);
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev,
 			 "Deassert the soft reset fail, ret = %d\n", ret);
@@ -4891,6 +4990,43 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	}
 }
 
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
+{
+	struct hclge_dev *hdev = vport->back;
+	int reset_try_times = 0;
+	int reset_status;
+	u16 queue_gid;
+	int ret;
+
+	queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "Send reset tqp cmd fail, ret = %d\n", ret);
+		return;
+	}
+
+	reset_try_times = 0;
+	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
+		/* Wait for tqp hw reset */
+		msleep(20);
+		reset_status = hclge_get_reset_status(hdev, queue_gid);
+		if (reset_status)
+			break;
+	}
+
+	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
+		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
+		return;
+	}
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
+	if (ret)
+		dev_warn(&hdev->pdev->dev,
+			 "Deassert the soft reset fail, ret = %d\n", ret);
+}
+
 static u32 hclge_get_fw_version(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5376,11 +5512,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
 		return ret;
 	}
-	ret = hclge_buffer_alloc(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-		return  ret;
-	}
 
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
@@ -5400,6 +5531,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	hclge_rss_init_cfg(hdev);
 	ret = hclge_rss_init_hw(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
@@ -5486,12 +5618,6 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_buffer_alloc(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-		return ret;
-	}
-
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
 		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
@@ -5504,9 +5630,9 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_tm_schd_init(hdev);
+	ret = hclge_tm_init_hw(hdev);
 	if (ret) {
-		dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		dev_err(&pdev->dev, "tm init hw fail, ret =%d\n", ret);
 		return ret;
 	}
 
@@ -5997,6 +6123,42 @@ static int hclge_update_led_status(struct hclge_dev *hdev)
 					HCLGE_LED_NO_CHANGE);
 }
 
+static void hclge_get_link_mode(struct hnae3_handle *handle,
+				unsigned long *supported,
+				unsigned long *advertising)
+{
+	unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	unsigned int idx = 0;
+
+	for (; idx < size; idx++) {
+		supported[idx] = hdev->hw.mac.supported[idx];
+		advertising[idx] = hdev->hw.mac.advertising[idx];
+	}
+}
+
+static void hclge_get_port_type(struct hnae3_handle *handle,
+				u8 *port_type)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u8 media_type = hdev->hw.mac.media_type;
+
+	switch (media_type) {
+	case HNAE3_MEDIA_TYPE_FIBER:
+		*port_type = PORT_FIBRE;
+		break;
+	case HNAE3_MEDIA_TYPE_COPPER:
+		*port_type = PORT_TP;
+		break;
+	case HNAE3_MEDIA_TYPE_UNKNOWN:
+	default:
+		*port_type = PORT_OTHER;
+		break;
+	}
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -6005,6 +6167,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.map_ring_to_vector = hclge_map_ring_to_vector,
 	.unmap_ring_from_vector = hclge_unmap_ring_frm_vector,
 	.get_vector = hclge_get_vector,
+	.put_vector = hclge_put_vector,
 	.set_promisc_mode = hclge_set_promisc_mode,
 	.set_loopback = hclge_set_loopback,
 	.start = hclge_ae_start,
@@ -6051,6 +6214,8 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_regs_len = hclge_get_regs_len,
 	.get_regs = hclge_get_regs,
 	.set_led_id = hclge_set_led_id,
+	.get_link_mode = hclge_get_link_mode,
+	.get_port_type = hclge_get_port_type,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index d99a76a9557c..0f4157e71282 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -106,6 +106,12 @@
 #define HCLGE_MAC_MIN_FRAME		64
 #define HCLGE_MAC_MAX_FRAME		9728
 
+#define HCLGE_SUPPORT_1G_BIT		BIT(0)
+#define HCLGE_SUPPORT_10G_BIT		BIT(1)
+#define HCLGE_SUPPORT_25G_BIT		BIT(2)
+#define HCLGE_SUPPORT_50G_BIT		BIT(3)
+#define HCLGE_SUPPORT_100G_BIT		BIT(4)
+
 enum HCLGE_DEV_STATE {
 	HCLGE_STATE_REINITING,
 	HCLGE_STATE_DOWN,
@@ -170,6 +176,8 @@ struct hclge_mac {
 	struct phy_device *phydev;
 	struct mii_bus *mdio_bus;
 	phy_interface_t phy_if;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
 };
 
 struct hclge_hw {
@@ -236,6 +244,7 @@ struct hclge_cfg {
 	u8 mac_addr[ETH_ALEN];
 	u8 default_speed;
 	u32 numa_node_map;
+	u8 speed_ability;
 };
 
 struct hclge_tm_info {
@@ -573,12 +582,27 @@ struct hclge_rx_vtag_cfg {
 	bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
 };
 
+struct hclge_rss_tuple_cfg {
+	u8 ipv4_tcp_en;
+	u8 ipv4_udp_en;
+	u8 ipv4_sctp_en;
+	u8 ipv4_fragment_en;
+	u8 ipv6_tcp_en;
+	u8 ipv6_udp_en;
+	u8 ipv6_sctp_en;
+	u8 ipv6_fragment_en;
+};
+
 struct hclge_vport {
 	u16 alloc_tqps;	/* Allocated Tx/Rx queues */
 
 	u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
 	/* User configured lookup table entries */
 	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+	int rss_algo;		/* User configured hash algorithm */
+	/* User configured rss tuple sets */
+	struct hclge_rss_tuple_cfg rss_tuple_sets;
+
 	u16 alloc_rss_size;
 
 	u16 qs_offset;
@@ -627,8 +651,11 @@ int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
 
 int hclge_buffer_alloc(struct hclge_dev *hdev);
 int hclge_rss_init_hw(struct hclge_dev *hdev);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index f38fc5ce9f51..a6f7ffa9c259 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -79,6 +79,18 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
 	return status;
 }
 
+static int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+{
+	u8 msg_data[2];
+	u8 dest_vfid;
+
+	dest_vfid = (u8)vport->vport_id;
+
+	/* send this requested info to VF */
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+				  HCLGE_MBX_ASSERTING_RESET, dest_vfid);
+}
+
 static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
 {
 	struct hnae3_ring_chain_node *chain_tmp, *chain;
@@ -105,14 +117,17 @@ static int hclge_get_ring_chain_from_mbx(
 			struct hnae3_ring_chain_node *ring_chain,
 			struct hclge_vport *vport)
 {
-#define HCLGE_RING_NODE_VARIABLE_NUM		3
-#define HCLGE_RING_MAP_MBX_BASIC_MSG_NUM	3
 	struct hnae3_ring_chain_node *cur_chain, *new_chain;
 	int ring_num;
 	int i;
 
 	ring_num = req->msg[2];
 
+	if (ring_num > ((HCLGE_MBX_VF_MSG_DATA_NUM -
+		HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+		HCLGE_MBX_RING_NODE_VARIABLE_NUM))
+		return -ENOMEM;
+
 	hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
 	ring_chain->tqp_index =
 			hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
@@ -128,18 +143,18 @@ static int hclge_get_ring_chain_from_mbx(
 			goto err;
 
 		hnae_set_bit(new_chain->flag, HNAE3_RING_TYPE_B,
-			     req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			     HCLGE_RING_MAP_MBX_BASIC_MSG_NUM]);
+			     req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			     HCLGE_MBX_RING_MAP_BASIC_MSG_NUM]);
 
 		new_chain->tqp_index =
 		hclge_get_queue_id(vport->nic.kinfo.tqp
-			[req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 1]]);
+			[req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 1]]);
 
 		hnae_set_field(new_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
 			       HCLGE_INT_GL_IDX_S,
-			       req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			       HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 2]);
+			       req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			       HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 2]);
 
 		cur_chain->next = new_chain;
 		cur_chain = new_chain;
@@ -196,6 +211,8 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
 
 		hclge_rm_uc_addr_common(vport, old_addr);
 		status = hclge_add_uc_addr_common(vport, mac_addr);
+		if (status)
+			hclge_add_uc_addr_common(vport, old_addr);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) {
 		status = hclge_add_uc_addr_common(vport, mac_addr);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) {
@@ -291,7 +308,7 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport,
 
 	/* get the queue related info */
 	memcpy(&resp_data[0], &vport->alloc_tqps, sizeof(u16));
-	memcpy(&resp_data[2], &hdev->rss_size_max, sizeof(u16));
+	memcpy(&resp_data[2], &vport->nic.kinfo.rss_size, sizeof(u16));
 	memcpy(&resp_data[4], &hdev->num_desc, sizeof(u16));
 	memcpy(&resp_data[6], &hdev->rx_buf_len, sizeof(u16));
 
@@ -304,27 +321,61 @@ static int hclge_get_link_info(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	u16 link_status;
-	u8 msg_data[2];
+	u8 msg_data[8];
 	u8 dest_vfid;
+	u16 duplex;
 
 	/* mac.link can only be 0 or 1 */
 	link_status = (u16)hdev->hw.mac.link;
+	duplex = hdev->hw.mac.duplex;
 	memcpy(&msg_data[0], &link_status, sizeof(u16));
+	memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32));
+	memcpy(&msg_data[6], &duplex, sizeof(u16));
 	dest_vfid = mbx_req->mbx_src_vfid;
 
 	/* send this requested info to VF */
-	return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
 				  HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
 }
 
-static void hclge_reset_vf_queue(struct hclge_vport *vport,
-				 struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
+				     struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
 	u16 queue_id;
 
 	memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id));
 
-	hclge_reset_tqp(&vport->nic, queue_id);
+	hclge_reset_vf_queue(vport, queue_id);
+
+	/* send response msg to VF after queue reset complete*/
+	hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0);
+}
+
+static void hclge_reset_vf(struct hclge_vport *vport,
+			   struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
+		 mbx_req->mbx_src_vfid);
+
+	/* Acknowledge VF that PF is now about to assert the reset for the VF.
+	 * On receiving this message VF will get into pending state and will
+	 * start polling for the hardware reset completion status.
+	 */
+	ret = hclge_inform_reset_assert_to_vf(vport);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"PF fail(%d) to inform VF(%d)of reset, reset failed!\n",
+			ret, vport->vport_id);
+		return;
+	}
+
+	dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n",
+		 mbx_req->mbx_src_vfid);
+	/* reset this virtual function */
+	hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid);
 }
 
 void hclge_mbx_handler(struct hclge_dev *hdev)
@@ -333,11 +384,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclge_vport *vport;
 	struct hclge_desc *desc;
-	int ret;
+	int ret, flag;
 
+	flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	/* handle all the mailbox requests in the queue */
-	while (hnae_get_bit(crq->desc[crq->next_to_use].flag,
-			    HCLGE_CMDQ_RX_OUTVLD_B)) {
+	while (hnae_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B)) {
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
 
@@ -360,7 +411,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 					ret);
 			break;
 		case HCLGE_MBX_SET_UNICAST:
-			ret = hclge_set_vf_uc_mac_addr(vport, req, false);
+			ret = hclge_set_vf_uc_mac_addr(vport, req, true);
 			if (ret)
 				dev_err(&hdev->pdev->dev,
 					"PF fail(%d) to set VF UC MAC Addr\n",
@@ -402,7 +453,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 					ret);
 			break;
 		case HCLGE_MBX_QUEUE_RESET:
-			hclge_reset_vf_queue(vport, req);
+			hclge_mbx_reset_vf_queue(vport, req);
+			break;
+		case HCLGE_MBX_RESET:
+			hclge_reset_vf(vport, req);
 			break;
 		default:
 			dev_err(&hdev->pdev->dev,
@@ -410,7 +464,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 				req->msg[0]);
 			break;
 		}
+		crq->desc[crq->next_to_use].flag = 0;
 		hclge_mbx_ring_ptr_move_crq(crq);
+		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	}
 
 	/* Write back CMDQ_RQ header pointer, M7 need this pointer */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index c1dea3a47bdd..682c2d6618e7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -60,6 +60,9 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
 	struct hclge_desc desc;
 	int ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
 
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
@@ -95,6 +98,9 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
 	struct hclge_desc desc;
 	int ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
 
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 36bd79a77940..885f25cd7be4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -23,6 +23,9 @@ enum hclge_shaper_level {
 	HCLGE_SHAPER_LVL_PF	= 1,
 };
 
+#define HCLGE_TM_PFC_PKT_GET_CMD_NUM	3
+#define HCLGE_TM_PFC_NUM_GET_PER_CMD	3
+
 #define HCLGE_SHAPER_BS_U_DEF	5
 #define HCLGE_SHAPER_BS_S_DEF	20
 
@@ -112,6 +115,56 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 	return 0;
 }
 
+static int hclge_pfc_stats_get(struct hclge_dev *hdev,
+			       enum hclge_opcode_type opcode, u64 *stats)
+{
+	struct hclge_desc desc[HCLGE_TM_PFC_PKT_GET_CMD_NUM];
+	int ret, i, j;
+
+	if (!(opcode == HCLGE_OPC_QUERY_PFC_RX_PKT_CNT ||
+	      opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT))
+		return -EINVAL;
+
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
+		if (i != (HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1))
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get pfc pause stats fail, ret = %d.\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+		struct hclge_pfc_stats_cmd *pfc_stats =
+				(struct hclge_pfc_stats_cmd *)desc[i].data;
+
+		for (j = 0; j < HCLGE_TM_PFC_NUM_GET_PER_CMD; j++) {
+			u32 index = i * HCLGE_TM_PFC_PKT_GET_CMD_NUM + j;
+
+			if (index < HCLGE_MAX_TC_NUM)
+				stats[index] =
+					le64_to_cpu(pfc_stats->pkt_num[j]);
+		}
+	}
+	return 0;
+}
+
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+	return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_RX_PKT_CNT, stats);
+}
+
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+	return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_TX_PKT_CNT, stats);
+}
+
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
 {
 	struct hclge_desc desc;
@@ -138,8 +191,8 @@ static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
-				     u8 pause_trans_gap, u16 pause_trans_time)
+static int hclge_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
+				 u8 pause_trans_gap, u16 pause_trans_time)
 {
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
@@ -155,7 +208,7 @@ static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 {
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
@@ -174,7 +227,7 @@ int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 	trans_gap = pause_param->pause_trans_gap;
 	trans_time = le16_to_cpu(pause_param->pause_trans_time);
 
-	return hclge_mac_pause_param_cfg(hdev, mac_addr, trans_gap,
+	return hclge_pause_param_cfg(hdev, mac_addr, trans_gap,
 					 trans_time);
 }
 
@@ -1096,11 +1149,11 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
 	return hclge_tm_schd_mode_hw(hdev);
 }
 
-static int hclge_mac_pause_param_setup_hw(struct hclge_dev *hdev)
+static int hclge_pause_param_setup_hw(struct hclge_dev *hdev)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
 
-	return hclge_mac_pause_param_cfg(hdev, mac->mac_addr,
+	return hclge_pause_param_cfg(hdev, mac->mac_addr,
 					 HCLGE_DEFAULT_PAUSE_TRANS_GAP,
 					 HCLGE_DEFAULT_PAUSE_TRANS_TIME);
 }
@@ -1151,13 +1204,12 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
 	int ret;
 	u8 i;
 
-	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
-		ret = hclge_mac_pause_setup_hw(hdev);
-		if (ret)
-			return ret;
+	ret = hclge_pause_param_setup_hw(hdev);
+	if (ret)
+		return ret;
 
-		return hclge_mac_pause_param_setup_hw(hdev);
-	}
+	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+		return hclge_mac_pause_setup_hw(hdev);
 
 	/* Only DCB-supported dev supports qset back pressure and pfc cmd */
 	if (!hnae3_dev_dcb_supported(hdev))
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 5401e7559437..2dbe177581e9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -109,6 +109,10 @@ struct hclge_cfg_pause_param_cmd {
 	__le16 pause_trans_time;
 };
 
+struct hclge_pfc_stats_cmd {
+	__le64 pkt_num[3];
+};
+
 struct hclge_port_shapping_cmd {
 	__le32 port_shapping_para;
 };
@@ -129,5 +133,7 @@ int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
 int hclge_tm_map_cfg(struct hclge_dev *hdev);
 int hclge_tm_init_hw(struct hclge_dev *hdev);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 85985e731311..1bbfe131b596 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -315,6 +315,12 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
 		goto err_csq;
 	}
 
+	/* initialize the pointers of async rx queue of mailbox */
+	hdev->arq.hdev = hdev;
+	hdev->arq.head = 0;
+	hdev->arq.tail = 0;
+	hdev->arq.count = 0;
+
 	/* get firmware version */
 	ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
 	if (ret) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 2caca9317f8c..621c6cbacf76 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include "hnae3.h"
 
-#define HCLGEVF_CMDQ_TX_TIMEOUT		200
+#define HCLGEVF_CMDQ_TX_TIMEOUT		30000
 #define HCLGEVF_CMDQ_RX_INVLD_B		0
 #define HCLGEVF_CMDQ_RX_OUTVLD_B	1
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 0d89965f7928..2b8426412cc9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2,6 +2,7 @@
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
+#include <net/rtnetlink.h>
 #include "hclgevf_cmd.h"
 #include "hclgevf_main.h"
 #include "hclge_mbx.h"
@@ -9,6 +10,8 @@
 
 #define HCLGEVF_NAME	"hclgevf"
 
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev);
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev);
 static struct hnae3_ae_algo ae_algovf;
 
 static const struct pci_device_id ae_algovf_pci_tbl[] = {
@@ -18,6 +21,8 @@ static const struct pci_device_id ae_algovf_pci_tbl[] = {
 	{0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl);
+
 static inline struct hclgevf_dev *hclgevf_ae_get_hdev(
 	struct hnae3_handle *handle)
 {
@@ -206,6 +211,12 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
 	struct hclgevf_tqp *tqp;
 	int i;
 
+	/* if this is on going reset then we need to re-allocate the TPQs
+	 * since we cannot assume we would get same number of TPQs back from PF
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		devm_kfree(&hdev->pdev->dev, hdev->htqp);
+
 	hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
 				  sizeof(struct hclgevf_tqp), GFP_KERNEL);
 	if (!hdev->htqp)
@@ -249,6 +260,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
 	new_tqps = kinfo->rss_size * kinfo->num_tc;
 	kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
 
+	/* if this is on going reset then we need to re-allocate the hnae queues
+	 * as well since number of TPQs from PF might have changed.
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		devm_kfree(&hdev->pdev->dev, kinfo->tqp);
+
 	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
 	if (!kinfo->tqp)
@@ -533,13 +550,11 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 				       int vector,
 				       struct hnae3_ring_chain_node *ring_chain)
 {
-#define HCLGEVF_RING_NODE_VARIABLE_NUM		3
-#define HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM	3
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hnae3_ring_chain_node *node;
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclgevf_desc desc;
-	int i, vector_id;
+	int i = 0, vector_id;
 	int status;
 	u8 type;
 
@@ -551,28 +566,33 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 		return vector_id;
 	}
 
-	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
-	type = en ?
-		HCLGE_MBX_MAP_RING_TO_VECTOR : HCLGE_MBX_UNMAP_RING_TO_VECTOR;
-	req->msg[0] = type;
-	req->msg[1] = vector_id; /* vector_id should be id in VF */
-
-	i = 0;
 	for (node = ring_chain; node; node = node->next) {
-		i++;
-		/* msg[2] is cause num */
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i] =
+		int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
+					HCLGE_MBX_RING_NODE_VARIABLE_NUM * i;
+
+		if (i == 0) {
+			hclgevf_cmd_setup_basic_desc(&desc,
+						     HCLGEVF_OPC_MBX_VF_TO_PF,
+						     false);
+			type = en ?
+				HCLGE_MBX_MAP_RING_TO_VECTOR :
+				HCLGE_MBX_UNMAP_RING_TO_VECTOR;
+			req->msg[0] = type;
+			req->msg[1] = vector_id;
+		}
+
+		req->msg[idx_offset] =
 				hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 1] =
-				node->tqp_index;
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 2] =
-				hnae_get_field(node->int_gl_idx,
-					       HNAE3_RING_GL_IDX_M,
-					       HNAE3_RING_GL_IDX_S);
-
-		if (i == (HCLGE_MBX_VF_MSG_DATA_NUM -
-		    HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM) /
-		    HCLGEVF_RING_NODE_VARIABLE_NUM) {
+		req->msg[idx_offset + 1] = node->tqp_index;
+		req->msg[idx_offset + 2] = hnae_get_field(node->int_gl_idx,
+							  HNAE3_RING_GL_IDX_M,
+							  HNAE3_RING_GL_IDX_S);
+
+		i++;
+		if ((i == (HCLGE_MBX_VF_MSG_DATA_NUM -
+		     HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+		     HCLGE_MBX_RING_NODE_VARIABLE_NUM) ||
+		    !node->next) {
 			req->msg[2] = i;
 
 			status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
@@ -591,17 +611,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 		}
 	}
 
-	if (i > 0) {
-		req->msg[2] = i;
-
-		status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
-		if (status) {
-			dev_err(&hdev->pdev->dev,
-				"Map TQP fail, status is %d.\n", status);
-			return status;
-		}
-	}
-
 	return 0;
 }
 
@@ -627,13 +636,18 @@ static int hclgevf_unmap_ring_from_vector(
 	}
 
 	ret = hclgevf_bind_ring_to_vector(handle, false, vector, ring_chain);
-	if (ret) {
+	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vector=%d, ret =%d\n",
 			vector_id,
 			ret);
-		return ret;
-	}
+
+	return ret;
+}
+
+static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
 	hclgevf_free_vector(hdev, vector);
 
@@ -729,21 +743,25 @@ static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 	ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
+				bool is_first)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 *old_mac_addr = (u8 *)hdev->hw.mac.mac_addr;
 	u8 *new_mac_addr = (u8 *)p;
 	u8 msg_data[ETH_ALEN * 2];
+	u16 subcode;
 	int status;
 
 	ether_addr_copy(msg_data, new_mac_addr);
 	ether_addr_copy(&msg_data[ETH_ALEN], old_mac_addr);
 
+	subcode = is_first ? HCLGE_MBX_MAC_VLAN_UC_ADD :
+			HCLGE_MBX_MAC_VLAN_UC_MODIFY;
+
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
-				      HCLGE_MBX_MAC_VLAN_UC_MODIFY,
-				      msg_data, ETH_ALEN * 2,
-				      false, NULL, 0);
+				      subcode, msg_data, ETH_ALEN * 2,
+				      true, NULL, 0);
 	if (!status)
 		ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
 
@@ -816,11 +834,149 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 msg_data[2];
+	int ret;
 
 	memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
 
-	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, 2, false,
-			     NULL, 0);
+	/* disable vf queue before send queue reset msg to PF */
+	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
+	if (ret)
+		return;
+
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+			     2, true, NULL, 0);
+}
+
+static int hclgevf_notify_client(struct hclgevf_dev *hdev,
+				 enum hnae3_reset_notify_type type)
+{
+	struct hnae3_client *client = hdev->nic_client;
+	struct hnae3_handle *handle = &hdev->nic;
+
+	if (!client->ops->reset_notify)
+		return -EOPNOTSUPP;
+
+	return client->ops->reset_notify(handle, type);
+}
+
+static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_RESET_WAIT_MS	500
+#define HCLGEVF_RESET_WAIT_CNT	20
+	u32 val, cnt = 0;
+
+	/* wait to check the hardware reset completion status */
+	val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+	while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
+			    (cnt < HCLGEVF_RESET_WAIT_CNT)) {
+		msleep(HCLGEVF_RESET_WAIT_MS);
+		val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+		cnt++;
+	}
+
+	/* hardware completion status should be available by this time */
+	if (cnt >= HCLGEVF_RESET_WAIT_CNT) {
+		dev_warn(&hdev->pdev->dev,
+			 "could'nt get reset done status from h/w, timeout!\n");
+		return -EBUSY;
+	}
+
+	/* we will wait a bit more to let reset of the stack to complete. This
+	 * might happen in case reset assertion was made by PF. Yes, this also
+	 * means we might end up waiting bit more even for VF reset.
+	 */
+	msleep(5000);
+
+	return 0;
+}
+
+static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
+{
+	int ret;
+
+	/* uninitialize the nic client */
+	hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+	/* re-initialize the hclge device */
+	ret = hclgevf_init_hdev(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"hclge device re-init failed, VF is disabled!\n");
+		return ret;
+	}
+
+	/* bring up the nic client again */
+	hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
+
+	return 0;
+}
+
+static int hclgevf_reset(struct hclgevf_dev *hdev)
+{
+	int ret;
+
+	rtnl_lock();
+
+	/* bring down the nic to stop any ongoing TX/RX */
+	hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+	/* check if VF could successfully fetch the hardware reset completion
+	 * status from the hardware
+	 */
+	ret = hclgevf_reset_wait(hdev);
+	if (ret) {
+		/* can't do much in this situation, will disable VF */
+		dev_err(&hdev->pdev->dev,
+			"VF failed(=%d) to fetch H/W reset completion status\n",
+			ret);
+
+		dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+		hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+		rtnl_unlock();
+		return ret;
+	}
+
+	/* now, re-initialize the nic client and ae device*/
+	ret = hclgevf_reset_stack(hdev);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
+
+	/* bring up the nic to enable TX/RX again */
+	hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+
+	rtnl_unlock();
+
+	return ret;
+}
+
+static int hclgevf_do_reset(struct hclgevf_dev *hdev)
+{
+	int status;
+	u8 respmsg;
+
+	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
+				      0, false, &respmsg, sizeof(u8));
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"VF reset request to PF failed(=%d)\n", status);
+
+	return status;
+}
+
+static void hclgevf_reset_event(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
+
+	handle->reset_level = HNAE3_VF_RESET;
+
+	/* reset of this VF requested */
+	set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
+	hclgevf_reset_task_schedule(hdev);
+
+	handle->last_reset_time = jiffies;
 }
 
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
@@ -845,10 +1001,22 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
 	hdev->num_msi_used += 1;
 }
 
-static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
+{
+	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+		set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+		schedule_work(&hdev->rst_service_task);
+	}
+}
+
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
 {
-	if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+	if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) {
+		set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 		schedule_work(&hdev->mbx_service_task);
+	}
 }
 
 static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
@@ -858,6 +1026,16 @@ static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
 		schedule_work(&hdev->service_task);
 }
 
+static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
+{
+	/* if we have any pending mailbox event then schedule the mbx task */
+	if (hdev->mbx_event_pending)
+		hclgevf_mbx_task_schedule(hdev);
+
+	if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
+		hclgevf_reset_task_schedule(hdev);
+}
+
 static void hclgevf_service_timer(struct timer_list *t)
 {
 	struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
@@ -867,6 +1045,75 @@ static void hclgevf_service_timer(struct timer_list *t)
 	hclgevf_task_schedule(hdev);
 }
 
+static void hclgevf_reset_service_task(struct work_struct *work)
+{
+	struct hclgevf_dev *hdev =
+		container_of(work, struct hclgevf_dev, rst_service_task);
+	int ret;
+
+	if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+		return;
+
+	clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+
+	if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
+			       &hdev->reset_state)) {
+		/* PF has initmated that it is about to reset the hardware.
+		 * We now have to poll & check if harware has actually completed
+		 * the reset sequence. On hardware reset completion, VF needs to
+		 * reset the client and ae device.
+		 */
+		hdev->reset_attempts = 0;
+
+		ret = hclgevf_reset(hdev);
+		if (ret)
+			dev_err(&hdev->pdev->dev, "VF stack reset failed.\n");
+	} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
+				      &hdev->reset_state)) {
+		/* we could be here when either of below happens:
+		 * 1. reset was initiated due to watchdog timeout due to
+		 *    a. IMP was earlier reset and our TX got choked down and
+		 *       which resulted in watchdog reacting and inducing VF
+		 *       reset. This also means our cmdq would be unreliable.
+		 *    b. problem in TX due to other lower layer(example link
+		 *       layer not functioning properly etc.)
+		 * 2. VF reset might have been initiated due to some config
+		 *    change.
+		 *
+		 * NOTE: Theres no clear way to detect above cases than to react
+		 * to the response of PF for this reset request. PF will ack the
+		 * 1b and 2. cases but we will not get any intimation about 1a
+		 * from PF as cmdq would be in unreliable state i.e. mailbox
+		 * communication between PF and VF would be broken.
+		 */
+
+		/* if we are never geting into pending state it means either:
+		 * 1. PF is not receiving our request which could be due to IMP
+		 *    reset
+		 * 2. PF is screwed
+		 * We cannot do much for 2. but to check first we can try reset
+		 * our PCIe + stack and see if it alleviates the problem.
+		 */
+		if (hdev->reset_attempts > 3) {
+			/* prepare for full reset of stack + pcie interface */
+			hdev->nic.reset_level = HNAE3_VF_FULL_RESET;
+
+			/* "defer" schedule the reset task again */
+			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+		} else {
+			hdev->reset_attempts++;
+
+			/* request PF for resetting this VF via mailbox */
+			ret = hclgevf_do_reset(hdev);
+			if (ret)
+				dev_warn(&hdev->pdev->dev,
+					 "VF rst fail, stack will call\n");
+		}
+	}
+
+	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+}
+
 static void hclgevf_mailbox_service_task(struct work_struct *work)
 {
 	struct hclgevf_dev *hdev;
@@ -878,7 +1125,7 @@ static void hclgevf_mailbox_service_task(struct work_struct *work)
 
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 
-	hclgevf_mbx_handler(hdev);
+	hclgevf_mbx_async_handler(hdev);
 
 	clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
 }
@@ -894,6 +1141,8 @@ static void hclgevf_service_task(struct work_struct *work)
 	 */
 	hclgevf_request_link_info(hdev);
 
+	hclgevf_deferred_task_schedule(hdev);
+
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 }
 
@@ -936,8 +1185,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 	if (!hclgevf_check_event_cause(hdev, &clearval))
 		goto skip_sched;
 
-	/* schedule the VF mailbox service task, if not already scheduled */
-	hclgevf_mbx_task_schedule(hdev);
+	hclgevf_mbx_handler(hdev);
 
 	hclgevf_clear_event_cause(hdev, clearval);
 
@@ -959,6 +1207,22 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
 	return hclgevf_get_tc_info(hdev);
 }
 
+static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return -ENOMEM;
+
+	hdev->pdev = pdev;
+	hdev->ae_dev = ae_dev;
+	ae_dev->priv = hdev;
+
+	return 0;
+}
+
 static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
 {
 	struct hnae3_handle *roce = &hdev->roce;
@@ -1057,10 +1321,17 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle)
 
 	/* reset tqp stats */
 	hclgevf_reset_tqp_stats(handle);
+	del_timer_sync(&hdev->service_timer);
+	cancel_work_sync(&hdev->service_task);
+	hclgevf_update_link_status(hdev, 0);
 }
 
 static void hclgevf_state_init(struct hclgevf_dev *hdev)
 {
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return;
+
 	/* setup tasks for the MBX */
 	INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
@@ -1072,6 +1343,8 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
 	INIT_WORK(&hdev->service_task, hclgevf_service_task);
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 
+	INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task);
+
 	mutex_init(&hdev->mbx_resp.mbx_mutex);
 
 	/* bring the device down */
@@ -1088,6 +1361,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
 		cancel_work_sync(&hdev->service_task);
 	if (hdev->mbx_service_task.func)
 		cancel_work_sync(&hdev->mbx_service_task);
+	if (hdev->rst_service_task.func)
+		cancel_work_sync(&hdev->rst_service_task);
 
 	mutex_destroy(&hdev->mbx_resp.mbx_mutex);
 }
@@ -1098,6 +1373,10 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
 	int vectors;
 	int i;
 
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM;
 
 	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
@@ -1148,6 +1427,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
 {
 	int ret = 0;
 
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	hclgevf_get_misc_vector(hdev);
 
 	ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
@@ -1258,6 +1541,14 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev)
 	struct hclgevf_hw *hw;
 	int ret;
 
+	/* check if we need to skip initialization of pci. This will happen if
+	 * device is undergoing VF reset. Otherwise, we would need to
+	 * re-initialize pci interface again i.e. when device is not going
+	 * through *any* reset or actually undergoing full reset.
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable PCI device\n");
@@ -1309,19 +1600,16 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
-static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 {
-	struct pci_dev *pdev = ae_dev->pdev;
-	struct hclgevf_dev *hdev;
+	struct pci_dev *pdev = hdev->pdev;
 	int ret;
 
-	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
-	if (!hdev)
-		return -ENOMEM;
-
-	hdev->pdev = pdev;
-	hdev->ae_dev = ae_dev;
-	ae_dev->priv = hdev;
+	/* check if device is on-going full reset(i.e. pcie as well) */
+	if (hclgevf_dev_ongoing_full_reset(hdev)) {
+		dev_warn(&pdev->dev, "device is going full reset\n");
+		hclgevf_uninit_hdev(hdev);
+	}
 
 	ret = hclgevf_pci_init(hdev);
 	if (ret) {
@@ -1406,15 +1694,38 @@ err_irq_init:
 	return ret;
 }
 
-static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 {
-	struct hclgevf_dev *hdev = ae_dev->priv;
-
 	hclgevf_cmd_uninit(hdev);
 	hclgevf_misc_irq_uninit(hdev);
 	hclgevf_state_uninit(hdev);
 	hclgevf_uninit_msi(hdev);
 	hclgevf_pci_uninit(hdev);
+}
+
+static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	int ret;
+
+	ret = hclgevf_alloc_hdev(ae_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "hclge device allocation failed\n");
+		return ret;
+	}
+
+	ret = hclgevf_init_hdev(ae_dev->priv);
+	if (ret)
+		dev_err(&pdev->dev, "hclge device initialization failed\n");
+
+	return ret;
+}
+
+static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	hclgevf_uninit_hdev(hdev);
 	ae_dev->priv = NULL;
 }
 
@@ -1447,6 +1758,43 @@ static void hclgevf_get_channels(struct hnae3_handle *handle,
 	ch->combined_count = hdev->num_tqps;
 }
 
+static void hclgevf_get_tqps_and_rss_info(struct hnae3_handle *handle,
+					  u16 *free_tqps, u16 *max_rss_size)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	*free_tqps = 0;
+	*max_rss_size = hdev->rss_size_max;
+}
+
+static int hclgevf_get_status(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return hdev->hw.mac.link;
+}
+
+static void hclgevf_get_ksettings_an_result(struct hnae3_handle *handle,
+					    u8 *auto_neg, u32 *speed,
+					    u8 *duplex)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	if (speed)
+		*speed = hdev->hw.mac.speed;
+	if (duplex)
+		*duplex = hdev->hw.mac.duplex;
+	if (auto_neg)
+		*auto_neg = AUTONEG_DISABLE;
+}
+
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+				 u8 duplex)
+{
+	hdev->hw.mac.speed = speed;
+	hdev->hw.mac.duplex = duplex;
+}
+
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
@@ -1457,6 +1805,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.map_ring_to_vector = hclgevf_map_ring_to_vector,
 	.unmap_ring_from_vector = hclgevf_unmap_ring_from_vector,
 	.get_vector = hclgevf_get_vector,
+	.put_vector = hclgevf_put_vector,
 	.reset_queue = hclgevf_reset_tqp,
 	.set_promisc_mode = hclgevf_set_promisc_mode,
 	.get_mac_addr = hclgevf_get_mac_addr,
@@ -1476,7 +1825,11 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_tc_size = hclgevf_get_tc_size,
 	.get_fw_version = hclgevf_get_fw_version,
 	.set_vlan_filter = hclgevf_set_vlan_filter,
+	.reset_event = hclgevf_reset_event,
 	.get_channels = hclgevf_get_channels,
+	.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
+	.get_status = hclgevf_get_status,
+	.get_ksettings_an_result = hclgevf_get_ksettings_an_result,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index a63bee4a3674..a477a7c36bbd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -34,6 +34,9 @@
 #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B	1
 
 #define HCLGEVF_TQP_RESET_TRY_TIMES	10
+/* Reset related Registers */
+#define HCLGEVF_FUN_RST_ING		0x20C00
+#define HCLGEVF_FUN_RST_ING_B		0
 
 #define HCLGEVF_RSS_IND_TBL_SIZE		512
 #define HCLGEVF_RSS_SET_BITMAP_MSK	0xffff
@@ -52,6 +55,8 @@ enum hclgevf_states {
 	HCLGEVF_STATE_DISABLED,
 	/* task states */
 	HCLGEVF_STATE_SERVICE_SCHED,
+	HCLGEVF_STATE_RST_SERVICE_SCHED,
+	HCLGEVF_STATE_RST_HANDLING,
 	HCLGEVF_STATE_MBX_SERVICE_SCHED,
 	HCLGEVF_STATE_MBX_HANDLING,
 };
@@ -61,6 +66,8 @@ enum hclgevf_states {
 struct hclgevf_mac {
 	u8 mac_addr[ETH_ALEN];
 	int link;
+	u8 duplex;
+	u32 speed;
 };
 
 struct hclgevf_hw {
@@ -120,6 +127,11 @@ struct hclgevf_dev {
 	struct hclgevf_rss_cfg rss_cfg;
 	unsigned long state;
 
+#define HCLGEVF_RESET_REQUESTED		0
+#define HCLGEVF_RESET_PENDING		1
+	unsigned long reset_state;	/* requested, pending */
+	u32 reset_attempts;
+
 	u32 fw_version;
 	u16 num_tqps;		/* num task queue pairs of this PF */
 
@@ -140,10 +152,13 @@ struct hclgevf_dev {
 	int *vector_irq;
 
 	bool accept_mta_mc; /* whether to accept mta filter multicast */
+	bool mbx_event_pending;
 	struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
+	struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
 
 	struct timer_list service_timer;
 	struct work_struct service_task;
+	struct work_struct rst_service_task;
 	struct work_struct mbx_service_task;
 
 	struct hclgevf_tqp *htqp;
@@ -156,9 +171,29 @@ struct hclgevf_dev {
 	u32 flag;
 };
 
+static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev)
+{
+	return (hdev &&
+		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+		(hdev->nic.reset_level == HNAE3_VF_RESET));
+}
+
+static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev)
+{
+	return (hdev &&
+		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+		(hdev->nic.reset_level == HNAE3_VF_FULL_RESET));
+}
+
 int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
 			 const u8 *msg_data, u8 msg_len, bool need_resp,
 			 u8 *resp_data, u16 resp_len);
 void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev);
+
 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+				 u8 duplex);
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index e39cad285fa9..a28618428338 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -54,6 +54,10 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 	mbx_resp = &hdev->mbx_resp;
 	r_code0 = (u16)(mbx_resp->origin_mbx_msg >> 16);
 	r_code1 = (u16)(mbx_resp->origin_mbx_msg & 0xff);
+
+	if (mbx_resp->resp_status)
+		return mbx_resp->resp_status;
+
 	if (resp_data)
 		memcpy(resp_data, &mbx_resp->additional_info[0], resp_len);
 
@@ -128,7 +132,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 	struct hclge_mbx_pf_to_vf_cmd *req;
 	struct hclgevf_cmq_ring *crq;
 	struct hclgevf_desc *desc;
-	u16 link_status, flag;
+	u16 *msg_q;
+	u16 flag;
 	u8 *temp;
 	int i;
 
@@ -140,6 +145,12 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
 
+		/* synchronous messages are time critical and need preferential
+		 * treatment. Therefore, we need to acknowledge all the sync
+		 * responses as quickly as possible so that waiting tasks do not
+		 * timeout and simultaneously queue the async messages for later
+		 * prcessing in context of mailbox task i.e. the slow path.
+		 */
 		switch (req->msg[0]) {
 		case HCLGE_MBX_PF_VF_RESP:
 			if (resp->received_resp)
@@ -159,10 +170,31 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			}
 			break;
 		case HCLGE_MBX_LINK_STAT_CHANGE:
-			link_status = le16_to_cpu(req->msg[1]);
+		case HCLGE_MBX_ASSERTING_RESET:
+			/* set this mbx event as pending. This is required as we
+			 * might loose interrupt event when mbx task is busy
+			 * handling. This shall be cleared when mbx task just
+			 * enters handling state.
+			 */
+			hdev->mbx_event_pending = true;
 
-			/* update upper layer with new link link status */
-			hclgevf_update_link_status(hdev, link_status);
+			/* we will drop the async msg if we find ARQ as full
+			 * and continue with next message
+			 */
+			if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+				dev_warn(&hdev->pdev->dev,
+					 "Async Q full, dropping msg(%d)\n",
+					 req->msg[1]);
+				break;
+			}
+
+			/* tail the async message in arq */
+			msg_q = hdev->arq.msg_q[hdev->arq.tail];
+			memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE);
+			hclge_mbx_tail_ptr_move_arq(hdev->arq);
+			hdev->arq.count++;
+
+			hclgevf_mbx_task_schedule(hdev);
 
 			break;
 		default:
@@ -171,6 +203,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 				req->msg[0]);
 			break;
 		}
+		crq->desc[crq->next_to_use].flag = 0;
 		hclge_mbx_ring_ptr_move_crq(crq);
 		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	}
@@ -179,3 +212,57 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 	hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG,
 			  crq->next_to_use);
 }
+
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
+{
+	u16 link_status;
+	u16 *msg_q;
+	u8 duplex;
+	u32 speed;
+	u32 tail;
+
+	/* we can safely clear it now as we are at start of the async message
+	 * processing
+	 */
+	hdev->mbx_event_pending = false;
+
+	tail = hdev->arq.tail;
+
+	/* process all the async queue messages */
+	while (tail != hdev->arq.head) {
+		msg_q = hdev->arq.msg_q[hdev->arq.head];
+
+		switch (msg_q[0]) {
+		case HCLGE_MBX_LINK_STAT_CHANGE:
+			link_status = le16_to_cpu(msg_q[1]);
+			memcpy(&speed, &msg_q[2], sizeof(speed));
+			duplex = (u8)le16_to_cpu(msg_q[4]);
+
+			/* update upper layer with new link link status */
+			hclgevf_update_link_status(hdev, link_status);
+			hclgevf_update_speed_duplex(hdev, speed, duplex);
+
+			break;
+		case HCLGE_MBX_ASSERTING_RESET:
+			/* PF has asserted reset hence VF should go in pending
+			 * state and poll for the hardware reset status till it
+			 * has been completely reset. After this stack should
+			 * eventually be re-initialized.
+			 */
+			hdev->nic.reset_level = HNAE3_VF_RESET;
+			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+			hclgevf_reset_task_schedule(hdev);
+
+			break;
+		default:
+			dev_err(&hdev->pdev->dev,
+				"fetched unsupported(%d) message from arq\n",
+				msg_q[0]);
+			break;
+		}
+
+		hclge_mbx_head_ptr_move_arq(hdev->arq);
+		hdev->arq.count--;
+		msg_q = hdev->arq.msg_q[hdev->arq.head];
+	}
+}
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 4878b7169e0f..ba580bfae512 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2903,8 +2903,7 @@ static ssize_t ehea_show_port_id(struct device *dev,
 	return sprintf(buf, "%d", port->logical_port_id);
 }
 
-static DEVICE_ATTR(log_port_id, S_IRUSR | S_IRGRP | S_IROTH, ehea_show_port_id,
-		   NULL);
+static DEVICE_ATTR(log_port_id, 0444, ehea_show_port_id, NULL);
 
 static void logical_port_release(struct device *dev)
 {
@@ -3235,8 +3234,8 @@ static ssize_t ehea_remove_port(struct device *dev,
 	return (ssize_t) count;
 }
 
-static DEVICE_ATTR(probe_port, S_IWUSR, NULL, ehea_probe_port);
-static DEVICE_ATTR(remove_port, S_IWUSR, NULL, ehea_remove_port);
+static DEVICE_ATTR(probe_port, 0200, NULL, ehea_probe_port);
+static DEVICE_ATTR(remove_port, 0200, NULL, ehea_remove_port);
 
 static int ehea_create_device_sysfs(struct platform_device *dev)
 {
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index f210398200ec..c1b51edaaf62 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -82,7 +82,7 @@ module_param(rx_flush, uint, 0644);
 MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use");
 
 static bool old_large_send __read_mostly;
-module_param(old_large_send, bool, S_IRUGO);
+module_param(old_large_send, bool, 0444);
 MODULE_PARM_DESC(old_large_send,
 	"Use old large send method on firmware that supports the new method");
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 27447260215d..0389a7a52152 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -90,7 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
-static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs(struct ibmvnic_adapter *, bool);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
 static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -361,14 +361,14 @@ static void release_stats_buffers(struct ibmvnic_adapter *adapter)
 static int init_stats_buffers(struct ibmvnic_adapter *adapter)
 {
 	adapter->tx_stats_buffers =
-				kcalloc(adapter->req_tx_queues,
+				kcalloc(IBMVNIC_MAX_QUEUES,
 					sizeof(struct ibmvnic_tx_queue_stats),
 					GFP_KERNEL);
 	if (!adapter->tx_stats_buffers)
 		return -ENOMEM;
 
 	adapter->rx_stats_buffers =
-				kcalloc(adapter->req_rx_queues,
+				kcalloc(IBMVNIC_MAX_QUEUES,
 					sizeof(struct ibmvnic_rx_queue_stats),
 					GFP_KERNEL);
 	if (!adapter->rx_stats_buffers)
@@ -509,7 +509,7 @@ static int init_rx_pools(struct net_device *netdev)
 		return -1;
 	}
 
-	adapter->num_active_rx_pools = 0;
+	adapter->num_active_rx_pools = rxadd_subcrqs;
 
 	for (i = 0; i < rxadd_subcrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
@@ -554,41 +554,44 @@ static int init_rx_pools(struct net_device *netdev)
 		rx_pool->next_free = 0;
 	}
 
-	adapter->num_active_rx_pools = rxadd_subcrqs;
+	return 0;
+}
+
+static int reset_one_tx_pool(struct ibmvnic_adapter *adapter,
+			     struct ibmvnic_tx_pool *tx_pool)
+{
+	int rc, i;
+
+	rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+	if (rc)
+		return rc;
+
+	memset(tx_pool->tx_buff, 0,
+	       tx_pool->num_buffers *
+	       sizeof(struct ibmvnic_tx_buff));
+
+	for (i = 0; i < tx_pool->num_buffers; i++)
+		tx_pool->free_map[i] = i;
+
+	tx_pool->consumer_index = 0;
+	tx_pool->producer_index = 0;
 
 	return 0;
 }
 
 static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_tx_pool *tx_pool;
 	int tx_scrqs;
-	int i, j, rc;
+	int i, rc;
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	for (i = 0; i < tx_scrqs; i++) {
-		netdev_dbg(adapter->netdev, "Re-setting tx_pool[%d]\n", i);
-
-		tx_pool = &adapter->tx_pool[i];
-
-		rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
 		if (rc)
 			return rc;
-
-		rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb);
+		rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]);
 		if (rc)
 			return rc;
-
-		memset(tx_pool->tx_buff, 0,
-		       adapter->req_tx_entries_per_subcrq *
-		       sizeof(struct ibmvnic_tx_buff));
-
-		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-			tx_pool->free_map[j] = j;
-
-		tx_pool->consumer_index = 0;
-		tx_pool->producer_index = 0;
-		tx_pool->tso_index = 0;
 	}
 
 	return 0;
@@ -605,35 +608,70 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
 	adapter->vpd = NULL;
 }
 
+static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
+				struct ibmvnic_tx_pool *tx_pool)
+{
+	kfree(tx_pool->tx_buff);
+	kfree(tx_pool->free_map);
+	free_long_term_buff(adapter, &tx_pool->long_term_buff);
+}
+
 static void release_tx_pools(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_tx_pool *tx_pool;
 	int i;
 
 	if (!adapter->tx_pool)
 		return;
 
 	for (i = 0; i < adapter->num_active_tx_pools; i++) {
-		netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
-		tx_pool = &adapter->tx_pool[i];
-		kfree(tx_pool->tx_buff);
-		free_long_term_buff(adapter, &tx_pool->long_term_buff);
-		free_long_term_buff(adapter, &tx_pool->tso_ltb);
-		kfree(tx_pool->free_map);
+		release_one_tx_pool(adapter, &adapter->tx_pool[i]);
+		release_one_tx_pool(adapter, &adapter->tso_pool[i]);
 	}
 
 	kfree(adapter->tx_pool);
 	adapter->tx_pool = NULL;
+	kfree(adapter->tso_pool);
+	adapter->tso_pool = NULL;
 	adapter->num_active_tx_pools = 0;
 }
 
+static int init_one_tx_pool(struct net_device *netdev,
+			    struct ibmvnic_tx_pool *tx_pool,
+			    int num_entries, int buf_size)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	tx_pool->tx_buff = kcalloc(num_entries,
+				   sizeof(struct ibmvnic_tx_buff),
+				   GFP_KERNEL);
+	if (!tx_pool->tx_buff)
+		return -1;
+
+	if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
+				 num_entries * buf_size))
+		return -1;
+
+	tx_pool->free_map = kcalloc(num_entries, sizeof(int), GFP_KERNEL);
+	if (!tx_pool->free_map)
+		return -1;
+
+	for (i = 0; i < num_entries; i++)
+		tx_pool->free_map[i] = i;
+
+	tx_pool->consumer_index = 0;
+	tx_pool->producer_index = 0;
+	tx_pool->num_buffers = num_entries;
+	tx_pool->buf_size = buf_size;
+
+	return 0;
+}
+
 static int init_tx_pools(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_tx_pool *tx_pool;
 	int tx_subcrqs;
-	int i, j;
+	int i, rc;
 
 	tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	adapter->tx_pool = kcalloc(tx_subcrqs,
@@ -641,57 +679,31 @@ static int init_tx_pools(struct net_device *netdev)
 	if (!adapter->tx_pool)
 		return -1;
 
-	adapter->num_active_tx_pools = 0;
-
-	for (i = 0; i < tx_subcrqs; i++) {
-		tx_pool = &adapter->tx_pool[i];
-
-		netdev_dbg(adapter->netdev,
-			   "Initializing tx_pool[%d], %lld buffs\n",
-			   i, adapter->req_tx_entries_per_subcrq);
-
-		tx_pool->tx_buff = kcalloc(adapter->req_tx_entries_per_subcrq,
-					   sizeof(struct ibmvnic_tx_buff),
-					   GFP_KERNEL);
-		if (!tx_pool->tx_buff) {
-			dev_err(dev, "tx pool buffer allocation failed\n");
-			release_tx_pools(adapter);
-			return -1;
-		}
+	adapter->tso_pool = kcalloc(tx_subcrqs,
+				    sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
+	if (!adapter->tso_pool)
+		return -1;
 
-		if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-					 adapter->req_tx_entries_per_subcrq *
-					 adapter->req_mtu)) {
-			release_tx_pools(adapter);
-			return -1;
-		}
+	adapter->num_active_tx_pools = tx_subcrqs;
 
-		/* alloc TSO ltb */
-		if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb,
-					 IBMVNIC_TSO_BUFS *
-					 IBMVNIC_TSO_BUF_SZ)) {
+	for (i = 0; i < tx_subcrqs; i++) {
+		rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
+				      adapter->req_tx_entries_per_subcrq,
+				      adapter->req_mtu + VLAN_HLEN);
+		if (rc) {
 			release_tx_pools(adapter);
-			return -1;
+			return rc;
 		}
 
-		tx_pool->tso_index = 0;
-
-		tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
-					    sizeof(int), GFP_KERNEL);
-		if (!tx_pool->free_map) {
+		init_one_tx_pool(netdev, &adapter->tso_pool[i],
+				 IBMVNIC_TSO_BUFS,
+				 IBMVNIC_TSO_BUF_SZ);
+		if (rc) {
 			release_tx_pools(adapter);
-			return -1;
+			return rc;
 		}
-
-		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-			tx_pool->free_map[j] = j;
-
-		tx_pool->consumer_index = 0;
-		tx_pool->producer_index = 0;
 	}
 
-	adapter->num_active_tx_pools = tx_subcrqs;
-
 	return 0;
 }
 
@@ -740,6 +752,45 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
 	adapter->napi_enabled = false;
 }
 
+static int init_napi(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	adapter->napi = kcalloc(adapter->req_rx_queues,
+				sizeof(struct napi_struct), GFP_KERNEL);
+	if (!adapter->napi)
+		return -ENOMEM;
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
+		netif_napi_add(adapter->netdev, &adapter->napi[i],
+			       ibmvnic_poll, NAPI_POLL_WEIGHT);
+	}
+
+	adapter->num_active_rx_napi = adapter->req_rx_queues;
+	return 0;
+}
+
+static void release_napi(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	if (!adapter->napi)
+		return;
+
+	for (i = 0; i < adapter->num_active_rx_napi; i++) {
+		if (&adapter->napi[i]) {
+			netdev_dbg(adapter->netdev,
+				   "Releasing napi[%d]\n", i);
+			netif_napi_del(&adapter->napi[i]);
+		}
+	}
+
+	kfree(adapter->napi);
+	adapter->napi = NULL;
+	adapter->num_active_rx_napi = 0;
+}
+
 static int ibmvnic_login(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -750,7 +801,7 @@ static int ibmvnic_login(struct net_device *netdev)
 	do {
 		if (adapter->renegotiate) {
 			adapter->renegotiate = false;
-			release_sub_crqs(adapter);
+			release_sub_crqs(adapter, 1);
 
 			reinit_completion(&adapter->init_done);
 			send_cap_queries(adapter);
@@ -774,8 +825,11 @@ static int ibmvnic_login(struct net_device *netdev)
 		}
 
 		reinit_completion(&adapter->init_done);
-		send_login(adapter);
-		if (!wait_for_completion_timeout(&adapter->init_done,
+		rc = send_login(adapter);
+		if (rc) {
+			dev_err(dev, "Unable to attempt device login\n");
+			return rc;
+		} else if (!wait_for_completion_timeout(&adapter->init_done,
 						 timeout)) {
 			dev_err(dev, "Login timeout\n");
 			return -1;
@@ -791,28 +845,28 @@ static int ibmvnic_login(struct net_device *netdev)
 	return 0;
 }
 
-static void release_resources(struct ibmvnic_adapter *adapter)
+static void release_login_buffer(struct ibmvnic_adapter *adapter)
 {
-	int i;
+	kfree(adapter->login_buf);
+	adapter->login_buf = NULL;
+}
 
+static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
+{
+	kfree(adapter->login_rsp_buf);
+	adapter->login_rsp_buf = NULL;
+}
+
+static void release_resources(struct ibmvnic_adapter *adapter)
+{
 	release_vpd_data(adapter);
 
 	release_tx_pools(adapter);
 	release_rx_pools(adapter);
 
-	release_stats_token(adapter);
-	release_stats_buffers(adapter);
 	release_error_buffers(adapter);
-
-	if (adapter->napi) {
-		for (i = 0; i < adapter->req_rx_queues; i++) {
-			if (&adapter->napi[i]) {
-				netdev_dbg(adapter->netdev,
-					   "Releasing napi[%d]\n", i);
-				netif_napi_del(&adapter->napi[i]);
-			}
-		}
-	}
+	release_napi(adapter);
+	release_login_rsp_buffer(adapter);
 }
 
 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
@@ -931,20 +985,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
 static int init_resources(struct ibmvnic_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	int i, rc;
+	int rc;
 
 	rc = set_real_num_queues(netdev);
 	if (rc)
 		return rc;
 
-	rc = init_stats_buffers(adapter);
-	if (rc)
-		return rc;
-
-	rc = init_stats_token(adapter);
-	if (rc)
-		return rc;
-
 	adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
 	if (!adapter->vpd)
 		return -ENOMEM;
@@ -957,16 +1003,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	}
 
 	adapter->map_id = 1;
-	adapter->napi = kcalloc(adapter->req_rx_queues,
-				sizeof(struct napi_struct), GFP_KERNEL);
-	if (!adapter->napi)
-		return -ENOMEM;
 
-	for (i = 0; i < adapter->req_rx_queues; i++) {
-		netdev_dbg(netdev, "Adding napi[%d]\n", i);
-		netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
-			       NAPI_POLL_WEIGHT);
-	}
+	rc = init_napi(adapter);
+	if (rc)
+		return rc;
 
 	send_map_query(adapter);
 
@@ -1057,43 +1097,81 @@ static int ibmvnic_open(struct net_device *netdev)
 	return rc;
 }
 
-static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_tx_pool *tx_pool;
+	struct ibmvnic_rx_pool *rx_pool;
+	struct ibmvnic_rx_buff *rx_buff;
+	u64 rx_entries;
+	int rx_scrqs;
+	int i, j;
+
+	if (!adapter->rx_pool)
+		return;
+
+	rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+	rx_entries = adapter->req_rx_add_entries_per_subcrq;
+
+	/* Free any remaining skbs in the rx buffer pools */
+	for (i = 0; i < rx_scrqs; i++) {
+		rx_pool = &adapter->rx_pool[i];
+		if (!rx_pool || !rx_pool->rx_buff)
+			continue;
+
+		netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
+		for (j = 0; j < rx_entries; j++) {
+			rx_buff = &rx_pool->rx_buff[j];
+			if (rx_buff && rx_buff->skb) {
+				dev_kfree_skb_any(rx_buff->skb);
+				rx_buff->skb = NULL;
+			}
+		}
+	}
+}
+
+static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
+			      struct ibmvnic_tx_pool *tx_pool)
+{
+	struct ibmvnic_tx_buff *tx_buff;
 	u64 tx_entries;
+	int i;
+
+	if (!tx_pool || !tx_pool->tx_buff)
+		return;
+
+	tx_entries = tx_pool->num_buffers;
+
+	for (i = 0; i < tx_entries; i++) {
+		tx_buff = &tx_pool->tx_buff[i];
+		if (tx_buff && tx_buff->skb) {
+			dev_kfree_skb_any(tx_buff->skb);
+			tx_buff->skb = NULL;
+		}
+	}
+}
+
+static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+{
 	int tx_scrqs;
-	int i, j;
+	int i;
 
-	if (!adapter->tx_pool)
+	if (!adapter->tx_pool || !adapter->tso_pool)
 		return;
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
-	tx_entries = adapter->req_tx_entries_per_subcrq;
 
 	/* Free any remaining skbs in the tx buffer pools */
 	for (i = 0; i < tx_scrqs; i++) {
-		tx_pool = &adapter->tx_pool[i];
-		if (!tx_pool)
-			continue;
-
 		netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
-		for (j = 0; j < tx_entries; j++) {
-			if (tx_pool->tx_buff[j].skb) {
-				dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
-				tx_pool->tx_buff[j].skb = NULL;
-			}
-		}
+		clean_one_tx_pool(adapter, &adapter->tx_pool[i]);
+		clean_one_tx_pool(adapter, &adapter->tso_pool[i]);
 	}
 }
 
-static int __ibmvnic_close(struct net_device *netdev)
+static void ibmvnic_cleanup(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	int rc = 0;
 	int i;
 
-	adapter->state = VNIC_CLOSING;
-
 	/* ensure that transmissions are stopped if called by do_reset */
 	if (adapter->resetting)
 		netif_tx_disable(netdev);
@@ -1105,39 +1183,37 @@ static int __ibmvnic_close(struct net_device *netdev)
 	if (adapter->tx_scrq) {
 		for (i = 0; i < adapter->req_tx_queues; i++)
 			if (adapter->tx_scrq[i]->irq) {
-				netdev_dbg(adapter->netdev,
+				netdev_dbg(netdev,
 					   "Disabling tx_scrq[%d] irq\n", i);
 				disable_irq(adapter->tx_scrq[i]->irq);
 			}
 	}
 
-	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
-	if (rc)
-		return rc;
-
 	if (adapter->rx_scrq) {
 		for (i = 0; i < adapter->req_rx_queues; i++) {
-			int retries = 10;
-
-			while (pending_scrq(adapter, adapter->rx_scrq[i])) {
-				retries--;
-				mdelay(100);
-
-				if (retries == 0)
-					break;
-			}
-
 			if (adapter->rx_scrq[i]->irq) {
-				netdev_dbg(adapter->netdev,
+				netdev_dbg(netdev,
 					   "Disabling rx_scrq[%d] irq\n", i);
 				disable_irq(adapter->rx_scrq[i]->irq);
 			}
 		}
 	}
-
+	clean_rx_pools(adapter);
 	clean_tx_pools(adapter);
+}
+
+static int __ibmvnic_close(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	adapter->state = VNIC_CLOSING;
+	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+	if (rc)
+		return rc;
+	ibmvnic_cleanup(netdev);
 	adapter->state = VNIC_CLOSED;
-	return rc;
+	return 0;
 }
 
 static int ibmvnic_close(struct net_device *netdev)
@@ -1169,7 +1245,10 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
 	int len = 0;
 	u8 *hdr;
 
-	hdr_len[0] = sizeof(struct ethhdr);
+	if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb))
+		hdr_len[0] = sizeof(struct vlan_ethhdr);
+	else
+		hdr_len[0] = sizeof(struct ethhdr);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		hdr_len[1] = ip_hdr(skb)->ihl * 4;
@@ -1285,6 +1364,21 @@ static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
 			 txbuff->indir_arr + 1);
 }
 
+static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	/* For some backing devices, mishandling of small packets
+	 * can result in a loss of connection or TX stall. Device
+	 * architects recommend that no packet should be smaller
+	 * than the minimum MTU value provided to the driver, so
+	 * pad any packets to that length
+	 */
+	if (skb->len < netdev->min_mtu)
+		return skb_put_padto(skb, netdev->min_mtu);
+
+	return 0;
+}
+
 static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -1322,7 +1416,17 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto out;
 	}
 
-	tx_pool = &adapter->tx_pool[queue_num];
+	if (ibmvnic_xmit_workarounds(skb, netdev)) {
+		tx_dropped++;
+		tx_send_failed++;
+		ret = NETDEV_TX_OK;
+		goto out;
+	}
+	if (skb_is_gso(skb))
+		tx_pool = &adapter->tso_pool[queue_num];
+	else
+		tx_pool = &adapter->tx_pool[queue_num];
+
 	tx_scrq = adapter->tx_scrq[queue_num];
 	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
 	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -1330,21 +1434,21 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	index = tx_pool->free_map[tx_pool->consumer_index];
 
-	if (skb_is_gso(skb)) {
-		offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ;
-		dst = tx_pool->tso_ltb.buff + offset;
-		memset(dst, 0, IBMVNIC_TSO_BUF_SZ);
-		data_dma_addr = tx_pool->tso_ltb.addr + offset;
-		tx_pool->tso_index++;
-		if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
-			tx_pool->tso_index = 0;
-	} else {
-		offset = index * adapter->req_mtu;
-		dst = tx_pool->long_term_buff.buff + offset;
-		memset(dst, 0, adapter->req_mtu);
-		data_dma_addr = tx_pool->long_term_buff.addr + offset;
+	if (index == IBMVNIC_INVALID_MAP) {
+		dev_kfree_skb_any(skb);
+		tx_send_failed++;
+		tx_dropped++;
+		ret = NETDEV_TX_OK;
+		goto out;
 	}
 
+	tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
+
+	offset = index * tx_pool->buf_size;
+	dst = tx_pool->long_term_buff.buff + offset;
+	memset(dst, 0, tx_pool->buf_size);
+	data_dma_addr = tx_pool->long_term_buff.addr + offset;
+
 	if (skb_shinfo(skb)->nr_frags) {
 		int cur, i;
 
@@ -1366,8 +1470,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	tx_pool->consumer_index =
-	    (tx_pool->consumer_index + 1) %
-		adapter->req_tx_entries_per_subcrq;
+	    (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
 
 	tx_buff = &tx_pool->tx_buff[index];
 	tx_buff->skb = skb;
@@ -1383,11 +1486,13 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_crq.v1.n_crq_elem = 1;
 	tx_crq.v1.n_sge = 1;
 	tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
-	tx_crq.v1.correlator = cpu_to_be32(index);
+
 	if (skb_is_gso(skb))
-		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id);
+		tx_crq.v1.correlator =
+			cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK);
 	else
-		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
+		tx_crq.v1.correlator = cpu_to_be32(index);
+	tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
 	tx_crq.v1.sge_len = cpu_to_be32(skb->len);
 	tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
 
@@ -1422,6 +1527,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if ((*hdrs >> 7) & 1) {
 		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
 		tx_crq.v1.n_crq_elem = num_entries;
+		tx_buff->num_entries = num_entries;
 		tx_buff->indir_arr[0] = tx_crq;
 		tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
 						    sizeof(tx_buff->indir_arr),
@@ -1434,24 +1540,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 			tx_map_failed++;
 			tx_dropped++;
 			ret = NETDEV_TX_OK;
-			goto out;
+			goto tx_err_out;
 		}
 		lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
 					       (u64)tx_buff->indir_dma,
 					       (u64)num_entries);
 	} else {
+		tx_buff->num_entries = num_entries;
 		lpar_rc = send_subcrq(adapter, handle_array[queue_num],
 				      &tx_crq);
 	}
 	if (lpar_rc != H_SUCCESS) {
 		dev_err(dev, "tx failed with code %ld\n", lpar_rc);
-
-		if (tx_pool->consumer_index == 0)
-			tx_pool->consumer_index =
-				adapter->req_tx_entries_per_subcrq - 1;
-		else
-			tx_pool->consumer_index--;
-
 		dev_kfree_skb_any(skb);
 		tx_buff->skb = NULL;
 
@@ -1467,12 +1567,12 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_send_failed++;
 		tx_dropped++;
 		ret = NETDEV_TX_OK;
-		goto out;
+		goto tx_err_out;
 	}
 
-	if (atomic_inc_return(&tx_scrq->used)
+	if (atomic_add_return(num_entries, &tx_scrq->used)
 					>= adapter->req_tx_entries_per_subcrq) {
-		netdev_info(netdev, "Stopping queue %d\n", queue_num);
+		netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
 		netif_stop_subqueue(netdev, queue_num);
 	}
 
@@ -1480,7 +1580,16 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_bytes += skb->len;
 	txq->trans_start = jiffies;
 	ret = NETDEV_TX_OK;
+	goto out;
 
+tx_err_out:
+	/* roll back consumer index and map array*/
+	if (tx_pool->consumer_index == 0)
+		tx_pool->consumer_index =
+			tx_pool->num_buffers - 1;
+	else
+		tx_pool->consumer_index--;
+	tx_pool->free_map[tx_pool->consumer_index] = index;
 out:
 	netdev->stats.tx_dropped += tx_dropped;
 	netdev->stats.tx_bytes += tx_bytes;
@@ -1599,16 +1708,19 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		rc = ibmvnic_reenable_crq_queue(adapter);
 		if (rc)
 			return 0;
+		ibmvnic_cleanup(netdev);
+	} else if (rwi->reset_reason == VNIC_RESET_FAILOVER) {
+		ibmvnic_cleanup(netdev);
+	} else {
+		rc = __ibmvnic_close(netdev);
+		if (rc)
+			return rc;
 	}
 
-	rc = __ibmvnic_close(netdev);
-	if (rc)
-		return rc;
-
 	if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
 	    adapter->wait_for_reset) {
 		release_resources(adapter);
-		release_sub_crqs(adapter);
+		release_sub_crqs(adapter, 1);
 		release_crq_queue(adapter);
 	}
 
@@ -1646,6 +1758,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			release_tx_pools(adapter);
 			init_rx_pools(netdev);
 			init_tx_pools(netdev);
+
+			release_napi(adapter);
+			init_napi(adapter);
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
@@ -1654,12 +1769,14 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			rc = reset_rx_pools(adapter);
 			if (rc)
 				return rc;
-
-			if (reset_state == VNIC_CLOSED)
-				return 0;
 		}
 	}
 
+	adapter->state = VNIC_CLOSED;
+
+	if (reset_state == VNIC_CLOSED)
+		return 0;
+
 	rc = __ibmvnic_open(netdev);
 	if (rc) {
 		if (list_empty(&adapter->rwi_list))
@@ -1670,8 +1787,6 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		return 0;
 	}
 
-	netif_carrier_on(netdev);
-
 	/* kick napi */
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_schedule(&adapter->napi[i]);
@@ -1679,6 +1794,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	if (adapter->reset_reason != VNIC_RESET_FAILOVER)
 		netdev_notify_peers(netdev);
 
+	netif_carrier_on(netdev);
+
 	return 0;
 }
 
@@ -1853,6 +1970,12 @@ restart_poll:
 				   be16_to_cpu(next->rx_comp.rc));
 			/* free the entry */
 			next->rx_comp.first = 0;
+			dev_kfree_skb_any(rx_buff->skb);
+			remove_buff_from_pool(adapter, rx_buff);
+			continue;
+		} else if (!rx_buff->skb) {
+			/* free the entry */
+			next->rx_comp.first = 0;
 			remove_buff_from_pool(adapter, rx_buff);
 			continue;
 		}
@@ -1960,6 +2083,23 @@ static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
 	return wait_for_reset(adapter);
 }
 
+static netdev_features_t ibmvnic_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
+{
+	/* Some backing hardware adapters can not
+	 * handle packets with a MSS less than 224
+	 * or with only one segment.
+	 */
+	if (skb_is_gso(skb)) {
+		if (skb_shinfo(skb)->gso_size < 224 ||
+		    skb_shinfo(skb)->gso_segs == 1)
+			features &= ~NETIF_F_GSO_MASK;
+	}
+
+	return features;
+}
+
 static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_open		= ibmvnic_open,
 	.ndo_stop		= ibmvnic_close,
@@ -1972,6 +2112,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_poll_controller	= ibmvnic_netpoll_controller,
 #endif
 	.ndo_change_mtu		= ibmvnic_change_mtu,
+	.ndo_features_check     = ibmvnic_features_check,
 };
 
 /* ethtool functions */
@@ -2244,24 +2385,27 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 }
 
 static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
-				  struct ibmvnic_sub_crq_queue *scrq)
+				  struct ibmvnic_sub_crq_queue *scrq,
+				  bool do_h_free)
 {
 	struct device *dev = &adapter->vdev->dev;
 	long rc;
 
 	netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
 
-	/* Close the sub-crqs */
-	do {
-		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
-					adapter->vdev->unit_address,
-					scrq->crq_num);
-	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+	if (do_h_free) {
+		/* Close the sub-crqs */
+		do {
+			rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
+						adapter->vdev->unit_address,
+						scrq->crq_num);
+		} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
-	if (rc) {
-		netdev_err(adapter->netdev,
-			   "Failed to release sub-CRQ %16lx, rc = %ld\n",
-			   scrq->crq_num, rc);
+		if (rc) {
+			netdev_err(adapter->netdev,
+				   "Failed to release sub-CRQ %16lx, rc = %ld\n",
+				   scrq->crq_num, rc);
+		}
 	}
 
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
@@ -2329,12 +2473,12 @@ zero_page_failed:
 	return NULL;
 }
 
-static void release_sub_crqs(struct ibmvnic_adapter *adapter)
+static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 {
 	int i;
 
 	if (adapter->tx_scrq) {
-		for (i = 0; i < adapter->req_tx_queues; i++) {
+		for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
 			if (!adapter->tx_scrq[i])
 				continue;
 
@@ -2347,15 +2491,17 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				adapter->tx_scrq[i]->irq = 0;
 			}
 
-			release_sub_crq_queue(adapter, adapter->tx_scrq[i]);
+			release_sub_crq_queue(adapter, adapter->tx_scrq[i],
+					      do_h_free);
 		}
 
 		kfree(adapter->tx_scrq);
 		adapter->tx_scrq = NULL;
+		adapter->num_active_tx_scrqs = 0;
 	}
 
 	if (adapter->rx_scrq) {
-		for (i = 0; i < adapter->req_rx_queues; i++) {
+		for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
 			if (!adapter->rx_scrq[i])
 				continue;
 
@@ -2368,11 +2514,13 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				adapter->rx_scrq[i]->irq = 0;
 			}
 
-			release_sub_crq_queue(adapter, adapter->rx_scrq[i]);
+			release_sub_crq_queue(adapter, adapter->rx_scrq[i],
+					      do_h_free);
 		}
 
 		kfree(adapter->rx_scrq);
 		adapter->rx_scrq = NULL;
+		adapter->num_active_rx_scrqs = 0;
 	}
 }
 
@@ -2413,6 +2561,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 			       struct ibmvnic_sub_crq_queue *scrq)
 {
 	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_tx_pool *tx_pool;
 	struct ibmvnic_tx_buff *txbuff;
 	union sub_crq *next;
 	int index;
@@ -2422,6 +2571,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 restart_loop:
 	while (pending_scrq(adapter, scrq)) {
 		unsigned int pool = scrq->pool_index;
+		int num_entries = 0;
 
 		next = ibmvnic_next_scrq(adapter, scrq);
 		for (i = 0; i < next->tx_comp.num_comps; i++) {
@@ -2431,7 +2581,14 @@ restart_loop:
 				continue;
 			}
 			index = be32_to_cpu(next->tx_comp.correlators[i]);
-			txbuff = &adapter->tx_pool[pool].tx_buff[index];
+			if (index & IBMVNIC_TSO_POOL_MASK) {
+				tx_pool = &adapter->tso_pool[pool];
+				index &= ~IBMVNIC_TSO_POOL_MASK;
+			} else {
+				tx_pool = &adapter->tx_pool[pool];
+			}
+
+			txbuff = &tx_pool->tx_buff[index];
 
 			for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
 				if (!txbuff->data_dma[j])
@@ -2452,22 +2609,23 @@ restart_loop:
 				txbuff->skb = NULL;
 			}
 
-			adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
-						     producer_index] = index;
-			adapter->tx_pool[pool].producer_index =
-			    (adapter->tx_pool[pool].producer_index + 1) %
-			    adapter->req_tx_entries_per_subcrq;
+			num_entries += txbuff->num_entries;
+
+			tx_pool->free_map[tx_pool->producer_index] = index;
+			tx_pool->producer_index =
+				(tx_pool->producer_index + 1) %
+					tx_pool->num_buffers;
 		}
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
 
-		if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <=
+		if (atomic_sub_return(num_entries, &scrq->used) <=
 		    (adapter->req_tx_entries_per_subcrq / 2) &&
 		    __netif_subqueue_stopped(adapter->netdev,
 					     scrq->pool_index)) {
 			netif_wake_subqueue(adapter->netdev, scrq->pool_index);
-			netdev_info(adapter->netdev, "Started queue %d\n",
-				    scrq->pool_index);
+			netdev_dbg(adapter->netdev, "Started queue %d\n",
+				   scrq->pool_index);
 		}
 	}
 
@@ -2539,7 +2697,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
 			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
 				scrq->irq, rc);
 			irq_dispose_mapping(scrq->irq);
-			goto req_rx_irq_failed;
+			goto req_tx_irq_failed;
 		}
 	}
 
@@ -2575,7 +2733,7 @@ req_tx_irq_failed:
 		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
 		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
 	}
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	return rc;
 }
 
@@ -2637,6 +2795,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < adapter->req_tx_queues; i++) {
 		adapter->tx_scrq[i] = allqueues[i];
 		adapter->tx_scrq[i]->pool_index = i;
+		adapter->num_active_tx_scrqs++;
 	}
 
 	adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
@@ -2647,6 +2806,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
 		adapter->rx_scrq[i]->scrq_num = i;
+		adapter->num_active_rx_scrqs++;
 	}
 
 	kfree(allqueues);
@@ -2657,7 +2817,7 @@ rx_failed:
 	adapter->tx_scrq = NULL;
 tx_failed:
 	for (i = 0; i < registered_queues; i++)
-		release_sub_crq_queue(adapter, allqueues[i]);
+		release_sub_crq_queue(adapter, allqueues[i], 1);
 	kfree(allqueues);
 	return -1;
 }
@@ -2997,7 +3157,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
 	strncpy(&vlcd->name, adapter->netdev->name, len);
 }
 
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
 	struct ibmvnic_login_buffer *login_buffer;
@@ -3013,6 +3173,13 @@ static void send_login(struct ibmvnic_adapter *adapter)
 	struct vnic_login_client_data *vlcd;
 	int i;
 
+	if (!adapter->tx_scrq || !adapter->rx_scrq) {
+		netdev_err(adapter->netdev,
+			   "RX or TX queues are not allocated, device login failed\n");
+		return -1;
+	}
+
+	release_login_rsp_buffer(adapter);
 	client_data_len = vnic_client_data_len(adapter);
 
 	buffer_size =
@@ -3109,7 +3276,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
 	crq.login.len = cpu_to_be32(buffer_size);
 	ibmvnic_send_crq(adapter, &crq);
 
-	return;
+	return 0;
 
 buf_rsp_map_failed:
 	kfree(login_rsp_buffer);
@@ -3118,7 +3285,7 @@ buf_rsp_alloc_failed:
 buf_map_failed:
 	kfree(login_buffer);
 buf_alloc_failed:
-	return;
+	return -1;
 }
 
 static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
@@ -3738,6 +3905,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
 		ibmvnic_remove(adapter->vdev);
 		return -EIO;
 	}
+	release_login_buffer(adapter);
 	complete(&adapter->init_done);
 
 	return 0;
@@ -4282,6 +4450,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
 	unsigned long timeout = msecs_to_jiffies(30000);
+	u64 old_num_rx_queues, old_num_tx_queues;
 	int rc;
 
 	if (adapter->resetting && !adapter->wait_for_reset) {
@@ -4299,6 +4468,9 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 
 	adapter->from_passive_init = false;
 
+	old_num_rx_queues = adapter->req_rx_queues;
+	old_num_tx_queues = adapter->req_tx_queues;
+
 	init_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
 	ibmvnic_send_crq_init(adapter);
@@ -4318,10 +4490,18 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		return -1;
 	}
 
-	if (adapter->resetting && !adapter->wait_for_reset)
-		rc = reset_sub_crq_queues(adapter);
-	else
+	if (adapter->resetting && !adapter->wait_for_reset) {
+		if (adapter->req_rx_queues != old_num_rx_queues ||
+		    adapter->req_tx_queues != old_num_tx_queues) {
+			release_sub_crqs(adapter, 0);
+			rc = init_sub_crqs(adapter);
+		} else {
+			rc = reset_sub_crq_queues(adapter);
+		}
+	} else {
 		rc = init_sub_crqs(adapter);
+	}
+
 	if (rc) {
 		dev_err(dev, "Initialization of sub crqs failed\n");
 		release_crq_queue(adapter);
@@ -4334,6 +4514,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		release_crq_queue(adapter);
 	}
 
+	rc = init_stats_buffers(adapter);
+	if (rc)
+		return rc;
+
+	rc = init_stats_token(adapter);
+	if (rc)
+		return rc;
+
 	return rc;
 }
 
@@ -4421,7 +4609,7 @@ ibmvnic_register_fail:
 	device_remove_file(&dev->dev, &dev_attr_failover);
 
 ibmvnic_init_fail:
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	release_crq_queue(adapter);
 	free_netdev(netdev);
 
@@ -4438,9 +4626,12 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	mutex_lock(&adapter->reset_lock);
 
 	release_resources(adapter);
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	release_crq_queue(adapter);
 
+	release_stats_token(adapter);
+	release_stats_buffers(adapter);
+
 	adapter->state = VNIC_REMOVED;
 
 	mutex_unlock(&adapter->reset_lock);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index fe21a6e2ddae..89efe700eafe 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -43,6 +43,7 @@
 
 #define IBMVNIC_TSO_BUF_SZ	65536
 #define IBMVNIC_TSO_BUFS	64
+#define IBMVNIC_TSO_POOL_MASK	0x80000000
 
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
@@ -909,6 +910,7 @@ struct ibmvnic_tx_buff {
 	union sub_crq indir_arr[6];
 	u8 hdr_data[140];
 	dma_addr_t indir_dma;
+	int num_entries;
 };
 
 struct ibmvnic_tx_pool {
@@ -916,11 +918,9 @@ struct ibmvnic_tx_pool {
 	int *free_map;
 	int consumer_index;
 	int producer_index;
-	wait_queue_head_t ibmvnic_tx_comp_q;
-	struct task_struct *work_thread;
 	struct ibmvnic_long_term_buff long_term_buff;
-	struct ibmvnic_long_term_buff tso_ltb;
-	int tso_index;
+	int num_buffers;
+	int buf_size;
 };
 
 struct ibmvnic_rx_buff {
@@ -1043,6 +1043,7 @@ struct ibmvnic_adapter {
 	u64 promisc;
 
 	struct ibmvnic_tx_pool *tx_pool;
+	struct ibmvnic_tx_pool *tso_pool;
 	struct completion init_done;
 	int init_done_rc;
 
@@ -1091,8 +1092,11 @@ struct ibmvnic_adapter {
 	u64 opt_rxba_entries_per_subcrq;
 	__be64 tx_rx_desc_req;
 	u8 map_id;
-	u64 num_active_rx_pools;
-	u64 num_active_tx_pools;
+	u32 num_active_rx_scrqs;
+	u32 num_active_rx_pools;
+	u32 num_active_rx_napi;
+	u32 num_active_tx_scrqs;
+	u32 num_active_tx_pools;
 
 	struct tasklet_struct tasklet;
 	enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 1feb54b6d92e..14d287bed33c 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -251,6 +251,20 @@ config I40EVF
 	  will be called i40evf.  MSI-X interrupt support is required
 	  for this driver to work correctly.
 
+config ICE
+	tristate "Intel(R) Ethernet Connection E800 Series Support"
+	default n
+	depends on PCI_MSI
+	---help---
+	  This driver supports Intel(R) Ethernet Connection E800 Series of
+	  devices.  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called ice.
+
 config FM10K
 	tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support"
 	default n
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index 90af7757a885..807a4f8c7e4e 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_I40E) += i40e/
 obj-$(CONFIG_IXGB) += ixgb/
 obj-$(CONFIG_I40EVF) += i40evf/
 obj-$(CONFIG_FM10K) += fm10k/
+obj-$(CONFIG_ICE) += ice/
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 29486478836e..41ad56edfb96 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/100 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/Makefile b/drivers/net/ethernet/intel/e1000/Makefile
index 4a6ab1522451..c7caadd3c8af 100644
--- a/drivers/net/ethernet/intel/e1000/Makefile
+++ b/drivers/net/ethernet/intel/e1000/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h
index 8fd2458060a0..3a0feea2df54 100644
--- a/drivers/net/ethernet/intel/e1000/e1000.h
+++ b/drivers/net/ethernet/intel/e1000/e1000.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index dc71e87c3260..3e80ca170dd7 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  * Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2006 Intel Corporation.
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 3bac9df1c099..6e7e923d57bf 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 *
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.h b/drivers/net/ethernet/intel/e1000/e1000_hw.h
index 5cf7268cc4e1..f09c569ec19b 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.h
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 3dd4aeb2706d..d5eb19b86a0a 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_osdep.h b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
index 33e7c45a4fe4..ae0559b8b011 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_osdep.h
+++ b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_param.c b/drivers/net/ethernet/intel/e1000/e1000_param.c
index c9cde352b1c8..345f23927bcc 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_param.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_param.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index cd391376036c..953e99df420c 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.h b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
index a2162e11673e..ee6d1256fda4 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.h
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 6b03c8553e59..924f2c8dfa6c 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/82571.h b/drivers/net/ethernet/intel/e1000e/82571.h
index abc6a9abff98..9a24c645f726 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.h
+++ b/drivers/net/ethernet/intel/e1000e/82571.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/Makefile b/drivers/net/ethernet/intel/e1000e/Makefile
index 106de493373c..24e391a4ac68 100644
--- a/drivers/net/ethernet/intel/e1000e/Makefile
+++ b/drivers/net/ethernet/intel/e1000e/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index afb7ebe20b24..22883015a695 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
@@ -400,6 +401,10 @@
 #define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
 #define E1000_ICR_RXO           0x00000040 /* Receiver Overrun */
 #define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
+#define E1000_ICR_MDAC          0x00000200 /* MDIO Access Complete */
+#define E1000_ICR_SRPD          0x00010000 /* Small Receive Packet Detected */
+#define E1000_ICR_ACK           0x00020000 /* Receive ACK Frame Detected */
+#define E1000_ICR_MNG           0x00040000 /* Manageability Event Detected */
 #define E1000_ICR_ECCER         0x00400000 /* Uncorrectable ECC Error */
 /* If this bit asserted, the driver should claim the interrupt */
 #define E1000_ICR_INT_ASSERTED	0x80000000
@@ -407,7 +412,7 @@
 #define E1000_ICR_RXQ1          0x00200000 /* Rx Queue 1 Interrupt */
 #define E1000_ICR_TXQ0          0x00400000 /* Tx Queue 0 Interrupt */
 #define E1000_ICR_TXQ1          0x00800000 /* Tx Queue 1 Interrupt */
-#define E1000_ICR_OTHER         0x01000000 /* Other Interrupts */
+#define E1000_ICR_OTHER         0x01000000 /* Other Interrupt */
 
 /* PBA ECC Register */
 #define E1000_PBA_ECC_COUNTER_MASK  0xFFF00000 /* ECC counter mask */
@@ -431,12 +436,27 @@
 	E1000_IMS_RXSEQ  |    \
 	E1000_IMS_LSC)
 
+/* These are all of the events related to the OTHER interrupt.
+ */
+#define IMS_OTHER_MASK ( \
+	E1000_IMS_LSC  | \
+	E1000_IMS_RXO  | \
+	E1000_IMS_MDAC | \
+	E1000_IMS_SRPD | \
+	E1000_IMS_ACK  | \
+	E1000_IMS_MNG)
+
 /* Interrupt Mask Set */
 #define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
 #define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
 #define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
 #define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
+#define E1000_IMS_RXO       E1000_ICR_RXO       /* Receiver Overrun */
 #define E1000_IMS_RXT0      E1000_ICR_RXT0      /* Rx timer intr */
+#define E1000_IMS_MDAC      E1000_ICR_MDAC      /* MDIO Access Complete */
+#define E1000_IMS_SRPD      E1000_ICR_SRPD      /* Small Receive Packet */
+#define E1000_IMS_ACK       E1000_ICR_ACK       /* Receive ACK Frame Detected */
+#define E1000_IMS_MNG       E1000_ICR_MNG       /* Manageability Event */
 #define E1000_IMS_ECCER     E1000_ICR_ECCER     /* Uncorrectable ECC Error */
 #define E1000_IMS_RXQ0      E1000_ICR_RXQ0      /* Rx Queue 0 Interrupt */
 #define E1000_IMS_RXQ1      E1000_ICR_RXQ1      /* Rx Queue 1 Interrupt */
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 2311b31bdcac..da88555ba1fd 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 003cbd605799..64dc0c11147f 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index d803b1a12349..21802396bed6 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 31277d3bb7dc..1551d6ce5341 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
@@ -1367,9 +1368,6 @@ out:
  *  Checks to see of the link status of the hardware has changed.  If a
  *  change in link status has been detected, then we read the PHY registers
  *  to get the current speed/duplex if link exists.
- *
- *  Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
- *  up).
  **/
 static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 {
@@ -1385,7 +1383,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	 * Change or Rx Sequence Error interrupt.
 	 */
 	if (!mac->get_link_status)
-		return 1;
+		return 0;
+	mac->get_link_status = false;
 
 	/* First we want to see if the MII Status Register reports
 	 * link.  If so, then we want to get the current speed/duplex
@@ -1393,12 +1392,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	 */
 	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
 	if (ret_val)
-		return ret_val;
+		goto out;
 
 	if (hw->mac.type == e1000_pchlan) {
 		ret_val = e1000_k1_gig_workaround_hv(hw, link);
 		if (ret_val)
-			return ret_val;
+			goto out;
 	}
 
 	/* When connected at 10Mbps half-duplex, some parts are excessively
@@ -1431,7 +1430,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 
 		ret_val = hw->phy.ops.acquire(hw);
 		if (ret_val)
-			return ret_val;
+			goto out;
 
 		if (hw->mac.type == e1000_pch2lan)
 			emi_addr = I82579_RX_CONFIG;
@@ -1453,7 +1452,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 		hw->phy.ops.release(hw);
 
 		if (ret_val)
-			return ret_val;
+			goto out;
 
 		if (hw->mac.type >= e1000_pch_spt) {
 			u16 data;
@@ -1462,14 +1461,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 			if (speed == SPEED_1000) {
 				ret_val = hw->phy.ops.acquire(hw);
 				if (ret_val)
-					return ret_val;
+					goto out;
 
 				ret_val = e1e_rphy_locked(hw,
 							  PHY_REG(776, 20),
 							  &data);
 				if (ret_val) {
 					hw->phy.ops.release(hw);
-					return ret_val;
+					goto out;
 				}
 
 				ptr_gap = (data & (0x3FF << 2)) >> 2;
@@ -1483,18 +1482,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 				}
 				hw->phy.ops.release(hw);
 				if (ret_val)
-					return ret_val;
+					goto out;
 			} else {
 				ret_val = hw->phy.ops.acquire(hw);
 				if (ret_val)
-					return ret_val;
+					goto out;
 
 				ret_val = e1e_wphy_locked(hw,
 							  PHY_REG(776, 20),
 							  0xC023);
 				hw->phy.ops.release(hw);
 				if (ret_val)
-					return ret_val;
+					goto out;
 
 			}
 		}
@@ -1521,7 +1520,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) {
 		ret_val = e1000_k1_workaround_lpt_lp(hw, link);
 		if (ret_val)
-			return ret_val;
+			goto out;
 	}
 	if (hw->mac.type >= e1000_pch_lpt) {
 		/* Set platform power management values for
@@ -1529,7 +1528,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 		 */
 		ret_val = e1000_platform_pm_pch_lpt(hw, link);
 		if (ret_val)
-			return ret_val;
+			goto out;
 	}
 
 	/* Clear link partner's EEE ability */
@@ -1552,9 +1551,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	}
 
 	if (!link)
-		return 0;	/* No link detected */
-
-	mac->get_link_status = false;
+		goto out;
 
 	switch (hw->mac.type) {
 	case e1000_pch2lan:
@@ -1616,12 +1613,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	 * different link partner.
 	 */
 	ret_val = e1000e_config_fc_after_link_up(hw);
-	if (ret_val) {
+	if (ret_val)
 		e_dbg("Error configuring flow control\n");
-		return ret_val;
-	}
 
-	return 1;
+	return ret_val;
+
+out:
+	mac->get_link_status = true;
+	return ret_val;
 }
 
 static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 00a36df02a3f..3c4f82c21084 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index f457c5703d0c..b293464a9f27 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
@@ -410,9 +411,6 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw)
  *  Checks to see of the link status of the hardware has changed.  If a
  *  change in link status has been detected, then we read the PHY registers
  *  to get the current speed/duplex if link exists.
- *
- *  Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
- *  up).
  **/
 s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 {
@@ -426,20 +424,16 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 	 * Change or Rx Sequence Error interrupt.
 	 */
 	if (!mac->get_link_status)
-		return 1;
+		return 0;
+	mac->get_link_status = false;
 
 	/* First we want to see if the MII Status Register reports
 	 * link.  If so, then we want to get the current speed/duplex
 	 * of the PHY.
 	 */
 	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
-	if (ret_val)
-		return ret_val;
-
-	if (!link)
-		return 0;	/* No link detected */
-
-	mac->get_link_status = false;
+	if (ret_val || !link)
+		goto out;
 
 	/* Check if there was DownShift, must be checked
 	 * immediately after link-up
@@ -464,12 +458,14 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 	 * different link partner.
 	 */
 	ret_val = e1000e_config_fc_after_link_up(hw);
-	if (ret_val) {
+	if (ret_val)
 		e_dbg("Error configuring flow control\n");
-		return ret_val;
-	}
 
-	return 1;
+	return ret_val;
+
+out:
+	mac->get_link_status = true;
+	return ret_val;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/e1000e/mac.h b/drivers/net/ethernet/intel/e1000e/mac.h
index 8284618af9ff..cb0abf6c76a5 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.h
+++ b/drivers/net/ethernet/intel/e1000e/mac.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
index cc9b3befc2bc..e027660aeb92 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.c
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/manage.h b/drivers/net/ethernet/intel/e1000e/manage.h
index 0b9ea5952b07..3268f2e58593 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.h
+++ b/drivers/net/ethernet/intel/e1000e/manage.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 1298b69f990b..ec4a9759a6f2 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
@@ -1914,30 +1915,20 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 	struct net_device *netdev = data;
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	u32 icr;
-	bool enable = true;
-
-	icr = er32(ICR);
-	if (icr & E1000_ICR_RXO) {
-		ew32(ICR, E1000_ICR_RXO);
-		enable = false;
-		/* napi poll will re-enable Other, make sure it runs */
-		if (napi_schedule_prep(&adapter->napi)) {
-			adapter->total_rx_bytes = 0;
-			adapter->total_rx_packets = 0;
-			__napi_schedule(&adapter->napi);
-		}
-	}
+	u32 icr = er32(ICR);
+
+	if (icr & adapter->eiac_mask)
+		ew32(ICS, (icr & adapter->eiac_mask));
+
 	if (icr & E1000_ICR_LSC) {
-		ew32(ICR, E1000_ICR_LSC);
 		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (enable && !test_bit(__E1000_DOWN, &adapter->state))
-		ew32(IMS, E1000_IMS_OTHER);
+	if (!test_bit(__E1000_DOWN, &adapter->state))
+		ew32(IMS, E1000_IMS_OTHER | IMS_OTHER_MASK);
 
 	return IRQ_HANDLED;
 }
@@ -2040,7 +2031,6 @@ static void e1000_configure_msix(struct e1000_adapter *adapter)
 		       hw->hw_addr + E1000_EITR_82574(vector));
 	else
 		writel(1, hw->hw_addr + E1000_EITR_82574(vector));
-	adapter->eiac_mask |= E1000_IMS_OTHER;
 
 	/* Cause Tx interrupts on every write back */
 	ivar |= BIT(31);
@@ -2265,7 +2255,8 @@ static void e1000_irq_enable(struct e1000_adapter *adapter)
 
 	if (adapter->msix_entries) {
 		ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-		ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC);
+		ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER |
+		     IMS_OTHER_MASK);
 	} else if (hw->mac.type >= e1000_pch_lpt) {
 		ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
 	} else {
@@ -2333,8 +2324,8 @@ static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
 {
 	struct pci_dev *pdev = adapter->pdev;
 
-	ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
-					GFP_KERNEL);
+	ring->desc = dma_zalloc_coherent(&pdev->dev, ring->size, &ring->dma,
+					 GFP_KERNEL);
 	if (!ring->desc)
 		return -ENOMEM;
 
@@ -2707,8 +2698,7 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
 		napi_complete_done(napi, work_done);
 		if (!test_bit(__E1000_DOWN, &adapter->state)) {
 			if (adapter->msix_entries)
-				ew32(IMS, adapter->rx_ring->ims_val |
-				     E1000_IMS_OTHER);
+				ew32(IMS, adapter->rx_ring->ims_val);
 			else
 				e1000_irq_enable(adapter);
 		}
@@ -5101,7 +5091,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
 	case e1000_media_type_copper:
 		if (hw->mac.get_link_status) {
 			ret_val = hw->mac.ops.check_for_link(hw);
-			link_active = ret_val > 0;
+			link_active = !hw->mac.get_link_status;
 		} else {
 			link_active = true;
 		}
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index 2efd80dfd88e..68949bb41b7b 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.h b/drivers/net/ethernet/intel/e1000e/nvm.h
index 5d46967e0d1f..8e082028be7d 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.h
+++ b/drivers/net/ethernet/intel/e1000e/nvm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
index 47da51864543..2def33eba9e6 100644
--- a/drivers/net/ethernet/intel/e1000e/param.c
+++ b/drivers/net/ethernet/intel/e1000e/param.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 86ff0969efb6..b8226ed0e338 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
index 3027f63ee793..d4180b5e9196 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.h
+++ b/drivers/net/ethernet/intel/e1000e/phy.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index b366885487a8..f941e5085f44 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index 0cb4d365e5ad..16afc3c2a986 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile
index cac645329cea..93277cb99cb7 100644
--- a/drivers/net/ethernet/intel/fm10k/Makefile
+++ b/drivers/net/ethernet/intel/fm10k/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel(R) Ethernet Switch Host Interface Driver
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 46973fb234c5..a9cdf763c59d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 736a9f087bc9..e303d88720ef 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,6 +263,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
  *  fm10k_read_hw_stats_32b - Reads value of 32-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing a 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of the register and returns the delta
  *  between the base and the current value.
@@ -281,6 +283,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
  *  fm10k_read_hw_stats_48b - Reads value of 48-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing the lower 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of 2 registers, combined to represent a 48-bit
  *  statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +464,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
 
 /**
  *  fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- *  @hw: pointer to the hardware structure
  *  @q: pointer to the ring of hardware statistics queue
  *  @idx: index pointing to the start of the ring iteration
  *  @count: number of queues to iterate over
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
index d51f9c7a47ff..2bdb24d2ca9d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
index db4bd8bf9722..c4f733452ef2 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 14df09e2d964..43e8d839831f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index c7234f35f8ff..28b6b4e56487 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 760cfa52d02c..30395f5e5e87 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 8e12aae065d8..df8607097e4a 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
@@ -28,13 +29,13 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.22.1-k"
+#define DRV_VERSION	"0.23.4-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] = DRV_SUMMARY;
 static const char fm10k_copyright[] =
-	"Copyright(c) 2013 - 2017 Intel Corporation.";
+	"Copyright(c) 2013 - 2018 Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <[email protected]>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index 244d3ad58ca7..c01bf30a0c9e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
index 35c1dbad1330..007e1dfa9b7a 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index a38ae5c54da3..45793491d4ba 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -486,7 +487,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
 
 /**
  * fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ti: Tunnel endpoint information
  *
  * This function is called when a new UDP tunnel port has been added.
@@ -518,8 +519,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
 
 /**
  * fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
  *
  * This function is called when a new UDP tunnel port is deleted. The freed
  * port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +804,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
  * @glort: the target glort for this update
  * @addr: the address to update
  * @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
  *
  * This function queues up a MAC request for sending to the switch manager.
  * A separate thread monitors the queue and sends updates to the switch
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index a434fecfdfeb..cffcb187cb76 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -29,7 +30,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
 	[fm10k_device_vf] = &fm10k_vf_info,
 };
 
-/**
+/*
  * fm10k_pci_tbl - PCI Device ID Table
  *
  * Wildcard entries (PCI_ANY_ID) should come last
@@ -211,7 +212,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
 
 /**
  * fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
  **/
 static void fm10k_service_timer(struct timer_list *t)
 {
@@ -649,7 +650,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 {
@@ -679,7 +680,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 {
@@ -703,7 +704,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 
 /**
  * fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  *
  * This function serves two purposes.  First it strobes the interrupt lines
  * in order to make certain interrupts are occurring.  Secondly it sets the
@@ -1995,6 +1996,7 @@ skip_tx_dma_drain:
 /**
  * fm10k_sw_init - Initialize general software structures
  * @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
  *
  * fm10k_sw_init initializes the interface private data structure.
  * Fields are initialized based on PCI device information and
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index d6406fc31ffb..7ba54c534f8c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1181,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
 
 /**
  * fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
  * @vid: VLAN ID to correct
  *
  * Will report an error if the VLAN ID is out of range. For VID = 0, it will
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index e04d41f1a532..ae81f9a16602 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index f8e87bf086b9..725ecb7abccd 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -120,6 +121,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
  *  @msg: Pointer to message block
  *  @attr_id: Attribute ID
  *  @mac_addr: MAC address to be stored
+ *  @vlan: VLAN to be stored
  *
  *  This function will reorder a MAC address to be CPU endian and store it
  *  in the attribute buffer.  It will return success if provided with a
@@ -155,8 +157,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
 /**
  *  fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
  *  @attr: Pointer to attribute
- *  @attr_id: Attribute ID
  *  @mac_addr: location of buffer to store MAC address
+ *  @vlan: location of buffer to store VLAN
  *
  *  This function pulls the MAC address back out of the attribute and will
  *  place it in the array pointed by by mac_addr.  It will return success
@@ -549,7 +551,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
  *  @hw: Pointer to hardware structure
  *  @msg: Pointer to message
  *  @mbx: Pointer to mailbox information structure
- *  @func: Function array containing list of message handling functions
+ *  @data: Pointer to message handler data structure
  *
  *  This function should be the first function called upon receiving a
  *  message.  The handler will identify the message type and call the correct
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
index a1f1027fe184..5d2ee759507e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 6bb16c13d9d6..dd23af11e2c1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index 337ba65a9411..f06913630b39 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
index 2662f33c0c71..66a66b73a2f1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index 3da482c3d68d..75437768a07c 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ebe795a7f5f9..a44139c1de80 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -158,9 +159,17 @@ enum i40e_state_t {
 	__I40E_BAD_EEPROM,
 	__I40E_DOWN_REQUESTED,
 	__I40E_FD_FLUSH_REQUESTED,
+	__I40E_FD_ATR_AUTO_DISABLED,
+	__I40E_FD_SB_AUTO_DISABLED,
 	__I40E_RESET_FAILED,
 	__I40E_PORT_SUSPENDED,
 	__I40E_VF_DISABLE,
+	__I40E_MACVLAN_SYNC_PENDING,
+	__I40E_UDP_FILTER_SYNC_PENDING,
+	__I40E_TEMP_LINK_POLLING,
+	__I40E_CLIENT_SERVICE_REQUESTED,
+	__I40E_CLIENT_L2_CHANGE,
+	__I40E_CLIENT_RESET,
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_STATE_SIZE__,
 };
@@ -507,41 +516,34 @@ struct i40e_pf {
 #define I40E_HW_STOP_FW_LLDP			BIT(16)
 #define I40E_HW_PORT_ID_VALID			BIT(17)
 #define I40E_HW_RESTART_AUTONEG			BIT(18)
-
-	u64 flags;
-#define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(0)
-#define I40E_FLAG_MSI_ENABLED			BIT_ULL(1)
-#define I40E_FLAG_MSIX_ENABLED			BIT_ULL(2)
-#define I40E_FLAG_RSS_ENABLED			BIT_ULL(3)
-#define I40E_FLAG_VMDQ_ENABLED			BIT_ULL(4)
-#define I40E_FLAG_FILTER_SYNC			BIT_ULL(5)
-#define I40E_FLAG_SRIOV_ENABLED			BIT_ULL(6)
-#define I40E_FLAG_DCB_CAPABLE			BIT_ULL(7)
-#define I40E_FLAG_DCB_ENABLED			BIT_ULL(8)
-#define I40E_FLAG_FD_SB_ENABLED			BIT_ULL(9)
-#define I40E_FLAG_FD_ATR_ENABLED		BIT_ULL(10)
-#define I40E_FLAG_FD_SB_AUTO_DISABLED		BIT_ULL(11)
-#define I40E_FLAG_FD_ATR_AUTO_DISABLED		BIT_ULL(12)
-#define I40E_FLAG_MFP_ENABLED			BIT_ULL(13)
-#define I40E_FLAG_UDP_FILTER_SYNC		BIT_ULL(14)
-#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT_ULL(15)
-#define I40E_FLAG_VEB_MODE_ENABLED		BIT_ULL(16)
-#define I40E_FLAG_VEB_STATS_ENABLED		BIT_ULL(17)
-#define I40E_FLAG_LINK_POLLING_ENABLED		BIT_ULL(18)
-#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT_ULL(19)
-#define I40E_FLAG_TEMP_LINK_POLLING		BIT_ULL(20)
-#define I40E_FLAG_LEGACY_RX			BIT_ULL(21)
-#define I40E_FLAG_PTP				BIT_ULL(22)
-#define I40E_FLAG_IWARP_ENABLED			BIT_ULL(23)
-#define I40E_FLAG_SERVICE_CLIENT_REQUESTED	BIT_ULL(24)
-#define I40E_FLAG_CLIENT_L2_CHANGE		BIT_ULL(25)
-#define I40E_FLAG_CLIENT_RESET			BIT_ULL(26)
-#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT_ULL(27)
-#define I40E_FLAG_SOURCE_PRUNING_DISABLED	BIT_ULL(28)
-#define I40E_FLAG_TC_MQPRIO			BIT_ULL(29)
-#define I40E_FLAG_FD_SB_INACTIVE		BIT_ULL(30)
-#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT_ULL(31)
-#define I40E_FLAG_DISABLE_FW_LLDP		BIT_ULL(32)
+#define I40E_HW_STOPPABLE_FW_LLDP		BIT(19)
+
+	u32 flags;
+#define I40E_FLAG_RX_CSUM_ENABLED		BIT(0)
+#define I40E_FLAG_MSI_ENABLED			BIT(1)
+#define I40E_FLAG_MSIX_ENABLED			BIT(2)
+#define I40E_FLAG_RSS_ENABLED			BIT(3)
+#define I40E_FLAG_VMDQ_ENABLED			BIT(4)
+#define I40E_FLAG_SRIOV_ENABLED			BIT(5)
+#define I40E_FLAG_DCB_CAPABLE			BIT(6)
+#define I40E_FLAG_DCB_ENABLED			BIT(7)
+#define I40E_FLAG_FD_SB_ENABLED			BIT(8)
+#define I40E_FLAG_FD_ATR_ENABLED		BIT(9)
+#define I40E_FLAG_MFP_ENABLED			BIT(10)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT(11)
+#define I40E_FLAG_VEB_MODE_ENABLED		BIT(12)
+#define I40E_FLAG_VEB_STATS_ENABLED		BIT(13)
+#define I40E_FLAG_LINK_POLLING_ENABLED		BIT(14)
+#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT(15)
+#define I40E_FLAG_LEGACY_RX			BIT(16)
+#define I40E_FLAG_PTP				BIT(17)
+#define I40E_FLAG_IWARP_ENABLED			BIT(18)
+#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT(19)
+#define I40E_FLAG_SOURCE_PRUNING_DISABLED       BIT(20)
+#define I40E_FLAG_TC_MQPRIO			BIT(21)
+#define I40E_FLAG_FD_SB_INACTIVE		BIT(22)
+#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT(23)
+#define I40E_FLAG_DISABLE_FW_LLDP		BIT(24)
 
 	struct i40e_client_instance *cinst;
 	bool stat_offsets_loaded;
@@ -1040,6 +1042,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
 void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
 void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
 void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+void i40e_client_update_msix_info(struct i40e_pf *pf);
 int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id);
 /**
  * i40e_irq_dynamic_enable - Enable default interrupt generation settings
@@ -1108,4 +1111,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 
 int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
 int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter,
+			      bool add);
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add);
 #endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index e78971605e0b..843fc7781ef8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index 2349fbe04bd2..0a8749ee9fd3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index a852775d3059..0244923edeb8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -1914,6 +1915,43 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_DEFAULT			= 0xFF,
 };
 
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+				BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XFI) | \
+				BIT_ULL(I40E_PHY_TYPE_SFI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+				BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+				BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+				BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
 #define I40E_LINK_SPEED_100MB_SHIFT	0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT	0x2
 #define I40E_LINK_SPEED_10GB_SHIFT	0x3
diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
index 926811ad44ac..abed0c52e782 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 0de9610c1d8d..d8ce4999864f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -287,6 +288,17 @@ out:
 	return capable;
 }
 
+void i40e_client_update_msix_info(struct i40e_pf *pf)
+{
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+}
+
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -328,9 +340,6 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 		return;
 	}
 
-	cdev->lan_info.msix_count = pf->num_iwarp_msix;
-	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
-
 	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
 			       struct netdev_hw_addr, list);
 	if (mac)
@@ -340,6 +349,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 
 	cdev->client = registered_client;
 	pf->cinst = cdev;
+
+	i40e_client_update_msix_info(pf);
 }
 
 /**
@@ -365,9 +376,8 @@ void i40e_client_subtask(struct i40e_pf *pf)
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	int ret = 0;
 
-	if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
+	if (!test_and_clear_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state))
 		return;
-	pf->flags &= ~I40E_FLAG_SERVICE_CLIENT_REQUESTED;
 	cdev = pf->cinst;
 
 	/* If we're down or resetting, just bail */
@@ -448,7 +458,7 @@ int i40e_lan_add_device(struct i40e_pf *pf)
 	 * added, we can schedule a subtask to go initiate the clients if
 	 * they can be launched at probe time.
 	 */
-	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	i40e_service_event_schedule(pf);
 
 out:
@@ -543,7 +553,7 @@ static void i40e_client_prepare(struct i40e_client *client)
 		pf = ldev->pf;
 		i40e_client_add_instance(pf);
 		/* Start the client subtask */
-		pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+		set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 		i40e_service_event_schedule(pf);
 	}
 	mutex_unlock(&i40e_device_mutex);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index ba55c889e4c5..9d464d40bc17 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index ef5a868aae46..c0a3dae8a2db 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -1208,6 +1209,29 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
 	return media;
 }
 
+/**
+ * i40e_poll_globr - Poll for Global Reset completion
+ * @hw: pointer to the hardware structure
+ * @retry_limit: how many times to retry before failure
+ **/
+static i40e_status i40e_poll_globr(struct i40e_hw *hw,
+				   u32 retry_limit)
+{
+	u32 cnt, reg = 0;
+
+	for (cnt = 0; cnt < retry_limit; cnt++) {
+		reg = rd32(hw, I40E_GLGEN_RSTAT);
+		if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+			return 0;
+		msleep(100);
+	}
+
+	hw_dbg(hw, "Global reset failed.\n");
+	hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg);
+
+	return I40E_ERR_RESET_FAILED;
+}
+
 #define I40E_PF_RESET_WAIT_COUNT_A0	200
 #define I40E_PF_RESET_WAIT_COUNT	200
 /**
@@ -1284,14 +1308,14 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw)
 			if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
 				break;
 			reg2 = rd32(hw, I40E_GLGEN_RSTAT);
-			if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
-				hw_dbg(hw, "Core reset upcoming. Skipping PF reset request.\n");
-				hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg2);
-				return I40E_ERR_NOT_READY;
-			}
+			if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK)
+				break;
 			usleep_range(1000, 2000);
 		}
-		if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+		if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+			if (i40e_poll_globr(hw, grst_del))
+				return I40E_ERR_RESET_FAILED;
+		} else if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
 			hw_dbg(hw, "PF reset polling failed to complete.\n");
 			return I40E_ERR_RESET_FAILED;
 		}
@@ -2415,6 +2439,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
  * i40e_aq_set_switch_config
  * @hw: pointer to the hardware structure
  * @flags: bit flag values to set
+ * @mode: cloud filter mode
  * @valid_flags: which bit flags to set
  * @mode: cloud filter mode
  * @cmd_details: pointer to command details structure or NULL
@@ -3200,9 +3225,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 	u32 valid_functions, num_functions;
 	u32 number, logical_id, phys_id;
 	struct i40e_hw_capabilities *p;
+	u16 id, ocp_cfg_word0;
+	i40e_status status;
 	u8 major_rev;
 	u32 i = 0;
-	u16 id;
 
 	cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
 
@@ -3389,6 +3415,26 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 			hw->num_ports++;
 	}
 
+	/* OCP cards case: if a mezz is removed the Ethernet port is at
+	 * disabled state in PRTGEN_CNF register. Additional NVM read is
+	 * needed in order to check if we are dealing with OCP card.
+	 * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting
+	 * physical ports results in wrong partition id calculation and thus
+	 * not supporting WoL.
+	 */
+	if (hw->mac.type == I40E_MAC_X722) {
+		if (!i40e_acquire_nvm(hw, I40E_RESOURCE_READ)) {
+			status = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR,
+						  2 * I40E_SR_OCP_CFG_WORD0,
+						  sizeof(ocp_cfg_word0),
+						  &ocp_cfg_word0, true, NULL);
+			if (!status &&
+			    (ocp_cfg_word0 & I40E_SR_OCP_ENABLED))
+				hw->num_ports = 4;
+			i40e_release_nvm(hw);
+		}
+	}
+
 	valid_functions = p->valid_functions;
 	num_functions = 0;
 	while (valid_functions) {
@@ -5531,7 +5577,7 @@ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count)
@@ -5625,7 +5671,7 @@ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 55079fe3ed63..9fec728dc4b9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index 92d01042c1f6..4f806386cb22 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 886e667f2f1c..502818e3da78 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index e9fc51bd6c95..d494dcaf18d0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -155,8 +156,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		dev_info(&pf->pdev->dev, "        vlan_features = 0x%08lx\n",
 			 (unsigned long int)nd->vlan_features);
 	}
-	dev_info(&pf->pdev->dev,
-		 "    vlgrp: & = %p\n", vsi->active_vlans);
+	dev_info(&pf->pdev->dev, "    active_vlans is %s\n",
+		 vsi->active_vlans ? "<valid>" : "<null>");
 	dev_info(&pf->pdev->dev,
 		 "    flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
 		 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
@@ -270,14 +271,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			continue;
 
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: desc = %p\n",
-			 i, rx_ring->desc);
-		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
-			 i, rx_ring->dev,
-			 rx_ring->netdev,
-			 rx_ring->rx_bi);
-		dev_info(&pf->pdev->dev,
 			 "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
 			 i, *rx_ring->state,
 			 rx_ring->queue_index,
@@ -307,13 +300,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 rx_ring->rx_stats.realloc_count,
 			 rx_ring->rx_stats.page_reuse_count);
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: size = %i, dma = 0x%08lx\n",
-			 i, rx_ring->size,
-			 (unsigned long int)rx_ring->dma);
-		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: vsi = %p, q_vector = %p\n",
-			 i, rx_ring->vsi,
-			 rx_ring->q_vector);
+			 "    rx_rings[%i]: size = %i\n",
+			 i, rx_ring->size);
 		dev_info(&pf->pdev->dev,
 			 "    rx_rings[%i]: itr_setting = %d (%s)\n",
 			 i, rx_ring->itr_setting,
@@ -326,14 +314,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			continue;
 
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: desc = %p\n",
-			 i, tx_ring->desc);
-		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
-			 i, tx_ring->dev,
-			 tx_ring->netdev,
-			 tx_ring->tx_bi);
-		dev_info(&pf->pdev->dev,
 			 "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
 			 i, *tx_ring->state,
 			 tx_ring->queue_index,
@@ -355,13 +335,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 tx_ring->tx_stats.tx_busy,
 			 tx_ring->tx_stats.tx_done_old);
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: size = %i, dma = 0x%08lx\n",
-			 i, tx_ring->size,
-			 (unsigned long int)tx_ring->dma);
-		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: vsi = %p, q_vector = %p\n",
-			 i, tx_ring->vsi,
-			 tx_ring->q_vector);
+			 "    tx_rings[%i]: size = %i\n",
+			 i, tx_ring->size);
 		dev_info(&pf->pdev->dev,
 			 "    tx_rings[%i]: DCB tc = %d\n",
 			 i, tx_ring->dcb_tc);
@@ -466,8 +441,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		 vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
 		 vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
 		 vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
-	if (vsi->back)
-		dev_info(&pf->pdev->dev, "    PF = %p\n", vsi->back);
 	dev_info(&pf->pdev->dev, "    idx = %d\n", vsi->idx);
 	dev_info(&pf->pdev->dev,
 		 "    tc_config: numtc = %d, enabled_tc = 0x%x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index 8e46098bad57..ad6a66ccb576 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
index 76ed56641864..df3e60470f8b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
index 0b5911652084..be8341763475 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 29a7412b2fa6..b974482ff630 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -230,6 +231,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
 	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
 	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+	I40E_PRIV_FLAG("link-down-on-close",
+		       I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
 	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
 	I40E_PRIV_FLAG("disable-source-pruning",
 		       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
@@ -857,7 +860,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	if (hw->device_id == I40E_DEV_ID_KX_B ||
 	    hw->device_id == I40E_DEV_ID_KX_C ||
 	    hw->device_id == I40E_DEV_ID_20G_KR2 ||
-	    hw->device_id == I40E_DEV_ID_20G_KR2_A) {
+	    hw->device_id == I40E_DEV_ID_20G_KR2_A ||
+	    hw->device_id == I40E_DEV_ID_25G_B ||
+	    hw->device_id == I40E_DEV_ID_KX_X722) {
 		netdev_info(netdev, "Changing settings is not supported on backplane.\n");
 		return -EOPNOTSUPP;
 	}
@@ -868,23 +873,21 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	/* save autoneg out of ksettings */
 	autoneg = copy_ks.base.autoneg;
 
-	memset(&safe_ks, 0, sizeof(safe_ks));
+	/* get our own copy of the bits to check against */
+	memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+	safe_ks.base.cmd = copy_ks.base.cmd;
+	safe_ks.base.link_mode_masks_nwords =
+		copy_ks.base.link_mode_masks_nwords;
+	i40e_get_link_ksettings(netdev, &safe_ks);
+
 	/* Get link modes supported by hardware and check against modes
 	 * requested by the user.  Return an error if unsupported mode was set.
 	 */
-	i40e_phy_type_to_ethtool(pf, &safe_ks);
 	if (!bitmap_subset(copy_ks.link_modes.advertising,
 			   safe_ks.link_modes.supported,
 			   __ETHTOOL_LINK_MODE_MASK_NBITS))
 		return -EINVAL;
 
-	/* get our own copy of the bits to check against */
-	memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
-	safe_ks.base.cmd = copy_ks.base.cmd;
-	safe_ks.base.link_mode_masks_nwords =
-		copy_ks.base.link_mode_masks_nwords;
-	i40e_get_link_ksettings(netdev, &safe_ks);
-
 	/* set autoneg back to what it currently is */
 	copy_ks.base.autoneg = safe_ks.base.autoneg;
 
@@ -3948,7 +3951,7 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
 	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
 		return -EOPNOTSUPP;
 
-	if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)
+	if (test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
 		return -ENOSPC;
 
 	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
@@ -4426,36 +4429,19 @@ flags_complete:
 	 * unsupported FW versions.
 	 */
 	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (pf->hw.func_caps.npar_enable) {
-			dev_warn(&pf->pdev->dev,
-				 "Unable to change FW LLDP if NPAR active\n");
-			return -EOPNOTSUPP;
-		}
-
-		if (pf->hw.aq.api_maj_ver < 1 ||
-		    (pf->hw.aq.api_maj_ver == 1 &&
-		     pf->hw.aq.api_min_ver < 7)) {
+		if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) {
 			dev_warn(&pf->pdev->dev,
-				 "FW ver does not support changing FW LLDP\n");
+				 "Device does not support changing FW LLDP\n");
 			return -EOPNOTSUPP;
 		}
 	}
 
-	/* Compare and exchange the new flags into place. If we failed, that
-	 * is if cmpxchg returns anything but the old value, this means that
-	 * something else has modified the flags variable since we copied it
-	 * originally. We'll just punt with an error and log something in the
-	 * message buffer.
-	 *
-	 * This is the point of no return for this function.  We need to have
-	 * checked any discrepancies or misconfigurations and returned
-	 * EOPNOTSUPP before updating pf->flags here.
+	/* Now that we've checked to ensure that the new flags are valid, load
+	 * them into place. Since we only modify flags either (a) during
+	 * initialization or (b) while holding the RTNL lock, we don't need
+	 * anything fancy here.
 	 */
-	if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
-		dev_warn(&pf->pdev->dev,
-			 "Unable to update pf->flags as it was modified by another thread...\n");
-		return -EAGAIN;
-	}
+	pf->flags = new_flags;
 
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were
@@ -4465,7 +4451,7 @@ flags_complete:
 	/* Flush current ATR settings if ATR was disabled */
 	if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
 	    !(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) {
-		pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 		set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
deleted file mode 100644
index 2d1253c5b7a1..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ /dev/null
@@ -1,1571 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <[email protected]>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#include <linux/if_ether.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/fc/fc_fs.h>
-#include <scsi/fc/fc_fip.h>
-#include <scsi/fc/fc_fcoe.h>
-#include <scsi/libfc.h>
-#include <scsi/libfcoe.h>
-#include <uapi/linux/dcbnl.h>
-
-#include "i40e.h"
-#include "i40e_fcoe.h"
-
-/**
- * i40e_fcoe_sof_is_class2 - returns true if this is a FC Class 2 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class2(u8 sof)
-{
-	return (sof == FC_SOF_I2) || (sof == FC_SOF_N2);
-}
-
-/**
- * i40e_fcoe_sof_is_class3 - returns true if this is a FC Class 3 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class3(u8 sof)
-{
-	return (sof == FC_SOF_I3) || (sof == FC_SOF_N3);
-}
-
-/**
- * i40e_fcoe_sof_is_supported - returns true if the FC SOF is supported by HW
- * @sof: the input SOF value from the frame
- **/
-static inline bool i40e_fcoe_sof_is_supported(u8 sof)
-{
-	return i40e_fcoe_sof_is_class2(sof) ||
-	       i40e_fcoe_sof_is_class3(sof);
-}
-
-/**
- * i40e_fcoe_fc_sof - pull the SOF from FCoE header in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_sof(struct sk_buff *skb, u8 *sof)
-{
-	*sof = ((struct fcoe_hdr *)skb_network_header(skb))->fcoe_sof;
-
-	if (!i40e_fcoe_sof_is_supported(*sof))
-		return -EINVAL;
-	return 0;
-}
-
-/**
- * i40e_fcoe_eof_is_supported - returns true if the EOF is supported by HW
- * @eof:     the input EOF value from the frame
- **/
-static inline bool i40e_fcoe_eof_is_supported(u8 eof)
-{
-	return (eof == FC_EOF_N) || (eof == FC_EOF_T) ||
-	       (eof == FC_EOF_NI) || (eof == FC_EOF_A);
-}
-
-/**
- * i40e_fcoe_fc_eof - pull EOF from FCoE trailer in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_eof(struct sk_buff *skb, u8 *eof)
-{
-	/* the first byte of the last dword is EOF */
-	skb_copy_bits(skb, skb->len - 4, eof, 1);
-
-	if (!i40e_fcoe_eof_is_supported(*eof))
-		return -EINVAL;
-	return 0;
-}
-
-/**
- * i40e_fcoe_ctxt_eof - convert input FC EOF for descriptor programming
- * @eof: the input eof value from the frame
- *
- * The FC EOF is converted to the value understood by HW for descriptor
- * programming. Never call this w/o calling i40e_fcoe_eof_is_supported()
- * first and that already checks for all supported valid eof values.
- **/
-static inline u32 i40e_fcoe_ctxt_eof(u8 eof)
-{
-	switch (eof) {
-	case FC_EOF_N:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_N;
-	case FC_EOF_T:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_T;
-	case FC_EOF_NI:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI;
-	case FC_EOF_A:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_A;
-	default:
-		/* Supported valid eof shall be already checked by
-		 * calling i40e_fcoe_eof_is_supported() first,
-		 * therefore this default case shall never hit.
-		 */
-		WARN_ON(1);
-		return -EINVAL;
-	}
-}
-
-/**
- * i40e_fcoe_xid_is_valid - returns true if the exchange id is valid
- * @xid: the exchange id
- **/
-static inline bool i40e_fcoe_xid_is_valid(u16 xid)
-{
-	return (xid != FC_XID_UNKNOWN) && (xid < I40E_FCOE_DDP_MAX);
-}
-
-/**
- * i40e_fcoe_ddp_unmap - unmap the mapped sglist associated
- * @pf: pointer to PF
- * @ddp: sw DDP context
- *
- * Unmap the scatter-gather list associated with the given SW DDP context
- *
- * Returns: data length already ddp-ed in bytes
- *
- **/
-static inline void i40e_fcoe_ddp_unmap(struct i40e_pf *pf,
-				       struct i40e_fcoe_ddp *ddp)
-{
-	if (test_and_set_bit(__I40E_FCOE_DDP_UNMAPPED, &ddp->flags))
-		return;
-
-	if (ddp->sgl) {
-		dma_unmap_sg(&pf->pdev->dev, ddp->sgl, ddp->sgc,
-			     DMA_FROM_DEVICE);
-		ddp->sgl = NULL;
-		ddp->sgc = 0;
-	}
-
-	if (ddp->pool) {
-		dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-		ddp->pool = NULL;
-	}
-}
-
-/**
- * i40e_fcoe_ddp_clear - clear the given SW DDP context
- * @ddp - SW DDP context
- **/
-static inline void i40e_fcoe_ddp_clear(struct i40e_fcoe_ddp *ddp)
-{
-	memset(ddp, 0, sizeof(struct i40e_fcoe_ddp));
-	ddp->xid = FC_XID_UNKNOWN;
-	ddp->flags = __I40E_FCOE_DDP_NONE;
-}
-
-/**
- * i40e_fcoe_progid_is_fcoe - check if the prog_id is for FCoE
- * @id: the prog id for the programming status Rx descriptor write-back
- **/
-static inline bool i40e_fcoe_progid_is_fcoe(u8 id)
-{
-	return (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
-	       (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS);
-}
-
-/**
- * i40e_fcoe_fc_get_xid - get xid from the frame header
- * @fh: the fc frame header
- *
- * In case the incoming frame's exchange is originated from
- * the initiator, then received frame's exchange id is ANDed
- * with fc_cpu_mask bits to get the same cpu on which exchange
- * was originated, otherwise just use the current cpu.
- *
- * Returns ox_id if exchange originator, rx_id if responder
- **/
-static inline u16 i40e_fcoe_fc_get_xid(struct fc_frame_header *fh)
-{
-	u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-	return (f_ctl & FC_FC_EX_CTX) ?
-		be16_to_cpu(fh->fh_ox_id) :
-		be16_to_cpu(fh->fh_rx_id);
-}
-
-/**
- * i40e_fcoe_fc_frame_header - get fc frame header from skb
- * @skb: packet
- *
- * This checks if there is a VLAN header and returns the data
- * pointer to the start of the fc_frame_header.
- *
- * Returns pointer to the fc_frame_header
- **/
-static inline struct fc_frame_header *i40e_fcoe_fc_frame_header(
-	struct sk_buff *skb)
-{
-	void *fh = skb->data + sizeof(struct fcoe_hdr);
-
-	if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
-		fh += sizeof(struct vlan_hdr);
-
-	return (struct fc_frame_header *)fh;
-}
-
-/**
- * i40e_fcoe_ddp_put - release the DDP context for a given exchange id
- * @netdev: the corresponding net_device
- * @xid: the exchange id that corresponding DDP context will be released
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_done
- * and it is expected to be called by ULD, i.e., FCP layer of libfc
- * to release the corresponding ddp context when the I/O is done.
- *
- * Returns : data length already ddp-ed in bytes
- **/
-static int i40e_fcoe_ddp_put(struct net_device *netdev, u16 xid)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_pf *pf = np->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	int len = 0;
-	struct i40e_fcoe_ddp *ddp = &fcoe->ddp[xid];
-
-	if (!fcoe || !ddp)
-		goto out;
-
-	if (test_bit(__I40E_FCOE_DDP_DONE, &ddp->flags))
-		len = ddp->len;
-	i40e_fcoe_ddp_unmap(pf, ddp);
-out:
-	return len;
-}
-
-/**
- * i40e_fcoe_sw_init - sets up the HW for FCoE
- * @pf: pointer to PF
- **/
-void i40e_init_pf_fcoe(struct i40e_pf *pf)
-{
-	struct i40e_hw *hw = &pf->hw;
-	u32 val;
-
-	pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
-	pf->num_fcoe_qps = 0;
-	pf->fcoe_hmc_cntx_num = 0;
-	pf->fcoe_hmc_filt_num = 0;
-
-	if (!pf->hw.func_caps.fcoe) {
-		dev_dbg(&pf->pdev->dev, "FCoE capability is disabled\n");
-		return;
-	}
-
-	if (!pf->hw.func_caps.dcb) {
-		dev_warn(&pf->pdev->dev,
-			 "Hardware is not DCB capable not enabling FCoE.\n");
-		return;
-	}
-
-	/* enable FCoE hash filter */
-	val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1));
-	val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32);
-	val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32);
-	val &= I40E_PFQF_HENA_PTYPE_ENA_MASK;
-	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val);
-
-	/* enable flag */
-	pf->flags |= I40E_FLAG_FCOE_ENABLED;
-	pf->num_fcoe_qps = I40E_DEFAULT_FCOE;
-
-	/* Reserve 4K DDP contexts and 20K filter size for FCoE */
-	pf->fcoe_hmc_cntx_num = BIT(I40E_DMA_CNTX_SIZE_4K) *
-				I40E_DMA_CNTX_BASE_SIZE;
-	pf->fcoe_hmc_filt_num = pf->fcoe_hmc_cntx_num +
-				BIT(I40E_HASH_FILTER_SIZE_16K) *
-				I40E_HASH_FILTER_BASE_SIZE;
-
-	/* FCoE object: max 16K filter buckets and 4K DMA contexts */
-	pf->filter_settings.fcoe_filt_num = I40E_HASH_FILTER_SIZE_16K;
-	pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K;
-
-	/* Setup max frame with FCoE_MTU plus L2 overheads */
-	val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL);
-	val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK;
-	val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
-		 << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT);
-	i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val);
-
-	dev_info(&pf->pdev->dev, "FCoE is supported.\n");
-}
-
-/**
- * i40e_get_fcoe_tc_map - Return TC map for FCoE APP
- * @pf: pointer to PF
- *
- **/
-u8 i40e_get_fcoe_tc_map(struct i40e_pf *pf)
-{
-	struct i40e_dcb_app_priority_table app;
-	struct i40e_hw *hw = &pf->hw;
-	u8 enabled_tc = 0;
-	u8 tc, i;
-	/* Get the FCoE APP TLV */
-	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
-
-	for (i = 0; i < dcbcfg->numapps; i++) {
-		app = dcbcfg->app[i];
-		if (app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
-		    app.protocolid == ETH_P_FCOE) {
-			tc = dcbcfg->etscfg.prioritytable[app.priority];
-			enabled_tc |= BIT(tc);
-			break;
-		}
-	}
-
-	/* TC0 if there is no TC defined for FCoE APP TLV */
-	enabled_tc = enabled_tc ? enabled_tc : 0x1;
-
-	return enabled_tc;
-}
-
-/**
- * i40e_fcoe_vsi_init - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-int i40e_fcoe_vsi_init(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt)
-{
-	struct i40e_aqc_vsi_properties_data *info = &ctxt->info;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_hw *hw = &pf->hw;
-	u8 enabled_tc = 0;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		dev_err(&pf->pdev->dev,
-			"FCoE is not enabled for this device\n");
-		return -EPERM;
-	}
-
-	/* initialize the hardware for FCoE */
-	ctxt->pf_num = hw->pf_id;
-	ctxt->vf_num = 0;
-	ctxt->uplink_seid = vsi->uplink_seid;
-	ctxt->connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
-	ctxt->flags = I40E_AQ_VSI_TYPE_PF;
-
-	/* FCoE VSI would need the following sections */
-	info->valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
-
-	/* FCoE VSI does not need these sections */
-	info->valid_sections &= cpu_to_le16(~(I40E_AQ_VSI_PROP_SECURITY_VALID |
-					    I40E_AQ_VSI_PROP_VLAN_VALID |
-					    I40E_AQ_VSI_PROP_CAS_PV_VALID |
-					    I40E_AQ_VSI_PROP_INGRESS_UP_VALID |
-					    I40E_AQ_VSI_PROP_EGRESS_UP_VALID));
-
-	if (i40e_is_vsi_uplink_mode_veb(vsi)) {
-		info->valid_sections |=
-				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
-		info->switch_id =
-				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
-	}
-	enabled_tc = i40e_get_fcoe_tc_map(pf);
-	i40e_vsi_setup_queue_map(vsi, ctxt, enabled_tc, true);
-
-	/* set up queue option section: only enable FCoE */
-	info->queueing_opt_flags = I40E_AQ_VSI_QUE_OPT_FCOE_ENA;
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_enable - this is the implementation of ndo_fcoe_enable,
- * indicating the upper FCoE protocol stack is ready to use FCoE
- * offload features.
- *
- * @netdev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- * in RTNL
- *
- **/
-int i40e_fcoe_enable(struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		netdev_err(netdev, "HW does not support FCoE.\n");
-		return -ENODEV;
-	}
-
-	if (vsi->type != I40E_VSI_FCOE) {
-		netdev_err(netdev, "interface does not support FCoE.\n");
-		return -EBUSY;
-	}
-
-	atomic_inc(&fcoe->refcnt);
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_disable- disables FCoE for upper FCoE protocol stack.
- * @dev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- **/
-int i40e_fcoe_disable(struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		netdev_err(netdev, "device does not support FCoE\n");
-		return -ENODEV;
-	}
-	if (vsi->type != I40E_VSI_FCOE)
-		return -EBUSY;
-
-	if (!atomic_dec_and_test(&fcoe->refcnt))
-		return -EINVAL;
-
-	netdev_info(netdev, "FCoE disabled\n");
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_dma_pool_free - free the per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- **/
-static void i40e_fcoe_dma_pool_free(struct i40e_fcoe *fcoe,
-				    struct device *dev,
-				    unsigned int cpu)
-{
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-	if (!ddp_pool->pool) {
-		dev_warn(dev, "DDP pool already freed for cpu %d\n", cpu);
-		return;
-	}
-	dma_pool_destroy(ddp_pool->pool);
-	ddp_pool->pool = NULL;
-}
-
-/**
- * i40e_fcoe_dma_pool_create - per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-static int i40e_fcoe_dma_pool_create(struct i40e_fcoe *fcoe,
-				     struct device *dev,
-				     unsigned int cpu)
-{
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-	struct dma_pool *pool;
-	char pool_name[32];
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-	if (ddp_pool && ddp_pool->pool) {
-		dev_warn(dev, "DDP pool already allocated for cpu %d\n", cpu);
-		return 0;
-	}
-	snprintf(pool_name, sizeof(pool_name), "i40e_fcoe_ddp_%d", cpu);
-	pool = dma_pool_create(pool_name, dev, I40E_FCOE_DDP_PTR_MAX,
-			       I40E_FCOE_DDP_PTR_ALIGN, PAGE_SIZE);
-	if (!pool) {
-		dev_err(dev, "dma_pool_create %s failed\n", pool_name);
-		return -ENOMEM;
-	}
-	ddp_pool->pool = pool;
-	return 0;
-}
-
-/**
- * i40e_fcoe_free_ddp_resources - release FCoE DDP resources
- * @vsi: the vsi FCoE is associated with
- *
- **/
-void i40e_fcoe_free_ddp_resources(struct i40e_vsi *vsi)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	int cpu, i;
-
-	/* do nothing if not FCoE VSI */
-	if (vsi->type != I40E_VSI_FCOE)
-		return;
-
-	/* do nothing if no DDP pools were allocated */
-	if (!fcoe->ddp_pool)
-		return;
-
-	for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-		i40e_fcoe_ddp_put(vsi->netdev, i);
-
-	for_each_possible_cpu(cpu)
-		i40e_fcoe_dma_pool_free(fcoe, &pf->pdev->dev, cpu);
-
-	free_percpu(fcoe->ddp_pool);
-	fcoe->ddp_pool = NULL;
-
-	netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources released\n",
-		    vsi->id, vsi->seid);
-}
-
-/**
- * i40e_fcoe_setup_ddp_resources - allocate per cpu DDP resources
- * @vsi: the VSI FCoE is associated with
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-int i40e_fcoe_setup_ddp_resources(struct i40e_vsi *vsi)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct device *dev = &pf->pdev->dev;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	unsigned int cpu;
-	int i;
-
-	if (vsi->type != I40E_VSI_FCOE)
-		return -ENODEV;
-
-	/* do nothing if no DDP pools were allocated */
-	if (fcoe->ddp_pool)
-		return -EEXIST;
-
-	/* allocate per CPU memory to track DDP pools */
-	fcoe->ddp_pool = alloc_percpu(struct i40e_fcoe_ddp_pool);
-	if (!fcoe->ddp_pool) {
-		dev_err(&pf->pdev->dev, "failed to allocate percpu DDP\n");
-		return -ENOMEM;
-	}
-
-	/* allocate pci pool for each cpu */
-	for_each_possible_cpu(cpu) {
-		if (!i40e_fcoe_dma_pool_create(fcoe, dev, cpu))
-			continue;
-
-		dev_err(dev, "failed to alloc DDP pool on cpu:%d\n", cpu);
-		i40e_fcoe_free_ddp_resources(vsi);
-		return -ENOMEM;
-	}
-
-	/* initialize the sw context */
-	for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-		i40e_fcoe_ddp_clear(&fcoe->ddp[i]);
-
-	netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources allocated\n",
-		    vsi->id, vsi->seid);
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_handle_status - check the Programming Status for FCoE
- * @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for Programming Status, not a packet descriptor.
- *
- * Check if this is the Rx Programming Status descriptor write-back for FCoE.
- * This is used to verify if the context/filter programming or invalidation
- * requested by SW to the HW is successful or not and take actions accordingly.
- **/
-void i40e_fcoe_handle_status(struct i40e_ring *rx_ring,
-			     union i40e_rx_desc *rx_desc, u8 prog_id)
-{
-	struct i40e_pf *pf = rx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct i40e_fcoe_ddp *ddp;
-	u32 error;
-	u16 xid;
-	u64 qw;
-
-	/* we only care for FCoE here */
-	if (!i40e_fcoe_progid_is_fcoe(prog_id))
-		return;
-
-	xid = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param) &
-	      (I40E_FCOE_DDP_MAX - 1);
-
-	if (!i40e_fcoe_xid_is_valid(xid))
-		return;
-
-	ddp = &fcoe->ddp[xid];
-	WARN_ON(xid != ddp->xid);
-
-	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
-		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
-
-	/* DDP context programming status: failure or success */
-	if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) {
-		if (I40E_RX_PROG_FCOE_ERROR_TBL_FULL(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x TABLE FULL\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT;
-		}
-		if (I40E_RX_PROG_FCOE_ERROR_CONFLICT(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x CONFLICT\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT;
-		}
-	}
-
-	/* DDP context invalidation status: failure or success */
-	if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS) {
-		if (I40E_RX_PROG_FCOE_ERROR_INVLFAIL(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x INVALIDATION FAILURE\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT;
-		}
-		/* clear the flag so we can retry invalidation */
-		clear_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags);
-	}
-
-	/* unmap DMA */
-	i40e_fcoe_ddp_unmap(pf, ddp);
-	i40e_fcoe_ddp_clear(ddp);
-}
-
-/**
- * i40e_fcoe_handle_offload - check ddp status and mark it done
- * @adapter: i40e adapter
- * @rx_desc: advanced rx descriptor
- * @skb: the skb holding the received data
- *
- * This checks ddp status.
- *
- * Returns : < 0 indicates an error or not a FCOE ddp, 0 indicates
- * not passing the skb to ULD, > 0 indicates is the length of data
- * being ddped.
- *
- **/
-int i40e_fcoe_handle_offload(struct i40e_ring *rx_ring,
-			     union i40e_rx_desc *rx_desc,
-			     struct sk_buff *skb)
-{
-	struct i40e_pf *pf = rx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct fc_frame_header *fh = NULL;
-	struct i40e_fcoe_ddp *ddp = NULL;
-	u32 status, fltstat;
-	u32 error, fcerr;
-	int rc = -EINVAL;
-	u16 ptype;
-	u16 xid;
-	u64 qw;
-
-	/* check this rxd is for programming status */
-	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	/* packet descriptor, check packet type */
-	ptype = (qw & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
-	if (!i40e_rx_is_fcoe(ptype))
-		goto out_no_ddp;
-
-	error = (qw & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT;
-	fcerr = (error >> I40E_RX_DESC_ERROR_L3L4E_SHIFT) &
-		 I40E_RX_DESC_FCOE_ERROR_MASK;
-
-	/* check stateless offload error */
-	if (unlikely(fcerr == I40E_RX_DESC_ERROR_L3L4E_PROT)) {
-		dev_err(&pf->pdev->dev, "Protocol Error\n");
-		skb->ip_summed = CHECKSUM_NONE;
-	} else {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-
-	/* check hw status on ddp */
-	status = (qw & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT;
-	fltstat = (status >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
-		   I40E_RX_DESC_FLTSTAT_FCMASK;
-
-	/* now we are ready to check DDP */
-	fh = i40e_fcoe_fc_frame_header(skb);
-	xid = i40e_fcoe_fc_get_xid(fh);
-	if (!i40e_fcoe_xid_is_valid(xid))
-		goto out_no_ddp;
-
-	/* non DDP normal receive, return to the protocol stack */
-	if (fltstat == I40E_RX_DESC_FLTSTAT_NOMTCH)
-		goto out_no_ddp;
-
-	/* do we have a sw ddp context setup ? */
-	ddp = &fcoe->ddp[xid];
-	if (!ddp->sgl)
-		goto out_no_ddp;
-
-	/* fetch xid from hw rxd wb, which should match up the sw ctxt */
-	xid = le16_to_cpu(rx_desc->wb.qword0.lo_dword.mirr_fcoe.fcoe_ctx_id);
-	if (ddp->xid != xid) {
-		dev_err(&pf->pdev->dev, "xid 0x%x does not match ctx_xid 0x%x\n",
-			ddp->xid, xid);
-		goto out_put_ddp;
-	}
-
-	/* the same exchange has already errored out */
-	if (ddp->fcerr) {
-		dev_err(&pf->pdev->dev, "xid 0x%x fcerr 0x%x reported fcer 0x%x\n",
-			xid, ddp->fcerr, fcerr);
-		goto out_put_ddp;
-	}
-
-	/* fcoe param is valid by now with correct DDPed length */
-	ddp->len = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param);
-	ddp->fcerr = fcerr;
-	/* header posting only, useful only for target mode and debugging */
-	if (fltstat == I40E_RX_DESC_FLTSTAT_DDP) {
-		/* For target mode, we get header of the last packet but it
-		 * does not have the FCoE trailer field, i.e., CRC and EOF
-		 * Ordered Set since they are offloaded by the HW, so fill
-		 * it up correspondingly to allow the packet to pass through
-		 * to the upper protocol stack.
-		 */
-		u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-		if ((f_ctl & FC_FC_END_SEQ) &&
-		    (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA)) {
-			struct fcoe_crc_eof *crc = NULL;
-
-			crc = skb_put(skb, sizeof(*crc));
-			crc->fcoe_eof = FC_EOF_T;
-		} else {
-			/* otherwise, drop the header only frame */
-			rc = 0;
-			goto out_no_ddp;
-		}
-	}
-
-out_put_ddp:
-	/* either we got RSP or we have an error, unmap DMA in both cases */
-	i40e_fcoe_ddp_unmap(pf, ddp);
-	if (ddp->len && !ddp->fcerr) {
-		int pkts;
-
-		rc = ddp->len;
-		i40e_fcoe_ddp_clear(ddp);
-		ddp->len = rc;
-		pkts = DIV_ROUND_UP(rc, 2048);
-		rx_ring->stats.bytes += rc;
-		rx_ring->stats.packets += pkts;
-		rx_ring->q_vector->rx.total_bytes += rc;
-		rx_ring->q_vector->rx.total_packets += pkts;
-		set_bit(__I40E_FCOE_DDP_DONE, &ddp->flags);
-	}
-
-out_no_ddp:
-	return rc;
-}
-
-/**
- * i40e_fcoe_ddp_setup - called to set up ddp context
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- * @target_mode: indicates this is a DDP request for target
- *
- * Returns : 1 for success and 0 for no DDP on this I/O
- **/
-static int i40e_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
-			       struct scatterlist *sgl, unsigned int sgc,
-			       int target_mode)
-{
-	static const unsigned int bufflen = I40E_FCOE_DDP_BUF_MIN;
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-	struct i40e_pf *pf = np->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	unsigned int i, j, dmacount;
-	struct i40e_fcoe_ddp *ddp;
-	unsigned int firstoff = 0;
-	unsigned int thisoff = 0;
-	unsigned int thislen = 0;
-	struct scatterlist *sg;
-	dma_addr_t addr = 0;
-	unsigned int len;
-
-	if (xid >= I40E_FCOE_DDP_MAX) {
-		dev_warn(&pf->pdev->dev, "xid=0x%x out-of-range\n", xid);
-		return 0;
-	}
-
-	/* no DDP if we are already down or resetting */
-	if (test_bit(__I40E_DOWN, &pf->state) ||
-	    test_bit(__I40E_NEEDS_RESTART, &pf->state)) {
-		dev_info(&pf->pdev->dev, "xid=0x%x device in reset/down\n",
-			 xid);
-		return 0;
-	}
-
-	ddp = &fcoe->ddp[xid];
-	if (ddp->sgl) {
-		dev_info(&pf->pdev->dev, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
-			 xid, ddp->sgl, ddp->sgc);
-		return 0;
-	}
-	i40e_fcoe_ddp_clear(ddp);
-
-	if (!fcoe->ddp_pool) {
-		dev_info(&pf->pdev->dev, "No DDP pool, xid 0x%x\n", xid);
-		return 0;
-	}
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, get_cpu());
-	if (!ddp_pool->pool) {
-		dev_info(&pf->pdev->dev, "No percpu ddp pool, xid 0x%x\n", xid);
-		goto out_noddp;
-	}
-
-	/* setup dma from scsi command sgl */
-	dmacount = dma_map_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-	if (dmacount == 0) {
-		dev_info(&pf->pdev->dev, "dma_map_sg for sgl %p, sgc %d failed\n",
-			 sgl, sgc);
-		goto out_noddp_unmap;
-	}
-
-	/* alloc the udl from our ddp pool */
-	ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp);
-	if (!ddp->udl) {
-		dev_info(&pf->pdev->dev,
-			 "Failed allocated ddp context, xid 0x%x\n", xid);
-		goto out_noddp_unmap;
-	}
-
-	j = 0;
-	ddp->len = 0;
-	for_each_sg(sgl, sg, dmacount, i) {
-		addr = sg_dma_address(sg);
-		len = sg_dma_len(sg);
-		ddp->len += len;
-		while (len) {
-			/* max number of buffers allowed in one DDP context */
-			if (j >= I40E_FCOE_DDP_BUFFCNT_MAX) {
-				dev_info(&pf->pdev->dev,
-					 "xid=%x:%d,%d,%d:addr=%llx not enough descriptors\n",
-					 xid, i, j, dmacount, (u64)addr);
-				goto out_noddp_free;
-			}
-
-			/* get the offset of length of current buffer */
-			thisoff = addr & ((dma_addr_t)bufflen - 1);
-			thislen = min_t(unsigned int, (bufflen - thisoff), len);
-			/* all but the 1st buffer (j == 0)
-			 * must be aligned on bufflen
-			 */
-			if ((j != 0) && (thisoff))
-				goto out_noddp_free;
-
-			/* all but the last buffer
-			 * ((i == (dmacount - 1)) && (thislen == len))
-			 * must end at bufflen
-			 */
-			if (((i != (dmacount - 1)) || (thislen != len)) &&
-			    ((thislen + thisoff) != bufflen))
-				goto out_noddp_free;
-
-			ddp->udl[j] = (u64)(addr - thisoff);
-			/* only the first buffer may have none-zero offset */
-			if (j == 0)
-				firstoff = thisoff;
-			len -= thislen;
-			addr += thislen;
-			j++;
-		}
-	}
-	/* only the last buffer may have non-full bufflen */
-	ddp->lastsize = thisoff + thislen;
-	ddp->firstoff = firstoff;
-	ddp->list_len = j;
-	ddp->pool = ddp_pool->pool;
-	ddp->sgl = sgl;
-	ddp->sgc = sgc;
-	ddp->xid = xid;
-	if (target_mode)
-		set_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-	set_bit(__I40E_FCOE_DDP_INITALIZED, &ddp->flags);
-
-	put_cpu();
-	return 1; /* Success */
-
-out_noddp_free:
-	dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-	i40e_fcoe_ddp_clear(ddp);
-
-out_noddp_unmap:
-	dma_unmap_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-out_noddp:
-	put_cpu();
-	return 0;
-}
-
-/**
- * i40e_fcoe_ddp_get - called to set up ddp context in initiator mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_setup
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_get(struct net_device *netdev, u16 xid,
-			     struct scatterlist *sgl, unsigned int sgc)
-{
-	return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 0);
-}
-
-/**
- * i40e_fcoe_ddp_target - called to set up ddp context in target mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_target
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O. The DDP in target mode is a write I/O request
- * from the initiator.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_target(struct net_device *netdev, u16 xid,
-				struct scatterlist *sgl, unsigned int sgc)
-{
-	return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 1);
-}
-
-/**
- * i40e_fcoe_program_ddp - programs the HW DDP related descriptors
- * @tx_ring: transmit ring for this packet
- * @skb:     the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is READ/WRITE command, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY).
- *
- * Note: caller checks sof and ddp sw context
- *
- * Returns : none
- *
- **/
-static void i40e_fcoe_program_ddp(struct i40e_ring *tx_ring,
-				  struct sk_buff *skb,
-				  struct i40e_fcoe_ddp *ddp, u8 sof)
-{
-	struct i40e_fcoe_filter_context_desc *filter_desc = NULL;
-	struct i40e_fcoe_queue_context_desc *queue_desc = NULL;
-	struct i40e_fcoe_ddp_context_desc *ddp_desc = NULL;
-	struct i40e_pf *pf = tx_ring->vsi->back;
-	u16 i = tx_ring->next_to_use;
-	struct fc_frame_header *fh;
-	u64 flags_rsvd_lanq = 0;
-	bool target_mode;
-
-	/* check if abort is still pending */
-	if (test_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags)) {
-		dev_warn(&pf->pdev->dev,
-			 "DDP abort is still pending xid:%hx and ddp->flags:%lx:\n",
-			 ddp->xid, ddp->flags);
-		return;
-	}
-
-	/* set the flag to indicate this is programmed */
-	if (test_and_set_bit(__I40E_FCOE_DDP_PROGRAMMED, &ddp->flags)) {
-		dev_warn(&pf->pdev->dev,
-			 "DDP is already programmed for xid:%hx and ddp->flags:%lx:\n",
-			 ddp->xid, ddp->flags);
-		return;
-	}
-
-	/* Prepare the DDP context descriptor */
-	ddp_desc = I40E_DDP_CONTEXT_DESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	ddp_desc->type_cmd_foff_lsize =
-				cpu_to_le64(I40E_TX_DESC_DTYPE_DDP_CTX	|
-				((u64)I40E_FCOE_DDP_CTX_DESC_BSIZE_4K  <<
-				I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT)	|
-				((u64)ddp->firstoff		       <<
-				I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT)	|
-				((u64)ddp->lastsize		       <<
-				I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT));
-	ddp_desc->rsvd = cpu_to_le64(0);
-
-	/* target mode needs last packet in the sequence  */
-	target_mode = test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-	if (target_mode)
-		ddp_desc->type_cmd_foff_lsize |=
-			cpu_to_le64(I40E_FCOE_DDP_CTX_DESC_LASTSEQH);
-
-	/* Prepare queue_context descriptor */
-	queue_desc = I40E_QUEUE_CONTEXT_DESC(tx_ring, i++);
-	if (i == tx_ring->count)
-		i = 0;
-	queue_desc->dmaindx_fbase = cpu_to_le64(ddp->xid | ((u64)ddp->udp));
-	queue_desc->flen_tph = cpu_to_le64(ddp->list_len |
-				((u64)(I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC |
-				I40E_FCOE_QUEUE_CTX_DESC_TPHDATA) <<
-				I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT));
-
-	/* Prepare filter_context_desc */
-	filter_desc = I40E_FILTER_CONTEXT_DESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	filter_desc->param = cpu_to_le32(ntohl(fh->fh_parm_offset));
-	filter_desc->seqn = cpu_to_le16(ntohs(fh->fh_seq_cnt));
-	filter_desc->rsvd_dmaindx = cpu_to_le16(ddp->xid <<
-				I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT);
-
-	flags_rsvd_lanq = I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP;
-	flags_rsvd_lanq |= (u64)(target_mode ?
-			I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP :
-			I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT);
-
-	flags_rsvd_lanq |= (u64)((sof == FC_SOF_I2 || sof == FC_SOF_N2) ?
-			I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2 :
-			I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3);
-
-	flags_rsvd_lanq |= ((u64)skb->queue_mapping <<
-				I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT);
-	filter_desc->flags_rsvd_lanq = cpu_to_le64(flags_rsvd_lanq);
-
-	/* By this time, all offload related descriptors has been programmed */
-	tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_invalidate_ddp - invalidates DDP in case of abort
- * @tx_ring: transmit ring for this packet
- * @skb: the packet associated w/ this DDP invalidation, i.e., ABTS
- * @ddp: the SW DDP context for this DDP
- *
- * Programs the Tx context descriptor to do DDP invalidation.
- **/
-static void i40e_fcoe_invalidate_ddp(struct i40e_ring *tx_ring,
-				     struct sk_buff *skb,
-				     struct i40e_fcoe_ddp *ddp)
-{
-	struct i40e_tx_context_desc *context_desc;
-	int i;
-
-	if (test_and_set_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags))
-		return;
-
-	i = tx_ring->next_to_use;
-	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	context_desc->tunneling_params = cpu_to_le32(0);
-	context_desc->l2tag2 = cpu_to_le16(0);
-	context_desc->rsvd = cpu_to_le16(0);
-	context_desc->type_cmd_tso_mss = cpu_to_le64(
-		I40E_TX_DESC_DTYPE_FCOE_CTX |
-		(I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL <<
-		I40E_TXD_CTX_QW1_CMD_SHIFT) |
-		(I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND <<
-		I40E_TXD_CTX_QW1_CMD_SHIFT));
-	tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_handle_ddp - check we should setup or invalidate DDP
- * @tx_ring: transmit ring for this packet
- * @skb: the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is ABTS/READ/XFER_RDY, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY). In case this is an ABTS, send
- * just invalidate the context.
- **/
-static void i40e_fcoe_handle_ddp(struct i40e_ring *tx_ring,
-				 struct sk_buff *skb, u8 sof)
-{
-	struct i40e_pf *pf = tx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct fc_frame_header *fh;
-	struct i40e_fcoe_ddp *ddp;
-	u32 f_ctl;
-	u8 r_ctl;
-	u16 xid;
-
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	f_ctl = ntoh24(fh->fh_f_ctl);
-	r_ctl = fh->fh_r_ctl;
-	ddp = NULL;
-
-	if ((r_ctl == FC_RCTL_DD_DATA_DESC) && (f_ctl & FC_FC_EX_CTX)) {
-		/* exchange responder? if so, XFER_RDY for write */
-		xid = ntohs(fh->fh_rx_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-		}
-	} else if (r_ctl == FC_RCTL_DD_UNSOL_CMD) {
-		/* exchange originator, check READ cmd */
-		xid = ntohs(fh->fh_ox_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-		}
-	} else if (r_ctl == FC_RCTL_BA_ABTS) {
-		/* exchange originator, check ABTS */
-		xid = ntohs(fh->fh_ox_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_invalidate_ddp(tx_ring, skb, ddp);
-		}
-	}
-}
-
-/**
- * i40e_fcoe_tso - set up FCoE TSO
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @tx_flags: collected send information
- * @hdr_len:  the tso header length
- * @sof: the SOF to indicate class of service
- *
- * Note must already have sof checked to be either class 2 or class 3 before
- * calling this function.
- *
- * Returns 1 to indicate sequence segmentation offload is properly setup
- * or returns 0 to indicate no tso is needed, otherwise returns error
- * code to drop the frame.
- **/
-static int i40e_fcoe_tso(struct i40e_ring *tx_ring,
-			 struct sk_buff *skb,
-			 u32 tx_flags, u8 *hdr_len, u8 sof)
-{
-	struct i40e_tx_context_desc *context_desc;
-	u32 cd_type, cd_cmd, cd_tso_len, cd_mss;
-	struct fc_frame_header *fh;
-	u64 cd_type_cmd_tso_mss;
-
-	/* must match gso type as FCoE */
-	if (!skb_is_gso(skb))
-		return 0;
-
-	/* is it the expected gso type for FCoE ?*/
-	if (skb_shinfo(skb)->gso_type != SKB_GSO_FCOE) {
-		netdev_err(skb->dev,
-			   "wrong gso type %d:expecting SKB_GSO_FCOE\n",
-			   skb_shinfo(skb)->gso_type);
-		return -EINVAL;
-	}
-
-	/* header and trailer are inserted by hw */
-	*hdr_len = skb_transport_offset(skb) + sizeof(struct fc_frame_header) +
-		   sizeof(struct fcoe_crc_eof);
-
-	/* check sof to decide a class 2 or 3 TSO */
-	if (likely(i40e_fcoe_sof_is_class3(sof)))
-		cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3;
-	else
-		cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2;
-
-	/* param field valid? */
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	if (fh->fh_f_ctl[2] & FC_FC_REL_OFF)
-		cd_cmd |= I40E_FCOE_TX_CTX_DESC_RELOFF;
-
-	/* fill the field values */
-	cd_type = I40E_TX_DESC_DTYPE_FCOE_CTX;
-	cd_tso_len = skb->len - *hdr_len;
-	cd_mss = skb_shinfo(skb)->gso_size;
-	cd_type_cmd_tso_mss =
-		((u64)cd_type  << I40E_TXD_CTX_QW1_DTYPE_SHIFT)     |
-		((u64)cd_cmd     << I40E_TXD_CTX_QW1_CMD_SHIFT)	    |
-		((u64)cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
-		((u64)cd_mss     << I40E_TXD_CTX_QW1_MSS_SHIFT);
-
-	/* grab the next descriptor */
-	context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use);
-	tx_ring->next_to_use++;
-	if (tx_ring->next_to_use == tx_ring->count)
-		tx_ring->next_to_use = 0;
-
-	context_desc->tunneling_params = 0;
-	context_desc->l2tag2 = cpu_to_le16((tx_flags & I40E_TX_FLAGS_VLAN_MASK)
-					    >> I40E_TX_FLAGS_VLAN_SHIFT);
-	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
-
-	return 1;
-}
-
-/**
- * i40e_fcoe_tx_map - build the tx descriptor
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @first:    first buffer info buffer to use
- * @tx_flags: collected send information
- * @hdr_len:  ptr to the size of the packet header
- * @eof:      the frame eof value
- *
- * Note, for FCoE, sof and eof are already checked
- **/
-static void i40e_fcoe_tx_map(struct i40e_ring *tx_ring,
-			     struct sk_buff *skb,
-			     struct i40e_tx_buffer *first,
-			     u32 tx_flags, u8 hdr_len, u8 eof)
-{
-	u32 td_offset = 0;
-	u32 td_cmd = 0;
-	u32 maclen;
-
-	/* insert CRC */
-	td_cmd = I40E_TX_DESC_CMD_ICRC;
-
-	/* setup MACLEN */
-	maclen = skb_network_offset(skb);
-	if (tx_flags & I40E_TX_FLAGS_SW_VLAN)
-		maclen += sizeof(struct vlan_hdr);
-
-	if (skb->protocol == htons(ETH_P_FCOE)) {
-		/* for FCoE, maclen should exclude ether type */
-		maclen -= 2;
-		/* setup type as FCoE and EOF insertion */
-		td_cmd |= (I40E_TX_DESC_CMD_FCOET | i40e_fcoe_ctxt_eof(eof));
-		/* setup FCoELEN and FCLEN */
-		td_offset |= ((((sizeof(struct fcoe_hdr) + 2) >> 2) <<
-				I40E_TX_DESC_LENGTH_IPLEN_SHIFT) |
-			      ((sizeof(struct fc_frame_header) >> 2) <<
-				I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT));
-		/* trim to exclude trailer */
-		pskb_trim(skb, skb->len - sizeof(struct fcoe_crc_eof));
-	}
-
-	/* MACLEN is ether header length in words not bytes */
-	td_offset |= (maclen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
-
-	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset);
-}
-
-/**
- * i40e_fcoe_set_skb_header - adjust skb header point for FIP/FCoE/FC
- * @skb: the skb to be adjusted
- *
- * Returns true if this skb is a FCoE/FIP or VLAN carried FCoE/FIP and then
- * adjusts the skb header pointers correspondingly. Otherwise, returns false.
- **/
-static inline int i40e_fcoe_set_skb_header(struct sk_buff *skb)
-{
-	__be16 protocol = skb->protocol;
-
-	skb_reset_mac_header(skb);
-	skb->mac_len = sizeof(struct ethhdr);
-	if (protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veth = (struct vlan_ethhdr *)eth_hdr(skb);
-
-		protocol = veth->h_vlan_encapsulated_proto;
-		skb->mac_len += sizeof(struct vlan_hdr);
-	}
-
-	/* FCoE or FIP only */
-	if ((protocol != htons(ETH_P_FIP)) &&
-	    (protocol != htons(ETH_P_FCOE)))
-		return -EINVAL;
-
-	/* set header to L2 of FCoE/FIP */
-	skb_set_network_header(skb, skb->mac_len);
-	if (protocol == htons(ETH_P_FIP))
-		return 0;
-
-	/* set header to L3 of FC */
-	skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr));
-	return 0;
-}
-
-/**
- * i40e_fcoe_xmit_frame - transmit buffer
- * @skb:     send buffer
- * @netdev:  the fcoe netdev
- *
- * Returns 0 if sent, else an error code
- **/
-static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb,
-					struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(skb->dev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
-	struct i40e_tx_buffer *first;
-	u32 tx_flags = 0;
-	int fso, count;
-	u8 hdr_len = 0;
-	u8 sof = 0;
-	u8 eof = 0;
-
-	if (i40e_fcoe_set_skb_header(skb))
-		goto out_drop;
-
-	count = i40e_xmit_descriptor_count(skb);
-	if (i40e_chk_linearize(skb, count)) {
-		if (__skb_linearize(skb))
-			goto out_drop;
-		count = i40e_txd_use_count(skb->len);
-		tx_ring->tx_stats.tx_linearize++;
-	}
-
-	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
-	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
-	 *       + 4 desc gap to avoid the cache line where head is,
-	 *       + 1 desc for context descriptor,
-	 * otherwise try next time
-	 */
-	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
-		tx_ring->tx_stats.tx_busy++;
-		return NETDEV_TX_BUSY;
-	}
-
-	/* prepare the xmit flags */
-	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
-		goto out_drop;
-
-	/* record the location of the first descriptor for this packet */
-	first = &tx_ring->tx_bi[tx_ring->next_to_use];
-
-	/* FIP is a regular L2 traffic w/o offload */
-	if (skb->protocol == htons(ETH_P_FIP))
-		goto out_send;
-
-	/* check sof and eof, only supports FC Class 2 or 3 */
-	if (i40e_fcoe_fc_sof(skb, &sof) || i40e_fcoe_fc_eof(skb, &eof)) {
-		netdev_err(netdev, "SOF/EOF error:%02x - %02x\n", sof, eof);
-		goto out_drop;
-	}
-
-	/* always do FCCRC for FCoE */
-	tx_flags |= I40E_TX_FLAGS_FCCRC;
-
-	/* check we should do sequence offload */
-	fso = i40e_fcoe_tso(tx_ring, skb, tx_flags, &hdr_len, sof);
-	if (fso < 0)
-		goto out_drop;
-	else if (fso)
-		tx_flags |= I40E_TX_FLAGS_FSO;
-	else
-		i40e_fcoe_handle_ddp(tx_ring, skb, sof);
-
-out_send:
-	/* send out the packet */
-	i40e_fcoe_tx_map(tx_ring, skb, first, tx_flags, hdr_len, eof);
-
-	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
-	return NETDEV_TX_OK;
-
-out_drop:
-	dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
-/**
- * i40e_fcoe_change_mtu - NDO callback to change the Maximum Transfer Unit
- * @netdev: network interface device structure
- * @new_mtu: new value for maximum frame size
- *
- * Returns error as operation not permitted
- *
- **/
-static int i40e_fcoe_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	netdev_warn(netdev, "MTU change is not supported on FCoE interfaces\n");
-	return -EPERM;
-}
-
-/**
- * i40e_fcoe_set_features - set the netdev feature flags
- * @netdev: ptr to the netdev being adjusted
- * @features: the feature set that the stack is suggesting
- *
- **/
-static int i40e_fcoe_set_features(struct net_device *netdev,
-				  netdev_features_t features)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
-		i40e_vlan_stripping_enable(vsi);
-	else
-		i40e_vlan_stripping_disable(vsi);
-
-	return 0;
-}
-
-static const struct net_device_ops i40e_fcoe_netdev_ops = {
-	.ndo_open		= i40e_open,
-	.ndo_stop		= i40e_close,
-	.ndo_get_stats64	= i40e_get_netdev_stats_struct,
-	.ndo_set_rx_mode	= i40e_set_rx_mode,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= i40e_set_mac,
-	.ndo_change_mtu		= i40e_fcoe_change_mtu,
-	.ndo_do_ioctl		= i40e_ioctl,
-	.ndo_tx_timeout		= i40e_tx_timeout,
-	.ndo_vlan_rx_add_vid	= i40e_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= i40e_vlan_rx_kill_vid,
-	.ndo_setup_tc		= __i40e_setup_tc,
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= i40e_netpoll,
-#endif
-	.ndo_start_xmit		= i40e_fcoe_xmit_frame,
-	.ndo_fcoe_enable	= i40e_fcoe_enable,
-	.ndo_fcoe_disable	= i40e_fcoe_disable,
-	.ndo_fcoe_ddp_setup	= i40e_fcoe_ddp_get,
-	.ndo_fcoe_ddp_done	= i40e_fcoe_ddp_put,
-	.ndo_fcoe_ddp_target	= i40e_fcoe_ddp_target,
-	.ndo_set_features	= i40e_fcoe_set_features,
-};
-
-/* fcoe network device type */
-static struct device_type fcoe_netdev_type = {
-	.name = "fcoe",
-};
-
-/**
- * i40e_fcoe_config_netdev - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi)
-{
-	struct i40e_hw *hw = &vsi->back->hw;
-	struct i40e_pf *pf = vsi->back;
-
-	if (vsi->type != I40E_VSI_FCOE)
-		return;
-
-	netdev->features = (NETIF_F_HW_VLAN_CTAG_TX |
-			    NETIF_F_HW_VLAN_CTAG_RX |
-			    NETIF_F_HW_VLAN_CTAG_FILTER);
-
-	netdev->vlan_features = netdev->features;
-	netdev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
-				   NETIF_F_HW_VLAN_CTAG_RX |
-				   NETIF_F_HW_VLAN_CTAG_FILTER);
-	netdev->fcoe_ddp_xid = I40E_FCOE_DDP_MAX - 1;
-	netdev->features |= NETIF_F_ALL_FCOE;
-	netdev->vlan_features |= NETIF_F_ALL_FCOE;
-	netdev->hw_features |= netdev->features;
-	netdev->priv_flags |= IFF_UNICAST_FLT;
-	netdev->priv_flags |= IFF_SUPP_NOFCS;
-
-	strlcpy(netdev->name, "fcoe%d", IFNAMSIZ-1);
-	netdev->mtu = FCOE_MTU;
-	SET_NETDEV_DEV(netdev, &pf->pdev->dev);
-	SET_NETDEV_DEVTYPE(netdev, &fcoe_netdev_type);
-	/* set different dev_port value 1 for FCoE netdev than the default
-	 * zero dev_port value for PF netdev, this helps biosdevname user
-	 * tool to differentiate them correctly while both attached to the
-	 * same PCI function.
-	 */
-	netdev->dev_port = 1;
-	spin_lock_bh(&vsi->mac_filter_hash_lock);
-	i40e_add_filter(vsi, hw->mac.san_addr, 0);
-	i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0);
-	i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0);
-	i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0);
-	spin_unlock_bh(&vsi->mac_filter_hash_lock);
-
-	/* use san mac */
-	ether_addr_copy(netdev->dev_addr, hw->mac.san_addr);
-	ether_addr_copy(netdev->perm_addr, hw->mac.san_addr);
-	/* fcoe netdev ops */
-	netdev->netdev_ops = &i40e_fcoe_netdev_ops;
-}
-
-/**
- * i40e_fcoe_vsi_setup - allocate and set up FCoE VSI
- * @pf: the PF that VSI is associated with
- *
- **/
-void i40e_fcoe_vsi_setup(struct i40e_pf *pf)
-{
-	struct i40e_vsi *vsi;
-	u16 seid;
-	int i;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED))
-		return;
-
-	for (i = 0; i < pf->num_alloc_vsi; i++) {
-		vsi = pf->vsi[i];
-		if (vsi && vsi->type == I40E_VSI_FCOE) {
-			dev_warn(&pf->pdev->dev,
-				 "FCoE VSI already created\n");
-			return;
-		}
-	}
-
-	seid = pf->vsi[pf->lan_vsi]->seid;
-	vsi = i40e_vsi_setup(pf, I40E_VSI_FCOE, seid, 0);
-	if (vsi) {
-		dev_dbg(&pf->pdev->dev,
-			"Successfully created FCoE VSI seid %d id %d uplink_seid %d PF seid %d\n",
-			vsi->seid, vsi->id, vsi->uplink_seid, seid);
-	} else {
-		dev_info(&pf->pdev->dev, "Failed to create FCoE VSI\n");
-	}
-}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h b/drivers/net/ethernet/intel/i40e/i40e_fcoe.h
deleted file mode 100644
index a93174ddeaba..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <[email protected]>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#ifndef _I40E_FCOE_H_
-#define _I40E_FCOE_H_
-
-/* FCoE HW context helper macros */
-#define I40E_DDP_CONTEXT_DESC(R, i)     \
-	(&(((struct i40e_fcoe_ddp_context_desc *)((R)->desc))[i]))
-
-#define I40E_QUEUE_CONTEXT_DESC(R, i)   \
-	(&(((struct i40e_fcoe_queue_context_desc *)((R)->desc))[i]))
-
-#define I40E_FILTER_CONTEXT_DESC(R, i)  \
-	(&(((struct i40e_fcoe_filter_context_desc *)((R)->desc))[i]))
-
-/* receive queue descriptor filter status for FCoE */
-#define I40E_RX_DESC_FLTSTAT_FCMASK	0x3
-#define I40E_RX_DESC_FLTSTAT_NOMTCH	0x0	/* no ddp context match */
-#define I40E_RX_DESC_FLTSTAT_NODDP	0x1	/* no ddp due to error */
-#define I40E_RX_DESC_FLTSTAT_DDP	0x2	/* DDPed payload, post header */
-#define I40E_RX_DESC_FLTSTAT_FCPRSP	0x3	/* FCP_RSP */
-
-/* receive queue descriptor error codes for FCoE */
-#define I40E_RX_DESC_FCOE_ERROR_MASK		\
-	(I40E_RX_DESC_ERROR_L3L4E_PROT |	\
-	 I40E_RX_DESC_ERROR_L3L4E_FC |		\
-	 I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR |	\
-	 I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN)
-
-/* receive queue descriptor programming error */
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL(e)	\
-	(((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)	\
-	(((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT	\
-	BIT(I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT)
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT	\
-	BIT(I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT)
-
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL(e)	\
-	I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT	\
-	I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT
-
-/* FCoE DDP related definitions */
-#define I40E_FCOE_MIN_XID	0x0000  /* the min xid supported by fcoe_sw */
-#define I40E_FCOE_MAX_XID	0x0FFF  /* the max xid supported by fcoe_sw */
-#define I40E_FCOE_DDP_BUFFCNT_MAX	512	/* 9 bits bufcnt */
-#define I40E_FCOE_DDP_PTR_ALIGN		16
-#define I40E_FCOE_DDP_PTR_MAX	(I40E_FCOE_DDP_BUFFCNT_MAX * sizeof(dma_addr_t))
-#define I40E_FCOE_DDP_BUF_MIN	4096
-#define I40E_FCOE_DDP_MAX	2048
-#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT	8
-
-/* supported netdev features for FCoE */
-#define I40E_FCOE_NETIF_FEATURES (NETIF_F_ALL_FCOE | \
-	NETIF_F_HW_VLAN_CTAG_TX | \
-	NETIF_F_HW_VLAN_CTAG_RX | \
-	NETIF_F_HW_VLAN_CTAG_FILTER)
-
-/* DDP context flags */
-enum i40e_fcoe_ddp_flags {
-	__I40E_FCOE_DDP_NONE = 1,
-	__I40E_FCOE_DDP_TARGET,
-	__I40E_FCOE_DDP_INITALIZED,
-	__I40E_FCOE_DDP_PROGRAMMED,
-	__I40E_FCOE_DDP_DONE,
-	__I40E_FCOE_DDP_ABORTED,
-	__I40E_FCOE_DDP_UNMAPPED,
-};
-
-/* DDP SW context struct */
-struct i40e_fcoe_ddp {
-	int len;
-	u16 xid;
-	u16 firstoff;
-	u16 lastsize;
-	u16 list_len;
-	u8 fcerr;
-	u8 prerr;
-	unsigned long flags;
-	unsigned int sgc;
-	struct scatterlist *sgl;
-	dma_addr_t udp;
-	u64 *udl;
-	struct dma_pool *pool;
-
-};
-
-struct i40e_fcoe_ddp_pool {
-	struct dma_pool *pool;
-};
-
-struct i40e_fcoe {
-	unsigned long mode;
-	atomic_t refcnt;
-	struct i40e_fcoe_ddp_pool __percpu *ddp_pool;
-	struct i40e_fcoe_ddp ddp[I40E_FCOE_DDP_MAX];
-};
-
-#endif /* _I40E_FCOE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
index a7c7b1d9b7c8..6d4b590f851b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
index d90669211392..7b5fd33d70ae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index daa9204426d4..cd40dc487b38 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
index e74128db5be5..79e1396735d9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 70ecd9c3a163..16229998fb1e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -69,12 +70,6 @@ static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-				     struct i40e_cloud_filter *filter,
-				     bool add);
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-					     struct i40e_cloud_filter *filter,
-					     bool add);
 static int i40e_get_capabilities(struct i40e_pf *pf,
 				 enum i40e_admin_queue_opc list_type);
 
@@ -215,8 +210,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
 
 	if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
 		dev_info(&pf->pdev->dev,
-			 "param err: pile=%p needed=%d id=0x%04x\n",
-			 pile, needed, id);
+			 "param err: pile=%s needed=%d id=0x%04x\n",
+			 pile ? "<valid>" : "<null>", needed, id);
 		return -EINVAL;
 	}
 
@@ -1088,13 +1083,13 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
 			   &osd->rx_lpi_count, &nsd->rx_lpi_count);
 
 	if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
-	    !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED))
+	    !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
 		nsd->fd_sb_status = true;
 	else
 		nsd->fd_sb_status = false;
 
 	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
-	    !(pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
+	    !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		nsd->fd_atr_status = true;
 	else
 		nsd->fd_atr_status = false;
@@ -1380,21 +1375,14 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 
 		ether_addr_copy(f->macaddr, macaddr);
 		f->vlan = vlan;
-		/* If we're in overflow promisc mode, set the state directly
-		 * to failed, so we don't bother to try sending the filter
-		 * to the hardware.
-		 */
-		if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))
-			f->state = I40E_FILTER_FAILED;
-		else
-			f->state = I40E_FILTER_NEW;
+		f->state = I40E_FILTER_NEW;
 		INIT_HLIST_NODE(&f->hlist);
 
 		key = i40e_addr_to_hkey(macaddr);
 		hash_add(vsi->mac_filter_hash, &f->hlist, key);
 
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
 	}
 
 	/* If we're asked to add a filter that has been marked for removal, it
@@ -1444,7 +1432,7 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
 	}
 
 	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-	vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+	set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->state);
 }
 
 /**
@@ -1967,7 +1955,7 @@ static void i40e_set_rx_mode(struct net_device *netdev)
 	/* check for other flag changes */
 	if (vsi->current_netdev_flags != vsi->netdev->flags) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
 	}
 }
 
@@ -2116,17 +2104,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
  * @list: the list of filters to send to firmware
  * @add_head: Position in the add hlist
  * @num_add: the number of filters to add
- * @promisc_change: set to true on exit if promiscuous mode was forced on
  *
  * Send a request to firmware via AdminQ to add a chunk of filters. Will set
- * promisc_changed to true if the firmware has run out of space for more
- * filters.
+ * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
+ * space for more filters.
  */
 static
 void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 			  struct i40e_aqc_add_macvlan_element_data *list,
 			  struct i40e_new_mac_filter *add_head,
-			  int num_add, bool *promisc_changed)
+			  int num_add)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
 	int aq_err, fcnt;
@@ -2136,7 +2123,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 	fcnt = i40e_update_filter_state(num_add, list, add_head);
 
 	if (fcnt != num_add) {
-		*promisc_changed = true;
 		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
 		dev_warn(&vsi->back->pdev->dev,
 			 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
@@ -2177,11 +2163,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
 							    NULL);
 	}
 
-	if (aq_ret)
+	if (aq_ret) {
+		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
 		dev_warn(&vsi->back->pdev->dev,
-			 "Error %s setting broadcast promiscuous mode on %s\n",
+			 "Error %s, forcing overflow promiscuous on %s\n",
 			 i40e_aq_str(hw, hw->aq.asq_last_status),
 			 vsi_name);
+	}
 
 	return aq_ret;
 }
@@ -2267,9 +2255,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	struct i40e_mac_filter *f;
 	struct i40e_new_mac_filter *new, *add_head = NULL;
 	struct i40e_hw *hw = &vsi->back->hw;
+	bool old_overflow, new_overflow;
 	unsigned int failed_filters = 0;
 	unsigned int vlan_filters = 0;
-	bool promisc_changed = false;
 	char vsi_name[16] = "PF";
 	int filter_list_len = 0;
 	i40e_status aq_ret = 0;
@@ -2291,6 +2279,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		usleep_range(1000, 2000);
 	pf = vsi->back;
 
+	old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
 	if (vsi->netdev) {
 		changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
 		vsi->current_netdev_flags = vsi->netdev->flags;
@@ -2423,12 +2413,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 		num_add = 0;
 		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
-			if (test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-				     vsi->state)) {
-				new->state = I40E_FILTER_FAILED;
-				continue;
-			}
-
 			/* handle broadcast filters by updating the broadcast
 			 * promiscuous flag instead of adding a MAC filter.
 			 */
@@ -2464,15 +2448,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 			/* flush a full buffer */
 			if (num_add == filter_list_len) {
 				i40e_aqc_add_filters(vsi, vsi_name, add_list,
-						     add_head, num_add,
-						     &promisc_changed);
+						     add_head, num_add);
 				memset(add_list, 0, list_size);
 				num_add = 0;
 			}
 		}
 		if (num_add) {
 			i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
-					     num_add, &promisc_changed);
+					     num_add);
 		}
 		/* Now move all of the filters from the temp add list back to
 		 * the VSI's list.
@@ -2501,24 +2484,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	}
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
-	/* If promiscuous mode has changed, we need to calculate a new
-	 * threshold for when we are safe to exit
-	 */
-	if (promisc_changed)
-		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
-
 	/* Check if we are able to exit overflow promiscuous mode. We can
 	 * safely exit if we didn't just enter, we no longer have any failed
 	 * filters, and we have reduced filters below the threshold value.
 	 */
-	if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) &&
-	    !promisc_changed && !failed_filters &&
-	    (vsi->active_filters < vsi->promisc_threshold)) {
+	if (old_overflow && !failed_filters &&
+	    vsi->active_filters < vsi->promisc_threshold) {
 		dev_info(&pf->pdev->dev,
 			 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
 			 vsi_name);
 		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
-		promisc_changed = true;
 		vsi->promisc_threshold = 0;
 	}
 
@@ -2528,6 +2503,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		goto out;
 	}
 
+	new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+	/* If we are entering overflow promiscuous, we need to calculate a new
+	 * threshold for when we are safe to exit
+	 */
+	if (!old_overflow && new_overflow)
+		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+
 	/* check for changes in promiscuous modes */
 	if (changed_flags & IFF_ALLMULTI) {
 		bool cur_multipromisc;
@@ -2548,12 +2531,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		}
 	}
 
-	if ((changed_flags & IFF_PROMISC) || promisc_changed) {
+	if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
 		bool cur_promisc;
 
 		cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
-			       test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-					vsi->state));
+			       new_overflow);
 		aq_ret = i40e_set_promiscuous(pf, cur_promisc);
 		if (aq_ret) {
 			retval = i40e_aq_rc_to_posix(aq_ret,
@@ -2595,9 +2577,10 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 {
 	int v;
 
-	if (!pf || !(pf->flags & I40E_FLAG_FILTER_SYNC))
+	if (!pf)
+		return;
+	if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
 		return;
-	pf->flags &= ~I40E_FLAG_FILTER_SYNC;
 
 	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] &&
@@ -2606,7 +2589,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 
 			if (ret) {
 				/* come back and try again later */
-				pf->flags |= I40E_FLAG_FILTER_SYNC;
+				set_bit(__I40E_MACVLAN_SYNC_PENDING,
+					pf->state);
 				break;
 			}
 		}
@@ -2650,8 +2634,8 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
 	netdev->mtu = new_mtu;
 	if (netif_running(netdev))
 		i40e_vsi_reinit_locked(vsi);
-	pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
-		      I40E_FLAG_CLIENT_L2_CHANGE);
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
 	return 0;
 }
 
@@ -2738,22 +2722,6 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
 }
 
 /**
- * i40e_vlan_rx_register - Setup or shutdown vlan offload
- * @netdev: network interface to be adjusted
- * @features: netdev features to test if VLAN offload is enabled or not
- **/
-static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
-		i40e_vlan_stripping_enable(vsi);
-	else
-		i40e_vlan_stripping_disable(vsi);
-}
-
-/**
  * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
  * @vsi: the vsi being configured
  * @vid: vlan id to be added (0 = untagged only , -1 = any)
@@ -2928,7 +2896,10 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features);
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		i40e_vlan_stripping_enable(vsi);
+	else
+		i40e_vlan_stripping_disable(vsi);
 
 	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
 		i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q),
@@ -4751,9 +4722,9 @@ static void i40e_vsi_close(struct i40e_vsi *vsi)
 	i40e_vsi_free_tx_resources(vsi);
 	i40e_vsi_free_rx_resources(vsi);
 	vsi->current_netdev_flags = 0;
-	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
-		pf->flags |=  I40E_FLAG_CLIENT_RESET;
+		set_bit(__I40E_CLIENT_RESET, pf->state);
 }
 
 /**
@@ -5381,7 +5352,7 @@ out:
  * @vsi: VSI to be configured
  *
  **/
-int i40e_get_link_speed(struct i40e_vsi *vsi)
+static int i40e_get_link_speed(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 
@@ -6524,7 +6495,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
 	/* On the next run of the service_task, notify any clients of the new
 	 * opened netdev
 	 */
-	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	i40e_service_event_schedule(pf);
 
 	return 0;
@@ -6566,6 +6537,75 @@ int i40e_up(struct i40e_vsi *vsi)
 }
 
 /**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_aq_set_phy_config config = {0};
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status err;
+	u64 mask;
+
+	/* Get the current phy config */
+	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					   NULL);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"failed to get phy cap., ret =  %s last_status =  %s\n",
+			i40e_stat_str(hw, err),
+			i40e_aq_str(hw, hw->aq.asq_last_status));
+		return err;
+	}
+
+	/* If link needs to go up, but was not forced to go down,
+	 * no need for a flap
+	 */
+	if (is_up && abilities.phy_type != 0)
+		return I40E_SUCCESS;
+
+	/* To force link we need to set bits for all supported PHY types,
+	 * but there are now more than 32, so we need to split the bitmap
+	 * across two fields.
+	 */
+	mask = I40E_PHY_TYPES_BITMASK;
+	config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+	config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+	/* Copy the old settings, except of phy_type */
+	config.abilities = abilities.abilities;
+	config.link_speed = abilities.link_speed;
+	config.eee_capability = abilities.eee_capability;
+	config.eeer = abilities.eeer_val;
+	config.low_power_ctrl = abilities.d3_lpan;
+	err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"set phy config ret =  %s last_status =  %s\n",
+			i40e_stat_str(&pf->hw, err),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return err;
+	}
+
+	/* Update the link info */
+	err = i40e_update_link_info(hw);
+	if (err) {
+		/* Wait a little bit (on 40G cards it sometimes takes a really
+		 * long time for link to come back from the atomic reset)
+		 * and try once more
+		 */
+		msleep(1000);
+		i40e_update_link_info(hw);
+	}
+
+	i40e_aq_set_link_restart_an(hw, true, NULL);
+
+	return I40E_SUCCESS;
+}
+
+/**
  * i40e_down - Shutdown the connection processing
  * @vsi: the VSI being stopped
  **/
@@ -6582,6 +6622,9 @@ void i40e_down(struct i40e_vsi *vsi)
 	}
 	i40e_vsi_disable_irq(vsi);
 	i40e_vsi_stop_rings(vsi);
+	if (vsi->type == I40E_VSI_MAIN &&
+	    vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+		i40e_force_link_state(vsi->back, false);
 	i40e_napi_disable_all(vsi);
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -6854,8 +6897,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter,
  * Add or delete a cloud filter for a specific flow spec.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-				     struct i40e_cloud_filter *filter, bool add)
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter, bool add)
 {
 	struct i40e_aqc_cloud_filters_element_data cld_filter;
 	struct i40e_pf *pf = vsi->back;
@@ -6921,9 +6964,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
  * Add or delete a cloud filter for a specific flow spec using big buffer.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-					     struct i40e_cloud_filter *filter,
-					     bool add)
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add)
 {
 	struct i40e_aqc_cloud_filters_element_bb cld_filter;
 	struct i40e_pf *pf = vsi->back;
@@ -7543,6 +7586,9 @@ int i40e_open(struct net_device *netdev)
 
 	netif_carrier_off(netdev);
 
+	if (i40e_force_link_state(pf, true))
+		return -EAGAIN;
+
 	err = i40e_vsi_open(vsi);
 	if (err)
 		return err;
@@ -7991,8 +8037,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
 		i40e_service_event_schedule(pf);
 	} else {
 		i40e_pf_unquiesce_all_vsi(pf);
-	pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
-		      I40E_FLAG_CLIENT_L2_CHANGE);
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
 	}
 
 exit:
@@ -8093,6 +8139,85 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_reenable_fdir_sb - Restore FDir SB capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
+{
+	if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
+		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
+}
+
+/**
+ * i40e_reenable_fdir_atr - Restore FDir ATR capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
+{
+	if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) {
+		/* ATR uses the same filtering logic as SB rules. It only
+		 * functions properly if the input set mask is at the default
+		 * settings. It is safe to restore the default input set
+		 * because there are no active TCPv4 filter rules.
+		 */
+		i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
+	}
+}
+
+/**
+ * i40e_delete_invalid_filter - Delete an invalid FDIR filter
+ * @pf: board private structure
+ * @filter: FDir filter to remove
+ */
+static void i40e_delete_invalid_filter(struct i40e_pf *pf,
+				       struct i40e_fdir_filter *filter)
+{
+	/* Update counters */
+	pf->fdir_pf_active_filters--;
+	pf->fd_inv = 0;
+
+	switch (filter->flow_type) {
+	case TCP_V4_FLOW:
+		pf->fd_tcp4_filter_cnt--;
+		break;
+	case UDP_V4_FLOW:
+		pf->fd_udp4_filter_cnt--;
+		break;
+	case SCTP_V4_FLOW:
+		pf->fd_sctp4_filter_cnt--;
+		break;
+	case IP_USER_FLOW:
+		switch (filter->ip4_proto) {
+		case IPPROTO_TCP:
+			pf->fd_tcp4_filter_cnt--;
+			break;
+		case IPPROTO_UDP:
+			pf->fd_udp4_filter_cnt--;
+			break;
+		case IPPROTO_SCTP:
+			pf->fd_sctp4_filter_cnt--;
+			break;
+		case IPPROTO_IP:
+			pf->fd_ip4_filter_cnt--;
+			break;
+		}
+		break;
+	}
+
+	/* Remove the filter from the list and free memory */
+	hlist_del(&filter->fdir_node);
+	kfree(filter);
+}
+
+/**
  * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
  * @pf: board private structure
  **/
@@ -8110,40 +8235,23 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
 	fcnt_avail = pf->fdir_pf_filter_count;
 	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
 	    (pf->fd_add_err == 0) ||
-	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) {
-		if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
-			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-			    (I40E_DEBUG_FD & pf->hw.debug_mask))
-				dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
-		}
-	}
+	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
+		i40e_reenable_fdir_sb(pf);
 
 	/* We should wait for even more space before re-enabling ATR.
 	 * Additionally, we cannot enable ATR as long as we still have TCP SB
 	 * rules active.
 	 */
 	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
-	    (pf->fd_tcp4_filter_cnt == 0)) {
-		if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
-			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-			    (I40E_DEBUG_FD & pf->hw.debug_mask))
-				dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
-		}
-	}
+	    (pf->fd_tcp4_filter_cnt == 0))
+		i40e_reenable_fdir_atr(pf);
 
 	/* if hw had a problem adding a filter, delete it */
 	if (pf->fd_inv > 0) {
 		hlist_for_each_entry_safe(filter, node,
-					  &pf->fdir_filter_list, fdir_node) {
-			if (filter->fd_id == pf->fd_inv) {
-				hlist_del(&filter->fdir_node);
-				kfree(filter);
-				pf->fdir_pf_active_filters--;
-				pf->fd_inv = 0;
-			}
-		}
+					  &pf->fdir_filter_list, fdir_node)
+			if (filter->fd_id == pf->fd_inv)
+				i40e_delete_invalid_filter(pf, filter);
 	}
 }
 
@@ -8180,7 +8288,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 	}
 
 	pf->fd_flush_timestamp = jiffies;
-	pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+	set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 	/* flush all filters */
 	wr32(&pf->hw, I40E_PFQF_CTL_1,
 	     I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
@@ -8200,7 +8308,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 		/* replay sideband filters */
 		i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
 		if (!disable_atr && !pf->fd_tcp4_filter_cnt)
-			pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
+			clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 		clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
@@ -8324,13 +8432,12 @@ static void i40e_link_event(struct i40e_pf *pf)
 
 	/* On success, disable temp link polling */
 	if (status == I40E_SUCCESS) {
-		if (pf->flags & I40E_FLAG_TEMP_LINK_POLLING)
-			pf->flags &= ~I40E_FLAG_TEMP_LINK_POLLING;
+		clear_bit(__I40E_TEMP_LINK_POLLING, pf->state);
 	} else {
 		/* Enable link polling temporarily until i40e_get_link_status
 		 * returns I40E_SUCCESS
 		 */
-		pf->flags |= I40E_FLAG_TEMP_LINK_POLLING;
+		set_bit(__I40E_TEMP_LINK_POLLING, pf->state);
 		dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
 			status);
 		return;
@@ -8382,7 +8489,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
 	pf->service_timer_previous = jiffies;
 
 	if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) ||
-	    (pf->flags & I40E_FLAG_TEMP_LINK_POLLING))
+	    test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
 		i40e_link_event(pf);
 
 	/* Update the stats for active netdevs so the network stack
@@ -9610,7 +9717,7 @@ static void i40e_sync_udp_filters(struct i40e_pf *pf)
 			pf->pending_udp_bitmap |= BIT_ULL(i);
 	}
 
-	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 }
 
 /**
@@ -9624,11 +9731,9 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 	u16 port;
 	int i;
 
-	if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC))
+	if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state))
 		return;
 
-	pf->flags &= ~I40E_FLAG_UDP_FILTER_SYNC;
-
 	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
 		if (pf->pending_udp_bitmap & BIT_ULL(i)) {
 			pf->pending_udp_bitmap &= ~BIT_ULL(i);
@@ -9680,17 +9785,15 @@ static void i40e_service_task(struct work_struct *work)
 	i40e_vc_process_vflr_event(pf);
 	i40e_watchdog_subtask(pf);
 	i40e_fdir_reinit_subtask(pf);
-	if (pf->flags & I40E_FLAG_CLIENT_RESET) {
+	if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) {
 		/* Client subtask will reopen next time through. */
 		i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], true);
-		pf->flags &= ~I40E_FLAG_CLIENT_RESET;
 	} else {
 		i40e_client_subtask(pf);
-		if (pf->flags & I40E_FLAG_CLIENT_L2_CHANGE) {
+		if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
+				       pf->state))
 			i40e_notify_client_of_l2_param_changes(
 							pf->vsi[pf->lan_vsi]);
-			pf->flags &= ~I40E_FLAG_CLIENT_L2_CHANGE;
-		}
 	}
 	i40e_sync_filters_subtask(pf);
 	i40e_sync_udp_filters_subtask(pf);
@@ -9954,18 +10057,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 
 	mutex_lock(&pf->switch_mutex);
 	if (!pf->vsi[vsi->idx]) {
-		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n",
-			vsi->idx, vsi->idx, vsi, vsi->type);
+		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
+			vsi->idx, vsi->idx, vsi->type);
 		goto unlock_vsi;
 	}
 
 	if (pf->vsi[vsi->idx] != vsi) {
 		dev_err(&pf->pdev->dev,
-			"pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n",
+			"pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
 			pf->vsi[vsi->idx]->idx,
-			pf->vsi[vsi->idx],
 			pf->vsi[vsi->idx]->type,
-			vsi->idx, vsi, vsi->type);
+			vsi->idx, vsi->type);
 		goto unlock_vsi;
 	}
 
@@ -10487,6 +10589,9 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
 	if (err)
 		goto err_unwind;
 
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+		i40e_client_update_msix_info(pf);
+
 	return 0;
 
 err_unwind:
@@ -11103,6 +11208,16 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		/* IWARP needs one extra vector for CQP just like MISC.*/
 		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
 	}
+	/* Stopping the FW LLDP engine is only supported on the
+	 * XL710 with a FW ver >= 1.7.  Also, stopping FW LLDP
+	 * engine is not supported if NPAR is functioning on this
+	 * part
+	 */
+	if (pf->hw.mac.type == I40E_MAC_XL710 &&
+	    !pf->hw.func_caps.npar_enable &&
+	    (pf->hw.aq.api_maj_ver > 1 ||
+	     (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6)))
+		pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP;
 
 #ifdef CONFIG_PCI_IOV
 	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
@@ -11170,20 +11285,18 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 			need_reset = true;
 			i40e_fdir_filter_exit(pf);
 		}
-		pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED |
-			       I40E_FLAG_FD_SB_AUTO_DISABLED);
+		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
 		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 
 		/* reset fd counters */
 		pf->fd_add_err = 0;
 		pf->fd_atr_cnt = 0;
 		/* if ATR was auto disabled it can be re-enabled. */
-		if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 			    (I40E_DEBUG_FD & pf->hw.debug_mask))
 				dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
-		}
 	}
 	return need_reset;
 }
@@ -11316,7 +11429,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
 	/* New port: add it and mark its index in the bitmap */
 	pf->udp_ports[next_idx].port = port;
 	pf->pending_udp_bitmap |= BIT_ULL(next_idx);
-	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 }
 
 /**
@@ -11357,7 +11470,7 @@ static void i40e_udp_tunnel_del(struct net_device *netdev,
 	 */
 	pf->udp_ports[idx].port = 0;
 	pf->pending_udp_bitmap |= BIT_ULL(idx);
-	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 
 	return;
 not_found:
@@ -11702,6 +11815,8 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_bridge_getlink	= i40e_ndo_bridge_getlink,
 	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
 	.ndo_bpf		= i40e_xdp,
+	.ndo_xdp_xmit		= i40e_xdp_xmit,
+	.ndo_xdp_flush		= i40e_xdp_flush,
 };
 
 /**
@@ -12119,7 +12234,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 
 	if (f_count) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		pf->flags |= I40E_FLAG_FILTER_SYNC;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
 	}
 
 	/* Update VSI BW information */
@@ -14227,10 +14342,21 @@ static int __maybe_unused i40e_suspend(struct device *dev)
 	del_timer_sync(&pf->service_timer);
 	cancel_work_sync(&pf->service_task);
 
+	/* Client close must be called explicitly here because the timer
+	 * has been stopped.
+	 */
+	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
+
 	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
 
-	i40e_prep_for_reset(pf, false);
+	/* Since we're going to destroy queues during the
+	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+	 * whole section
+	 */
+	rtnl_lock();
+
+	i40e_prep_for_reset(pf, true);
 
 	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
 	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
@@ -14242,6 +14368,8 @@ static int __maybe_unused i40e_suspend(struct device *dev)
 	 */
 	i40e_clear_interrupt_scheme(pf);
 
+	rtnl_unlock();
+
 	return 0;
 }
 
@@ -14259,6 +14387,11 @@ static int __maybe_unused i40e_resume(struct device *dev)
 	if (!test_bit(__I40E_SUSPENDED, pf->state))
 		return 0;
 
+	/* We need to hold the RTNL lock prior to restoring interrupt schemes,
+	 * since we're going to be restoring queues
+	 */
+	rtnl_lock();
+
 	/* We cleared the interrupt scheme when we suspended, so we need to
 	 * restore it now to resume device functionality.
 	 */
@@ -14269,7 +14402,9 @@ static int __maybe_unused i40e_resume(struct device *dev)
 	}
 
 	clear_bit(__I40E_DOWN, pf->state);
-	i40e_reset_and_rebuild(pf, false, false);
+	i40e_reset_and_rebuild(pf, false, true);
+
+	rtnl_unlock();
 
 	/* Clear suspended state last after everything is recovered */
 	clear_bit(__I40E_SUSPENDED, pf->state);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 76a5cb04e4fe..ba9687c03795 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index 80e66da6b145..9c3c3b0d3ac4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 83798b7841b9..2ec24188d6e2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -287,7 +288,7 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
 		struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw,
 				   struct i40e_asq_cmd_details *cmd_details);
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count);
@@ -299,7 +300,7 @@ enum i40e_status_code
 i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
 			  struct i40e_aqc_cloud_filters_element_data *filters,
 			  u8 filter_count);
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 97381238eb7c..5b47dd1f75a5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index c234758dad15..b3e206e49cc2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_status.h b/drivers/net/ethernet/intel/i40e/i40e_status.h
index afb72e711d43..10c86f63dc52 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_status.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index d3e55f54a05e..410ba13bcf21 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel(R) 40-10 Gigabit Ethernet Connection Network Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 1ec9b1d8023d..f174c72480ab 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -335,7 +336,7 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
 		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 		    I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
-		pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 	} else {
 		pf->fd_tcp4_filter_cnt--;
 	}
@@ -593,8 +594,14 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
 
 		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
-		    pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
-			pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		    test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
+			/* These set_bit() calls aren't atomic with the
+			 * test_bit() here, but worse case we potentially
+			 * disable ATR and queue a flush right after SB
+			 * support is re-enabled. That shouldn't cause an
+			 * issue in practice
+			 */
+			set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 			set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 		}
 
@@ -607,11 +614,10 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 		 */
 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-			    !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)) {
-				pf->flags |= I40E_FLAG_FD_SB_AUTO_DISABLED;
+			    !test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED,
+					      pf->state))
 				if (I40E_DEBUG_FD & pf->hw.debug_mask)
 					dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
-			}
 		}
 	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
@@ -708,16 +714,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 /**
  * i40e_get_tx_pending - how many tx descriptors not processed
  * @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
  *
  * Since there is no access to the ring head register
  * in XL710, we need to use our local copies
  **/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 {
 	u32 head, tail;
 
-	head = i40e_get_head(ring);
-	tail = readl(ring->tail);
+	if (!in_sw) {
+		head = i40e_get_head(ring);
+		tail = readl(ring->tail);
+	} else {
+		head = ring->next_to_clean;
+		tail = ring->next_to_use;
+	}
 
 	if (head != tail)
 		return (head < tail) ?
@@ -774,7 +786,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
 			 */
 			smp_rmb();
 			tx_ring->tx_stats.prev_pkt_ctr =
-			    i40e_get_tx_pending(tx_ring) ? packets : -1;
+			    i40e_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
 }
@@ -898,7 +910,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		 * them to be written back in case we stay in NAPI.
 		 * In this mode on X722 we do not enable Interrupt.
 		 */
-		unsigned int j = i40e_get_tx_pending(tx_ring);
+		unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
 		if (budget &&
 		    ((j / WB_STRIDE) == 0) && (j > 0) &&
@@ -1576,9 +1588,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
 	bi->dma = dma;
 	bi->page = page;
 	bi->page_offset = i40e_rx_offset(rx_ring);
-
-	/* initialize pagecnt_bias to 1 representing we fully own page */
-	bi->pagecnt_bias = 1;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
 
 	return true;
 }
@@ -1944,8 +1955,8 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(!pagecnt_bias)) {
-		page_ref_add(page, USHRT_MAX);
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
 
@@ -2203,7 +2214,7 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
 static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 				    struct xdp_buff *xdp)
 {
-	int result = I40E_XDP_PASS;
+	int err, result = I40E_XDP_PASS;
 	struct i40e_ring *xdp_ring;
 	struct bpf_prog *xdp_prog;
 	u32 act;
@@ -2222,6 +2233,10 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 		result = i40e_xmit_xdp_ring(xdp, xdp_ring);
 		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 	case XDP_ABORTED:
@@ -2257,6 +2272,15 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
 #endif
 }
 
+static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
 /**
  * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: rx descriptor ring to transact packets on
@@ -2391,16 +2415,11 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	}
 
 	if (xdp_xmit) {
-		struct i40e_ring *xdp_ring;
-
-		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-
-		/* Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.
-		 */
-		wmb();
+		struct i40e_ring *xdp_ring =
+			rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 
-		writel(xdp_ring->next_to_use, xdp_ring->tail);
+		i40e_xdp_ring_update_tail(xdp_ring);
+		xdp_do_flush_map();
 	}
 
 	rx_ring->skb = skb;
@@ -2640,7 +2659,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
 		return;
 
-	if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED)
+	if (test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		return;
 
 	/* if sampling is disabled do nothing */
@@ -2680,7 +2699,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	th = (struct tcphdr *)(hdr.network + hlen);
 
 	/* Due to lack of space, no more new filters can be programmed */
-	if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
+	if (th->syn && test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		return;
 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
 		/* HW ATR eviction will take care of removing filters on FIN
@@ -3648,3 +3667,49 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 	return i40e_xmit_frame_ring(skb, tx_ring);
 }
+
+/**
+ * i40e_xdp_xmit - Implements ndo_xdp_xmit
+ * @dev: netdev
+ * @xdp: XDP buffer
+ *
+ * Returns Zero if sent, else an error code
+ **/
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct i40e_vsi *vsi = np->vsi;
+	int err;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+		return -ENXIO;
+
+	err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]);
+	if (err != I40E_XDP_TX)
+		return -ENOSPC;
+
+	return 0;
+}
+
+/**
+ * i40e_xdp_flush - Implements ndo_xdp_flush
+ * @dev: netdev
+ **/
+void i40e_xdp_flush(struct net_device *dev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct i40e_vsi *vsi = np->vsi;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return;
+
+	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+		return;
+
+	i40e_xdp_ring_update_tail(vsi->xdp_rings[queue_index]);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index f75a8fe68fcf..3043483ec426 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -505,10 +506,12 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
 void i40e_free_rx_resources(struct i40e_ring *rx_ring);
 int i40e_napi_poll(struct napi_struct *napi, int budget);
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp);
+void i40e_xdp_flush(struct net_device *dev);
 
 /**
  * i40e_get_head - Retrieve head from head writeback
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index cd294e6a8587..bfb80092b352 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -39,7 +40,7 @@
 #define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
 
 #define I40E_MAX_VSI_QP			16
-#define I40E_MAX_VF_VSI			3
+#define I40E_MAX_VF_VSI			4
 #define I40E_MAX_CHAINED_RX_BUFFERS	5
 #define I40E_MAX_PF_UDP_OFFLOAD_PORTS	16
 
@@ -1336,6 +1337,9 @@ struct i40e_hw_port_stats {
 #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE	1024
 #define I40E_SR_CONTROL_WORD_1_SHIFT		0x06
 #define I40E_SR_CONTROL_WORD_1_MASK	(0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_PTR_TYPE				BIT(15)
+#define I40E_SR_OCP_CFG_WORD0			0x2B
+#define I40E_SR_OCP_ENABLED			BIT(15)
 
 /* Shadow RAM related */
 #define I40E_SR_SECTOR_SIZE_IN_WORDS	0x800
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index e9309fb9084b..35173cbe80f7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -258,6 +259,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id,
 }
 
 /**
+ * i40e_get_real_pf_qid
+ * @vf: pointer to the VF info
+ * @vsi_id: vsi id
+ * @queue_id: queue number
+ *
+ * wrapper function to get pf_queue_id handling ADq code as well
+ **/
+static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id)
+{
+	int i;
+
+	if (vf->adq_enabled) {
+		/* Although VF considers all the queues(can be 1 to 16) as its
+		 * own but they may actually belong to different VSIs(up to 4).
+		 * We need to find which queues belongs to which VSI.
+		 */
+		for (i = 0; i < vf->num_tc; i++) {
+			if (queue_id < vf->ch[i].num_qps) {
+				vsi_id = vf->ch[i].vsi_id;
+				break;
+			}
+			/* find right queue id which is relative to a
+			 * given VSI.
+			 */
+			queue_id -= vf->ch[i].num_qps;
+			}
+		}
+
+	return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id);
+}
+
+/**
  * i40e_config_irq_link_list
  * @vf: pointer to the VF info
  * @vsi_id: id of VSI as given by the FW
@@ -310,7 +343,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 
 	vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 	qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+	pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id);
 	reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
 
 	wr32(hw, reg_idx, reg);
@@ -333,8 +366,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 		if (next_q < size) {
 			vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 			qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-			pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id,
-							      vsi_queue_id);
+			pf_queue_id = i40e_get_real_pf_qid(vf,
+							   vsi_id,
+							   vsi_queue_id);
 		} else {
 			pf_queue_id = I40E_QUEUE_END_OF_LIST;
 			qtype = 0;
@@ -669,18 +703,20 @@ error_param:
 /**
  * i40e_alloc_vsi_res
  * @vf: pointer to the VF info
- * @type: type of VSI to allocate
+ * @idx: VSI index, applies only for ADq mode, zero otherwise
  *
  * alloc VF vsi context & resources
  **/
-static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
+static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
 {
 	struct i40e_mac_filter *f = NULL;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi;
+	u64 max_tx_rate = 0;
 	int ret = 0;
 
-	vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id);
+	vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
+			     vf->vf_id);
 
 	if (!vsi) {
 		dev_err(&pf->pdev->dev,
@@ -689,7 +725,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		ret = -ENOENT;
 		goto error_alloc_vsi_res;
 	}
-	if (type == I40E_VSI_SRIOV) {
+
+	if (!idx) {
 		u64 hena = i40e_pf_get_default_rss_hena(pf);
 		u8 broadcast[ETH_ALEN];
 
@@ -721,17 +758,29 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
 		wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
+		/* program mac filter only for VF VSI */
+		ret = i40e_sync_vsi_filters(vsi);
+		if (ret)
+			dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
 	}
 
-	/* program mac filter */
-	ret = i40e_sync_vsi_filters(vsi);
-	if (ret)
-		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+	/* storing VSI index and id for ADq and don't apply the mac filter */
+	if (vf->adq_enabled) {
+		vf->ch[idx].vsi_idx = vsi->idx;
+		vf->ch[idx].vsi_id = vsi->id;
+	}
 
 	/* Set VF bandwidth if specified */
 	if (vf->tx_rate) {
+		max_tx_rate = vf->tx_rate;
+	} else if (vf->ch[idx].max_tx_rate) {
+		max_tx_rate = vf->ch[idx].max_tx_rate;
+	}
+
+	if (max_tx_rate) {
+		max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR);
 		ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
-						  vf->tx_rate / 50, 0, NULL);
+						  max_tx_rate, 0, NULL);
 		if (ret)
 			dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
 				vf->vf_id, ret);
@@ -742,6 +791,92 @@ error_alloc_vsi_res:
 }
 
 /**
+ * i40e_map_pf_queues_to_vsi
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI.
+ **/
+static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qps;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < 7; j++) {
+			if (j * 2 >= qps) {
+				/* end of list */
+				reg = 0x07FF07FF;
+			} else {
+				u16 qid = i40e_vc_get_pf_queue_id(vf,
+								  vsi_id,
+								  j * 2);
+				reg = qid;
+				qid = i40e_vc_get_pf_queue_id(vf, vsi_id,
+							      (j * 2) + 1);
+				reg |= qid << 16;
+			}
+			i40e_write_rx_ctl(hw,
+					  I40E_VSILAN_QTABLE(j, vsi_id),
+					  reg);
+		}
+	}
+}
+
+/**
+ * i40e_map_pf_to_vf_queues
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of the second part VPLAN_QTABLE & completes VF mappings.
+ **/
+static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, total_qps = 0;
+	u32 qps, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qid;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < qps; j++) {
+			qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j);
+
+			reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
+			wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id),
+			     reg);
+			total_qps++;
+		}
+	}
+}
+
+/**
  * i40e_enable_vf_mappings
  * @vf: pointer to the VF info
  *
@@ -751,8 +886,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
-	u32 reg, total_queue_pairs = 0;
-	int j;
+	u32 reg;
 
 	/* Tell the hardware we're using noncontiguous mapping. HW requires
 	 * that VF queues be mapped using this method, even when they are
@@ -765,30 +899,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
 	reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
 	wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
 
-	/* map PF queues to VF queues */
-	for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) {
-		u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j);
-
-		reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
-		wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg);
-		total_queue_pairs++;
-	}
-
-	/* map PF queues to VSI */
-	for (j = 0; j < 7; j++) {
-		if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) {
-			reg = 0x07FF07FF;	/* unused */
-		} else {
-			u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-							  j * 2);
-			reg = qid;
-			qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-						      (j * 2) + 1);
-			reg |= qid << 16;
-		}
-		i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id),
-				  reg);
-	}
+	i40e_map_pf_to_vf_queues(vf);
+	i40e_map_pf_queues_to_vsi(vf);
 
 	i40e_flush(hw);
 }
@@ -824,7 +936,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
 	u32 reg_idx, reg;
-	int i, msix_vf;
+	int i, j, msix_vf;
 
 	/* Start by disabling VF's configuration API to prevent the OS from
 	 * accessing the VF's VSI after it's freed / invalidated.
@@ -846,6 +958,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 		vf->lan_vsi_id = 0;
 		vf->num_mac = 0;
 	}
+
+	/* do the accounting and remove additional ADq VSI's */
+	if (vf->adq_enabled && vf->ch[0].vsi_idx) {
+		for (j = 0; j < vf->num_tc; j++) {
+			/* At this point VSI0 is already released so don't
+			 * release it again and only clear their values in
+			 * structure variables
+			 */
+			if (j)
+				i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]);
+			vf->ch[j].vsi_idx = 0;
+			vf->ch[j].vsi_id = 0;
+		}
+	}
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 
 	/* disable interrupts so the VF starts in a known state */
@@ -891,7 +1017,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 {
 	struct i40e_pf *pf = vf->pf;
 	int total_queue_pairs = 0;
-	int ret;
+	int ret, idx;
 
 	if (vf->num_req_queues &&
 	    vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
@@ -900,11 +1026,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
 
 	/* allocate hw vsi context & associated resources */
-	ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
+	ret = i40e_alloc_vsi_res(vf, 0);
 	if (ret)
 		goto error_alloc;
 	total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
 
+	/* allocate additional VSIs based on tc information for ADq */
+	if (vf->adq_enabled) {
+		if (pf->queues_left >=
+		    (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) {
+			/* TC 0 always belongs to VF VSI */
+			for (idx = 1; idx < vf->num_tc; idx++) {
+				ret = i40e_alloc_vsi_res(vf, idx);
+				if (ret)
+					goto error_alloc;
+			}
+			/* send correct number of queues */
+			total_queue_pairs = I40E_MAX_VF_QUEUES;
+		} else {
+			dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n",
+				 vf->vf_id);
+			vf->adq_enabled = false;
+		}
+	}
+
 	/* We account for each VF to get a default number of queue pairs.  If
 	 * the VF has now requested more, we need to account for that to make
 	 * certain we never request more queues than we actually have left in
@@ -1537,6 +1682,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
 }
 
 /**
+ * i40e_del_qch - delete all the additional VSIs created as a part of ADq
+ * @vf: pointer to VF structure
+ **/
+static void i40e_del_qch(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	int i;
+
+	/* first element in the array belongs to primary VF VSI and we shouldn't
+	 * delete it. We should however delete the rest of the VSIs created
+	 */
+	for (i = 1; i < vf->num_tc; i++) {
+		if (vf->ch[i].vsi_idx) {
+			i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]);
+			vf->ch[i].vsi_idx = 0;
+			vf->ch[i].vsi_id = 0;
+		}
+	}
+}
+
+/**
  * i40e_vc_get_vf_resources_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1631,6 +1797,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ;
+
 	vfres->num_vsis = num_vsis;
 	vfres->num_queue_pairs = vf->num_queue_pairs;
 	vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@ -1855,27 +2024,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
 	struct i40e_pf *pf = vf->pf;
-	u16 vsi_id, vsi_queue_id;
+	u16 vsi_id, vsi_queue_id = 0;
 	i40e_status aq_ret = 0;
-	int i;
+	int i, j = 0, idx = 0;
+
+	vsi_id = qci->vsi_id;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
 
-	vsi_id = qci->vsi_id;
 	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
+
 	for (i = 0; i < qci->num_queue_pairs; i++) {
 		qpi = &qci->qpair[i];
-		vsi_queue_id = qpi->txq.queue_id;
-		if ((qpi->txq.vsi_id != vsi_id) ||
-		    (qpi->rxq.vsi_id != vsi_id) ||
-		    (qpi->rxq.queue_id != vsi_queue_id) ||
-		    !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
+
+		if (!vf->adq_enabled) {
+			vsi_queue_id = qpi->txq.queue_id;
+
+			if (qpi->txq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.queue_id != vsi_queue_id) {
+				aq_ret = I40E_ERR_PARAM;
+				goto error_param;
+			}
+		}
+
+		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
 			aq_ret = I40E_ERR_PARAM;
 			goto error_param;
 		}
@@ -1887,9 +2066,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 			aq_ret = I40E_ERR_PARAM;
 			goto error_param;
 		}
+
+		/* For ADq there can be up to 4 VSIs with max 4 queues each.
+		 * VF does not know about these additional VSIs and all
+		 * it cares is about its own queues. PF configures these queues
+		 * to its appropriate VSIs based on TC mapping
+		 **/
+		if (vf->adq_enabled) {
+			if (j == (vf->ch[idx].num_qps - 1)) {
+				idx++;
+				j = 0; /* resetting the queue count */
+				vsi_queue_id = 0;
+			} else {
+				j++;
+				vsi_queue_id++;
+			}
+			vsi_id = vf->ch[idx].vsi_id;
+		}
 	}
 	/* set vsi num_queue_pairs in use to num configured by VF */
-	pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs;
+	if (!vf->adq_enabled) {
+		pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
+			qci->num_queue_pairs;
+	} else {
+		for (i = 0; i < vf->num_tc; i++)
+			pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
+			       vf->ch[i].num_qps;
+	}
 
 error_param:
 	/* send the response to the VF */
@@ -1898,6 +2101,33 @@ error_param:
 }
 
 /**
+ * i40e_validate_queue_map
+ * @vsi_id: vsi id
+ * @queuemap: Tx or Rx queue map
+ *
+ * check if Tx or Rx queue map is valid
+ **/
+static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
+				   unsigned long queuemap)
+{
+	u16 vsi_queue_id, queue_id;
+
+	for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
+		if (vf->adq_enabled) {
+			vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
+			queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
+		} else {
+			queue_id = vsi_queue_id;
+		}
+
+		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
  * i40e_vc_config_irq_map_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1911,9 +2141,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	struct virtchnl_irq_map_info *irqmap_info =
 	    (struct virtchnl_irq_map_info *)msg;
 	struct virtchnl_vector_map *map;
-	u16 vsi_id, vsi_queue_id, vector_id;
+	u16 vsi_id, vector_id;
 	i40e_status aq_ret = 0;
-	unsigned long tempmap;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
@@ -1923,7 +2152,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	for (i = 0; i < irqmap_info->num_vectors; i++) {
 		map = &irqmap_info->vecmap[i];
-
 		vector_id = map->vector_id;
 		vsi_id = map->vsi_id;
 		/* validate msg params */
@@ -1933,23 +2161,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 			goto error_param;
 		}
 
-		/* lookout for the invalid queue index */
-		tempmap = map->rxq_map;
-		for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-						      vsi_queue_id)) {
-				aq_ret = I40E_ERR_PARAM;
-				goto error_param;
-			}
+		if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
+			aq_ret = I40E_ERR_PARAM;
+			goto error_param;
 		}
 
-		tempmap = map->txq_map;
-		for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-						      vsi_queue_id)) {
-				aq_ret = I40E_ERR_PARAM;
-				goto error_param;
-			}
+		if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) {
+			aq_ret = I40E_ERR_PARAM;
+			goto error_param;
 		}
 
 		i40e_config_irq_link_list(vf, vsi_id, map);
@@ -1975,6 +2194,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	struct i40e_pf *pf = vf->pf;
 	u16 vsi_id = vqs->vsi_id;
 	i40e_status aq_ret = 0;
+	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
@@ -1993,6 +2213,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
 		aq_ret = I40E_ERR_TIMEOUT;
+
+	/* need to start the rings for additional ADq VSI's as well */
+	if (vf->adq_enabled) {
+		/* zero belongs to LAN VSI */
+		for (i = 1; i < vf->num_tc; i++) {
+			if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx]))
+				aq_ret = I40E_ERR_TIMEOUT;
+		}
+	}
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
@@ -2139,25 +2369,47 @@ error_param:
 /**
  * i40e_check_vf_permission
  * @vf: pointer to the VF info
- * @macaddr: pointer to the MAC Address being checked
+ * @al: MAC address list from virtchnl
  *
- * Check if the VF has permission to add or delete unicast MAC address
- * filters and return error code -EPERM if not.  Then check if the
- * address filter requested is broadcast or zero and if so return
- * an invalid MAC address error code.
+ * Check that the given list of MAC addresses is allowed. Will return -EPERM
+ * if any address in the list is not valid. Checks the following conditions:
+ *
+ * 1) broadcast and zero addresses are never valid
+ * 2) unicast addresses are not allowed if the VMM has administratively set
+ *    the VF MAC address, unless the VF is marked as privileged.
+ * 3) There is enough space to add all the addresses.
+ *
+ * Note that to guarantee consistency, it is expected this function be called
+ * while holding the mac_filter_hash_lock, as otherwise the current number of
+ * addresses might not be accurate.
  **/
-static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
+static inline int i40e_check_vf_permission(struct i40e_vf *vf,
+					   struct virtchnl_ether_addr_list *al)
 {
 	struct i40e_pf *pf = vf->pf;
-	int ret = 0;
+	int i;
+
+	/* If this VF is not privileged, then we can't add more than a limited
+	 * number of addresses. Check to make sure that the additions do not
+	 * push us over the limit.
+	 */
+	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+	    (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) {
+		dev_err(&pf->pdev->dev,
+			"Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+		return -EPERM;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		u8 *addr = al->list[i].addr;
+
+		if (is_broadcast_ether_addr(addr) ||
+		    is_zero_ether_addr(addr)) {
+			dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n",
+				addr);
+			return I40E_ERR_INVALID_MAC_ADDR;
+		}
 
-	if (is_broadcast_ether_addr(macaddr) ||
-		   is_zero_ether_addr(macaddr)) {
-		dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", macaddr);
-		ret = I40E_ERR_INVALID_MAC_ADDR;
-	} else if (vf->pf_set_mac && !is_multicast_ether_addr(macaddr) &&
-		   !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
-		   !ether_addr_equal(macaddr, vf->default_lan_addr.addr)) {
 		/* If the host VMM administrator has set the VF MAC address
 		 * administratively via the ndo_set_vf_mac command then deny
 		 * permission to the VF to add or delete unicast MAC addresses.
@@ -2165,16 +2417,16 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
 		 * The VF may request to set the MAC address filter already
 		 * assigned to it so do not return an error in that case.
 		 */
-		dev_err(&pf->pdev->dev,
-			"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
-		ret = -EPERM;
-	} else if ((vf->num_mac >= I40E_VC_MAX_MAC_ADDR_PER_VF) &&
-		   !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
-		dev_err(&pf->pdev->dev,
-			"VF is not trusted, switch the VF to trusted to add more functionality\n");
-		ret = -EPERM;
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+		    !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+		    !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+			dev_err(&pf->pdev->dev,
+				"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
+			return -EPERM;
+		}
 	}
-	return ret;
+
+	return 0;
 }
 
 /**
@@ -2201,11 +2453,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 		goto error_param;
 	}
 
-	for (i = 0; i < al->num_elements; i++) {
-		ret = i40e_check_vf_permission(vf, al->list[i].addr);
-		if (ret)
-			goto error_param;
-	}
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
 	/* Lock once, because all function inside for loop accesses VSI's
@@ -2213,6 +2460,12 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	 */
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
+	ret = i40e_check_vf_permission(vf, al);
+	if (ret) {
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+		goto error_param;
+	}
+
 	/* add new addresses to the list */
 	for (i = 0; i < al->num_elements; i++) {
 		struct i40e_mac_filter *f;
@@ -2688,6 +2941,618 @@ err:
 }
 
 /**
+ * i40e_validate_cloud_filter
+ * @mask: mask for TC filter
+ * @data: data for TC filter
+ *
+ * This function validates cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_validate_cloud_filter(struct i40e_vf *vf,
+				      struct virtchnl_filter *tc_filter)
+{
+	struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec;
+	struct virtchnl_l4_spec data = tc_filter->data.tcp_spec;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	bool found = false;
+	int bkt;
+
+	if (!tc_filter->action) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Currently ADq doesn't support Drop Action\n",
+			 vf->vf_id);
+		goto err;
+	}
+
+	/* action_meta is TC number here to which the filter is applied */
+	if (!tc_filter->action_meta ||
+	    tc_filter->action_meta > I40E_MAX_VF_VSI) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
+			 vf->vf_id, tc_filter->action_meta);
+		goto err;
+	}
+
+	/* Check filter if it's programmed for advanced mode or basic mode.
+	 * There are two ADq modes (for VF only),
+	 * 1. Basic mode: intended to allow as many filter options as possible
+	 *		  to be added to a VF in Non-trusted mode. Main goal is
+	 *		  to add filters to its own MAC and VLAN id.
+	 * 2. Advanced mode: is for allowing filters to be applied other than
+	 *		  its own MAC or VLAN. This mode requires the VF to be
+	 *		  Trusted.
+	 */
+	if (mask.dst_mac[0] && !mask.dst_ip[0]) {
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		f = i40e_find_mac(vsi, data.dst_mac);
+
+		if (!f) {
+			dev_info(&pf->pdev->dev,
+				 "Destination MAC %pM doesn't belong to VF %d\n",
+				 data.dst_mac, vf->vf_id);
+			goto err;
+		}
+
+		if (mask.vlan_id) {
+			hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
+					   hlist) {
+				if (f->vlan == ntohs(data.vlan_id)) {
+					found = true;
+					break;
+				}
+			}
+			if (!found) {
+				dev_info(&pf->pdev->dev,
+					 "VF %d doesn't have any VLAN id %u\n",
+					 vf->vf_id, ntohs(data.vlan_id));
+				goto err;
+			}
+		}
+	} else {
+		/* Check if VF is trusted */
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+			dev_err(&pf->pdev->dev,
+				"VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n",
+				vf->vf_id);
+			return I40E_ERR_CONFIG;
+		}
+	}
+
+	if (mask.dst_mac[0] & data.dst_mac[0]) {
+		if (is_broadcast_ether_addr(data.dst_mac) ||
+		    is_zero_ether_addr(data.dst_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n",
+				 vf->vf_id, data.dst_mac);
+			goto err;
+		}
+	}
+
+	if (mask.src_mac[0] & data.src_mac[0]) {
+		if (is_broadcast_ether_addr(data.src_mac) ||
+		    is_zero_ether_addr(data.src_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n",
+				 vf->vf_id, data.src_mac);
+			goto err;
+		}
+	}
+
+	if (mask.dst_port & data.dst_port) {
+		if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (mask.src_port & data.src_port) {
+		if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW &&
+	    tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n",
+			 vf->vf_id);
+		goto err;
+	}
+
+	if (mask.vlan_id & data.vlan_id) {
+		if (ntohs(data.vlan_id) > I40E_MAX_VLANID) {
+			dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	return I40E_SUCCESS;
+err:
+	return I40E_ERR_CONFIG;
+}
+
+/**
+ * i40e_find_vsi_from_seid - searches for the vsi with the given seid
+ * @vf: pointer to the VF info
+ * @seid - seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int i;
+
+	for (i = 0; i < vf->num_tc ; i++) {
+		vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
+		if (vsi && vsi->seid == seid)
+			return vsi;
+	}
+	return NULL;
+}
+
+/**
+ * i40e_del_all_cloud_filters
+ * @vf: pointer to the VF info
+ *
+ * This function deletes all cloud filters
+ **/
+static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
+{
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	int ret;
+
+	hlist_for_each_entry_safe(cfilter, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		vsi = i40e_find_vsi_from_seid(vf, cfilter->seid);
+
+		if (!vsi) {
+			dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n",
+				vf->vf_id, cfilter->seid);
+			continue;
+		}
+
+		if (cfilter->dst_port)
+			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+								false);
+		else
+			ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
+		if (ret)
+			dev_err(&pf->pdev->dev,
+				"VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+				vf->vf_id, i40e_stat_str(&pf->hw, ret),
+				i40e_aq_str(&pf->hw,
+					    pf->hw.aq.asq_last_status));
+
+		hlist_del(&cfilter->cloud_node);
+		kfree(cfilter);
+		vf->num_cloud_filters--;
+	}
+}
+
+/**
+ * i40e_vc_del_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function deletes a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter cfilter, *cf = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	i40e_status aq_ret = 0;
+	int i, ret;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	memset(&cfilter, 0, sizeof(cfilter));
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter.vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter.dst_port = mask.dst_port & tcf.dst_port;
+	cfilter.src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter.n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter.n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter.ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter.ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the vsi to which the tc belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter.seid = vsi->seid;
+	cfilter.flags = vcf->field_flags;
+
+	/* Deleting TC filter */
+	if (tcf.dst_port)
+		ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false);
+	else
+		ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+			vf->vf_id, i40e_stat_str(&pf->hw, ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err;
+	}
+
+	hlist_for_each_entry_safe(cf, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		if (cf->seid != cfilter.seid)
+			continue;
+		if (mask.dst_port)
+			if (cfilter.dst_port != cf->dst_port)
+				continue;
+		if (mask.dst_mac[0])
+			if (!ether_addr_equal(cf->src_mac, cfilter.src_mac))
+				continue;
+		/* for ipv4 data to be valid, only first byte of mask is set */
+		if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0])
+			if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip,
+				   ARRAY_SIZE(tcf.dst_ip)))
+				continue;
+		/* for ipv6, mask is set for all sixteen bytes (4 words) */
+		if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3])
+			if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6,
+				   sizeof(cfilter.ip.v6.src_ip6)))
+				continue;
+		if (mask.vlan_id)
+			if (cfilter.vlan_id != cf->vlan_id)
+				continue;
+
+		hlist_del(&cf->cloud_node);
+		kfree(cf);
+		vf->num_cloud_filters--;
+	}
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_add_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function adds a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	i40e_status aq_ret = 0;
+	int i, ret;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq is not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input/s, can't apply cloud filter\n",
+			 vf->vf_id);
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+	}
+
+	cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
+	if (!cfilter)
+		return -ENOMEM;
+
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter->vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter->dst_port = mask.dst_port & tcf.dst_port;
+	cfilter->src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter->n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter->n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter->ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter->ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the VSI to which the TC belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter->seid = vsi->seid;
+	cfilter->flags = vcf->field_flags;
+
+	/* Adding cloud filter programmed as TC filter */
+	if (tcf.dst_port)
+		ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+	else
+		ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to add cloud filter, err %s aq_err %s\n",
+			vf->vf_id, i40e_stat_str(&pf->hw, ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err;
+	}
+
+	INIT_HLIST_NODE(&cfilter->cloud_node);
+	hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
+	vf->num_cloud_filters++;
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_add_qch_msg: Add queue channel and enable ADq
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_tc_info *tci =
+		(struct virtchnl_tc_info *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_link_status *ls = &pf->hw.phy.link_info;
+	int i, adq_request_qps = 0, speed = 0;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* ADq cannot be applied if spoof check is ON */
+	if (vf->spoofchk) {
+		dev_err(&pf->pdev->dev,
+			"Spoof check is ON, turn it OFF to enable ADq\n");
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+		dev_err(&pf->pdev->dev,
+			"VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n",
+			vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* max number of traffic classes for VF currently capped at 4 */
+	if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
+		dev_err(&pf->pdev->dev,
+			"VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
+			vf->vf_id, tci->num_tc);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* validate queues for each TC */
+	for (i = 0; i < tci->num_tc; i++)
+		if (!tci->list[i].count ||
+		    tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
+			dev_err(&pf->pdev->dev,
+				"VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
+				vf->vf_id, i, tci->list[i].count);
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+		}
+
+	/* need Max VF queues but already have default number of queues */
+	adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF;
+
+	if (pf->queues_left < adq_request_qps) {
+		dev_err(&pf->pdev->dev,
+			"No queues left to allocate to VF %d\n",
+			vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	} else {
+		/* we need to allocate max VF queues to enable ADq so as to
+		 * make sure ADq enabled VF always gets back queues when it
+		 * goes through a reset.
+		 */
+		vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
+	}
+
+	/* get link speed in MB to validate rate limit */
+	switch (ls->link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = SPEED_100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = SPEED_1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = SPEED_10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = SPEED_20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = SPEED_25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = SPEED_40000;
+		break;
+	default:
+		dev_err(&pf->pdev->dev,
+			"Cannot detect link speed\n");
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* parse data from the queue channel info */
+	vf->num_tc = tci->num_tc;
+	for (i = 0; i < vf->num_tc; i++) {
+		if (tci->list[i].max_tx_rate) {
+			if (tci->list[i].max_tx_rate > speed) {
+				dev_err(&pf->pdev->dev,
+					"Invalid max tx rate %llu specified for VF %d.",
+					tci->list[i].max_tx_rate,
+					vf->vf_id);
+				aq_ret = I40E_ERR_PARAM;
+				goto err;
+			} else {
+				vf->ch[i].max_tx_rate =
+					tci->list[i].max_tx_rate;
+			}
+		}
+		vf->ch[i].num_qps = tci->list[i].count;
+	}
+
+	/* set this flag only after making sure all inputs are sane */
+	vf->adq_enabled = true;
+	/* num_req_queues is set when user changes number of queues via ethtool
+	 * and this causes issue for default VSI(which depends on this variable)
+	 * when ADq is enabled, hence reset it.
+	 */
+	vf->num_req_queues = 0;
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_notify_vf_reset(vf);
+	i40e_reset_vf(vf, false);
+
+	return I40E_SUCCESS;
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_del_qch_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct i40e_pf *pf = vf->pf;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (vf->adq_enabled) {
+		i40e_del_all_cloud_filters(vf);
+		i40e_del_qch(vf);
+		vf->adq_enabled = false;
+		vf->num_tc = 0;
+		dev_info(&pf->pdev->dev,
+			 "Deleting Queue Channels and cloud filters for ADq on VF %d\n",
+			 vf->vf_id);
+	} else {
+		dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+	}
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_notify_vf_reset(vf);
+	i40e_reset_vf(vf, false);
+
+	return I40E_SUCCESS;
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS,
+				       aq_ret);
+}
+
+/**
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
  * @vf_id: source VF id
@@ -2816,7 +3681,18 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	case VIRTCHNL_OP_REQUEST_QUEUES:
 		ret = i40e_vc_request_queues_msg(vf, msg, msglen);
 		break;
-
+	case VIRTCHNL_OP_ENABLE_CHANNELS:
+		ret = i40e_vc_add_qch_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DISABLE_CHANNELS:
+		ret = i40e_vc_del_qch_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+		ret = i40e_vc_add_cloud_filter(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+		ret = i40e_vc_del_cloud_filter(vf, msg);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
@@ -2889,6 +3765,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	int ret = 0;
 	struct hlist_node *h;
 	int bkt;
+	u8 i;
 
 	/* validate the request */
 	if (vf_id >= pf->num_alloc_vfs) {
@@ -2900,6 +3777,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	vf = &(pf->vf[vf_id]);
 	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	/* When the VF is resetting wait until it is done.
+	 * It can take up to 200 milliseconds,
+	 * but wait for up to 300 milliseconds to be safe.
+	 */
+	for (i = 0; i < 15; i++) {
+		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+			break;
+		msleep(20);
+	}
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
 		dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
 			vf_id);
@@ -3382,6 +4269,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
 	i40e_vc_disable_vf(vf);
 	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
 		 vf_id, setting ? "" : "un");
+
+	if (vf->adq_enabled) {
+		if (!vf->trusted) {
+			dev_info(&pf->pdev->dev,
+				 "VF %u no longer Trusted, deleting all cloud filters\n",
+				 vf_id);
+			i40e_del_all_cloud_filters(vf);
+		}
+	}
+
 out:
 	return ret;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 5efc4f92bb37..57f727bb9e36 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -69,6 +70,19 @@ enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI.
+ * These variables are used to store indices, id's and number of queues
+ * for each VSI including that of primary VF VSI. Each Traffic class is
+ * termed as channel and each channel can in-turn have 4 queues which
+ * means max 16 queues overall per VF.
+ */
+struct i40evf_channel {
+	u16 vsi_idx; /* index in PF struct for all channel VSIs */
+	u16 vsi_id; /* VSI ID used by firmware */
+	u16 num_qps; /* number of queue pairs requested by user */
+	u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
+};
+
 /* VF information structure */
 struct i40e_vf {
 	struct i40e_pf *pf;
@@ -111,6 +125,13 @@ struct i40e_vf {
 	u16 num_mac;
 	u16 num_vlan;
 
+	/* ADq related variables */
+	bool adq_enabled; /* flag to enable adq */
+	u8 num_tc;
+	struct i40evf_channel ch[I40E_MAX_VF_VSI];
+	struct hlist_head cloud_filter_list;
+	u16 num_cloud_filters;
+
 	/* RDMA Client */
 	struct virtchnl_iwarp_qvlist_info *qvlist_info;
 };
diff --git a/drivers/net/ethernet/intel/i40evf/Makefile b/drivers/net/ethernet/intel/i40evf/Makefile
index a393f4a07f06..1e89c5487676 100644
--- a/drivers/net/ethernet/intel/i40evf/Makefile
+++ b/drivers/net/ethernet/intel/i40evf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index d1aab6b8bfb1..6fd677efa9da 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
index e0bfaa3d4a21..a7137c165256 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 815de8d9c3fb..439e71882049 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
index 8e6a6dd9212b..7e0fddd8af36 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 67bf5cebb76f..67140cdbcd7a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index 0469e4bfd3ec..352dd3f3eb6a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
index 00ed24bfce13..7432596164f4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
index a5d79877354c..ddac0e4908d3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index a90737786c34..8668ad6c1a65 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 47c429931a57..72501bd0f1a9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_register.h b/drivers/net/ethernet/intel/i40evf/i40e_register.h
index 10febcfd7cd8..c9c935659758 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_register.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h
index 5b222246e08b..0d7993ecb99a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_status.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_trace.h b/drivers/net/ethernet/intel/i40evf/i40e_trace.h
index 9a5100b2b7c7..ece01dd12a3c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel(R) 40-10 Gigabit Ethernet Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index eb8f3e327f6b..12bd937861e7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -196,7 +197,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
 			 */
 			smp_rmb();
 			tx_ring->tx_stats.prev_pkt_ctr =
-			  i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+			  i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
 }
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 9129447d079b..5790897eae2e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 54951c84a481..449de4b0058e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index b6991e8014d8..3a7a1e77bf39 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -52,7 +53,10 @@
 #include <linux/socket.h>
 #include <linux/jiffies.h>
 #include <net/ip6_checksum.h>
+#include <net/pkt_cls.h>
 #include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
 
 #include "i40e_type.h"
 #include <linux/avf/virtchnl.h>
@@ -106,6 +110,7 @@ struct i40e_vsi {
 
 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
 #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40EVF_MBPS_DIVISOR	125000 /* divisor to convert to Mbps */
 
 /* MAX_MSIX_Q_VECTORS of these are allocated,
  * but we only use one per queue-specific vector.
@@ -168,6 +173,28 @@ struct i40evf_vlan_filter {
 	bool add;		/* filter needs to be added */
 };
 
+#define I40EVF_MAX_TRAFFIC_CLASS	4
+/* State of traffic class creation */
+enum i40evf_tc_state_t {
+	__I40EVF_TC_INVALID, /* no traffic class, default state */
+	__I40EVF_TC_RUNNING, /* traffic classes have been created */
+};
+
+/* channel info */
+struct i40evf_channel_config {
+	struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS];
+	enum i40evf_tc_state_t state;
+	u8 total_qps;
+};
+
+/* State of cloud filter */
+enum i40evf_cloud_filter_state_t {
+	__I40EVF_CF_INVALID,	 /* cloud filter not added */
+	__I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
+	__I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
+	__I40EVF_CF_ACTIVE,	 /* cloud filter is active */
+};
+
 /* Driver state. The order of these is important! */
 enum i40evf_state_t {
 	__I40EVF_STARTUP,		/* driver loaded, probe complete */
@@ -189,6 +216,36 @@ enum i40evf_critical_section_t {
 	__I40EVF_IN_REMOVE_TASK,	/* device being removed */
 };
 
+#define I40EVF_CLOUD_FIELD_OMAC		0x01
+#define I40EVF_CLOUD_FIELD_IMAC		0x02
+#define I40EVF_CLOUD_FIELD_IVLAN	0x04
+#define I40EVF_CLOUD_FIELD_TEN_ID	0x08
+#define I40EVF_CLOUD_FIELD_IIP		0x10
+
+#define I40EVF_CF_FLAGS_OMAC	I40EVF_CLOUD_FIELD_OMAC
+#define I40EVF_CF_FLAGS_IMAC	I40EVF_CLOUD_FIELD_IMAC
+#define I40EVF_CF_FLAGS_IMAC_IVLAN	(I40EVF_CLOUD_FIELD_IMAC |\
+					 I40EVF_CLOUD_FIELD_IVLAN)
+#define I40EVF_CF_FLAGS_IMAC_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
+					 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC	(I40EVF_CLOUD_FIELD_OMAC |\
+						 I40EVF_CLOUD_FIELD_IMAC |\
+						 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
+						 I40EVF_CLOUD_FIELD_IVLAN |\
+						 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IIP	I40E_CLOUD_FIELD_IIP
+
+/* bookkeeping of cloud filters */
+struct i40evf_cloud_filter {
+	enum i40evf_cloud_filter_state_t state;
+	struct list_head list;
+	struct virtchnl_filter f;
+	unsigned long cookie;
+	bool del;		/* filter needs to be deleted */
+	bool add;		/* filter needs to be added */
+};
+
 /* board specific private data structure */
 struct i40evf_adapter {
 	struct timer_list watchdog_timer;
@@ -224,13 +281,10 @@ struct i40evf_adapter {
 
 	u32 flags;
 #define I40EVF_FLAG_RX_CSUM_ENABLED		BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED		BIT(1)
-#define I40EVF_FLAG_MQ_CAPABLE			BIT(2)
 #define I40EVF_FLAG_PF_COMMS_FAILED		BIT(3)
 #define I40EVF_FLAG_RESET_PENDING		BIT(4)
 #define I40EVF_FLAG_RESET_NEEDED		BIT(5)
 #define I40EVF_FLAG_WB_ON_ITR_CAPABLE		BIT(6)
-#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE	BIT(7)
 #define I40EVF_FLAG_ADDR_SET_BY_PF		BIT(8)
 #define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(9)
 #define I40EVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)
@@ -240,6 +294,7 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_ALLMULTI_ON			BIT(14)
 #define I40EVF_FLAG_LEGACY_RX			BIT(15)
 #define I40EVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
+#define I40EVF_FLAG_QUEUES_DISABLED		BIT(17)
 /* duplicates for common code */
 #define I40E_FLAG_DCB_ENABLED			0
 #define I40E_FLAG_RX_CSUM_ENABLED		I40EVF_FLAG_RX_CSUM_ENABLED
@@ -268,6 +323,10 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI		BIT(18)
 #define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT(19)
 #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT(20)
+#define I40EVF_FLAG_AQ_ENABLE_CHANNELS		BIT(21)
+#define I40EVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
+#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT(23)
+#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -313,6 +372,13 @@ struct i40evf_adapter {
 	u16 rss_lut_size;
 	u8 *rss_key;
 	u8 *rss_lut;
+	/* ADQ related members */
+	struct i40evf_channel_config ch_config;
+	u8 num_tc;
+	struct list_head cloud_filter_list;
+	/* lock to protest access to the cloud filter list */
+	spinlock_t cloud_filter_list_lock;
+	u16 num_cloud_filters;
 };
 
 
@@ -379,4 +445,8 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len);
 void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
 void i40evf_notify_client_open(struct i40e_vsi *vsi);
 void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
+void i40evf_enable_channels(struct i40evf_adapter *adapter);
+void i40evf_disable_channels(struct i40evf_adapter *adapter);
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter);
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter);
 #endif /* _I40EVF_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index aded3ad7763e..dc4cde274fb8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -695,6 +696,12 @@ static int i40evf_set_channels(struct net_device *netdev,
 		return -EINVAL;
 	}
 
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
+		return -EINVAL;
+	}
+
 	/* All of these should have already been checked by ethtool before this
 	 * even gets to us, but just to be sure.
 	 */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 6fd09926181a..5f71532be7f1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -785,7 +786,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
  **/
 static struct
 i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
-				      u8 *macaddr)
+				      const u8 *macaddr)
 {
 	struct i40evf_mac_filter *f;
 
@@ -808,20 +809,18 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
  **/
 static struct
 i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
-				     u8 *macaddr)
+				     const u8 *macaddr)
 {
 	struct i40evf_mac_filter *f;
 
 	if (!macaddr)
 		return NULL;
 
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
 	f = i40evf_find_filter(adapter, macaddr);
 	if (!f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
 		if (!f)
-			goto clearout;
+			return f;
 
 		ether_addr_copy(f->macaddr, macaddr);
 
@@ -832,8 +831,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
 		f->remove = false;
 	}
 
-clearout:
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 	return f;
 }
 
@@ -868,9 +865,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 	}
 
+	f = i40evf_add_filter(adapter, addr->sa_data);
+
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	f = i40evf_add_filter(adapter, addr->sa_data);
 	if (f) {
 		ether_addr_copy(hw->mac.addr, addr->sa_data);
 		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -880,50 +878,64 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 }
 
 /**
- * i40evf_set_rx_mode - NDO callback to set the netdev filters
- * @netdev: network interface device structure
- **/
-static void i40evf_set_rx_mode(struct net_device *netdev)
+ * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40evf_mac_filter *f, *ftmp;
-	struct netdev_hw_addr *uca;
-	struct netdev_hw_addr *mca;
-	struct netdev_hw_addr *ha;
-
-	/* add addr if not already in the filter list */
-	netdev_for_each_uc_addr(uca, netdev) {
-		i40evf_add_filter(adapter, uca->addr);
-	}
-	netdev_for_each_mc_addr(mca, netdev) {
-		i40evf_add_filter(adapter, mca->addr);
-	}
 
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
-		netdev_for_each_mc_addr(mca, netdev)
-			if (ether_addr_equal(mca->addr, f->macaddr))
-				goto bottom_of_search_loop;
-
-		netdev_for_each_uc_addr(uca, netdev)
-			if (ether_addr_equal(uca->addr, f->macaddr))
-				goto bottom_of_search_loop;
+	if (i40evf_add_filter(adapter, addr))
+		return 0;
+	else
+		return -ENOMEM;
+}
 
-		for_each_dev_addr(netdev, ha)
-			if (ether_addr_equal(ha->addr, f->macaddr))
-				goto bottom_of_search_loop;
+/**
+ * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40evf_mac_filter *f;
 
-		if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr))
-			goto bottom_of_search_loop;
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
 
-		/* f->macaddr wasn't found in uc, mc, or ha list so delete it */
+	f = i40evf_find_filter(adapter, addr);
+	if (f) {
 		f->remove = true;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-
-bottom_of_search_loop:
-		continue;
 	}
+	return 0;
+}
+
+/**
+ * i40evf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void i40evf_set_rx_mode(struct net_device *netdev)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	__dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+	__dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
 	if (netdev->flags & IFF_PROMISC &&
 	    !(adapter->flags & I40EVF_FLAG_PROMISC_ON))
@@ -938,8 +950,6 @@ bottom_of_search_loop:
 	else if (!(netdev->flags & IFF_ALLMULTI) &&
 		 adapter->flags & I40EVF_FLAG_ALLMULTI_ON)
 		adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI;
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 }
 
 /**
@@ -1027,7 +1037,9 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter)
 void i40evf_down(struct i40evf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct i40evf_vlan_filter *vlf;
 	struct i40evf_mac_filter *f;
+	struct i40evf_cloud_filter *cf;
 
 	if (adapter->state <= __I40EVF_DOWN_PENDING)
 		return;
@@ -1040,17 +1052,29 @@ void i40evf_down(struct i40evf_adapter *adapter)
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
+	/* clear the sync flag on all filters */
+	__dev_uc_unsync(adapter->netdev, NULL);
+	__dev_mc_unsync(adapter->netdev, NULL);
+
 	/* remove all MAC filters */
 	list_for_each_entry(f, &adapter->mac_filter_list, list) {
 		f->remove = true;
 	}
+
 	/* remove all VLAN filters */
-	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
-		f->remove = true;
+	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+		vlf->remove = true;
 	}
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	/* remove all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		cf->del = true;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
 	    adapter->state != __I40EVF_RESETTING) {
 		/* cancel any current operation */
@@ -1061,6 +1085,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 		 */
 		adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
 	}
 
@@ -1146,6 +1171,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 	 */
 	if (adapter->num_req_queues)
 		num_active_queues = adapter->num_req_queues;
+	else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+		 adapter->num_tc)
+		num_active_queues = adapter->ch_config.total_qps;
 	else
 		num_active_queues = min_t(int,
 					  adapter->vsi_res->num_queue_pairs,
@@ -1473,6 +1501,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter)
 		goto err_alloc_q_vectors;
 	}
 
+	/* If we've made it so far while ADq flag being ON, then we haven't
+	 * bailed out anywhere in middle. And ADq isn't just enabled but actual
+	 * resources have been allocated in the reset path.
+	 * Now we can truly claim that ADq is enabled.
+	 */
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc)
+		dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
+			 adapter->num_tc);
+
 	dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
 		 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
 		 adapter->num_active_queues);
@@ -1714,6 +1752,27 @@ static void i40evf_watchdog_task(struct work_struct *work)
 		i40evf_set_promiscuous(adapter, 0);
 		goto watchdog_done;
 	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) {
+		i40evf_enable_channels(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) {
+		i40evf_disable_channels(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		i40evf_add_cloud_filter(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		i40evf_del_cloud_filter(adapter);
+		goto watchdog_done;
+	}
+
 	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
 
 	if (adapter->state == __I40EVF_RUNNING)
@@ -1737,6 +1796,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 {
 	struct i40evf_mac_filter *f, *ftmp;
 	struct i40evf_vlan_filter *fv, *fvtmp;
+	struct i40evf_cloud_filter *cf, *cftmp;
 
 	adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
 
@@ -1758,7 +1818,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
-	/* Delete all of the filters, both MAC and VLAN. */
+	/* Delete all of the filters */
 	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
 		list_del(&f->list);
 		kfree(f);
@@ -1771,6 +1831,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	i40evf_free_misc_irq(adapter);
 	i40evf_reset_interrupt_capability(adapter);
 	i40evf_free_queues(adapter);
@@ -1800,9 +1868,11 @@ static void i40evf_reset_task(struct work_struct *work)
 	struct i40evf_adapter *adapter = container_of(work,
 						      struct i40evf_adapter,
 						      reset_task);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
 	struct net_device *netdev = adapter->netdev;
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40evf_vlan_filter *vlf;
+	struct i40evf_cloud_filter *cf;
 	struct i40evf_mac_filter *f;
 	u32 reg_val;
 	int i = 0, err;
@@ -1895,6 +1965,7 @@ continue_reset:
 	i40evf_free_all_rx_resources(adapter);
 	i40evf_free_all_tx_resources(adapter);
 
+	adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED;
 	/* kill and reinit the admin queue */
 	i40evf_shutdown_adminq(hw);
 	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
@@ -1926,8 +1997,19 @@ continue_reset:
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	/* check if TCs are running and re-add all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			cf->add = true;
+		}
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
 	i40evf_misc_irq_enable(adapter);
 
 	mod_timer(&adapter->watchdog_timer, jiffies + 2);
@@ -2193,6 +2275,712 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
 }
 
 /**
+ * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth
+ * @adapter: board private structure
+ * @max_tx_rate: max Tx bw for a tc
+ **/
+static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter,
+					u64 max_tx_rate)
+{
+	int speed = 0, ret = 0;
+
+	switch (adapter->link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case I40E_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case I40E_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case I40E_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case I40E_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case I40E_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	default:
+		break;
+	}
+
+	if (max_tx_rate > speed) {
+		dev_err(&adapter->pdev->dev,
+			"Invalid tx rate specified\n");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * i40evf_validate_channel_config - validate queue mapping info
+ * @adapter: board private structure
+ * @mqprio_qopt: queue parameters
+ *
+ * This function validates if the config provided by the user to
+ * configure queue channels is valid or not. Returns 0 on a valid
+ * config.
+ **/
+static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
+				     struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	u64 total_max_rate = 0;
+	int i, num_qps = 0;
+	u64 tx_rate = 0;
+	int ret = 0;
+
+	if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
+	    mqprio_qopt->qopt.num_tc < 1)
+		return -EINVAL;
+
+	for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
+		if (!mqprio_qopt->qopt.count[i] ||
+		    mqprio_qopt->qopt.offset[i] != num_qps)
+			return -EINVAL;
+		if (mqprio_qopt->min_rate[i]) {
+			dev_err(&adapter->pdev->dev,
+				"Invalid min tx rate (greater than 0) specified\n");
+			return -EINVAL;
+		}
+		/*convert to Mbps */
+		tx_rate = div_u64(mqprio_qopt->max_rate[i],
+				  I40EVF_MBPS_DIVISOR);
+		total_max_rate += tx_rate;
+		num_qps += mqprio_qopt->qopt.count[i];
+	}
+	if (num_qps > MAX_QUEUES)
+		return -EINVAL;
+
+	ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
+	return ret;
+}
+
+/**
+ * i40evf_del_all_cloud_filters - delete all cloud filters
+ * on the traffic classes
+ **/
+static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf, *cftmp;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+				 list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
+/**
+ * __i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type_date: tc offload data
+ *
+ * This function processes the config information provided by the
+ * user to configure traffic classes/queue channels and packages the
+ * information to request the PF to setup traffic classes.
+ *
+ * Returns 0 on success.
+ **/
+static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	u8 num_tc = 0, total_qps = 0;
+	int ret = 0, netdev_tc = 0;
+	u64 max_tx_rate;
+	u16 mode;
+	int i;
+
+	num_tc = mqprio_qopt->qopt.num_tc;
+	mode = mqprio_qopt->mode;
+
+	/* delete queue_channel */
+	if (!mqprio_qopt->qopt.hw) {
+		if (adapter->ch_config.state == __I40EVF_TC_RUNNING) {
+			/* reset the tc configuration */
+			netdev_reset_tc(netdev);
+			adapter->num_tc = 0;
+			netif_tx_stop_all_queues(netdev);
+			netif_tx_disable(netdev);
+			i40evf_del_all_cloud_filters(adapter);
+			adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+			goto exit;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	/* add queue channel */
+	if (mode == TC_MQPRIO_MODE_CHANNEL) {
+		if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+			dev_err(&adapter->pdev->dev, "ADq not supported\n");
+			return -EOPNOTSUPP;
+		}
+		if (adapter->ch_config.state != __I40EVF_TC_INVALID) {
+			dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
+			return -EINVAL;
+		}
+
+		ret = i40evf_validate_ch_config(adapter, mqprio_qopt);
+		if (ret)
+			return ret;
+		/* Return if same TC config is requested */
+		if (adapter->num_tc == num_tc)
+			return 0;
+		adapter->num_tc = num_tc;
+
+		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+			if (i < num_tc) {
+				adapter->ch_config.ch_info[i].count =
+					mqprio_qopt->qopt.count[i];
+				adapter->ch_config.ch_info[i].offset =
+					mqprio_qopt->qopt.offset[i];
+				total_qps += mqprio_qopt->qopt.count[i];
+				max_tx_rate = mqprio_qopt->max_rate[i];
+				/* convert to Mbps */
+				max_tx_rate = div_u64(max_tx_rate,
+						      I40EVF_MBPS_DIVISOR);
+				adapter->ch_config.ch_info[i].max_tx_rate =
+					max_tx_rate;
+			} else {
+				adapter->ch_config.ch_info[i].count = 1;
+				adapter->ch_config.ch_info[i].offset = 0;
+			}
+		}
+		adapter->ch_config.total_qps = total_qps;
+		netif_tx_stop_all_queues(netdev);
+		netif_tx_disable(netdev);
+		adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+		netdev_reset_tc(netdev);
+		/* Report the tc mapping up the stack */
+		netdev_set_num_tc(adapter->netdev, num_tc);
+		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+			u16 qcount = mqprio_qopt->qopt.count[i];
+			u16 qoffset = mqprio_qopt->qopt.offset[i];
+
+			if (i < num_tc)
+				netdev_set_tc_queue(netdev, netdev_tc++, qcount,
+						    qoffset);
+		}
+	}
+exit:
+	return ret;
+}
+
+/**
+ * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @adapter: board private structure
+ * @cls_flower: pointer to struct tc_cls_flower_offload
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
+				   struct tc_cls_flower_offload *f,
+				   struct i40evf_cloud_filter *filter)
+{
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
+	u8 field_flags = 0;
+	u16 addr_type = 0;
+	u16 n_proto = 0;
+	int i = 0;
+	struct virtchnl_filter *vf = &filter->f;
+
+	if (f->dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+		dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
+			f->dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_dissector_key_keyid *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  f->mask);
+
+		if (mask->keyid != 0)
+			field_flags |= I40EVF_CLOUD_FIELD_TEN_ID;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->key);
+
+		struct flow_dissector_key_basic *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->mask);
+		n_proto_key = ntohs(key->n_proto);
+		n_proto_mask = ntohs(mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+		n_proto = n_proto_key & n_proto_mask;
+		if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
+			return -EINVAL;
+		if (n_proto == ETH_P_IPV6) {
+			/* specify flow type as TCP IPv6 */
+			vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
+		}
+
+		if (key->ip_proto != IPPROTO_TCP) {
+			dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
+			return -EINVAL;
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->key);
+
+		struct flow_dissector_key_eth_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->mask);
+		/* use is_broadcast and is_zero to check for all 0xf or 0 */
+		if (!is_zero_ether_addr(mask->dst)) {
+			if (is_broadcast_ether_addr(mask->dst)) {
+				field_flags |= I40EVF_CLOUD_FIELD_OMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+					mask->dst);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(mask->src)) {
+			if (is_broadcast_ether_addr(mask->src)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+					mask->src);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(key->dst))
+			if (is_valid_ether_addr(key->dst) ||
+			    is_multicast_ether_addr(key->dst)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.dst_mac,
+						key->dst);
+			}
+
+		if (!is_zero_ether_addr(key->src))
+			if (is_valid_ether_addr(key->src) ||
+			    is_multicast_ether_addr(key->src)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.src_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.src_mac,
+						key->src);
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->key);
+		struct flow_dissector_key_vlan *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->mask);
+
+		if (mask->vlan_id) {
+			if (mask->vlan_id == VLAN_VID_MASK) {
+				field_flags |= I40EVF_CLOUD_FIELD_IVLAN;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+					mask->vlan_id);
+				return I40E_ERR_CONFIG;
+			}
+		}
+		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+		vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_CONTROL,
+						  f->key);
+
+		addr_type = key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->mask);
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+					be32_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+					be32_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) {
+			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+			return I40E_ERR_CONFIG;
+		}
+		if (key->dst) {
+			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.dst_ip[0] = key->dst;
+		}
+		if (key->src) {
+			vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.src_ip[0] = key->src;
+		}
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_dissector_key_ipv6_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv6_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->mask);
+
+		/* validate mask, make sure it is not IPV6_ADDR_ANY */
+		if (ipv6_addr_any(&mask->dst)) {
+			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+				IPV6_ADDR_ANY);
+			return I40E_ERR_CONFIG;
+		}
+
+		/* src and dest IPv6 address should not be LOOPBACK
+		 * (0:0:0:0:0:0:0:1) which can be represented as ::1
+		 */
+		if (ipv6_addr_loopback(&key->dst) ||
+		    ipv6_addr_loopback(&key->src)) {
+			dev_err(&adapter->pdev->dev,
+				"ipv6 addr should not be loopback\n");
+			return I40E_ERR_CONFIG;
+		}
+		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+			field_flags |= I40EVF_CLOUD_FIELD_IIP;
+
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+		       sizeof(vf->data.tcp_spec.dst_ip));
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+		       sizeof(vf->data.tcp_spec.src_ip));
+	}
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->key);
+		struct flow_dissector_key_ports *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->mask);
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be16(0xffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+					be16_to_cpu(mask->src));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be16(0xffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+					be16_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+		if (key->dst) {
+			vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.dst_port = key->dst;
+		}
+
+		if (key->src) {
+			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.src_port = key->src;
+		}
+	}
+	vf->field_flags = field_flags;
+
+	return 0;
+}
+
+/**
+ * i40evf_handle_tclass - Forward to a traffic class on the device
+ * @adapter: board private structure
+ * @tc: traffic class index on the device
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc,
+				struct i40evf_cloud_filter *filter)
+{
+	if (tc == 0)
+		return 0;
+	if (tc < adapter->num_tc) {
+		if (!filter->f.data.tcp_spec.dst_port) {
+			dev_err(&adapter->pdev->dev,
+				"Specify destination port to redirect to traffic class other than TC0\n");
+			return -EINVAL;
+		}
+	}
+	/* redirect to a traffic class on the same device */
+	filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
+	filter->f.action_meta = tc;
+	return 0;
+}
+
+/**
+ * i40evf_configure_clsflower - Add tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
+				      struct tc_cls_flower_offload *cls_flower)
+{
+	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+	struct i40evf_cloud_filter *filter = NULL;
+	int err = -EINVAL, count = 50;
+
+	if (tc < 0) {
+		dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		if (--count == 0)
+			goto err;
+		udelay(1);
+	}
+
+	filter->cookie = cls_flower->cookie;
+
+	/* set the mask to all zeroes to begin with */
+	memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
+	/* start out with flow type and eth type IPv4 to begin with */
+	filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+	err = i40evf_parse_cls_flower(adapter, cls_flower, filter);
+	if (err < 0)
+		goto err;
+
+	err = i40evf_handle_tclass(adapter, tc, filter);
+	if (err < 0)
+		goto err;
+
+	/* add filter to the list */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_add_tail(&filter->list, &adapter->cloud_filter_list);
+	adapter->num_cloud_filters++;
+	filter->add = true;
+	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+err:
+	if (err)
+		kfree(filter);
+
+	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	return err;
+}
+
+/* i40evf_find_cf - Find the cloud filter in the list
+ * @adapter: Board private structure
+ * @cookie: filter specific cookie
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * cloud_filter_list_lock.
+ */
+static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter,
+						  unsigned long *cookie)
+{
+	struct i40evf_cloud_filter *filter = NULL;
+
+	if (!cookie)
+		return NULL;
+
+	list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
+		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+			return filter;
+	}
+	return NULL;
+}
+
+/**
+ * i40evf_delete_clsflower - Remove tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_delete_clsflower(struct i40evf_adapter *adapter,
+				   struct tc_cls_flower_offload *cls_flower)
+{
+	struct i40evf_cloud_filter *filter = NULL;
+	int err = 0;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	filter = i40evf_find_cf(adapter, &cls_flower->cookie);
+	if (filter) {
+		filter->del = true;
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+	} else {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	return err;
+}
+
+/**
+ * i40evf_setup_tc_cls_flower - flower classifier offloads
+ * @netdev: net device to configure
+ * @type_data: offload data
+ */
+static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
+				      struct tc_cls_flower_offload *cls_flower)
+{
+	if (cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return i40evf_configure_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return i40evf_delete_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * i40evf_setup_tc_block_cb - block callback for tc
+ * @type: type of offload
+ * @type_data: offload data
+ * @cb_priv:
+ *
+ * This function is the block callback for traffic classes
+ **/
+static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				    void *cb_priv)
+{
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return i40evf_setup_tc_cls_flower(cb_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40evf_setup_tc_block - register callbacks for tc
+ * @netdev: network interface device structure
+ * @f: tc offload data
+ *
+ * This function registers block callbacks for tc
+ * offloads
+ **/
+static int i40evf_setup_tc_block(struct net_device *dev,
+				 struct tc_block_offload *f)
+{
+	struct i40evf_adapter *adapter = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
+					     adapter, adapter);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
+					adapter);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type: type of offload
+ * @type_date: tc offload data
+ *
+ * This function is the callback to ndo_setup_tc in the
+ * netdev_ops.
+ *
+ * Returns 0 on success
+ **/
+static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			   void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return __i40evf_setup_tc(netdev, type_data);
+	case TC_SETUP_BLOCK:
+		return i40evf_setup_tc_block(netdev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
  * i40evf_open - Called when a network interface is made active
  * @netdev: network interface device structure
  *
@@ -2238,7 +3026,12 @@ static int i40evf_open(struct net_device *netdev)
 	if (err)
 		goto err_req_irq;
 
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
 	i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
 	i40evf_configure(adapter);
 
 	i40evf_up_complete(adapter);
@@ -2459,6 +3252,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= i40evf_netpoll,
 #endif
+	.ndo_setup_tc		= i40evf_setup_tc,
 };
 
 /**
@@ -2573,6 +3367,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
 		hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX);
+	/* Enable cloud filter if ADQ is supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+		hw_features |= NETIF_F_HW_TC;
 
 	netdev->hw_features |= hw_features;
 
@@ -2940,9 +3737,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mutex_init(&hw->aq.arq_mutex);
 
 	spin_lock_init(&adapter->mac_vlan_list_lock);
+	spin_lock_init(&adapter->cloud_filter_list_lock);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
+	INIT_LIST_HEAD(&adapter->cloud_filter_list);
 
 	INIT_WORK(&adapter->reset_task, i40evf_reset_task);
 	INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
@@ -3067,7 +3866,9 @@ static void i40evf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40evf_vlan_filter *vlf, *vlftmp;
 	struct i40evf_mac_filter *f, *ftmp;
+	struct i40evf_cloud_filter *cf, *cftmp;
 	struct i40e_hw *hw = &adapter->hw;
 	int err;
 	/* Indicate we are in remove and not to run reset_task */
@@ -3089,6 +3890,7 @@ static void i40evf_remove(struct pci_dev *pdev)
 	/* Shut down all the garbage mashers on the detention level */
 	adapter->state = __I40EVF_REMOVE;
 	adapter->aq_required = 0;
+	adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
 	i40evf_request_reset(adapter);
 	msleep(50);
 	/* If the FW isn't responding, kick it once, but only once. */
@@ -3129,13 +3931,21 @@ static void i40evf_remove(struct pci_dev *pdev)
 		list_del(&f->list);
 		kfree(f);
 	}
-	list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
-		list_del(&f->list);
-		kfree(f);
+	list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+				 list) {
+		list_del(&vlf->list);
+		kfree(vlf);
 	}
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index d57a67285505..26a59890532f 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -161,7 +162,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
-	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+	       VIRTCHNL_VF_OFFLOAD_ADQ;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
@@ -465,7 +467,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 		more = true;
 	}
 
-	veal = kzalloc(len, GFP_KERNEL);
+	veal = kzalloc(len, GFP_ATOMIC);
 	if (!veal) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -538,7 +540,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 		      (count * sizeof(struct virtchnl_ether_addr));
 		more = true;
 	}
-	veal = kzalloc(len, GFP_KERNEL);
+	veal = kzalloc(len, GFP_ATOMIC);
 	if (!veal) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -612,7 +614,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 		      (count * sizeof(u16));
 		more = true;
 	}
-	vvfl = kzalloc(len, GFP_KERNEL);
+	vvfl = kzalloc(len, GFP_ATOMIC);
 	if (!vvfl) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -684,7 +686,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 		      (count * sizeof(u16));
 		more = true;
 	}
-	vvfl = kzalloc(len, GFP_KERNEL);
+	vvfl = kzalloc(len, GFP_ATOMIC);
 	if (!vvfl) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -973,6 +975,205 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter)
 }
 
 /**
+ * i40evf_enable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable channels as specified by
+ * the user via tc tool.
+ **/
+void i40evf_enable_channels(struct i40evf_adapter *adapter)
+{
+	struct virtchnl_tc_info *vti = NULL;
+	u16 len;
+	int i;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
+	       sizeof(struct virtchnl_tc_info);
+
+	vti = kzalloc(len, GFP_KERNEL);
+	if (!vti)
+		return;
+	vti->num_tc = adapter->num_tc;
+	for (i = 0; i < vti->num_tc; i++) {
+		vti->list[i].count = adapter->ch_config.ch_info[i].count;
+		vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
+		vti->list[i].pad = 0;
+		vti->list[i].max_tx_rate =
+				adapter->ch_config.ch_info[i].max_tx_rate;
+	}
+
+	adapter->ch_config.state = __I40EVF_TC_RUNNING;
+	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS,
+			   (u8 *)vti, len);
+	kfree(vti);
+}
+
+/**
+ * i40evf_disable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF disable channels that are configured
+ **/
+void i40evf_disable_channels(struct i40evf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	adapter->ch_config.state = __I40EVF_TC_INVALID;
+	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS,
+			   NULL, 0);
+}
+
+/**
+ * i40evf_print_cloud_filter
+ * @adapter: adapter structure
+ * @f: cloud filter to print
+ *
+ * Print the cloud filter
+ **/
+static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
+				      struct virtchnl_filter *f)
+{
+	switch (f->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip[0],
+			 &f->data.tcp_spec.src_ip[0],
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip,
+			 &f->data.tcp_spec.src_ip,
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
+		break;
+	}
+}
+
+/**
+ * i40evf_add_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF add cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->add = false;
+			cf->state = __I40EVF_CF_ADD_PENDING;
+			i40evf_send_pf_msg(adapter,
+					   VIRTCHNL_OP_ADD_CLOUD_FILTER,
+					   (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
+/**
+ * i40evf_del_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF delete cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf, *cftmp;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->del = false;
+			cf->state = __I40EVF_CF_DEL_PENDING;
+			i40evf_send_pf_msg(adapter,
+					   VIRTCHNL_OP_DEL_CLOUD_FILTER,
+					   (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
+/**
  * i40evf_request_reset
  * @adapter: adapter structure
  *
@@ -1017,14 +1218,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			if (adapter->link_up == link_up)
 				break;
 
-			/* If we get link up message and start queues before
-			 * our queues are configured it will trigger a TX hang.
-			 * In that case, just ignore the link status message,
-			 * we'll get another one after we enable queues and
-			 * actually prepared to send traffic.
-			 */
-			if (link_up && adapter->state != __I40EVF_RUNNING)
-				break;
+			if (link_up) {
+				/* If we get link up message and start queues
+				 * before our queues are configured it will
+				 * trigger a TX hang. In that case, just ignore
+				 * the link status message,we'll get another one
+				 * after we enable queues and actually prepared
+				 * to send traffic.
+				 */
+				if (adapter->state != __I40EVF_RUNNING)
+					break;
+
+				/* For ADq enabled VF, we reconfigure VSIs and
+				 * re-allocate queues. Hence wait till all
+				 * queues are enabled.
+				 */
+				if (adapter->flags &
+				    I40EVF_FLAG_QUEUES_DISABLED)
+					break;
+			}
 
 			adapter->link_up = link_up;
 			if (link_up) {
@@ -1037,7 +1249,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			i40evf_print_link_message(adapter);
 			break;
 		case VIRTCHNL_EVENT_RESET_IMPENDING:
-			dev_info(&adapter->pdev->dev, "PF reset warning received\n");
+			dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n");
 			if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
 				adapter->flags |= I40EVF_FLAG_RESET_PENDING;
 				dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
@@ -1069,6 +1281,57 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
 				i40evf_stat_str(&adapter->hw, v_retval));
 			break;
+		case VIRTCHNL_OP_ENABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __I40EVF_TC_INVALID;
+			netdev_reset_tc(netdev);
+			netif_tx_start_all_queues(netdev);
+			break;
+		case VIRTCHNL_OP_DISABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __I40EVF_TC_RUNNING;
+			netif_tx_start_all_queues(netdev);
+			break;
+		case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+			struct i40evf_cloud_filter *cf, *cftmp;
+
+			list_for_each_entry_safe(cf, cftmp,
+						 &adapter->cloud_filter_list,
+						 list) {
+				if (cf->state == __I40EVF_CF_ADD_PENDING) {
+					cf->state = __I40EVF_CF_INVALID;
+					dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
+						 i40evf_stat_str(&adapter->hw,
+								 v_retval));
+					i40evf_print_cloud_filter(adapter,
+								  &cf->f);
+					list_del(&cf->list);
+					kfree(cf);
+					adapter->num_cloud_filters--;
+				}
+			}
+			}
+			break;
+		case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+			struct i40evf_cloud_filter *cf;
+
+			list_for_each_entry(cf, &adapter->cloud_filter_list,
+					    list) {
+				if (cf->state == __I40EVF_CF_DEL_PENDING) {
+					cf->state = __I40EVF_CF_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
+						 i40evf_stat_str(&adapter->hw,
+								 v_retval));
+					i40evf_print_cloud_filter(adapter,
+								  &cf->f);
+				}
+			}
+			}
+			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
 				v_retval,
@@ -1108,6 +1371,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 	case VIRTCHNL_OP_ENABLE_QUEUES:
 		/* enable transmits */
 		i40evf_irq_enable(adapter, true);
+		adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED;
 		break;
 	case VIRTCHNL_OP_DISABLE_QUEUES:
 		i40evf_free_all_tx_resources(adapter);
@@ -1162,6 +1426,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		}
 		}
 		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+		struct i40evf_cloud_filter *cf;
+
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			if (cf->state == __I40EVF_CF_ADD_PENDING)
+				cf->state = __I40EVF_CF_ACTIVE;
+		}
+		}
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+		struct i40evf_cloud_filter *cf, *cftmp;
+
+		list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+					 list) {
+			if (cf->state == __I40EVF_CF_DEL_PENDING) {
+				cf->state = __I40EVF_CF_INVALID;
+				list_del(&cf->list);
+				kfree(cf);
+				adapter->num_cloud_filters--;
+			}
+		}
+		}
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
new file mode 100644
index 000000000000..4058673fd853
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver
+#
+
+obj-$(CONFIG_ICE) += ice.o
+
+ice-y := ice_main.o	\
+	 ice_controlq.o	\
+	 ice_common.o	\
+	 ice_nvm.o	\
+	 ice_switch.o	\
+	 ice_sched.o	\
+	 ice_txrx.o	\
+	 ice_ethtool.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
new file mode 100644
index 000000000000..d8b5fff581e7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_H_
+#define _ICE_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/compiler.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/cpumask.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/workqueue.h>
+#include <linux/aer.h>
+#include <linux/interrupt.h>
+#include <linux/ethtool.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/bitmap.h>
+#include <linux/log2.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_bridge.h>
+#include <net/ipv6.h>
+#include "ice_devids.h"
+#include "ice_type.h"
+#include "ice_txrx.h"
+#include "ice_switch.h"
+#include "ice_common.h"
+#include "ice_sched.h"
+
+extern const char ice_drv_ver[];
+#define ICE_BAR0		0
+#define ICE_DFLT_NUM_DESC	128
+#define ICE_MIN_NUM_DESC	8
+#define ICE_MAX_NUM_DESC	8160
+#define ICE_REQ_DESC_MULTIPLE	32
+#define ICE_DFLT_TRAFFIC_CLASS	BIT(0)
+#define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
+#define ICE_ETHTOOL_FWVER_LEN	32
+#define ICE_AQ_LEN		64
+#define ICE_MIN_MSIX		2
+#define ICE_NO_VSI		0xffff
+#define ICE_MAX_VSI_ALLOC	130
+#define ICE_MAX_TXQS		2048
+#define ICE_MAX_RXQS		2048
+#define ICE_VSI_MAP_CONTIG	0
+#define ICE_VSI_MAP_SCATTER	1
+#define ICE_MAX_SCATTER_TXQS	16
+#define ICE_MAX_SCATTER_RXQS	16
+#define ICE_Q_WAIT_RETRY_LIMIT	10
+#define ICE_Q_WAIT_MAX_RETRY	(5 * ICE_Q_WAIT_RETRY_LIMIT)
+#define ICE_MAX_LG_RSS_QS	256
+#define ICE_MAX_SMALL_RSS_QS	8
+#define ICE_RES_VALID_BIT	0x8000
+#define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
+#define ICE_INVAL_Q_INDEX	0xffff
+
+#define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
+
+#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
+			 ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN)
+
+#define ICE_UP_TABLE_TRANSLATE(val, i) \
+		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
+		  ICE_AQ_VSI_UP_TABLE_UP##i##_M)
+
+#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
+#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
+#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
+
+/* Macro for each VSI in a PF */
+#define ice_for_each_vsi(pf, i) \
+	for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
+
+/* Macros for each tx/rx ring in a VSI */
+#define ice_for_each_txq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
+
+#define ice_for_each_rxq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
+
+struct ice_tc_info {
+	u16 qoffset;
+	u16 qcount;
+};
+
+struct ice_tc_cfg {
+	u8 numtc; /* Total number of enabled TCs */
+	u8 ena_tc; /* TX map */
+	struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
+};
+
+struct ice_res_tracker {
+	u16 num_entries;
+	u16 search_hint;
+	u16 list[1];
+};
+
+struct ice_sw {
+	struct ice_pf *pf;
+	u16 sw_id;		/* switch ID for this switch */
+	u16 bridge_mode;	/* VEB/VEPA/Port Virtualizer */
+};
+
+enum ice_state {
+	__ICE_DOWN,
+	__ICE_NEEDS_RESTART,
+	__ICE_RESET_RECOVERY_PENDING,	/* set by driver when reset starts */
+	__ICE_PFR_REQ,			/* set by driver and peers */
+	__ICE_CORER_REQ,		/* set by driver and peers */
+	__ICE_GLOBR_REQ,		/* set by driver and peers */
+	__ICE_CORER_RECV,		/* set by OICR handler */
+	__ICE_GLOBR_RECV,		/* set by OICR handler */
+	__ICE_EMPR_RECV,		/* set by OICR handler */
+	__ICE_SUSPENDED,		/* set on module remove path */
+	__ICE_RESET_FAILED,		/* set by reset/rebuild */
+	__ICE_ADMINQ_EVENT_PENDING,
+	__ICE_FLTR_OVERFLOW_PROMISC,
+	__ICE_CFG_BUSY,
+	__ICE_SERVICE_SCHED,
+	__ICE_STATE_NBITS		/* must be last */
+};
+
+enum ice_vsi_flags {
+	ICE_VSI_FLAG_UMAC_FLTR_CHANGED,
+	ICE_VSI_FLAG_MMAC_FLTR_CHANGED,
+	ICE_VSI_FLAG_VLAN_FLTR_CHANGED,
+	ICE_VSI_FLAG_PROMISC_CHANGED,
+	ICE_VSI_FLAG_NBITS		/* must be last */
+};
+
+/* struct that defines a VSI, associated with a dev */
+struct ice_vsi {
+	struct net_device *netdev;
+	struct ice_sw *vsw;		 /* switch this VSI is on */
+	struct ice_pf *back;		 /* back pointer to PF */
+	struct ice_port_info *port_info; /* back pointer to port_info */
+	struct ice_ring **rx_rings;	 /* rx ring array */
+	struct ice_ring **tx_rings;	 /* tx ring array */
+	struct ice_q_vector **q_vectors; /* q_vector array */
+
+	irqreturn_t (*irq_handler)(int irq, void *data);
+
+	u64 tx_linearize;
+	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	unsigned int current_netdev_flags;
+	u32 tx_restart;
+	u32 tx_busy;
+	u32 rx_buf_failed;
+	u32 rx_page_failed;
+	int num_q_vectors;
+	int base_vector;
+	enum ice_vsi_type type;
+	u16 vsi_num;			 /* HW (absolute) index of this VSI */
+	u16 idx;			 /* software index in pf->vsi[] */
+
+	/* Interrupt thresholds */
+	u16 work_lmt;
+
+	/* RSS config */
+	u16 rss_table_size;	/* HW RSS table size */
+	u16 rss_size;		/* Allocated RSS queues */
+	u8 *rss_hkey_user;	/* User configured hash keys */
+	u8 *rss_lut_user;	/* User configured lookup table entries */
+	u8 rss_lut_type;	/* used to configure Get/Set RSS LUT AQ call */
+
+	u16 max_frame;
+	u16 rx_buf_len;
+
+	struct ice_aqc_vsi_props info;	 /* VSI properties */
+
+	/* VSI stats */
+	struct rtnl_link_stats64 net_stats;
+	struct ice_eth_stats eth_stats;
+	struct ice_eth_stats eth_stats_prev;
+
+	struct list_head tmp_sync_list;		/* MAC filters to be synced */
+	struct list_head tmp_unsync_list;	/* MAC filters to be unsynced */
+
+	bool irqs_ready;
+	bool current_isup;		 /* Sync 'link up' logging */
+	bool stat_offsets_loaded;
+
+	/* queue information */
+	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u8 rx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u16 txq_map[ICE_MAX_TXQS];	 /* index in pf->avail_txqs */
+	u16 rxq_map[ICE_MAX_RXQS];	 /* index in pf->avail_rxqs */
+	u16 alloc_txq;			 /* Allocated Tx queues */
+	u16 num_txq;			 /* Used Tx queues */
+	u16 alloc_rxq;			 /* Allocated Rx queues */
+	u16 num_rxq;			 /* Used Rx queues */
+	u16 num_desc;
+	struct ice_tc_cfg tc_cfg;
+} ____cacheline_internodealigned_in_smp;
+
+/* struct that defines an interrupt vector */
+struct ice_q_vector {
+	struct ice_vsi *vsi;
+	cpumask_t affinity_mask;
+	struct napi_struct napi;
+	struct ice_ring_container rx;
+	struct ice_ring_container tx;
+	struct irq_affinity_notify affinity_notify;
+	u16 v_idx;			/* index in the vsi->q_vector array. */
+	u8 num_ring_tx;			/* total number of tx rings in vector */
+	u8 num_ring_rx;			/* total number of rx rings in vector */
+	char name[ICE_INT_NAME_STR_LEN];
+} ____cacheline_internodealigned_in_smp;
+
+enum ice_pf_flags {
+	ICE_FLAG_MSIX_ENA,
+	ICE_FLAG_FLTR_SYNC,
+	ICE_FLAG_RSS_ENA,
+	ICE_PF_FLAGS_NBITS		/* must be last */
+};
+
+struct ice_pf {
+	struct pci_dev *pdev;
+	struct msix_entry *msix_entries;
+	struct ice_res_tracker *irq_tracker;
+	struct ice_vsi **vsi;		/* VSIs created by the driver */
+	struct ice_sw *first_sw;	/* first switch created by firmware */
+	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS);
+	DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS);
+	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
+	unsigned long serv_tmr_period;
+	unsigned long serv_tmr_prev;
+	struct timer_list serv_tmr;
+	struct work_struct serv_task;
+	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
+	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
+	u32 msg_enable;
+	u32 hw_csum_rx_error;
+	u32 oicr_idx;		/* Other interrupt cause vector index */
+	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
+	u32 num_avail_msix;	/* remaining MSIX vectors left unclaimed */
+	u16 num_lan_tx;		/* num lan tx queues setup */
+	u16 num_lan_rx;		/* num lan rx queues setup */
+	u16 q_left_tx;		/* remaining num tx queues left unclaimed */
+	u16 q_left_rx;		/* remaining num rx queues left unclaimed */
+	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
+	u16 num_alloc_vsi;
+	u16 corer_count;	/* Core reset count */
+	u16 globr_count;	/* Global reset count */
+	u16 empr_count;		/* EMP reset count */
+	u16 pfr_count;		/* PF reset count */
+
+	struct ice_hw_port_stats stats;
+	struct ice_hw_port_stats stats_prev;
+	struct ice_hw hw;
+	bool stat_prev_loaded;	/* has previous stats been loaded */
+	char int_name[ICE_INT_NAME_STR_LEN];
+};
+
+struct ice_netdev_priv {
+	struct ice_vsi *vsi;
+};
+
+/**
+ * ice_irq_dynamic_ena - Enable default interrupt generation settings
+ * @hw: pointer to hw struct
+ * @vsi: pointer to vsi struct, can be NULL
+ * @q_vector: pointer to q_vector, can be NULL
+ */
+static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
+				       struct ice_q_vector *q_vector)
+{
+	u32 vector = (vsi && q_vector) ? vsi->base_vector + q_vector->v_idx :
+					((struct ice_pf *)hw->back)->oicr_idx;
+	int itr = ICE_ITR_NONE;
+	u32 val;
+
+	/* clear the PBA here, as this function is meant to clean out all
+	 * previous interrupts and enable the interrupt
+	 */
+	val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+	      (itr << GLINT_DYN_CTL_ITR_INDX_S);
+	if (vsi)
+		if (test_bit(__ICE_DOWN, vsi->state))
+			return;
+	wr32(hw, GLINT_DYN_CTL(vector), val);
+}
+
+static inline void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+{
+	vsi->tc_cfg.ena_tc =  ICE_DFLT_TRAFFIC_CLASS;
+	vsi->tc_cfg.numtc = 1;
+}
+
+void ice_set_ethtool_ops(struct net_device *netdev);
+int ice_up(struct ice_vsi *vsi);
+int ice_down(struct ice_vsi *vsi);
+int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+
+#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
new file mode 100644
index 000000000000..5b13ca1bd85f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -0,0 +1,1352 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_ADMINQ_CMD_H_
+#define _ICE_ADMINQ_CMD_H_
+
+/* This header file defines the Admin Queue commands, error codes and
+ * descriptor format.  It is shared between Firmware and Software.
+ */
+
+#define ICE_MAX_VSI			768
+#define ICE_AQC_TOPO_MAX_LEVEL_NUM	0x9
+#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX	9728
+
+struct ice_aqc_generic {
+	__le32 param0;
+	__le32 param1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get version (direct 0x0001) */
+struct ice_aqc_get_ver {
+	__le32 rom_ver;
+	__le32 fw_build;
+	u8 fw_branch;
+	u8 fw_major;
+	u8 fw_minor;
+	u8 fw_patch;
+	u8 api_branch;
+	u8 api_major;
+	u8 api_minor;
+	u8 api_patch;
+};
+
+/* Queue Shutdown (direct 0x0003) */
+struct ice_aqc_q_shutdown {
+#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
+	__le32 driver_unloading;
+	u8 reserved[12];
+};
+
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+struct ice_aqc_req_res {
+	__le16 res_id;
+#define ICE_AQC_RES_ID_NVM		1
+#define ICE_AQC_RES_ID_SDP		2
+#define ICE_AQC_RES_ID_CHNG_LOCK	3
+#define ICE_AQC_RES_ID_GLBL_LOCK	4
+	__le16 access_type;
+#define ICE_AQC_RES_ACCESS_READ		1
+#define ICE_AQC_RES_ACCESS_WRITE	2
+
+	/* Upon successful completion, FW writes this value and driver is
+	 * expected to release resource before timeout. This value is provided
+	 * in milliseconds.
+	 */
+	__le32 timeout;
+#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS	3000
+#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS	180000
+#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS	1000
+#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS	3000
+	/* For SDP: pin id of the SDP */
+	__le32 res_number;
+	/* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
+	__le16 status;
+#define ICE_AQ_RES_GLBL_SUCCESS		0
+#define ICE_AQ_RES_GLBL_IN_PROG		1
+#define ICE_AQ_RES_GLBL_DONE		2
+	u8 reserved[2];
+};
+
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct ice_aqc_list_caps {
+	u8 cmd_flags;
+	u8 pf_index;
+	u8 reserved[2];
+	__le32 count;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Device/Function buffer entry, repeated per reported capability */
+struct ice_aqc_list_caps_elem {
+	__le16 cap;
+#define ICE_AQC_CAPS_VSI				0x0017
+#define ICE_AQC_CAPS_RSS				0x0040
+#define ICE_AQC_CAPS_RXQS				0x0041
+#define ICE_AQC_CAPS_TXQS				0x0042
+#define ICE_AQC_CAPS_MSIX				0x0043
+#define ICE_AQC_CAPS_MAX_MTU				0x0047
+
+	u8 major_ver;
+	u8 minor_ver;
+	/* Number of resources described by this capability */
+	__le32 number;
+	/* Only meaningful for some types of resources */
+	__le32 logical_id;
+	/* Only meaningful for some types of resources */
+	__le32 phys_id;
+	__le64 rsvd1;
+	__le64 rsvd2;
+};
+
+/* Manage MAC address, read command - indirect (0x0107)
+ * This struct is also used for the response
+ */
+struct ice_aqc_manage_mac_read {
+	__le16 flags; /* Zeroed by device driver */
+#define ICE_AQC_MAN_MAC_LAN_ADDR_VALID		BIT(4)
+#define ICE_AQC_MAN_MAC_SAN_ADDR_VALID		BIT(5)
+#define ICE_AQC_MAN_MAC_PORT_ADDR_VALID		BIT(6)
+#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID		BIT(7)
+#define ICE_AQC_MAN_MAC_READ_S			4
+#define ICE_AQC_MAN_MAC_READ_M			(0xF << ICE_AQC_MAN_MAC_READ_S)
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID	BIT(0)
+	u8 num_addr; /* Used in response */
+	u8 reserved[3];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response buffer format for manage MAC read command */
+struct ice_aqc_manage_mac_read_resp {
+	u8 lport_num;
+	u8 addr_type;
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_LAN		0
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_WOL		1
+	u8 mac_addr[ETH_ALEN];
+};
+
+/* Manage MAC address, write command - direct (0x0108) */
+struct ice_aqc_manage_mac_write {
+	u8 port_num;
+	u8 flags;
+#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
+#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
+#define ICE_AQC_MAN_MAC_WR_S		6
+#define ICE_AQC_MAN_MAC_WR_M		(3 << ICE_AQC_MAN_MAC_WR_S)
+#define ICE_AQC_MAN_MAC_UPDATE_LAA	0
+#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL	(BIT(0) << ICE_AQC_MAN_MAC_WR_S)
+	/* High 16 bits of MAC address in big endian order */
+	__be16 sah;
+	/* Low 32 bits of MAC address in big endian order */
+	__be32 sal;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Clear PXE Command and response (direct 0x0110) */
+struct ice_aqc_clear_pxe {
+	u8 rx_cnt;
+#define ICE_AQC_CLEAR_PXE_RX_CNT		0x2
+	u8 reserved[15];
+};
+
+/* Get switch configuration (0x0200) */
+struct ice_aqc_get_sw_cfg {
+	/* Reserved for command and copy of request flags for response */
+	__le16 flags;
+	/* First desc in case of command and next_elem in case of response
+	 * In case of response, if it is not zero, means all the configuration
+	 * was not returned and new command shall be sent with this value in
+	 * the 'first desc' field
+	 */
+	__le16 element;
+	/* Reserved for command, only used for response */
+	__le16 num_elems;
+	__le16 rsvd;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Each entry in the response buffer is of the following type: */
+struct ice_aqc_get_sw_cfg_resp_elem {
+	/* VSI/Port Number */
+	__le16 vsi_port_num;
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M	\
+			(0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S	14
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M	(0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S)
+#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT	0
+#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT	1
+#define ICE_AQC_GET_SW_CONF_RESP_VSI		2
+
+	/* SWID VSI/Port belongs to */
+	__le16 swid;
+
+	/* Bit 14..0 : PF/VF number VSI belongs to
+	 * Bit 15 : VF indication bit
+	 */
+	__le16 pf_vf_num;
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M	\
+				(0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_IS_VF		BIT(15)
+};
+
+/* The response buffer is as follows. Note that the length of the
+ * elements array varies with the length of the command response.
+ */
+struct ice_aqc_get_sw_cfg_resp {
+	struct ice_aqc_get_sw_cfg_resp_elem elements[1];
+};
+
+/* These resource type defines are used for all switch resource
+ * commands where a resource type is required, such as:
+ * Get Resource Allocation command (indirect 0x0204)
+ * Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ * Get Allocated Resource Descriptors Command (indirect 0x020A)
+ */
+#define ICE_AQC_RES_TYPE_VSI_LIST_REP			0x03
+#define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE			0x04
+
+/* Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ */
+struct ice_aqc_alloc_free_res_cmd {
+	__le16 num_entries; /* Number of Resource entries */
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Resource descriptor */
+struct ice_aqc_res_elem {
+	union {
+		__le16 sw_resp;
+		__le16 flu_resp;
+	} e;
+};
+
+/* Buffer for Allocate/Free Resources commands */
+struct ice_aqc_alloc_free_res_elem {
+	__le16 res_type; /* Types defined above cmd 0x0204 */
+#define ICE_AQC_RES_TYPE_SHARED_S	7
+#define ICE_AQC_RES_TYPE_SHARED_M	(0x1 << ICE_AQC_RES_TYPE_SHARED_S)
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S	8
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_M	\
+				(0xF << ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S)
+	__le16 num_elems;
+	struct ice_aqc_res_elem elem[1];
+};
+
+/* Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Get VSI (indirect 0x0212)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_get_update_free_vsi {
+	__le16 vsi_num;
+#define ICE_AQ_VSI_NUM_S	0
+#define ICE_AQ_VSI_NUM_M	(0x03FF << ICE_AQ_VSI_NUM_S)
+#define ICE_AQ_VSI_IS_VALID	BIT(15)
+	__le16 cmd_flags;
+#define ICE_AQ_VSI_KEEP_ALLOC	0x1
+	u8 vf_id;
+	u8 reserved;
+	__le16 vsi_flags;
+#define ICE_AQ_VSI_TYPE_S	0
+#define ICE_AQ_VSI_TYPE_M	(0x3 << ICE_AQ_VSI_TYPE_S)
+#define ICE_AQ_VSI_TYPE_VF	0x0
+#define ICE_AQ_VSI_TYPE_VMDQ2	0x1
+#define ICE_AQ_VSI_TYPE_PF	0x2
+#define ICE_AQ_VSI_TYPE_EMP_MNG	0x3
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response descriptor for:
+ * Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_update_free_vsi_resp {
+	__le16 vsi_num;
+	__le16 ext_status;
+	__le16 vsi_used;
+	__le16 vsi_free;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_vsi_props {
+	__le16 valid_sections;
+#define ICE_AQ_VSI_PROP_SW_VALID		BIT(0)
+#define ICE_AQ_VSI_PROP_SECURITY_VALID		BIT(1)
+#define ICE_AQ_VSI_PROP_VLAN_VALID		BIT(2)
+#define ICE_AQ_VSI_PROP_OUTER_TAG_VALID		BIT(3)
+#define ICE_AQ_VSI_PROP_INGRESS_UP_VALID	BIT(4)
+#define ICE_AQ_VSI_PROP_EGRESS_UP_VALID		BIT(5)
+#define ICE_AQ_VSI_PROP_RXQ_MAP_VALID		BIT(6)
+#define ICE_AQ_VSI_PROP_Q_OPT_VALID		BIT(7)
+#define ICE_AQ_VSI_PROP_OUTER_UP_VALID		BIT(8)
+#define ICE_AQ_VSI_PROP_FLOW_DIR_VALID		BIT(11)
+#define ICE_AQ_VSI_PROP_PASID_VALID		BIT(12)
+	/* switch section */
+	u8 sw_id;
+	u8 sw_flags;
+#define ICE_AQ_VSI_SW_FLAG_ALLOW_LB		BIT(5)
+#define ICE_AQ_VSI_SW_FLAG_LOCAL_LB		BIT(6)
+#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE		BIT(7)
+	u8 sw_flags2;
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S	0
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M	\
+				(0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S)
+#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA	BIT(0)
+#define ICE_AQ_VSI_SW_FLAG_LAN_ENA		BIT(4)
+	u8 veb_stat_id;
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_S		0
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_M	(0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S)
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID		BIT(5)
+	/* security section */
+	u8 sec_flags;
+#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	BIT(0)
+#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF	BIT(2)
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S	4
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M	(0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)
+#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA	BIT(0)
+	u8 sec_reserved;
+	/* VLAN section */
+	__le16 pvid; /* VLANS include priority bits */
+	u8 pvlan_reserved[2];
+	u8 port_vlan_flags;
+#define ICE_AQ_VSI_PVLAN_MODE_S	0
+#define ICE_AQ_VSI_PVLAN_MODE_M	(0x3 << ICE_AQ_VSI_PVLAN_MODE_S)
+#define ICE_AQ_VSI_PVLAN_MODE_UNTAGGED	0x1
+#define ICE_AQ_VSI_PVLAN_MODE_TAGGED	0x2
+#define ICE_AQ_VSI_PVLAN_MODE_ALL	0x3
+#define ICE_AQ_VSI_PVLAN_INSERT_PVID	BIT(2)
+#define ICE_AQ_VSI_PVLAN_EMOD_S	3
+#define ICE_AQ_VSI_PVLAN_EMOD_M	(0x3 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH	(0x0 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_STR_UP	(0x1 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_STR	(0x2 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_NOTHING	(0x3 << ICE_AQ_VSI_PVLAN_EMOD_S)
+	u8 pvlan_reserved2[3];
+	/* ingress egress up sections */
+	__le32 ingress_table; /* bitmap, 3 bits per up */
+#define ICE_AQ_VSI_UP_TABLE_UP0_S	0
+#define ICE_AQ_VSI_UP_TABLE_UP0_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S)
+#define ICE_AQ_VSI_UP_TABLE_UP1_S	3
+#define ICE_AQ_VSI_UP_TABLE_UP1_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S)
+#define ICE_AQ_VSI_UP_TABLE_UP2_S	6
+#define ICE_AQ_VSI_UP_TABLE_UP2_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S)
+#define ICE_AQ_VSI_UP_TABLE_UP3_S	9
+#define ICE_AQ_VSI_UP_TABLE_UP3_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S)
+#define ICE_AQ_VSI_UP_TABLE_UP4_S	12
+#define ICE_AQ_VSI_UP_TABLE_UP4_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S)
+#define ICE_AQ_VSI_UP_TABLE_UP5_S	15
+#define ICE_AQ_VSI_UP_TABLE_UP5_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S)
+#define ICE_AQ_VSI_UP_TABLE_UP6_S	18
+#define ICE_AQ_VSI_UP_TABLE_UP6_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S)
+#define ICE_AQ_VSI_UP_TABLE_UP7_S	21
+#define ICE_AQ_VSI_UP_TABLE_UP7_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S)
+	__le32 egress_table;   /* same defines as for ingress table */
+	/* outer tags section */
+	__le16 outer_tag;
+	u8 outer_tag_flags;
+#define ICE_AQ_VSI_OUTER_TAG_MODE_S	0
+#define ICE_AQ_VSI_OUTER_TAG_MODE_M	(0x3 << ICE_AQ_VSI_OUTER_TAG_MODE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NOTHING	0x0
+#define ICE_AQ_VSI_OUTER_TAG_REMOVE	0x1
+#define ICE_AQ_VSI_OUTER_TAG_COPY	0x2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_S	2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_M	(0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NONE	0x0
+#define ICE_AQ_VSI_OUTER_TAG_STAG	0x1
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100	0x2
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100	0x3
+#define ICE_AQ_VSI_OUTER_TAG_INSERT	BIT(4)
+#define ICE_AQ_VSI_OUTER_TAG_ACCEPT_HOST BIT(6)
+	u8 outer_tag_reserved;
+	/* queue mapping section */
+	__le16 mapping_flags;
+#define ICE_AQ_VSI_Q_MAP_CONTIG	0x0
+#define ICE_AQ_VSI_Q_MAP_NONCONTIG	BIT(0)
+	__le16 q_mapping[16];
+#define ICE_AQ_VSI_Q_S		0
+#define ICE_AQ_VSI_Q_M		(0x7FF << ICE_AQ_VSI_Q_S)
+	__le16 tc_mapping[8];
+#define ICE_AQ_VSI_TC_Q_OFFSET_S	0
+#define ICE_AQ_VSI_TC_Q_OFFSET_M	(0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S)
+#define ICE_AQ_VSI_TC_Q_NUM_S		11
+#define ICE_AQ_VSI_TC_Q_NUM_M		(0xF << ICE_AQ_VSI_TC_Q_NUM_S)
+	/* queueing option section */
+	u8 q_opt_rss;
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S	0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI	0x0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF	0x2
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL	0x3
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S	2
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M	(0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S	6
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ	(0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ	(0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_XOR	(0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_JHASH	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+	u8 q_opt_tc;
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_S	0
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_M	(0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S)
+#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR	BIT(7)
+	u8 q_opt_flags;
+#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN	BIT(0)
+	u8 q_opt_reserved[3];
+	/* outer up section */
+	__le32 outer_up_table; /* same structure and defines as ingress tbl */
+	/* section 10 */
+	__le16 sect_10_reserved;
+	/* flow director section */
+	__le16 fd_options;
+#define ICE_AQ_VSI_FD_ENABLE		BIT(0)
+#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE	BIT(1)
+#define ICE_AQ_VSI_FD_PROG_ENABLE	BIT(3)
+	__le16 max_fd_fltr_dedicated;
+	__le16 max_fd_fltr_shared;
+	__le16 fd_def_q;
+#define ICE_AQ_VSI_FD_DEF_Q_S		0
+#define ICE_AQ_VSI_FD_DEF_Q_M		(0x7FF << ICE_AQ_VSI_FD_DEF_Q_S)
+#define ICE_AQ_VSI_FD_DEF_GRP_S	12
+#define ICE_AQ_VSI_FD_DEF_GRP_M	(0x7 << ICE_AQ_VSI_FD_DEF_GRP_S)
+	__le16 fd_report_opt;
+#define ICE_AQ_VSI_FD_REPORT_Q_S	0
+#define ICE_AQ_VSI_FD_REPORT_Q_M	(0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S)
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_S	12
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_M	(0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S)
+#define ICE_AQ_VSI_FD_DEF_DROP		BIT(15)
+	/* PASID section */
+	__le32 pasid_id;
+#define ICE_AQ_VSI_PASID_ID_S		0
+#define ICE_AQ_VSI_PASID_ID_M		(0xFFFFF << ICE_AQ_VSI_PASID_ID_S)
+#define ICE_AQ_VSI_PASID_ID_VALID	BIT(31)
+	u8 reserved[24];
+};
+
+/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
+ */
+struct ice_aqc_sw_rules {
+	/* ops: add switch rules, referring the number of rules.
+	 * ops: update switch rules, referring the number of filters
+	 * ops: remove switch rules, referring the entry index.
+	 * ops: get switch rules, referring to the number of filters.
+	 */
+	__le16 num_rules_fltr_entry_index;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Add/Update/Get/Remove lookup Rx/Tx command/response entry
+ * This structures describes the lookup rules and associated actions.  "index"
+ * is returned as part of a response to a successful Add command, and can be
+ * used to identify the rule for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lkup_rx_tx {
+	__le16 recipe_id;
+#define ICE_SW_RECIPE_LOGICAL_PORT_FWD		10
+	/* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */
+	__le16 src;
+	__le32 act;
+
+	/* Bit 0:1 - Action type */
+#define ICE_SINGLE_ACT_TYPE_S	0x00
+#define ICE_SINGLE_ACT_TYPE_M	(0x3 << ICE_SINGLE_ACT_TYPE_S)
+
+	/* Bit 2 - Loop back enable
+	 * Bit 3 - LAN enable
+	 */
+#define ICE_SINGLE_ACT_LB_ENABLE	BIT(2)
+#define ICE_SINGLE_ACT_LAN_ENABLE	BIT(3)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_SINGLE_ACT_VSI_FORWARDING	0x0
+
+#define ICE_SINGLE_ACT_VSI_ID_S		4
+#define ICE_SINGLE_ACT_VSI_ID_M		(0x3FF << ICE_SINGLE_ACT_VSI_ID_S)
+#define ICE_SINGLE_ACT_VSI_LIST_ID_S	4
+#define ICE_SINGLE_ACT_VSI_LIST_ID_M	(0x3FF << ICE_SINGLE_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_SINGLE_ACT_VSI_LIST		BIT(14)
+#define ICE_SINGLE_ACT_VALID_BIT	BIT(17)
+#define ICE_SINGLE_ACT_DROP		BIT(18)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_SINGLE_ACT_TO_Q		0x1
+#define ICE_SINGLE_ACT_Q_INDEX_S	4
+#define ICE_SINGLE_ACT_Q_INDEX_M	(0x7FF << ICE_SINGLE_ACT_Q_INDEX_S)
+#define ICE_SINGLE_ACT_Q_REGION_S	15
+#define ICE_SINGLE_ACT_Q_REGION_M	(0x7 << ICE_SINGLE_ACT_Q_REGION_S)
+#define ICE_SINGLE_ACT_Q_PRIORITY	BIT(18)
+
+	/* Action type = 2 - Prune */
+#define ICE_SINGLE_ACT_PRUNE		0x2
+#define ICE_SINGLE_ACT_EGRESS		BIT(15)
+#define ICE_SINGLE_ACT_INGRESS		BIT(16)
+#define ICE_SINGLE_ACT_PRUNET		BIT(17)
+	/* Bit 18 should be set to 0 for this action */
+
+	/* Action type = 2 - Pointer */
+#define ICE_SINGLE_ACT_PTR		0x2
+#define ICE_SINGLE_ACT_PTR_VAL_S	4
+#define ICE_SINGLE_ACT_PTR_VAL_M	(0x1FFF << ICE_SINGLE_ACT_PTR_VAL_S)
+	/* Bit 18 should be set to 1 */
+#define ICE_SINGLE_ACT_PTR_BIT		BIT(18)
+
+	/* Action type = 3 - Other actions. Last two bits
+	 * are other action identifier
+	 */
+#define ICE_SINGLE_ACT_OTHER_ACTS		0x3
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S	17
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M	\
+				(0x3 << \ ICE_SINGLE_OTHER_ACT_IDENTIFIER_S)
+
+	/* Bit 17:18 - Defines other actions */
+	/* Other action = 0 - Mirror VSI */
+#define ICE_SINGLE_OTHER_ACT_MIRROR		0
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_S	4
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_M	\
+				(0x3FF << ICE_SINGLE_ACT_MIRROR_VSI_ID_S)
+
+	/* Other action = 3 - Set Stat count */
+#define ICE_SINGLE_OTHER_ACT_STAT_COUNT		3
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_S	4
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_M	\
+				(0x7F << ICE_SINGLE_ACT_STAT_COUNT_INDEX_S)
+
+	__le16 index; /* The index of the rule in the lookup table */
+	/* Length and values of the header to be matched per recipe or
+	 * lookup-type
+	 */
+	__le16 hdr_len;
+	u8 hdr[1];
+} __packed;
+
+/* Add/Update/Remove large action command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the action for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lg_act {
+	__le16 index; /* Index in large action table */
+	__le16 size;
+	__le32 act[1]; /* array of size for actions */
+	/* Max number of large actions */
+#define ICE_MAX_LG_ACT	4
+	/* Bit 0:1 - Action type */
+#define ICE_LG_ACT_TYPE_S	0
+#define ICE_LG_ACT_TYPE_M	(0x7 << ICE_LG_ACT_TYPE_S)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_LG_ACT_VSI_FORWARDING	0
+#define ICE_LG_ACT_VSI_ID_S		3
+#define ICE_LG_ACT_VSI_ID_M		(0x3FF << ICE_LG_ACT_VSI_ID_S)
+#define ICE_LG_ACT_VSI_LIST_ID_S	3
+#define ICE_LG_ACT_VSI_LIST_ID_M	(0x3FF << ICE_LG_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_LG_ACT_VSI_LIST		BIT(13)
+
+#define ICE_LG_ACT_VALID_BIT		BIT(16)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_LG_ACT_TO_Q			0x1
+#define ICE_LG_ACT_Q_INDEX_S		3
+#define ICE_LG_ACT_Q_INDEX_M		(0x7FF << ICE_LG_ACT_Q_INDEX_S)
+#define ICE_LG_ACT_Q_REGION_S		14
+#define ICE_LG_ACT_Q_REGION_M		(0x7 << ICE_LG_ACT_Q_REGION_S)
+#define ICE_LG_ACT_Q_PRIORITY_SET	BIT(17)
+
+	/* Action type = 2 - Prune */
+#define ICE_LG_ACT_PRUNE		0x2
+#define ICE_LG_ACT_EGRESS		BIT(14)
+#define ICE_LG_ACT_INGRESS		BIT(15)
+#define ICE_LG_ACT_PRUNET		BIT(16)
+
+	/* Action type = 3 - Mirror VSI */
+#define ICE_LG_OTHER_ACT_MIRROR		0x3
+#define ICE_LG_ACT_MIRROR_VSI_ID_S	3
+#define ICE_LG_ACT_MIRROR_VSI_ID_M	(0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S)
+
+	/* Action type = 5 - Large Action */
+#define ICE_LG_ACT_GENERIC		0x5
+#define ICE_LG_ACT_GENERIC_VALUE_S	3
+#define ICE_LG_ACT_GENERIC_VALUE_M	(0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S)
+#define ICE_LG_ACT_GENERIC_OFFSET_S	19
+#define ICE_LG_ACT_GENERIC_OFFSET_M	(0x7 << ICE_LG_ACT_GENERIC_OFFSET_S)
+#define ICE_LG_ACT_GENERIC_PRIORITY_S	22
+#define ICE_LG_ACT_GENERIC_PRIORITY_M	(0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S)
+
+	/* Action = 7 - Set Stat count */
+#define ICE_LG_ACT_STAT_COUNT		0x7
+#define ICE_LG_ACT_STAT_COUNT_S		3
+#define ICE_LG_ACT_STAT_COUNT_M		(0x7F << ICE_LG_ACT_STAT_COUNT_S)
+};
+
+/* Add/Update/Remove VSI list command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the VSI list for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_vsi_list {
+	__le16 index; /* Index of VSI/Prune list */
+	__le16 number_vsi;
+	__le16 vsi[1]; /* Array of number_vsi VSI numbers */
+};
+
+/* Query VSI list command/response entry */
+struct ice_sw_rule_vsi_list_query {
+	__le16 index;
+	DECLARE_BITMAP(vsi_list, ICE_MAX_VSI);
+} __packed;
+
+/* Add switch rule response:
+ * Content of return buffer is same as the input buffer. The status field and
+ * LUT index are updated as part of the response
+ */
+struct ice_aqc_sw_rules_elem {
+	__le16 type; /* Switch rule type, one of T_... */
+#define ICE_AQC_SW_RULES_T_LKUP_RX		0x0
+#define ICE_AQC_SW_RULES_T_LKUP_TX		0x1
+#define ICE_AQC_SW_RULES_T_LG_ACT		0x2
+#define ICE_AQC_SW_RULES_T_VSI_LIST_SET		0x3
+#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR	0x4
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET	0x5
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR	0x6
+	__le16 status;
+	union {
+		struct ice_sw_rule_lkup_rx_tx lkup_tx_rx;
+		struct ice_sw_rule_lg_act lg_act;
+		struct ice_sw_rule_vsi_list vsi_list;
+		struct ice_sw_rule_vsi_list_query vsi_list_query;
+	} __packed pdata;
+};
+
+/* Get Default Topology (indirect 0x0400) */
+struct ice_aqc_get_topo {
+	u8 port_num;
+	u8 num_branches;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Update TSE (indirect 0x0403)
+ * Get TSE (indirect 0x0404)
+ */
+struct ice_aqc_get_cfg_elem {
+	__le16 num_elem_req;	/* Used by commands */
+	__le16 num_elem_resp;	/* Used by responses */
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the buffer for:
+ * Suspend Nodes (indirect 0x0409)
+ * Resume Nodes (indirect 0x040A)
+ */
+struct ice_aqc_suspend_resume_elem {
+	__le32 teid[1];
+};
+
+/* Add TSE (indirect 0x0401)
+ * Delete TSE (indirect 0x040F)
+ * Move TSE (indirect 0x0408)
+ */
+struct ice_aqc_add_move_delete_elem {
+	__le16 num_grps_req;
+	__le16 num_grps_updated;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_elem_info_bw {
+	__le16 bw_profile_idx;
+	__le16 bw_alloc;
+};
+
+struct ice_aqc_txsched_elem {
+	u8 elem_type; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_TYPE_UNDEFINED		0x0
+#define ICE_AQC_ELEM_TYPE_ROOT_PORT		0x1
+#define ICE_AQC_ELEM_TYPE_TC			0x2
+#define ICE_AQC_ELEM_TYPE_SE_GENERIC		0x3
+#define ICE_AQC_ELEM_TYPE_ENTRY_POINT		0x4
+#define ICE_AQC_ELEM_TYPE_LEAF			0x5
+#define ICE_AQC_ELEM_TYPE_SE_PADDED		0x6
+	u8 valid_sections;
+#define ICE_AQC_ELEM_VALID_GENERIC		BIT(0)
+#define ICE_AQC_ELEM_VALID_CIR			BIT(1)
+#define ICE_AQC_ELEM_VALID_EIR			BIT(2)
+#define ICE_AQC_ELEM_VALID_SHARED		BIT(3)
+	u8 generic;
+#define ICE_AQC_ELEM_GENERIC_MODE_M		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_S		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_M	(0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S)
+#define ICE_AQC_ELEM_GENERIC_SP_S		0x4
+#define ICE_AQC_ELEM_GENERIC_SP_M	(0x1 << ICE_AQC_ELEM_GENERIC_SP_S)
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S	0x5
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M	\
+	(0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S)
+	u8 flags; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_FLAG_SUSPEND_M		0x1
+	struct ice_aqc_elem_info_bw cir_bw;
+	struct ice_aqc_elem_info_bw eir_bw;
+	__le16 srl_id;
+	__le16 reserved2;
+};
+
+struct ice_aqc_txsched_elem_data {
+	__le32 parent_teid;
+	__le32 node_teid;
+	struct ice_aqc_txsched_elem data;
+};
+
+struct ice_aqc_txsched_topo_grp_info_hdr {
+	__le32 parent_teid;
+	__le16 num_elems;
+	__le16 reserved2;
+};
+
+struct ice_aqc_add_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data generic[1];
+};
+
+struct ice_aqc_get_topo_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data
+		generic[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+struct ice_aqc_delete_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	__le32 teid[1];
+};
+
+/* Query Scheduler Resource Allocation (indirect 0x0412)
+ * This indirect command retrieves the scheduler resources allocated by
+ * EMP Firmware to the given PF.
+ */
+struct ice_aqc_query_txsched_res {
+	u8 reserved[8];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_generic_sched_props {
+	__le16 phys_levels;
+	__le16 logical_levels;
+	u8 flattening_bitmap;
+	u8 max_device_cgds;
+	u8 max_pf_cgds;
+	u8 rsvd0;
+	__le16 rdma_qsets;
+	u8 rsvd1[22];
+};
+
+struct ice_aqc_layer_props {
+	u8 logical_layer;
+	u8 chunk_size;
+	__le16 max_device_nodes;
+	__le16 max_pf_nodes;
+	u8 rsvd0[2];
+	__le16 max_shared_rate_lmtr;
+	__le16 max_children;
+	__le16 max_cir_rl_profiles;
+	__le16 max_eir_rl_profiles;
+	__le16 max_srl_profiles;
+	u8 rsvd1[14];
+};
+
+struct ice_aqc_query_txsched_res_resp {
+	struct ice_aqc_generic_sched_props sched_props;
+	struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+/* Get PHY capabilities (indirect 0x0600) */
+struct ice_aqc_get_phy_caps {
+	u8 lport_num;
+	u8 reserved;
+	__le16 param0;
+	/* 18.0 - Report qualified modules */
+#define ICE_AQC_GET_PHY_RQM		BIT(0)
+	/* 18.1 - 18.2 : Report mode
+	 * 00b - Report NVM capabilities
+	 * 01b - Report topology capabilities
+	 * 10b - Report SW configured
+	 */
+#define ICE_AQC_REPORT_MODE_S		1
+#define ICE_AQC_REPORT_MODE_M		(3 << ICE_AQC_REPORT_MODE_S)
+#define ICE_AQC_REPORT_NVM_CAP		0
+#define ICE_AQC_REPORT_TOPO_CAP		BIT(1)
+#define ICE_AQC_REPORT_SW_CFG		BIT(2)
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is #define of PHY type (Extended):
+ * The first set of defines is for phy_type_low.
+ */
+#define ICE_PHY_TYPE_LOW_100BASE_TX		BIT_ULL(0)
+#define ICE_PHY_TYPE_LOW_100M_SGMII		BIT_ULL(1)
+#define ICE_PHY_TYPE_LOW_1000BASE_T		BIT_ULL(2)
+#define ICE_PHY_TYPE_LOW_1000BASE_SX		BIT_ULL(3)
+#define ICE_PHY_TYPE_LOW_1000BASE_LX		BIT_ULL(4)
+#define ICE_PHY_TYPE_LOW_1000BASE_KX		BIT_ULL(5)
+#define ICE_PHY_TYPE_LOW_1G_SGMII		BIT_ULL(6)
+#define ICE_PHY_TYPE_LOW_2500BASE_T		BIT_ULL(7)
+#define ICE_PHY_TYPE_LOW_2500BASE_X		BIT_ULL(8)
+#define ICE_PHY_TYPE_LOW_2500BASE_KX		BIT_ULL(9)
+#define ICE_PHY_TYPE_LOW_5GBASE_T		BIT_ULL(10)
+#define ICE_PHY_TYPE_LOW_5GBASE_KR		BIT_ULL(11)
+#define ICE_PHY_TYPE_LOW_10GBASE_T		BIT_ULL(12)
+#define ICE_PHY_TYPE_LOW_10G_SFI_DA		BIT_ULL(13)
+#define ICE_PHY_TYPE_LOW_10GBASE_SR		BIT_ULL(14)
+#define ICE_PHY_TYPE_LOW_10GBASE_LR		BIT_ULL(15)
+#define ICE_PHY_TYPE_LOW_10GBASE_KR_CR1		BIT_ULL(16)
+#define ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC	BIT_ULL(17)
+#define ICE_PHY_TYPE_LOW_10G_SFI_C2C		BIT_ULL(18)
+#define ICE_PHY_TYPE_LOW_25GBASE_T		BIT_ULL(19)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR		BIT_ULL(20)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR_S		BIT_ULL(21)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR1		BIT_ULL(22)
+#define ICE_PHY_TYPE_LOW_25GBASE_SR		BIT_ULL(23)
+#define ICE_PHY_TYPE_LOW_25GBASE_LR		BIT_ULL(24)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR		BIT_ULL(25)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR_S		BIT_ULL(26)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR1		BIT_ULL(27)
+#define ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC	BIT_ULL(28)
+#define ICE_PHY_TYPE_LOW_25G_AUI_C2C		BIT_ULL(29)
+#define ICE_PHY_TYPE_LOW_40GBASE_CR4		BIT_ULL(30)
+#define ICE_PHY_TYPE_LOW_40GBASE_SR4		BIT_ULL(31)
+#define ICE_PHY_TYPE_LOW_40GBASE_LR4		BIT_ULL(32)
+#define ICE_PHY_TYPE_LOW_40GBASE_KR4		BIT_ULL(33)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC	BIT_ULL(34)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI		BIT_ULL(35)
+#define ICE_PHY_TYPE_LOW_MAX_INDEX		63
+
+struct ice_aqc_get_phy_caps_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 reserved;
+	u8 caps;
+#define ICE_AQC_PHY_EN_TX_LINK_PAUSE			BIT(0)
+#define ICE_AQC_PHY_EN_RX_LINK_PAUSE			BIT(1)
+#define ICE_AQC_PHY_LOW_POWER_MODE			BIT(2)
+#define ICE_AQC_PHY_EN_LINK				BIT(3)
+#define ICE_AQC_PHY_AN_MODE				BIT(4)
+#define ICE_AQC_GET_PHY_EN_MOD_QUAL			BIT(5)
+	u8 low_power_ctrl;
+#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG		BIT(0)
+	__le16 eee_cap;
+#define ICE_AQC_PHY_EEE_EN_100BASE_TX			BIT(0)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_T			BIT(1)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_T			BIT(2)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_KX			BIT(3)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_KR			BIT(4)
+#define ICE_AQC_PHY_EEE_EN_25GBASE_KR			BIT(5)
+#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4			BIT(6)
+	__le16 eeer_value;
+	u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+	u8 link_fec_options;
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN		BIT(0)
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ		BIT(1)
+#define ICE_AQC_PHY_FEC_25G_RS_528_REQ			BIT(2)
+#define ICE_AQC_PHY_FEC_25G_KR_REQ			BIT(3)
+#define ICE_AQC_PHY_FEC_25G_RS_544_REQ			BIT(4)
+#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN		BIT(6)
+#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN		BIT(7)
+	u8 extended_compliance_code;
+#define ICE_MODULE_TYPE_TOTAL_BYTE			3
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS		0x80
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE	BIT(0)
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE	BIT(1)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR		BIT(4)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LR		BIT(5)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LRM		BIT(6)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_ER		BIT(7)
+#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS		0x86
+	u8 qualified_module_count;
+#define ICE_AQC_QUAL_MOD_COUNT_MAX			16
+	struct {
+		u8 v_oui[3];
+		u8 rsvd1;
+		u8 v_part[16];
+		__le32 v_rev;
+		__le64 rsvd8;
+	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
+};
+
+/* Set PHY capabilities (direct 0x0601)
+ * NOTE: This command must be followed by setup link and restart auto-neg
+ */
+struct ice_aqc_set_phy_cfg {
+	u8 lport_num;
+	u8 reserved[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Set PHY config command data structure */
+struct ice_aqc_set_phy_cfg_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 rsvd0;
+	u8 caps;
+#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY		BIT(0)
+#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY		BIT(1)
+#define ICE_AQ_PHY_ENA_LOW_POWER		BIT(2)
+#define ICE_AQ_PHY_ENA_LINK			BIT(3)
+#define ICE_AQ_PHY_ENA_ATOMIC_LINK		BIT(5)
+	u8 low_power_ctrl;
+	__le16 eee_cap; /* Value from ice_aqc_get_phy_caps */
+	__le16 eeer_value;
+	u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */
+	u8 rsvd1;
+};
+
+/* Restart AN command data structure (direct 0x0605)
+ * Also used for response, with only the lport_num field present.
+ */
+struct ice_aqc_restart_an {
+	u8 lport_num;
+	u8 reserved;
+	u8 cmd_flags;
+#define ICE_AQC_RESTART_AN_LINK_RESTART	BIT(1)
+#define ICE_AQC_RESTART_AN_LINK_ENABLE	BIT(2)
+	u8 reserved2[13];
+};
+
+/* Get link status (indirect 0x0607), also used for Link Status Event */
+struct ice_aqc_get_link_status {
+	u8 lport_num;
+	u8 reserved;
+	__le16 cmd_flags;
+#define ICE_AQ_LSE_M			0x3
+#define ICE_AQ_LSE_NOP			0x0
+#define ICE_AQ_LSE_DIS			0x2
+#define ICE_AQ_LSE_ENA			0x3
+	/* only response uses this flag */
+#define ICE_AQ_LSE_IS_ENABLED		0x1
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get link status response data structure, also used for Link Status Event */
+struct ice_aqc_get_link_status_data {
+	u8 topo_media_conflict;
+#define ICE_AQ_LINK_TOPO_CONFLICT	BIT(0)
+#define ICE_AQ_LINK_MEDIA_CONFLICT	BIT(1)
+#define ICE_AQ_LINK_TOPO_CORRUPT	BIT(2)
+	u8 reserved1;
+	u8 link_info;
+#define ICE_AQ_LINK_UP			BIT(0)	/* Link Status */
+#define ICE_AQ_LINK_FAULT		BIT(1)
+#define ICE_AQ_LINK_FAULT_TX		BIT(2)
+#define ICE_AQ_LINK_FAULT_RX		BIT(3)
+#define ICE_AQ_LINK_FAULT_REMOTE	BIT(4)
+#define ICE_AQ_LINK_UP_PORT		BIT(5)	/* External Port Link Status */
+#define ICE_AQ_MEDIA_AVAILABLE		BIT(6)
+#define ICE_AQ_SIGNAL_DETECT		BIT(7)
+	u8 an_info;
+#define ICE_AQ_AN_COMPLETED		BIT(0)
+#define ICE_AQ_LP_AN_ABILITY		BIT(1)
+#define ICE_AQ_PD_FAULT			BIT(2)	/* Parallel Detection Fault */
+#define ICE_AQ_FEC_EN			BIT(3)
+#define ICE_AQ_PHY_LOW_POWER		BIT(4)	/* Low Power State */
+#define ICE_AQ_LINK_PAUSE_TX		BIT(5)
+#define ICE_AQ_LINK_PAUSE_RX		BIT(6)
+#define ICE_AQ_QUALIFIED_MODULE		BIT(7)
+	u8 ext_info;
+#define ICE_AQ_LINK_PHY_TEMP_ALARM	BIT(0)
+#define ICE_AQ_LINK_EXCESSIVE_ERRORS	BIT(1)	/* Excessive Link Errors */
+	/* Port TX Suspended */
+#define ICE_AQ_LINK_TX_S		2
+#define ICE_AQ_LINK_TX_M		(0x03 << ICE_AQ_LINK_TX_S)
+#define ICE_AQ_LINK_TX_ACTIVE		0
+#define ICE_AQ_LINK_TX_DRAINED		1
+#define ICE_AQ_LINK_TX_FLUSHED		3
+	u8 reserved2;
+	__le16 max_frame_size;
+	u8 cfg;
+#define ICE_AQ_LINK_25G_KR_FEC_EN	BIT(0)
+#define ICE_AQ_LINK_25G_RS_528_FEC_EN	BIT(1)
+#define ICE_AQ_LINK_25G_RS_544_FEC_EN	BIT(2)
+	/* Pacing Config */
+#define ICE_AQ_CFG_PACING_S		3
+#define ICE_AQ_CFG_PACING_M		(0xF << ICE_AQ_CFG_PACING_S)
+#define ICE_AQ_CFG_PACING_TYPE_M	BIT(7)
+#define ICE_AQ_CFG_PACING_TYPE_AVG	0
+#define ICE_AQ_CFG_PACING_TYPE_FIXED	ICE_AQ_CFG_PACING_TYPE_M
+	/* External Device Power Ability */
+	u8 power_desc;
+#define ICE_AQ_PWR_CLASS_M		0x3
+#define ICE_AQ_LINK_PWR_BASET_LOW_HIGH	0
+#define ICE_AQ_LINK_PWR_BASET_HIGH	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_1	0
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_2	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_3	2
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_4	3
+	__le16 link_speed;
+#define ICE_AQ_LINK_SPEED_10MB		BIT(0)
+#define ICE_AQ_LINK_SPEED_100MB		BIT(1)
+#define ICE_AQ_LINK_SPEED_1000MB	BIT(2)
+#define ICE_AQ_LINK_SPEED_2500MB	BIT(3)
+#define ICE_AQ_LINK_SPEED_5GB		BIT(4)
+#define ICE_AQ_LINK_SPEED_10GB		BIT(5)
+#define ICE_AQ_LINK_SPEED_20GB		BIT(6)
+#define ICE_AQ_LINK_SPEED_25GB		BIT(7)
+#define ICE_AQ_LINK_SPEED_40GB		BIT(8)
+#define ICE_AQ_LINK_SPEED_UNKNOWN	BIT(15)
+	__le32 reserved3; /* Aligns next field to 8-byte boundary */
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 reserved4;
+};
+
+/* Set event mask command (direct 0x0613) */
+struct ice_aqc_set_event_mask {
+	u8	lport_num;
+	u8	reserved[7];
+	__le16	event_mask;
+#define ICE_AQ_LINK_EVENT_UPDOWN		BIT(1)
+#define ICE_AQ_LINK_EVENT_MEDIA_NA		BIT(2)
+#define ICE_AQ_LINK_EVENT_LINK_FAULT		BIT(3)
+#define ICE_AQ_LINK_EVENT_PHY_TEMP_ALARM	BIT(4)
+#define ICE_AQ_LINK_EVENT_EXCESSIVE_ERRORS	BIT(5)
+#define ICE_AQ_LINK_EVENT_SIGNAL_DETECT		BIT(6)
+#define ICE_AQ_LINK_EVENT_AN_COMPLETED		BIT(7)
+#define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL	BIT(8)
+#define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED	BIT(9)
+	u8	reserved1[6];
+};
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct ice_aqc_nvm {
+	u8	cmd_flags;
+#define ICE_AQC_NVM_LAST_CMD		BIT(0)
+#define ICE_AQC_NVM_PCIR_REQ		BIT(0)	/* Used by NVM Update reply */
+#define ICE_AQC_NVM_PRESERVATION_S	1
+#define ICE_AQC_NVM_PRESERVATION_M	(3 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_NO_PRESERVATION	(0 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_ALL	BIT(1)
+#define ICE_AQC_NVM_PRESERVE_SELECTED	(3 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_FLASH_ONLY		BIT(7)
+	u8	module_typeid;
+	__le16	length;
+#define ICE_AQC_NVM_ERASE_LEN	0xFFFF
+	__le32	offset;
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
+struct ice_aqc_get_set_rss_key {
+#define ICE_AQC_GSET_RSS_KEY_VSI_VALID	BIT(15)
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_S	0
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_M	(0x3FF << ICE_AQC_GSET_RSS_KEY_VSI_ID_S)
+	__le16 vsi_id;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE	0x28
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE	0xC
+
+struct ice_aqc_get_set_rss_keys {
+	u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
+	u8 extended_hash_key[ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE];
+};
+
+/* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */
+struct  ice_aqc_get_set_rss_lut {
+#define ICE_AQC_GSET_RSS_LUT_VSI_VALID	BIT(15)
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_S	0
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_M	(0x1FF << ICE_AQC_GSET_RSS_LUT_VSI_ID_S)
+	__le16 vsi_id;
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S	0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M	\
+				(0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI	 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF	 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL	 2
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S	 2
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M	 \
+				(0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128	 128
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512	 512
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K	 2048
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG	 2
+
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S	 4
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M	 \
+				(0xF << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S)
+
+	__le16 flags;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Add TX LAN Queues (indirect 0x0C30) */
+struct ice_aqc_add_txqs {
+	u8 num_qgrps;
+	u8 reserved[3];
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the descriptor of each queue entry for the Add TX LAN Queues
+ * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp.
+ */
+struct ice_aqc_add_txqs_perq {
+	__le16 txq_id;
+	u8 rsvd[2];
+	__le32 q_teid;
+	u8 txq_ctx[22];
+	u8 rsvd2[2];
+	struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add TX LAN Queues (0x0C30)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_tx_qgrp is variable due
+ * to the variable number of queues in each group!
+ */
+struct ice_aqc_add_tx_qgrp {
+	__le32 parent_teid;
+	u8 num_txqs;
+	u8 rsvd[3];
+	struct ice_aqc_add_txqs_perq txqs[1];
+};
+
+/* Disable TX LAN Queues (indirect 0x0C31) */
+struct ice_aqc_dis_txqs {
+	u8 cmd_type;
+#define ICE_AQC_Q_DIS_CMD_S		0
+#define ICE_AQC_Q_DIS_CMD_M		(0x3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_NO_FUNC_RESET	(0 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VM_RESET	BIT(ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VF_RESET	(2 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_PF_RESET	(3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_SUBSEQ_CALL	BIT(2)
+#define ICE_AQC_Q_DIS_CMD_FLUSH_PIPE	BIT(3)
+	u8 num_entries;
+	__le16 vmvf_and_timeout;
+#define ICE_AQC_Q_DIS_VMVF_NUM_S	0
+#define ICE_AQC_Q_DIS_VMVF_NUM_M	(0x3FF << ICE_AQC_Q_DIS_VMVF_NUM_S)
+#define ICE_AQC_Q_DIS_TIMEOUT_S		10
+#define ICE_AQC_Q_DIS_TIMEOUT_M		(0x3F << ICE_AQC_Q_DIS_TIMEOUT_S)
+	__le32 blocked_cgds;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* The buffer for Disable TX LAN Queues (indirect 0x0C31)
+ * contains the following structures, arrayed one after the
+ * other.
+ * Note: Since the q_id is 16 bits wide, if the
+ * number of queues is even, then 2 bytes of alignment MUST be
+ * added before the start of the next group, to allow correct
+ * alignment of the parent_teid field.
+ */
+struct ice_aqc_dis_txq_item {
+	__le32 parent_teid;
+	u8 num_qs;
+	u8 rsvd;
+	/* The length of the q_id array varies according to num_qs */
+	__le16 q_id[1];
+	/* This only applies from F8 onward */
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S		15
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_LAN_Q	\
+			(0 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET	\
+			(1 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+};
+
+struct ice_aqc_dis_txq {
+	struct ice_aqc_dis_txq_item qgrps[1];
+};
+
+/**
+ * struct ice_aq_desc - Admin Queue (AQ) descriptor
+ * @flags: ICE_AQ_FLAG_* flags
+ * @opcode: AQ command opcode
+ * @datalen: length in bytes of indirect/external data buffer
+ * @retval: return value from firmware
+ * @cookie_h: opaque data high-half
+ * @cookie_l: opaque data low-half
+ * @params: command-specific parameters
+ *
+ * Descriptor format for commands the driver posts on the Admin Transmit Queue
+ * (ATQ).  The firmware writes back onto the command descriptor and returns
+ * the result of the command.  Asynchronous events that are not an immediate
+ * result of the command are written to the Admin Receive Queue (ARQ) using
+ * the same descriptor format.  Descriptors are in little-endian notation with
+ * 32-bit words.
+ */
+struct ice_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		u8 raw[16];
+		struct ice_aqc_generic generic;
+		struct ice_aqc_get_ver get_ver;
+		struct ice_aqc_q_shutdown q_shutdown;
+		struct ice_aqc_req_res res_owner;
+		struct ice_aqc_manage_mac_read mac_read;
+		struct ice_aqc_manage_mac_write mac_write;
+		struct ice_aqc_clear_pxe clear_pxe;
+		struct ice_aqc_list_caps get_cap;
+		struct ice_aqc_get_phy_caps get_phy;
+		struct ice_aqc_set_phy_cfg set_phy;
+		struct ice_aqc_restart_an restart_an;
+		struct ice_aqc_get_sw_cfg get_sw_conf;
+		struct ice_aqc_sw_rules sw_rules;
+		struct ice_aqc_get_topo get_topo;
+		struct ice_aqc_get_cfg_elem get_update_elem;
+		struct ice_aqc_query_txsched_res query_sched_res;
+		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
+		struct ice_aqc_nvm nvm;
+		struct ice_aqc_get_set_rss_lut get_set_rss_lut;
+		struct ice_aqc_get_set_rss_key get_set_rss_key;
+		struct ice_aqc_add_txqs add_txqs;
+		struct ice_aqc_dis_txqs dis_txqs;
+		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
+		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
+		struct ice_aqc_set_event_mask set_event_mask;
+		struct ice_aqc_get_link_status get_link_status;
+	} params;
+};
+
+/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
+#define ICE_AQ_LG_BUF	512
+
+#define ICE_AQ_FLAG_ERR_S	2
+#define ICE_AQ_FLAG_LB_S	9
+#define ICE_AQ_FLAG_RD_S	10
+#define ICE_AQ_FLAG_BUF_S	12
+#define ICE_AQ_FLAG_SI_S	13
+
+#define ICE_AQ_FLAG_ERR		BIT(ICE_AQ_FLAG_ERR_S) /* 0x4    */
+#define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
+#define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
+#define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
+#define ICE_AQ_FLAG_SI		BIT(ICE_AQ_FLAG_SI_S)  /* 0x2000 */
+
+/* error codes */
+enum ice_aq_err {
+	ICE_AQ_RC_OK		= 0,  /* success */
+	ICE_AQ_RC_ENOMEM	= 9,  /* Out of memory */
+	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
+	ICE_AQ_RC_EEXIST	= 13, /* object already exists */
+	ICE_AQ_RC_ENOSPC	= 16, /* No space left or allocation failure */
+};
+
+/* Admin Queue command opcodes */
+enum ice_adminq_opc {
+	/* AQ commands */
+	ice_aqc_opc_get_ver				= 0x0001,
+	ice_aqc_opc_q_shutdown				= 0x0003,
+
+	/* resource ownership */
+	ice_aqc_opc_req_res				= 0x0008,
+	ice_aqc_opc_release_res				= 0x0009,
+
+	/* device/function capabilities */
+	ice_aqc_opc_list_func_caps			= 0x000A,
+	ice_aqc_opc_list_dev_caps			= 0x000B,
+
+	/* manage MAC address */
+	ice_aqc_opc_manage_mac_read			= 0x0107,
+	ice_aqc_opc_manage_mac_write			= 0x0108,
+
+	/* PXE */
+	ice_aqc_opc_clear_pxe_mode			= 0x0110,
+
+	/* internal switch commands */
+	ice_aqc_opc_get_sw_cfg				= 0x0200,
+
+	/* Alloc/Free/Get Resources */
+	ice_aqc_opc_alloc_res				= 0x0208,
+	ice_aqc_opc_free_res				= 0x0209,
+
+	/* VSI commands */
+	ice_aqc_opc_add_vsi				= 0x0210,
+	ice_aqc_opc_update_vsi				= 0x0211,
+	ice_aqc_opc_free_vsi				= 0x0213,
+
+	/* switch rules population commands */
+	ice_aqc_opc_add_sw_rules			= 0x02A0,
+	ice_aqc_opc_update_sw_rules			= 0x02A1,
+	ice_aqc_opc_remove_sw_rules			= 0x02A2,
+
+	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
+
+	/* transmit scheduler commands */
+	ice_aqc_opc_get_dflt_topo			= 0x0400,
+	ice_aqc_opc_add_sched_elems			= 0x0401,
+	ice_aqc_opc_suspend_sched_elems			= 0x0409,
+	ice_aqc_opc_resume_sched_elems			= 0x040A,
+	ice_aqc_opc_delete_sched_elems			= 0x040F,
+	ice_aqc_opc_query_sched_res			= 0x0412,
+
+	/* PHY commands */
+	ice_aqc_opc_get_phy_caps			= 0x0600,
+	ice_aqc_opc_set_phy_cfg				= 0x0601,
+	ice_aqc_opc_restart_an				= 0x0605,
+	ice_aqc_opc_get_link_status			= 0x0607,
+	ice_aqc_opc_set_event_mask			= 0x0613,
+
+	/* NVM commands */
+	ice_aqc_opc_nvm_read				= 0x0701,
+
+	/* RSS commands */
+	ice_aqc_opc_set_rss_key				= 0x0B02,
+	ice_aqc_opc_set_rss_lut				= 0x0B03,
+	ice_aqc_opc_get_rss_key				= 0x0B04,
+	ice_aqc_opc_get_rss_lut				= 0x0B05,
+
+	/* TX queue handling commands/events */
+	ice_aqc_opc_add_txqs				= 0x0C30,
+	ice_aqc_opc_dis_txqs				= 0x0C31,
+};
+
+#endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
new file mode 100644
index 000000000000..385f5d425d19
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -0,0 +1,2233 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_adminq_cmd.h"
+
+#define ICE_PF_RESET_WAIT_COUNT	200
+
+#define ICE_NIC_FLX_ENTRY(hw, mdid, idx) \
+	wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(ICE_RXDID_FLEX_NIC), \
+	     ((ICE_RX_OPC_MDID << \
+	       GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \
+	      GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \
+	     (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \
+	      GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M))
+
+#define ICE_NIC_FLX_FLG_ENTRY(hw, flg_0, flg_1, flg_2, flg_3, idx) \
+	wr32((hw), GLFLXP_RXDID_FLAGS(ICE_RXDID_FLEX_NIC, idx), \
+	     (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \
+	     (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \
+	     (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \
+	     (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M))
+
+/**
+ * ice_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the MAC type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ */
+static enum ice_status ice_set_mac_type(struct ice_hw *hw)
+{
+	if (hw->vendor_id != PCI_VENDOR_ID_INTEL)
+		return ICE_ERR_DEVICE_NOT_SUPPORTED;
+
+	hw->mac_type = ICE_MAC_GENERIC;
+	return 0;
+}
+
+/**
+ * ice_clear_pf_cfg - Clear PF configuration
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_clear_pf_cfg(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_manage_mac_read - manage MAC address read command
+ * @hw: pointer to the hw struct
+ * @buf: a virtual buffer to hold the manage MAC read response
+ * @buf_size: Size of the virtual buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to return per PF station MAC address (0x0107).
+ * NOTE: Upon successful completion of this command, MAC address information
+ * is returned in user specified buffer. Please interpret user specified
+ * buffer as "manage_mac_read" response.
+ * Response such as various MAC addresses are stored in HW struct (port.mac)
+ * ice_aq_discover_caps is expected to be called before this function is called.
+ */
+static enum ice_status
+ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_read_resp *resp;
+	struct ice_aqc_manage_mac_read *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 flags;
+
+	cmd = &desc.params.mac_read;
+
+	if (buf_size < sizeof(*resp))
+		return ICE_ERR_BUF_TOO_SHORT;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_read);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (status)
+		return status;
+
+	resp = (struct ice_aqc_manage_mac_read_resp *)buf;
+	flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M;
+
+	if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) {
+		ice_debug(hw, ICE_DBG_LAN, "got invalid MAC address\n");
+		return ICE_ERR_CFG;
+	}
+
+	ether_addr_copy(hw->port_info->mac.lan_addr, resp->mac_addr);
+	ether_addr_copy(hw->port_info->mac.perm_addr, resp->mac_addr);
+	return 0;
+}
+
+/**
+ * ice_aq_get_phy_caps - returns PHY capabilities
+ * @pi: port information structure
+ * @qual_mods: report qualified modules
+ * @report_mode: report mode capabilities
+ * @pcaps: structure for PHY capabilities to be filled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Returns the various PHY capabilities supported on the Port (0x0600)
+ */
+static enum ice_status
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+		    struct ice_aqc_get_phy_caps_data *pcaps,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_phy_caps *cmd;
+	u16 pcaps_size = sizeof(*pcaps);
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_phy;
+
+	if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
+
+	if (qual_mods)
+		cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM);
+
+	cmd->param0 |= cpu_to_le16(report_mode);
+	status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd);
+
+	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP)
+		pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
+
+	return status;
+}
+
+/**
+ * ice_get_media_type - Gets media type
+ * @pi: port information structure
+ */
+static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
+{
+	struct ice_link_status *hw_link_info;
+
+	if (!pi)
+		return ICE_MEDIA_UNKNOWN;
+
+	hw_link_info = &pi->phy.link_info;
+
+	if (hw_link_info->phy_type_low) {
+		switch (hw_link_info->phy_type_low) {
+		case ICE_PHY_TYPE_LOW_1000BASE_SX:
+		case ICE_PHY_TYPE_LOW_1000BASE_LX:
+		case ICE_PHY_TYPE_LOW_10GBASE_SR:
+		case ICE_PHY_TYPE_LOW_10GBASE_LR:
+		case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		case ICE_PHY_TYPE_LOW_25GBASE_SR:
+		case ICE_PHY_TYPE_LOW_25GBASE_LR:
+		case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+		case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+			return ICE_MEDIA_FIBER;
+		case ICE_PHY_TYPE_LOW_100BASE_TX:
+		case ICE_PHY_TYPE_LOW_1000BASE_T:
+		case ICE_PHY_TYPE_LOW_2500BASE_T:
+		case ICE_PHY_TYPE_LOW_5GBASE_T:
+		case ICE_PHY_TYPE_LOW_10GBASE_T:
+		case ICE_PHY_TYPE_LOW_25GBASE_T:
+			return ICE_MEDIA_BASET;
+		case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+		case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+			return ICE_MEDIA_DA;
+		case ICE_PHY_TYPE_LOW_1000BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_X:
+		case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+		case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+			return ICE_MEDIA_BACKPLANE;
+		}
+	}
+
+	return ICE_MEDIA_UNKNOWN;
+}
+
+/**
+ * ice_aq_get_link_info
+ * @pi: port information structure
+ * @ena_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Link Status (0x607). Returns the link status of the adapter.
+ */
+enum ice_status
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd)
+{
+	struct ice_link_status *hw_link_info_old, *hw_link_info;
+	struct ice_aqc_get_link_status_data link_data = { 0 };
+	struct ice_aqc_get_link_status *resp;
+	enum ice_media_type *hw_media_type;
+	struct ice_fc_info *hw_fc_info;
+	bool tx_pause, rx_pause;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 cmd_flags;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw_link_info_old = &pi->phy.link_info_old;
+	hw_media_type = &pi->phy.media_type;
+	hw_link_info = &pi->phy.link_info;
+	hw_fc_info = &pi->fc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
+	cmd_flags = (ena_lse) ? ICE_AQ_LSE_ENA : ICE_AQ_LSE_DIS;
+	resp = &desc.params.get_link_status;
+	resp->cmd_flags = cpu_to_le16(cmd_flags);
+	resp->lport_num = pi->lport;
+
+	status = ice_aq_send_cmd(pi->hw, &desc, &link_data, sizeof(link_data),
+				 cd);
+
+	if (status)
+		return status;
+
+	/* save off old link status information */
+	*hw_link_info_old = *hw_link_info;
+
+	/* update current link status information */
+	hw_link_info->link_speed = le16_to_cpu(link_data.link_speed);
+	hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+	*hw_media_type = ice_get_media_type(pi);
+	hw_link_info->link_info = link_data.link_info;
+	hw_link_info->an_info = link_data.an_info;
+	hw_link_info->ext_info = link_data.ext_info;
+	hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M;
+
+	/* update fc info */
+	tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX);
+	rx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_RX);
+	if (tx_pause && rx_pause)
+		hw_fc_info->current_mode = ICE_FC_FULL;
+	else if (tx_pause)
+		hw_fc_info->current_mode = ICE_FC_TX_PAUSE;
+	else if (rx_pause)
+		hw_fc_info->current_mode = ICE_FC_RX_PAUSE;
+	else
+		hw_fc_info->current_mode = ICE_FC_NONE;
+
+	hw_link_info->lse_ena =
+		!!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+
+	/* save link status information */
+	if (link)
+		*link = *hw_link_info;
+
+	/* flag cleared so calling functions don't call AQ again */
+	pi->phy.get_link_info = false;
+
+	return status;
+}
+
+/**
+ * ice_init_flex_parser - initialize rx flex parser
+ * @hw: pointer to the hardware structure
+ *
+ * Function to initialize flex descriptors
+ */
+static void ice_init_flex_parser(struct ice_hw *hw)
+{
+	u8 idx = 0;
+
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_LOW, 0);
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_HIGH, 1);
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_LOWER, 2);
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_HIGH, 3);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_FRG, ICE_RXFLG_UDP_GRE,
+			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_FIN, idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_SYN, ICE_RXFLG_RST,
+			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI,
+			      ICE_RXFLG_EVLAN_x8100, ICE_RXFLG_EVLAN_x9100,
+			      idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_VLAN_x8100, ICE_RXFLG_TNL_VLAN,
+			      ICE_RXFLG_TNL_MAC, ICE_RXFLG_TNL0, idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_TNL1, ICE_RXFLG_TNL2,
+			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx);
+}
+
+/**
+ * ice_init_fltr_mgmt_struct - initializes filter management list and locks
+ * @hw: pointer to the hw struct
+ */
+static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw;
+
+	hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw),
+				       sizeof(*hw->switch_info), GFP_KERNEL);
+	sw = hw->switch_info;
+
+	if (!sw)
+		return ICE_ERR_NO_MEMORY;
+
+	INIT_LIST_HEAD(&sw->vsi_list_map_head);
+
+	mutex_init(&sw->mac_list_lock);
+	INIT_LIST_HEAD(&sw->mac_list_head);
+
+	mutex_init(&sw->vlan_list_lock);
+	INIT_LIST_HEAD(&sw->vlan_list_head);
+
+	mutex_init(&sw->eth_m_list_lock);
+	INIT_LIST_HEAD(&sw->eth_m_list_head);
+
+	mutex_init(&sw->promisc_list_lock);
+	INIT_LIST_HEAD(&sw->promisc_list_head);
+
+	mutex_init(&sw->mac_vlan_list_lock);
+	INIT_LIST_HEAD(&sw->mac_vlan_list_head);
+
+	return 0;
+}
+
+/**
+ * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks
+ * @hw: pointer to the hw struct
+ */
+static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_pos_map;
+	struct ice_vsi_list_map_info *v_tmp_map;
+
+	list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head,
+				 list_entry) {
+		list_del(&v_pos_map->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), v_pos_map);
+	}
+
+	mutex_destroy(&sw->mac_list_lock);
+	mutex_destroy(&sw->vlan_list_lock);
+	mutex_destroy(&sw->eth_m_list_lock);
+	mutex_destroy(&sw->promisc_list_lock);
+	mutex_destroy(&sw->mac_vlan_list_lock);
+
+	devm_kfree(ice_hw_to_dev(hw), sw);
+}
+
+/**
+ * ice_init_hw - main hardware initialization routine
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_init_hw(struct ice_hw *hw)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	enum ice_status status;
+	u16 mac_buf_len;
+	void *mac_buf;
+
+	/* Set MAC type based on DeviceID */
+	status = ice_set_mac_type(hw);
+	if (status)
+		return status;
+
+	hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) &
+			 PF_FUNC_RID_FUNC_NUM_M) >>
+		PF_FUNC_RID_FUNC_NUM_S;
+
+	status = ice_reset(hw, ICE_RESET_PFR);
+	if (status)
+		return status;
+
+	/* set these values to minimum allowed */
+	hw->itr_gran_200 = ICE_ITR_GRAN_MIN_200;
+	hw->itr_gran_100 = ICE_ITR_GRAN_MIN_100;
+	hw->itr_gran_50 = ICE_ITR_GRAN_MIN_50;
+	hw->itr_gran_25 = ICE_ITR_GRAN_MIN_25;
+
+	status = ice_init_all_ctrlq(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	status = ice_clear_pf_cfg(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	ice_clear_pxe_mode(hw);
+
+	status = ice_init_nvm(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	status = ice_get_caps(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	hw->port_info = devm_kzalloc(ice_hw_to_dev(hw),
+				     sizeof(*hw->port_info), GFP_KERNEL);
+	if (!hw->port_info) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_unroll_cqinit;
+	}
+
+	/* set the back pointer to hw */
+	hw->port_info->hw = hw;
+
+	/* Initialize port_info struct with switch configuration data */
+	status = ice_get_initial_sw_cfg(hw);
+	if (status)
+		goto err_unroll_alloc;
+
+	hw->evb_veb = true;
+
+	/* Query the allocated resources for tx scheduler */
+	status = ice_sched_query_res_alloc(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_SCHED,
+			  "Failed to get scheduler allocated resources\n");
+		goto err_unroll_alloc;
+	}
+
+	/* Initialize port_info struct with scheduler data */
+	status = ice_sched_init_port(hw->port_info);
+	if (status)
+		goto err_unroll_sched;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_unroll_sched;
+	}
+
+	/* Initialize port_info struct with PHY capabilities */
+	status = ice_aq_get_phy_caps(hw->port_info, false,
+				     ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL);
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	if (status)
+		goto err_unroll_sched;
+
+	/* Initialize port_info struct with link information */
+	status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL);
+	if (status)
+		goto err_unroll_sched;
+
+	status = ice_init_fltr_mgmt_struct(hw);
+	if (status)
+		goto err_unroll_sched;
+
+	/* Get port MAC information */
+	mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp);
+	mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL);
+
+	if (!mac_buf)
+		goto err_unroll_fltr_mgmt_struct;
+
+	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
+	devm_kfree(ice_hw_to_dev(hw), mac_buf);
+
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+
+	ice_init_flex_parser(hw);
+
+	return 0;
+
+err_unroll_fltr_mgmt_struct:
+	ice_cleanup_fltr_mgmt_struct(hw);
+err_unroll_sched:
+	ice_sched_cleanup_all(hw);
+err_unroll_alloc:
+	devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+err_unroll_cqinit:
+	ice_shutdown_all_ctrlq(hw);
+	return status;
+}
+
+/**
+ * ice_deinit_hw - unroll initialization operations done by ice_init_hw
+ * @hw: pointer to the hardware structure
+ */
+void ice_deinit_hw(struct ice_hw *hw)
+{
+	ice_sched_cleanup_all(hw);
+	ice_shutdown_all_ctrlq(hw);
+
+	if (hw->port_info) {
+		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+		hw->port_info = NULL;
+	}
+
+	ice_cleanup_fltr_mgmt_struct(hw);
+}
+
+/**
+ * ice_check_reset - Check to see if a global reset is complete
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_check_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg = 0, grst_delay;
+
+	/* Poll for Device Active state in case a recent CORER, GLOBR,
+	 * or EMPR has occurred. The grst delay value is in 100ms units.
+	 * Add 1sec for outstanding AQ commands that can take a long time.
+	 */
+	grst_delay = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >>
+		      GLGEN_RSTCTL_GRSTDEL_S) + 10;
+
+	for (cnt = 0; cnt < grst_delay; cnt++) {
+		mdelay(100);
+		reg = rd32(hw, GLGEN_RSTAT);
+		if (!(reg & GLGEN_RSTAT_DEVSTATE_M))
+			break;
+	}
+
+	if (cnt == grst_delay) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Global reset polling failed to complete.\n");
+		return ICE_ERR_RESET_FAILED;
+	}
+
+#define ICE_RESET_DONE_MASK	(GLNVM_ULD_CORER_DONE_M | \
+				 GLNVM_ULD_GLOBR_DONE_M)
+
+	/* Device is Active; check Global Reset processes are done */
+	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK;
+		if (reg == ICE_RESET_DONE_MASK) {
+			ice_debug(hw, ICE_DBG_INIT,
+				  "Global reset processes done. %d\n", cnt);
+			break;
+		}
+		mdelay(10);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n",
+			  reg);
+		return ICE_ERR_RESET_FAILED;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * If a global reset has been triggered, this function checks
+ * for its completion and then issues the PF reset
+ */
+static enum ice_status ice_pf_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg;
+
+	/* If at function entry a global reset was already in progress, i.e.
+	 * state is not 'device active' or any of the reset done bits are not
+	 * set in GLNVM_ULD, there is no need for a PF Reset; poll until the
+	 * global reset is done.
+	 */
+	if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) ||
+	    (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) {
+		/* poll on global reset currently in progress until done */
+		if (ice_check_reset(hw))
+			return ICE_ERR_RESET_FAILED;
+
+		return 0;
+	}
+
+	/* Reset the PF */
+	reg = rd32(hw, PFGEN_CTRL);
+
+	wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M));
+
+	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, PFGEN_CTRL);
+		if (!(reg & PFGEN_CTRL_PFSWR_M))
+			break;
+
+		mdelay(1);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "PF reset polling failed to complete.\n");
+		return ICE_ERR_RESET_FAILED;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reset - Perform different types of reset
+ * @hw: pointer to the hardware structure
+ * @req: reset request
+ *
+ * This function triggers a reset as specified by the req parameter.
+ *
+ * Note:
+ * If anything other than a PF reset is triggered, PXE mode is restored.
+ * This has to be cleared using ice_clear_pxe_mode again, once the AQ
+ * interface has been restored in the rebuild flow.
+ */
+enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
+{
+	u32 val = 0;
+
+	switch (req) {
+	case ICE_RESET_PFR:
+		return ice_pf_reset(hw);
+	case ICE_RESET_CORER:
+		ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n");
+		val = GLGEN_RTRIG_CORER_M;
+		break;
+	case ICE_RESET_GLOBR:
+		ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n");
+		val = GLGEN_RTRIG_GLOBR_M;
+		break;
+	}
+
+	val |= rd32(hw, GLGEN_RTRIG);
+	wr32(hw, GLGEN_RTRIG, val);
+	ice_flush(hw);
+
+	/* wait for the FW to be ready */
+	return ice_check_reset(hw);
+}
+
+/**
+ * ice_copy_rxq_ctx_to_hw
+ * @hw: pointer to the hardware structure
+ * @ice_rxq_ctx: pointer to the rxq context
+ * @rxq_index: the index of the rx queue
+ *
+ * Copies rxq context from dense structure to hw register space
+ */
+static enum ice_status
+ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
+{
+	u8 i;
+
+	if (!ice_rxq_ctx)
+		return ICE_ERR_BAD_PTR;
+
+	if (rxq_index > QRX_CTRL_MAX_INDEX)
+		return ICE_ERR_PARAM;
+
+	/* Copy each dword separately to hw */
+	for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
+		wr32(hw, QRX_CONTEXT(i, rxq_index),
+		     *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+
+		ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i,
+			  *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+	}
+
+	return 0;
+}
+
+/* LAN Rx Queue Context */
+static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
+	/* Field		Width	LSB */
+	ICE_CTX_STORE(ice_rlan_ctx, head,		13,	0),
+	ICE_CTX_STORE(ice_rlan_ctx, cpuid,		8,	13),
+	ICE_CTX_STORE(ice_rlan_ctx, base,		57,	32),
+	ICE_CTX_STORE(ice_rlan_ctx, qlen,		13,	89),
+	ICE_CTX_STORE(ice_rlan_ctx, dbuf,		7,	102),
+	ICE_CTX_STORE(ice_rlan_ctx, hbuf,		5,	109),
+	ICE_CTX_STORE(ice_rlan_ctx, dtype,		2,	114),
+	ICE_CTX_STORE(ice_rlan_ctx, dsize,		1,	116),
+	ICE_CTX_STORE(ice_rlan_ctx, crcstrip,		1,	117),
+	ICE_CTX_STORE(ice_rlan_ctx, l2tsel,		1,	119),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_0,		4,	120),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_1,		2,	124),
+	ICE_CTX_STORE(ice_rlan_ctx, showiv,		1,	127),
+	ICE_CTX_STORE(ice_rlan_ctx, rxmax,		14,	174),
+	ICE_CTX_STORE(ice_rlan_ctx, tphrdesc_ena,	1,	193),
+	ICE_CTX_STORE(ice_rlan_ctx, tphwdesc_ena,	1,	194),
+	ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena,	1,	195),
+	ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena,	1,	196),
+	ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh,		3,	198),
+	{ 0 }
+};
+
+/**
+ * ice_write_rxq_ctx
+ * @hw: pointer to the hardware structure
+ * @rlan_ctx: pointer to the rxq context
+ * @rxq_index: the index of the rx queue
+ *
+ * Converts rxq context from sparse to dense structure and then writes
+ * it to hw register space
+ */
+enum ice_status
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index)
+{
+	u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
+
+	ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
+	return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
+}
+
+/* LAN Tx Queue Context */
+const struct ice_ctx_ele ice_tlan_ctx_info[] = {
+				    /* Field			Width	LSB */
+	ICE_CTX_STORE(ice_tlan_ctx, base,			57,	0),
+	ICE_CTX_STORE(ice_tlan_ctx, port_num,			3,	57),
+	ICE_CTX_STORE(ice_tlan_ctx, cgd_num,			5,	60),
+	ICE_CTX_STORE(ice_tlan_ctx, pf_num,			3,	65),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_num,			10,	68),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_type,			2,	78),
+	ICE_CTX_STORE(ice_tlan_ctx, src_vsi,			10,	80),
+	ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena,			1,	90),
+	ICE_CTX_STORE(ice_tlan_ctx, alt_vlan,			1,	92),
+	ICE_CTX_STORE(ice_tlan_ctx, cpuid,			8,	93),
+	ICE_CTX_STORE(ice_tlan_ctx, wb_mode,			1,	101),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc,			1,	102),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd,			1,	103),
+	ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc,			1,	104),
+	ICE_CTX_STORE(ice_tlan_ctx, cmpq_id,			9,	105),
+	ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func,		14,	114),
+	ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode,	1,	128),
+	ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id,		6,	129),
+	ICE_CTX_STORE(ice_tlan_ctx, qlen,			13,	135),
+	ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx,		4,	148),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_ena,			1,	152),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_qnum,			11,	153),
+	ICE_CTX_STORE(ice_tlan_ctx, legacy_int,			1,	164),
+	ICE_CTX_STORE(ice_tlan_ctx, drop_ena,			1,	165),
+	ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx,		2,	166),
+	ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx,	3,	168),
+	ICE_CTX_STORE(ice_tlan_ctx, int_q_state,		110,	171),
+	{ 0 }
+};
+
+/**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @mask: debug mask
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+void ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc,
+		  void *buf, u16 buf_len)
+{
+	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+	u16 len;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (!(mask & hw->debug_mask))
+		return;
+#endif
+
+	if (!desc)
+		return;
+
+	len = le16_to_cpu(cq_desc->datalen);
+
+	ice_debug(hw, mask,
+		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		  le16_to_cpu(cq_desc->opcode),
+		  le16_to_cpu(cq_desc->flags),
+		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+	ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->cookie_high),
+		  le32_to_cpu(cq_desc->cookie_low));
+	ice_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.param0),
+		  le32_to_cpu(cq_desc->params.generic.param1));
+	ice_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.addr_high),
+		  le32_to_cpu(cq_desc->params.generic.addr_low));
+	if (buf && cq_desc->datalen != 0) {
+		ice_debug(hw, mask, "Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+
+		ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len);
+	}
+}
+
+/* FW Admin Queue command wrappers */
+
+/**
+ * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
+ * @hw: pointer to the hw struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Helper function to send FW Admin Queue commands to the FW Admin Queue.
+ */
+enum ice_status
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
+		u16 buf_size, struct ice_sq_cd *cd)
+{
+	return ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_aq_get_fw_ver
+ * @hw: pointer to the hw struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the firmware version (0x0001) from the admin queue commands
+ */
+enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_ver *resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	resp = &desc.params.get_ver;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	if (!status) {
+		hw->fw_branch = resp->fw_branch;
+		hw->fw_maj_ver = resp->fw_major;
+		hw->fw_min_ver = resp->fw_minor;
+		hw->fw_patch = resp->fw_patch;
+		hw->fw_build = le32_to_cpu(resp->fw_build);
+		hw->api_branch = resp->api_branch;
+		hw->api_maj_ver = resp->api_major;
+		hw->api_min_ver = resp->api_minor;
+		hw->api_patch = resp->api_patch;
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_q_shutdown
+ * @hw: pointer to the hw struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well (0x0003).
+ */
+enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
+{
+	struct ice_aqc_q_shutdown *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.q_shutdown;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
+
+	if (unloading)
+		cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_req_res
+ * @hw: pointer to the hw struct
+ * @res: resource id
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cd: pointer to command details structure or NULL
+ *
+ * requests common resource using the admin queue commands (0x0008)
+ */
+static enum ice_status
+ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+	       enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd_resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd_resp = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res);
+
+	cmd_resp->res_id = cpu_to_le16(res);
+	cmd_resp->access_type = cpu_to_le16(access);
+	cmd_resp->res_number = cpu_to_le32(sdp_number);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	/* The completion specifies the maximum time in ms that the driver
+	 * may hold the resource in the Timeout field.
+	 * If the resource is held by someone else, the command completes with
+	 * busy return value and the timeout field indicates the maximum time
+	 * the current owner of the resource has to free it.
+	 */
+	if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)
+		*timeout = le32_to_cpu(cmd_resp->timeout);
+
+	return status;
+}
+
+/**
+ * ice_aq_release_res
+ * @hw: pointer to the hw struct
+ * @res: resource id
+ * @sdp_number: resource number
+ * @cd: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands (0x0009)
+ */
+static enum ice_status
+ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res);
+
+	cmd->res_id = cpu_to_le16(res);
+	cmd->res_number = cpu_to_le32(sdp_number);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_acquire_res
+ * @hw: pointer to the HW structure
+ * @res: resource id
+ * @access: access type (read or write)
+ *
+ * This function will attempt to acquire the ownership of a resource.
+ */
+enum ice_status
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access)
+{
+#define ICE_RES_POLLING_DELAY_MS	10
+	u32 delay = ICE_RES_POLLING_DELAY_MS;
+	enum ice_status status;
+	u32 time_left = 0;
+	u32 timeout;
+
+	status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+	/* An admin queue return code of ICE_AQ_RC_EEXIST means that another
+	 * driver has previously acquired the resource and performed any
+	 * necessary updates; in this case the caller does not obtain the
+	 * resource and has no further work to do.
+	 */
+	if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
+		status = ICE_ERR_AQ_NO_WORK;
+		goto ice_acquire_res_exit;
+	}
+
+	if (status)
+		ice_debug(hw, ICE_DBG_RES,
+			  "resource %d acquire type %d failed.\n", res, access);
+
+	/* If necessary, poll until the current lock owner timeouts */
+	timeout = time_left;
+	while (status && timeout && time_left) {
+		mdelay(delay);
+		timeout = (timeout > delay) ? timeout - delay : 0;
+		status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
+			/* lock free, but no work to do */
+			status = ICE_ERR_AQ_NO_WORK;
+			break;
+		}
+
+		if (!status)
+			/* lock acquired */
+			break;
+	}
+	if (status && status != ICE_ERR_AQ_NO_WORK)
+		ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n");
+
+ice_acquire_res_exit:
+	if (status == ICE_ERR_AQ_NO_WORK) {
+		if (access == ICE_RES_WRITE)
+			ice_debug(hw, ICE_DBG_RES,
+				  "resource indicates no work to do.\n");
+		else
+			ice_debug(hw, ICE_DBG_RES,
+				  "Warning: ICE_ERR_AQ_NO_WORK not expected\n");
+	}
+	return status;
+}
+
+/**
+ * ice_release_res
+ * @hw: pointer to the HW structure
+ * @res: resource id
+ *
+ * This function will release a resource using the proper Admin Command.
+ */
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
+{
+	enum ice_status status;
+	u32 total_delay = 0;
+
+	status = ice_aq_release_res(hw, res, 0, NULL);
+
+	/* there are some rare cases when trying to release the resource
+	 * results in an admin Q timeout, so handle them correctly
+	 */
+	while ((status == ICE_ERR_AQ_TIMEOUT) &&
+	       (total_delay < hw->adminq.sq_cmd_timeout)) {
+		mdelay(1);
+		status = ice_aq_release_res(hw, res, 0, NULL);
+		total_delay++;
+	}
+}
+
+/**
+ * ice_parse_caps - parse function/device capabilities
+ * @hw: pointer to the hw struct
+ * @buf: pointer to a buffer containing function/device capability records
+ * @cap_count: number of capability records in the list
+ * @opc: type of capabilities list to parse
+ *
+ * Helper function to parse function(0x000a)/device(0x000b) capabilities list.
+ */
+static void
+ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
+	       enum ice_adminq_opc opc)
+{
+	struct ice_aqc_list_caps_elem *cap_resp;
+	struct ice_hw_func_caps *func_p = NULL;
+	struct ice_hw_dev_caps *dev_p = NULL;
+	struct ice_hw_common_caps *caps;
+	u32 i;
+
+	if (!buf)
+		return;
+
+	cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+
+	if (opc == ice_aqc_opc_list_dev_caps) {
+		dev_p = &hw->dev_caps;
+		caps = &dev_p->common_cap;
+	} else if (opc == ice_aqc_opc_list_func_caps) {
+		func_p = &hw->func_caps;
+		caps = &func_p->common_cap;
+	} else {
+		ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
+		return;
+	}
+
+	for (i = 0; caps && i < cap_count; i++, cap_resp++) {
+		u32 logical_id = le32_to_cpu(cap_resp->logical_id);
+		u32 phys_id = le32_to_cpu(cap_resp->phys_id);
+		u32 number = le32_to_cpu(cap_resp->number);
+		u16 cap = le16_to_cpu(cap_resp->cap);
+
+		switch (cap) {
+		case ICE_AQC_CAPS_VSI:
+			if (dev_p) {
+				dev_p->num_vsi_allocd_to_host = number;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: Dev.VSI cnt = %d\n",
+					  dev_p->num_vsi_allocd_to_host);
+			} else if (func_p) {
+				func_p->guaranteed_num_vsi = number;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: Func.VSI cnt = %d\n",
+					  func_p->guaranteed_num_vsi);
+			}
+			break;
+		case ICE_AQC_CAPS_RSS:
+			caps->rss_table_size = number;
+			caps->rss_table_entry_width = logical_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: RSS table size = %d\n",
+				  caps->rss_table_size);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: RSS table width = %d\n",
+				  caps->rss_table_entry_width);
+			break;
+		case ICE_AQC_CAPS_RXQS:
+			caps->num_rxq = number;
+			caps->rxq_first_id = phys_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Num Rx Qs = %d\n", caps->num_rxq);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Rx first queue ID = %d\n",
+				  caps->rxq_first_id);
+			break;
+		case ICE_AQC_CAPS_TXQS:
+			caps->num_txq = number;
+			caps->txq_first_id = phys_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Num Tx Qs = %d\n", caps->num_txq);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Tx first queue ID = %d\n",
+				  caps->txq_first_id);
+			break;
+		case ICE_AQC_CAPS_MSIX:
+			caps->num_msix_vectors = number;
+			caps->msix_vector_first_id = phys_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: MSIX vector count = %d\n",
+				  caps->num_msix_vectors);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: MSIX first vector index = %d\n",
+				  caps->msix_vector_first_id);
+			break;
+		case ICE_AQC_CAPS_MAX_MTU:
+			caps->max_mtu = number;
+			if (dev_p)
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: Dev.MaxMTU = %d\n",
+					  caps->max_mtu);
+			else if (func_p)
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: func.MaxMTU = %d\n",
+					  caps->max_mtu);
+			break;
+		default:
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Unknown capability[%d]: 0x%x\n", i,
+				  cap);
+			break;
+		}
+	}
+}
+
+/**
+ * ice_aq_discover_caps - query function/device capabilities
+ * @hw: pointer to the hw struct
+ * @buf: a virtual buffer to hold the capabilities
+ * @buf_size: Size of the virtual buffer
+ * @data_size: Size of the returned data, or buf size needed if AQ err==ENOMEM
+ * @opc: capabilities type to discover - pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the function(0x000a)/device(0x000b) capabilities description from
+ * the firmware.
+ */
+static enum ice_status
+ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u16 *data_size,
+		     enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_list_caps *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_cap;
+
+	if (opc != ice_aqc_opc_list_func_caps &&
+	    opc != ice_aqc_opc_list_dev_caps)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status)
+		ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc);
+	*data_size = le16_to_cpu(desc.datalen);
+
+	return status;
+}
+
+/**
+ * ice_get_caps - get info about the HW
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_get_caps(struct ice_hw *hw)
+{
+	enum ice_status status;
+	u16 data_size = 0;
+	u16 cbuf_len;
+	u8 retries;
+
+	/* The driver doesn't know how many capabilities the device will return
+	 * so the buffer size required isn't known ahead of time. The driver
+	 * starts with cbuf_len and if this turns out to be insufficient, the
+	 * device returns ICE_AQ_RC_ENOMEM and also the buffer size it needs.
+	 * The driver then allocates the buffer of this size and retries the
+	 * operation. So it follows that the retry count is 2.
+	 */
+#define ICE_GET_CAP_BUF_COUNT	40
+#define ICE_GET_CAP_RETRY_COUNT	2
+
+	cbuf_len = ICE_GET_CAP_BUF_COUNT *
+		sizeof(struct ice_aqc_list_caps_elem);
+
+	retries = ICE_GET_CAP_RETRY_COUNT;
+
+	do {
+		void *cbuf;
+
+		cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL);
+		if (!cbuf)
+			return ICE_ERR_NO_MEMORY;
+
+		status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &data_size,
+					      ice_aqc_opc_list_func_caps, NULL);
+		devm_kfree(ice_hw_to_dev(hw), cbuf);
+
+		if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM)
+			break;
+
+		/* If ENOMEM is returned, try again with bigger buffer */
+		cbuf_len = data_size;
+	} while (--retries);
+
+	return status;
+}
+
+/**
+ * ice_aq_manage_mac_write - manage MAC address write command
+ * @hw: pointer to the hw struct
+ * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address
+ * @flags: flags to control write behavior
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to write MAC address to the NVM (0x0108).
+ */
+enum ice_status
+ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_write *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.mac_write;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write);
+
+	cmd->flags = flags;
+
+	/* Prep values for flags, sah, sal */
+	cmd->sah = htons(*((u16 *)mac_addr));
+	cmd->sal = htonl(*((u32 *)(mac_addr + 2)));
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_clear_pxe_mode
+ * @hw: pointer to the hw struct
+ *
+ * Tell the firmware that the driver is taking over from PXE (0x0110).
+ */
+static enum ice_status ice_aq_clear_pxe_mode(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode);
+	desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the hw struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ */
+void ice_clear_pxe_mode(struct ice_hw *hw)
+{
+	if (ice_check_sq_alive(hw, &hw->adminq))
+		ice_aq_clear_pxe_mode(hw);
+}
+
+/**
+ * ice_aq_set_phy_cfg
+ * @hw: pointer to the hw struct
+ * @lport: logical port number
+ * @cfg: structure with PHY configuration data to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the various PHY configuration parameters supported on the Port.
+ * One or more of the Set PHY config parameters may be ignored in an MFP
+ * mode as the PF may not have the privilege to set some of the PHY Config
+ * parameters. This status will be indicated by the command response (0x0601).
+ */
+static enum ice_status
+ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
+		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_phy_cfg *cmd;
+	struct ice_aq_desc desc;
+
+	if (!cfg)
+		return ICE_ERR_PARAM;
+
+	cmd = &desc.params.set_phy;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg);
+	cmd->lport_num = lport;
+
+	return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
+}
+
+/**
+ * ice_update_link_info - update status of the HW network link
+ * @pi: port info structure of the interested logical port
+ */
+static enum ice_status
+ice_update_link_info(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_phy_info *phy_info;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return ICE_ERR_NO_MEMORY;
+
+	phy_info = &pi->phy;
+	status = ice_aq_get_link_info(pi, true, NULL, NULL);
+	if (status)
+		goto out;
+
+	if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
+					     pcaps, NULL);
+		if (status)
+			goto out;
+
+		memcpy(phy_info->link_info.module_type, &pcaps->module_type,
+		       sizeof(phy_info->link_info.module_type));
+	}
+out:
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	return status;
+}
+
+/**
+ * ice_set_fc
+ * @pi: port information structure
+ * @aq_failures: pointer to status code, specific to ice_set_fc routine
+ * @atomic_restart: enable automatic link update
+ *
+ * Set the requested flow control mode.
+ */
+enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart)
+{
+	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	enum ice_status status;
+	u8 pause_mask = 0x0;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+	*aq_failures = ICE_SET_FC_AQ_FAIL_NONE;
+
+	switch (pi->fc.req_mode) {
+	case ICE_FC_FULL:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_RX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_TX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		break;
+	default:
+		break;
+	}
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Get the current phy config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+				     NULL);
+	if (status) {
+		*aq_failures = ICE_SET_FC_AQ_FAIL_GET;
+		goto out;
+	}
+
+	/* clear the old pause settings */
+	cfg.caps = pcaps->caps & ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
+				   ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+	/* set the new capabilities */
+	cfg.caps |= pause_mask;
+	/* If the capabilities have changed, then set the new config */
+	if (cfg.caps != pcaps->caps) {
+		int retry_count, retry_max = 10;
+
+		/* Auto restart link so settings take effect */
+		if (atomic_restart)
+			cfg.caps |= ICE_AQ_PHY_ENA_ATOMIC_LINK;
+		/* Copy over all the old settings */
+		cfg.phy_type_low = pcaps->phy_type_low;
+		cfg.low_power_ctrl = pcaps->low_power_ctrl;
+		cfg.eee_cap = pcaps->eee_cap;
+		cfg.eeer_value = pcaps->eeer_value;
+		cfg.link_fec_opt = pcaps->link_fec_options;
+
+		status = ice_aq_set_phy_cfg(hw, pi->lport, &cfg, NULL);
+		if (status) {
+			*aq_failures = ICE_SET_FC_AQ_FAIL_SET;
+			goto out;
+		}
+
+		/* Update the link info
+		 * It sometimes takes a really long time for link to
+		 * come back from the atomic reset. Thus, we wait a
+		 * little bit.
+		 */
+		for (retry_count = 0; retry_count < retry_max; retry_count++) {
+			status = ice_update_link_info(pi);
+
+			if (!status)
+				break;
+
+			mdelay(100);
+		}
+
+		if (status)
+			*aq_failures = ICE_SET_FC_AQ_FAIL_UPDATE;
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	return status;
+}
+
+/**
+ * ice_get_link_status - get status of the HW network link
+ * @pi: port information structure
+ * @link_up: pointer to bool (true/false = linkup/linkdown)
+ *
+ * Variable link_up is true if link is up, false if link is down.
+ * The variable link_up is invalid if status is non zero. As a
+ * result of this call, link status reporting becomes enabled
+ */
+enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up)
+{
+	struct ice_phy_info *phy_info;
+	enum ice_status status = 0;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	phy_info = &pi->phy;
+
+	if (phy_info->get_link_info) {
+		status = ice_update_link_info(pi);
+
+		if (status)
+			ice_debug(pi->hw, ICE_DBG_LINK,
+				  "get link status error, status = %d\n",
+				  status);
+	}
+
+	*link_up = phy_info->link_info.link_info & ICE_AQ_LINK_UP;
+
+	return status;
+}
+
+/**
+ * ice_aq_set_link_restart_an
+ * @pi: pointer to the port information structure
+ * @ena_link: if true: enable link, if false: disable link
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ */
+enum ice_status
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_restart_an *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.restart_an;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_restart_an);
+
+	cmd->cmd_flags = ICE_AQC_RESTART_AN_LINK_RESTART;
+	cmd->lport_num = pi->lport;
+	if (ena_link)
+		cmd->cmd_flags |= ICE_AQC_RESTART_AN_LINK_ENABLE;
+	else
+		cmd->cmd_flags &= ~ICE_AQC_RESTART_AN_LINK_ENABLE;
+
+	return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_event_mask
+ * @hw: pointer to the hw struct
+ * @port_num: port number of the physical function
+ * @mask: event mask to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set event mask (0x0613)
+ */
+enum ice_status
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_event_mask *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_event_mask;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask);
+
+	cmd->lport_num = port_num;
+
+	cmd->event_mask = cpu_to_le16(mask);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * __ice_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: VSI FW index
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ * @glob_lut_idx: global LUT index
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get (0x0B05) or set (0x0B03) RSS look up table
+ */
+static enum ice_status
+__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+			 u16 lut_size, u8 glob_lut_idx, bool set)
+{
+	struct ice_aqc_get_set_rss_lut *cmd_resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 flags = 0;
+
+	cmd_resp = &desc.params.get_set_rss_lut;
+
+	if (set) {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_lut);
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	} else {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_lut);
+	}
+
+	cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+					 ICE_AQC_GSET_RSS_LUT_VSI_ID_S) &
+					ICE_AQC_GSET_RSS_LUT_VSI_ID_M) |
+				       ICE_AQC_GSET_RSS_LUT_VSI_VALID);
+
+	switch (lut_type) {
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI:
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF:
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL:
+		flags |= ((lut_type << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) &
+			  ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M);
+		break;
+	default:
+		status = ICE_ERR_PARAM;
+		goto ice_aq_get_set_rss_lut_exit;
+	}
+
+	if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL) {
+		flags |= ((glob_lut_idx << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) &
+			  ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M);
+
+		if (!set)
+			goto ice_aq_get_set_rss_lut_send;
+	} else if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) {
+		if (!set)
+			goto ice_aq_get_set_rss_lut_send;
+	} else {
+		goto ice_aq_get_set_rss_lut_send;
+	}
+
+	/* LUT size is only valid for Global and PF table types */
+	if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128) {
+		flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG <<
+			  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+			 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+	} else if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512) {
+		flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG <<
+			  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+			 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+	} else if ((lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K) &&
+		   (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF)) {
+		flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG <<
+			  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+			 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+	} else {
+		status = ICE_ERR_PARAM;
+		goto ice_aq_get_set_rss_lut_exit;
+	}
+
+ice_aq_get_set_rss_lut_send:
+	cmd_resp->flags = cpu_to_le16(flags);
+	status = ice_aq_send_cmd(hw, &desc, lut, lut_size, NULL);
+
+ice_aq_get_set_rss_lut_exit:
+	return status;
+}
+
+/**
+ * ice_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: VSI FW index
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ *
+ * get the RSS lookup table, PF or VSI type
+ */
+enum ice_status
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size)
+{
+	return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0,
+					false);
+}
+
+/**
+ * ice_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: VSI FW index
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ *
+ * set the RSS lookup table, PF or VSI type
+ */
+enum ice_status
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size)
+{
+	return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0,
+					true);
+}
+
+/**
+ * __ice_aq_get_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: VSI FW index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get (0x0B04) or set (0x0B02) the RSS key per VSI
+ */
+static enum
+ice_status __ice_aq_get_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+				    struct ice_aqc_get_set_rss_keys *key,
+				    bool set)
+{
+	struct ice_aqc_get_set_rss_key *cmd_resp;
+	u16 key_size = sizeof(*key);
+	struct ice_aq_desc desc;
+
+	cmd_resp = &desc.params.get_set_rss_key;
+
+	if (set) {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_key);
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	} else {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_key);
+	}
+
+	cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+					 ICE_AQC_GSET_RSS_KEY_VSI_ID_S) &
+					ICE_AQC_GSET_RSS_KEY_VSI_ID_M) |
+				       ICE_AQC_GSET_RSS_KEY_VSI_VALID);
+
+	return ice_aq_send_cmd(hw, &desc, key, key_size, NULL);
+}
+
+/**
+ * ice_aq_get_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: VSI FW index
+ * @key: pointer to key info struct
+ *
+ * get the RSS key per VSI
+ */
+enum ice_status
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *key)
+{
+	return __ice_aq_get_set_rss_key(hw, vsi_id, key, false);
+}
+
+/**
+ * ice_aq_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: VSI FW index
+ * @keys: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ */
+enum ice_status
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *keys)
+{
+	return __ice_aq_get_set_rss_key(hw, vsi_id, keys, true);
+}
+
+/**
+ * ice_aq_add_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: Number of added queue groups
+ * @qg_list: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx LAN queue (0x0C30)
+ *
+ * NOTE:
+ * Prior to calling add Tx LAN queue:
+ * Initialize the following as part of the Tx queue context:
+ * Completion queue ID if the queue uses Completion queue, Quanta profile,
+ * Cache profile and Packet shaper profile.
+ *
+ * After add Tx LAN queue AQ command is completed:
+ * Interrupts should be associated with specific queues,
+ * Association of Tx queue to Doorbell queue is not part of Add LAN Tx queue
+ * flow.
+ */
+static enum ice_status
+ice_aq_add_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_add_tx_qgrp *qg_list, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	u16 i, sum_header_size, sum_q_size = 0;
+	struct ice_aqc_add_tx_qgrp *list;
+	struct ice_aqc_add_txqs *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.add_txqs;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_txqs);
+
+	if (!qg_list)
+		return ICE_ERR_PARAM;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return ICE_ERR_PARAM;
+
+	sum_header_size = num_qgrps *
+		(sizeof(*qg_list) - sizeof(*qg_list->txqs));
+
+	list = qg_list;
+	for (i = 0; i < num_qgrps; i++) {
+		struct ice_aqc_add_txqs_perq *q = list->txqs;
+
+		sum_q_size += list->num_txqs * sizeof(*q);
+		list = (struct ice_aqc_add_tx_qgrp *)(q + list->num_txqs);
+	}
+
+	if (buf_size != (sum_header_size + sum_q_size))
+		return ICE_ERR_PARAM;
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_qgrps = num_qgrps;
+
+	return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+}
+
+/**
+ * ice_aq_dis_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: number of groups in the list
+ * @qg_list: the list of groups to disable
+ * @buf_size: the total size of the qg_list buffer in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * Disable LAN Tx queue (0x0C31)
+ */
+static enum ice_status
+ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_dis_txq_item *qg_list, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_dis_txqs *cmd;
+	struct ice_aq_desc desc;
+	u16 i, sz = 0;
+
+	cmd = &desc.params.dis_txqs;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs);
+
+	if (!qg_list)
+		return ICE_ERR_PARAM;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return ICE_ERR_PARAM;
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_entries = num_qgrps;
+
+	for (i = 0; i < num_qgrps; ++i) {
+		/* Calculate the size taken up by the queue IDs in this group */
+		sz += qg_list[i].num_qs * sizeof(qg_list[i].q_id);
+
+		/* Add the size of the group header */
+		sz += sizeof(qg_list[i]) - sizeof(qg_list[i].q_id);
+
+		/* If the num of queues is even, add 2 bytes of padding */
+		if ((qg_list[i].num_qs % 2) == 0)
+			sz += 2;
+	}
+
+	if (buf_size != sz)
+		return ICE_ERR_PARAM;
+
+	return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+}
+
+/* End of FW Admin Queue command wrappers */
+
+/**
+ * ice_write_byte - write a byte to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_byte(u8 *src_ctx, u8 *dest_ctx,
+			   const struct ice_ctx_ele *ce_info)
+{
+	u8 src_byte, dest_byte, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = (u8)(BIT(ce_info->width) - 1);
+
+	src_byte = *from;
+	src_byte &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_byte <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_byte, dest, sizeof(dest_byte));
+
+	dest_byte &= ~mask;	/* get the bits not changing */
+	dest_byte |= src_byte;	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_byte, sizeof(dest_byte));
+}
+
+/**
+ * ice_write_word - write a word to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_word(u8 *src_ctx, u8 *dest_ctx,
+			   const struct ice_ctx_ele *ce_info)
+{
+	u16 src_word, mask;
+	__le16 dest_word;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = BIT(ce_info->width) - 1;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_word = *(u16 *)from;
+	src_word &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_word <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_word, dest, sizeof(dest_word));
+
+	dest_word &= ~(cpu_to_le16(mask));	/* get the bits not changing */
+	dest_word |= cpu_to_le16(src_word);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_word, sizeof(dest_word));
+}
+
+/**
+ * ice_write_dword - write a dword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_dword(u8 *src_ctx, u8 *dest_ctx,
+			    const struct ice_ctx_ele *ce_info)
+{
+	u32 src_dword, mask;
+	__le32 dest_dword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 32 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 5 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 32)
+		mask = BIT(ce_info->width) - 1;
+	else
+		mask = (u32)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_dword = *(u32 *)from;
+	src_dword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_dword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_dword, dest, sizeof(dest_dword));
+
+	dest_dword &= ~(cpu_to_le32(mask));	/* get the bits not changing */
+	dest_dword |= cpu_to_le32(src_dword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_dword, sizeof(dest_dword));
+}
+
+/**
+ * ice_write_qword - write a qword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_qword(u8 *src_ctx, u8 *dest_ctx,
+			    const struct ice_ctx_ele *ce_info)
+{
+	u64 src_qword, mask;
+	__le64 dest_qword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 64 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 6 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 64)
+		mask = BIT_ULL(ce_info->width) - 1;
+	else
+		mask = (u64)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_qword = *(u64 *)from;
+	src_qword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_qword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_qword, dest, sizeof(dest_qword));
+
+	dest_qword &= ~(cpu_to_le64(mask));	/* get the bits not changing */
+	dest_qword |= cpu_to_le64(src_qword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_qword, sizeof(dest_qword));
+}
+
+/**
+ * ice_set_ctx - set context bits in packed structure
+ * @src_ctx:  pointer to a generic non-packed context structure
+ * @dest_ctx: pointer to memory for the packed structure
+ * @ce_info:  a description of the structure to be transformed
+ */
+enum ice_status
+ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	int f;
+
+	for (f = 0; ce_info[f].width; f++) {
+		/* We have to deal with each element of the FW response
+		 * using the correct size so that we are correct regardless
+		 * of the endianness of the machine.
+		 */
+		switch (ce_info[f].size_of) {
+		case sizeof(u8):
+			ice_write_byte(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u16):
+			ice_write_word(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u32):
+			ice_write_dword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u64):
+			ice_write_qword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		default:
+			return ICE_ERR_INVAL_SIZE;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ena_vsi_txq
+ * @pi: port information structure
+ * @vsi_id: VSI id
+ * @tc: tc number
+ * @num_qgrps: Number of added queue groups
+ * @buf: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function adds one lan q
+ */
+enum ice_status
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps,
+		struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aqc_txsched_elem_data node = { 0 };
+	struct ice_sched_node *parent;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	if (num_qgrps > 1 || buf->num_txqs > 1)
+		return ICE_ERR_MAX_LIMIT;
+
+	hw = pi->hw;
+
+	mutex_lock(&pi->sched_lock);
+
+	/* find a parent node */
+	parent = ice_sched_get_free_qparent(pi, vsi_id, tc,
+					    ICE_SCHED_NODE_OWNER_LAN);
+	if (!parent) {
+		status = ICE_ERR_PARAM;
+		goto ena_txq_exit;
+	}
+	buf->parent_teid = parent->info.node_teid;
+	node.parent_teid = parent->info.node_teid;
+	/* Mark that the values in the "generic" section as valid. The default
+	 * value in the "generic" section is zero. This means that :
+	 * - Scheduling mode is Bytes Per Second (BPS), indicated by Bit 0.
+	 * - 0 priority among siblings, indicated by Bit 1-3.
+	 * - WFQ, indicated by Bit 4.
+	 * - 0 Adjustment value is used in PSM credit update flow, indicated by
+	 * Bit 5-6.
+	 * - Bit 7 is reserved.
+	 * Without setting the generic section as valid in valid_sections, the
+	 * Admin Q command will fail with error code ICE_AQ_RC_EINVAL.
+	 */
+	buf->txqs[0].info.valid_sections = ICE_AQC_ELEM_VALID_GENERIC;
+
+	/* add the lan q */
+	status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd);
+	if (status)
+		goto ena_txq_exit;
+
+	node.node_teid = buf->txqs[0].q_teid;
+	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+
+	/* add a leaf node into schduler tree q layer */
+	status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
+
+ena_txq_exit:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_dis_vsi_txq
+ * @pi: port information structure
+ * @num_queues: number of queues
+ * @q_ids: pointer to the q_id array
+ * @q_teids: pointer to queue node teids
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function removes queues and their corresponding nodes in SW DB
+ */
+enum ice_status
+ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
+		u32 *q_teids, struct ice_sq_cd *cd)
+{
+	enum ice_status status = ICE_ERR_DOES_NOT_EXIST;
+	struct ice_aqc_dis_txq_item qg_list;
+	u16 i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < num_queues; i++) {
+		struct ice_sched_node *node;
+
+		node = ice_sched_find_node_by_teid(pi->root, q_teids[i]);
+		if (!node)
+			continue;
+		qg_list.parent_teid = node->info.parent_teid;
+		qg_list.num_qs = 1;
+		qg_list.q_id[0] = cpu_to_le16(q_ids[i]);
+		status = ice_aq_dis_lan_txq(pi->hw, 1, &qg_list,
+					    sizeof(qg_list), cd);
+
+		if (status)
+			break;
+		ice_free_sched_node(pi, node);
+	}
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_qs - configure the new/exisiting VSI queues
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc_bitmap: TC bitmap
+ * @maxqs: max queues array per TC
+ * @owner: lan or rdma
+ *
+ * This function adds/updates the VSI queues per TC.
+ */
+static enum ice_status
+ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+	       u16 *maxqs, u8 owner)
+{
+	enum ice_status status = 0;
+	u8 i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		/* configuration is possible only if TC node is present */
+		if (!ice_sched_get_tc_node(pi, i))
+			continue;
+
+		status = ice_sched_cfg_vsi(pi, vsi_id, i, maxqs[i], owner,
+					   ice_is_tc_ena(tc_bitmap, i));
+		if (status)
+			break;
+	}
+
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_lan - configure VSI lan queues
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc_bitmap: TC bitmap
+ * @max_lanqs: max lan queues array per TC
+ *
+ * This function adds/updates the VSI lan queues per TC.
+ */
+enum ice_status
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+		u16 *max_lanqs)
+{
+	return ice_cfg_vsi_qs(pi, vsi_id, tc_bitmap, max_lanqs,
+			      ICE_SCHED_NODE_OWNER_LAN);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
new file mode 100644
index 000000000000..9a5519130af1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_COMMON_H_
+#define _ICE_COMMON_H_
+
+#include "ice.h"
+#include "ice_type.h"
+#include "ice_switch.h"
+
+void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf,
+		  u16 buf_len);
+enum ice_status ice_init_hw(struct ice_hw *hw);
+void ice_deinit_hw(struct ice_hw *hw);
+enum ice_status ice_check_reset(struct ice_hw *hw);
+enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
+enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
+void ice_shutdown_all_ctrlq(struct ice_hw *hw);
+enum ice_status
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending);
+enum ice_status
+ice_get_link_status(struct ice_port_info *pi, bool *link_up);
+enum ice_status
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access);
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
+enum ice_status ice_init_nvm(struct ice_hw *hw);
+enum ice_status
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
+void ice_clear_pxe_mode(struct ice_hw *hw);
+enum ice_status ice_get_caps(struct ice_hw *hw);
+enum ice_status
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index);
+
+enum ice_status
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size);
+enum ice_status
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size);
+enum ice_status
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *keys);
+enum ice_status
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *keys);
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
+enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
+extern const struct ice_ctx_ele ice_tlan_ctx_info[];
+enum ice_status
+ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info);
+enum ice_status
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
+		void *buf, u16 buf_size, struct ice_sq_cd *cd);
+enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd);
+enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
+enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart);
+enum ice_status
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd);
+enum ice_status
+ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
+		u32 *q_teids, struct ice_sq_cd *cmd_details);
+enum ice_status
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+		u16 *max_lanqs);
+enum ice_status
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps,
+		struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
+#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
new file mode 100644
index 000000000000..5909a4407e38
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -0,0 +1,1066 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_adminq_init_regs - Initialize AdminQ registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_adminq_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+
+	cq->sq.head = PF_FW_ATQH;
+	cq->sq.tail = PF_FW_ATQT;
+	cq->sq.len = PF_FW_ATQLEN;
+	cq->sq.bah = PF_FW_ATQBAH;
+	cq->sq.bal = PF_FW_ATQBAL;
+	cq->sq.len_mask = PF_FW_ATQLEN_ATQLEN_M;
+	cq->sq.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
+	cq->sq.head_mask = PF_FW_ATQH_ATQH_M;
+
+	cq->rq.head = PF_FW_ARQH;
+	cq->rq.tail = PF_FW_ARQT;
+	cq->rq.len = PF_FW_ARQLEN;
+	cq->rq.bah = PF_FW_ARQBAH;
+	cq->rq.bal = PF_FW_ARQBAL;
+	cq->rq.len_mask = PF_FW_ARQLEN_ARQLEN_M;
+	cq->rq.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
+	cq->rq.head_mask = PF_FW_ARQH_ARQH_M;
+}
+
+/**
+ * ice_check_sq_alive
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if Queue is enabled else false.
+ */
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* check both queue-length and queue-enable fields */
+	if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask)
+		return (rd32(hw, cq->sq.len) & (cq->sq.len_mask |
+						cq->sq.len_ena_mask)) ==
+			(cq->num_sq_entries | cq->sq.len_ena_mask);
+
+	return false;
+}
+
+/**
+ * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc);
+
+	cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->sq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->sq.desc_buf.va)
+		return ICE_ERR_NO_MEMORY;
+	cq->sq.desc_buf.size = size;
+
+	cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				      sizeof(struct ice_sq_cd), GFP_KERNEL);
+	if (!cq->sq.cmd_buf) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
+				   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
+		cq->sq.desc_buf.va = NULL;
+		cq->sq.desc_buf.pa = 0;
+		cq->sq.desc_buf.size = 0;
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc);
+
+	cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->rq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->rq.desc_buf.va)
+		return ICE_ERR_NO_MEMORY;
+	cq->rq.desc_buf.size = size;
+	return 0;
+}
+
+/**
+ * ice_free_ctrlq_sq_ring - Free Control Transmit Queue (ATQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted send buffers have already been cleaned
+ * and de-allocated
+ */
+static void ice_free_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
+			   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
+	cq->sq.desc_buf.va = NULL;
+	cq->sq.desc_buf.pa = 0;
+	cq->sq.desc_buf.size = 0;
+}
+
+/**
+ * ice_free_ctrlq_rq_ring - Free Control Receive Queue (ARQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted receive buffers have already been cleaned
+ * and de-allocated
+ */
+static void ice_free_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.desc_buf.size,
+			   cq->rq.desc_buf.va, cq->rq.desc_buf.pa);
+	cq->rq.desc_buf.va = NULL;
+	cq->rq.desc_buf.pa = 0;
+	cq->rq.desc_buf.size = 0;
+}
+
+/**
+ * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* We'll be allocating the buffer info memory first, then we can
+	 * allocate the mapped buffers for the event processing
+	 */
+	cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries,
+				       sizeof(cq->rq.desc_buf), GFP_KERNEL);
+	if (!cq->rq.dma_head)
+		return ICE_ERR_NO_MEMORY;
+	cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_rq_entries; i++) {
+		struct ice_aq_desc *desc;
+		struct ice_dma_mem *bi;
+
+		bi = &cq->rq.r.rq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->rq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_rq_bufs;
+		bi->size = cq->rq_buf_size;
+
+		/* now configure the descriptors for use */
+		desc = ICE_CTL_Q_DESC(cq->rq, i);
+
+		desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+		desc->opcode = 0;
+		/* This is in accordance with Admin queue design, there is no
+		 * register for buffer size configuration
+		 */
+		desc->datalen = cpu_to_le16(bi->size);
+		desc->retval = 0;
+		desc->cookie_high = 0;
+		desc->cookie_low = 0;
+		desc->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(bi->pa));
+		desc->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(bi->pa));
+		desc->params.generic.param0 = 0;
+		desc->params.generic.param1 = 0;
+	}
+	return 0;
+
+unwind_alloc_rq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
+				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
+		cq->rq.r.rq_bi[i].va = NULL;
+		cq->rq.r.rq_bi[i].pa = 0;
+		cq->rq.r.rq_bi[i].size = 0;
+	}
+	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* No mapped memory needed yet, just the buffer info structures */
+	cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				       sizeof(cq->sq.desc_buf), GFP_KERNEL);
+	if (!cq->sq.dma_head)
+		return ICE_ERR_NO_MEMORY;
+	cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_sq_entries; i++) {
+		struct ice_dma_mem *bi;
+
+		bi = &cq->sq.r.sq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->sq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_sq_bufs;
+		bi->size = cq->sq_buf_size;
+	}
+	return 0;
+
+unwind_alloc_sq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size,
+				   cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa);
+		cq->sq.r.sq_bi[i].va = NULL;
+		cq->sq.r.sq_bi[i].pa = 0;
+		cq->sq.r.sq_bi[i].size = 0;
+	}
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_free_rq_bufs - Free ARQ buffer info elements
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static void ice_free_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* free descriptors */
+	for (i = 0; i < cq->num_rq_entries; i++) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
+				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
+		cq->rq.r.rq_bi[i].va = NULL;
+		cq->rq.r.rq_bi[i].pa = 0;
+		cq->rq.r.rq_bi[i].size = 0;
+	}
+
+	/* free the dma header */
+	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+}
+
+/**
+ * ice_free_sq_bufs - Free ATQ buffer info elements
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static void ice_free_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* only unmap if the address is non-NULL */
+	for (i = 0; i < cq->num_sq_entries; i++)
+		if (cq->sq.r.sq_bi[i].pa) {
+			dmam_free_coherent(ice_hw_to_dev(hw),
+					   cq->sq.r.sq_bi[i].size,
+					   cq->sq.r.sq_bi[i].va,
+					   cq->sq.r.sq_bi[i].pa);
+			cq->sq.r.sq_bi[i].va = NULL;
+			cq->sq.r.sq_bi[i].pa = 0;
+			cq->sq.r.sq_bi[i].size = 0;
+		}
+
+	/* free the buffer info list */
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.cmd_buf);
+
+	/* free the dma header */
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+}
+
+/**
+ * ice_cfg_sq_regs - configure Control ATQ registers
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the transmit queue
+ */
+static enum ice_status
+ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, cq->sq.head, 0);
+	wr32(hw, cq->sq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, cq->sq.len, (cq->num_sq_entries | cq->sq.len_ena_mask));
+	wr32(hw, cq->sq.bal, lower_32_bits(cq->sq.desc_buf.pa));
+	wr32(hw, cq->sq.bah, upper_32_bits(cq->sq.desc_buf.pa));
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, cq->sq.bal);
+	if (reg != lower_32_bits(cq->sq.desc_buf.pa))
+		return ICE_ERR_AQ_ERROR;
+
+	return 0;
+}
+
+/**
+ * ice_cfg_rq_regs - configure Control ARQ register
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the receive (event q)
+ */
+static enum ice_status
+ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, cq->rq.head, 0);
+	wr32(hw, cq->rq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, cq->rq.len, (cq->num_rq_entries | cq->rq.len_ena_mask));
+	wr32(hw, cq->rq.bal, lower_32_bits(cq->rq.desc_buf.pa));
+	wr32(hw, cq->rq.bah, upper_32_bits(cq->rq.desc_buf.pa));
+
+	/* Update tail in the HW to post pre-allocated buffers */
+	wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1));
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, cq->rq.bal);
+	if (reg != lower_32_bits(cq->rq.desc_buf.pa))
+		return ICE_ERR_AQ_ERROR;
+
+	return 0;
+}
+
+/**
+ * ice_init_sq - main initialization routine for Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This is the main initialization routine for the Control Send Queue
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->sq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code;
+
+	if (cq->sq.count > 0) {
+		/* queue already initialized */
+		ret_code = ICE_ERR_NOT_READY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_sq_entries || !cq->sq_buf_size) {
+		ret_code = ICE_ERR_CFG;
+		goto init_ctrlq_exit;
+	}
+
+	cq->sq.next_to_use = 0;
+	cq->sq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_sq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_sq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_sq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->sq.count = cq->num_sq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ice_free_ctrlq_sq_ring(hw, cq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_init_rq - initialize ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main initialization routine for the Admin Receive (Event) Queue.
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code;
+
+	if (cq->rq.count > 0) {
+		/* queue already initialized */
+		ret_code = ICE_ERR_NOT_READY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->rq_buf_size) {
+		ret_code = ICE_ERR_CFG;
+		goto init_ctrlq_exit;
+	}
+
+	cq->rq.next_to_use = 0;
+	cq->rq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_rq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_rq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_rq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->rq.count = cq->num_rq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ice_free_ctrlq_rq_ring(hw, cq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_shutdown_sq - shutdown the Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Transmit Queue
+ */
+static enum ice_status
+ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code = 0;
+
+	mutex_lock(&cq->sq_lock);
+
+	if (!cq->sq.count) {
+		ret_code = ICE_ERR_NOT_READY;
+		goto shutdown_sq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, cq->sq.head, 0);
+	wr32(hw, cq->sq.tail, 0);
+	wr32(hw, cq->sq.len, 0);
+	wr32(hw, cq->sq.bal, 0);
+	wr32(hw, cq->sq.bah, 0);
+
+	cq->sq.count = 0;	/* to indicate uninitialized queue */
+
+	/* free ring buffers and the ring itself */
+	ice_free_sq_bufs(hw, cq);
+	ice_free_ctrlq_sq_ring(hw, cq);
+
+shutdown_sq_out:
+	mutex_unlock(&cq->sq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_aq_ver_check - Check the reported AQ API version.
+ * @fw_branch: The "branch" of FW, typically describes the device type
+ * @fw_major: The major version of the FW API
+ * @fw_minor: The minor version increment of the FW API
+ *
+ * Checks if the driver should load on a given AQ API version.
+ *
+ * Return: 'true' iff the driver should attempt to load. 'false' otherwise.
+ */
+static bool ice_aq_ver_check(u8 fw_branch, u8 fw_major, u8 fw_minor)
+{
+	if (fw_branch != EXP_FW_API_VER_BRANCH)
+		return false;
+	if (fw_major != EXP_FW_API_VER_MAJOR)
+		return false;
+	if (fw_minor != EXP_FW_API_VER_MINOR)
+		return false;
+	return true;
+}
+
+/**
+ * ice_shutdown_rq - shutdown Control ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Receive Queue
+ */
+static enum ice_status
+ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code = 0;
+
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ret_code = ICE_ERR_NOT_READY;
+		goto shutdown_rq_out;
+	}
+
+	/* Stop Control Queue processing */
+	wr32(hw, cq->rq.head, 0);
+	wr32(hw, cq->rq.tail, 0);
+	wr32(hw, cq->rq.len, 0);
+	wr32(hw, cq->rq.bal, 0);
+	wr32(hw, cq->rq.bah, 0);
+
+	/* set rq.count to 0 to indicate uninitialized queue */
+	cq->rq.count = 0;
+
+	/* free ring buffers and the ring itself */
+	ice_free_rq_bufs(hw, cq);
+	ice_free_ctrlq_rq_ring(hw, cq);
+
+shutdown_rq_out:
+	mutex_unlock(&cq->rq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_init_check_adminq - Check version for Admin Queue to know if its alive
+ * @hw: pointer to the hardware structure
+ */
+static enum ice_status ice_init_check_adminq(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+	enum ice_status status;
+
+	status = ice_aq_get_fw_ver(hw, NULL);
+	if (status)
+		goto init_ctrlq_free_rq;
+
+	if (!ice_aq_ver_check(hw->api_branch, hw->api_maj_ver,
+			      hw->api_min_ver)) {
+		status = ICE_ERR_FW_API_VER;
+		goto init_ctrlq_free_rq;
+	}
+
+	return 0;
+
+init_ctrlq_free_rq:
+	ice_shutdown_rq(hw, cq);
+	ice_shutdown_sq(hw, cq);
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+	return status;
+}
+
+/**
+ * ice_init_ctrlq - main initialization routine for any control Queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ */
+static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+	enum ice_status ret_code;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		ice_adminq_init_regs(hw);
+		cq = &hw->adminq;
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	cq->qtype = q_type;
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->num_sq_entries ||
+	    !cq->rq_buf_size || !cq->sq_buf_size) {
+		return ICE_ERR_CFG;
+	}
+	mutex_init(&cq->sq_lock);
+	mutex_init(&cq->rq_lock);
+
+	/* setup SQ command write back timeout */
+	cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT;
+
+	/* allocate the ATQ */
+	ret_code = ice_init_sq(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_destroy_locks;
+
+	/* allocate the ARQ */
+	ret_code = ice_init_rq(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_sq;
+
+	/* success! */
+	return 0;
+
+init_ctrlq_free_sq:
+	ice_shutdown_sq(hw, cq);
+init_ctrlq_destroy_locks:
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_init_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure for all control queues:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ */
+enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
+{
+	enum ice_status ret_code;
+
+	/* Init FW admin queue */
+	ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	if (ret_code)
+		return ret_code;
+
+	return ice_init_check_adminq(hw);
+}
+
+/**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		if (ice_check_sq_alive(hw, cq))
+			ice_aq_q_shutdown(hw, true);
+		break;
+	default:
+		return;
+	}
+
+	ice_shutdown_sq(hw, cq);
+	ice_shutdown_rq(hw, cq);
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+	/* Shutdown FW admin queue */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+}
+
+/**
+ * ice_clean_sq - cleans Admin send queue (ATQ)
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * returns the number of free desc
+ */
+static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	struct ice_ctl_q_ring *sq = &cq->sq;
+	u16 ntc = sq->next_to_clean;
+	struct ice_sq_cd *details;
+	struct ice_aq_desc *desc;
+
+	desc = ICE_CTL_Q_DESC(*sq, ntc);
+	details = ICE_CTL_Q_DETAILS(*sq, ntc);
+
+	while (rd32(hw, cq->sq.head) != ntc) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head));
+		memset(desc, 0, sizeof(*desc));
+		memset(details, 0, sizeof(*details));
+		ntc++;
+		if (ntc == sq->count)
+			ntc = 0;
+		desc = ICE_CTL_Q_DESC(*sq, ntc);
+		details = ICE_CTL_Q_DETAILS(*sq, ntc);
+	}
+
+	sq->next_to_clean = ntc;
+
+	return ICE_CTL_Q_DESC_UNUSED(sq);
+}
+
+/**
+ * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if the firmware has processed all descriptors on the
+ * admin send queue. Returns false if there are still requests pending.
+ */
+static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* AQ designers suggest use of head for better
+	 * timing reliability than DD bit
+	 */
+	return rd32(hw, cq->sq.head) == cq->sq.next_to_use;
+}
+
+/**
+ * ice_sq_send_cmd - send command to Control Queue (ATQ)
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command (non DMA mem)
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * This is the main send command routine for the ATQ.  It runs the q,
+ * cleans the queue, etc.
+ */
+enum ice_status
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_dma_mem *dma_buf = NULL;
+	struct ice_aq_desc *desc_on_ring;
+	bool cmd_completed = false;
+	enum ice_status status = 0;
+	struct ice_sq_cd *details;
+	u32 total_delay = 0;
+	u16 retval = 0;
+	u32 val = 0;
+
+	mutex_lock(&cq->sq_lock);
+
+	cq->sq_last_status = ICE_AQ_RC_OK;
+
+	if (!cq->sq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Send queue not initialized.\n");
+		status = ICE_ERR_AQ_EMPTY;
+		goto sq_send_command_error;
+	}
+
+	if ((buf && !buf_size) || (!buf && buf_size)) {
+		status = ICE_ERR_PARAM;
+		goto sq_send_command_error;
+	}
+
+	if (buf) {
+		if (buf_size > cq->sq_buf_size) {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Invalid buffer size for Control Send queue: %d.\n",
+				  buf_size);
+			status = ICE_ERR_INVAL_SIZE;
+			goto sq_send_command_error;
+		}
+
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	}
+
+	val = rd32(hw, cq->sq.head);
+	if (val >= cq->num_sq_entries) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "head overrun at %d in the Control Send Queue ring\n",
+			  val);
+		status = ICE_ERR_AQ_EMPTY;
+		goto sq_send_command_error;
+	}
+
+	details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
+	if (cd)
+		memcpy(details, cd, sizeof(*details));
+	else
+		memset(details, 0, sizeof(*details));
+
+	/* Call clean and check queue available function to reclaim the
+	 * descriptors that were processed by FW/MBX; the function returns the
+	 * number of desc available. The clean function called here could be
+	 * called in a separate thread in case of asynchronous completions.
+	 */
+	if (ice_clean_sq(hw, cq) == 0) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Error: Control Send Queue is full.\n");
+		status = ICE_ERR_AQ_FULL;
+		goto sq_send_command_error;
+	}
+
+	/* initialize the temp desc pointer with the right desc */
+	desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use);
+
+	/* if the desc is available copy the temp desc to the right place */
+	memcpy(desc_on_ring, desc, sizeof(*desc_on_ring));
+
+	/* if buf is not NULL assume indirect command */
+	if (buf) {
+		dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use];
+		/* copy the user buf into the respective DMA buf */
+		memcpy(dma_buf->va, buf, buf_size);
+		desc_on_ring->datalen = cpu_to_le16(buf_size);
+
+		/* Update the address values in the desc with the pa value
+		 * for respective buffer
+		 */
+		desc_on_ring->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(dma_buf->pa));
+		desc_on_ring->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(dma_buf->pa));
+	}
+
+	/* Debug desc and buffer */
+	ice_debug(hw, ICE_DBG_AQ_MSG,
+		  "ATQ: Control Send queue desc and buffer:\n");
+
+	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size);
+
+	(cq->sq.next_to_use)++;
+	if (cq->sq.next_to_use == cq->sq.count)
+		cq->sq.next_to_use = 0;
+	wr32(hw, cq->sq.tail, cq->sq.next_to_use);
+
+	do {
+		if (ice_sq_done(hw, cq))
+			break;
+
+		mdelay(1);
+		total_delay++;
+	} while (total_delay < cq->sq_cmd_timeout);
+
+	/* if ready, copy the desc back to temp */
+	if (ice_sq_done(hw, cq)) {
+		memcpy(desc, desc_on_ring, sizeof(*desc));
+		if (buf) {
+			/* get returned length to copy */
+			u16 copy_size = le16_to_cpu(desc->datalen);
+
+			if (copy_size > buf_size) {
+				ice_debug(hw, ICE_DBG_AQ_MSG,
+					  "Return len %d > than buf len %d\n",
+					  copy_size, buf_size);
+				status = ICE_ERR_AQ_ERROR;
+			} else {
+				memcpy(buf, dma_buf->va, copy_size);
+			}
+		}
+		retval = le16_to_cpu(desc->retval);
+		if (retval) {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Control Send Queue command completed with error 0x%x\n",
+				  retval);
+
+			/* strip off FW internal code */
+			retval &= 0xff;
+		}
+		cmd_completed = true;
+		if (!status && retval != ICE_AQ_RC_OK)
+			status = ICE_ERR_AQ_ERROR;
+		cq->sq_last_status = (enum ice_aq_err)retval;
+	}
+
+	ice_debug(hw, ICE_DBG_AQ_MSG,
+		  "ATQ: desc and buffer writeback:\n");
+
+	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size);
+
+	/* save writeback AQ if requested */
+	if (details->wb_desc)
+		memcpy(details->wb_desc, desc_on_ring,
+		       sizeof(*details->wb_desc));
+
+	/* update the error if time out occurred */
+	if (!cmd_completed) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Send Queue Writeback timeout.\n");
+		status = ICE_ERR_AQ_TIMEOUT;
+	}
+
+sq_send_command_error:
+	mutex_unlock(&cq->sq_lock);
+	return status;
+}
+
+/**
+ * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function
+ * @desc: pointer to the temp descriptor (non DMA mem)
+ * @opcode: the opcode can be used to decide which flags to turn off or on
+ *
+ * Fill the desc with default values
+ */
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
+{
+	/* zero out the desc */
+	memset(desc, 0, sizeof(*desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
+}
+
+/**
+ * ice_clean_rq_elem
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ * @e: event info from the receive descriptor, includes any buffers
+ * @pending: number of events that could be left to process
+ *
+ * This function cleans one Admin Receive Queue element and returns
+ * the contents through e.  It can also return how many events are
+ * left to process through 'pending'.
+ */
+enum ice_status
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending)
+{
+	u16 ntc = cq->rq.next_to_clean;
+	enum ice_status ret_code = 0;
+	struct ice_aq_desc *desc;
+	struct ice_dma_mem *bi;
+	u16 desc_idx;
+	u16 datalen;
+	u16 flags;
+	u16 ntu;
+
+	/* pre-clean the event info */
+	memset(&e->desc, 0, sizeof(e->desc));
+
+	/* take the lock before we start messing with the ring */
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Receive queue not initialized.\n");
+		ret_code = ICE_ERR_AQ_EMPTY;
+		goto clean_rq_elem_err;
+	}
+
+	/* set next_to_use to head */
+	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+
+	if (ntu == ntc) {
+		/* nothing to do - shouldn't need to update ring's values */
+		ret_code = ICE_ERR_AQ_NO_WORK;
+		goto clean_rq_elem_out;
+	}
+
+	/* now clean the next descriptor */
+	desc = ICE_CTL_Q_DESC(cq->rq, ntc);
+	desc_idx = ntc;
+
+	flags = le16_to_cpu(desc->flags);
+	if (flags & ICE_AQ_FLAG_ERR) {
+		ret_code = ICE_ERR_AQ_ERROR;
+		cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Receive Queue Event received with error 0x%x\n",
+			  cq->rq_last_status);
+	}
+	memcpy(&e->desc, desc, sizeof(e->desc));
+	datalen = le16_to_cpu(desc->datalen);
+	e->msg_len = min(datalen, e->buf_len);
+	if (e->msg_buf && e->msg_len)
+		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
+
+	ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n");
+
+	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf,
+		     cq->rq_buf_size);
+
+	/* Restore the original datalen and buffer address in the desc,
+	 * FW updates datalen to indicate the event message size
+	 */
+	bi = &cq->rq.r.rq_bi[ntc];
+	memset(desc, 0, sizeof(*desc));
+
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+	if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	desc->datalen = cpu_to_le16(bi->size);
+	desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+	desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+	/* set tail = the last cleaned desc index. */
+	wr32(hw, cq->rq.tail, ntc);
+	/* ntc is updated to tail + 1 */
+	ntc++;
+	if (ntc == cq->num_rq_entries)
+		ntc = 0;
+	cq->rq.next_to_clean = ntc;
+	cq->rq.next_to_use = ntu;
+
+clean_rq_elem_out:
+	/* Set pending if needed, unlock and return */
+	if (pending)
+		*pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc));
+clean_rq_elem_err:
+	mutex_unlock(&cq->rq_lock);
+
+	return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
new file mode 100644
index 000000000000..ea02b89243e2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_CONTROLQ_H_
+#define _ICE_CONTROLQ_H_
+
+#include "ice_adminq_cmd.h"
+
+/* Maximum buffer lengths for all control queue types */
+#define ICE_AQ_MAX_BUF_LEN 4096
+
+#define ICE_CTL_Q_DESC(R, i) \
+	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
+
+#define ICE_CTL_Q_DESC_UNUSED(R) \
+	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->next_to_clean - (R)->next_to_use - 1)
+
+/* Defines that help manage the driver vs FW API checks.
+ * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
+ *
+ */
+#define EXP_FW_API_VER_BRANCH		0x00
+#define EXP_FW_API_VER_MAJOR		0x00
+#define EXP_FW_API_VER_MINOR		0x01
+
+/* Different control queue types: These are mainly for SW consumption. */
+enum ice_ctl_q {
+	ICE_CTL_Q_UNKNOWN = 0,
+	ICE_CTL_Q_ADMIN,
+};
+
+/* Control Queue default settings */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
+
+struct ice_ctl_q_ring {
+	void *dma_head;			/* Virtual address to dma head */
+	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
+	void *cmd_buf;			/* command buffer memory */
+
+	union {
+		struct ice_dma_mem *sq_bi;
+		struct ice_dma_mem *rq_bi;
+	} r;
+
+	u16 count;		/* Number of descriptors */
+
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	u32 len;
+	u32 bah;
+	u32 bal;
+	u32 len_mask;
+	u32 len_ena_mask;
+	u32 head_mask;
+};
+
+/* sq transaction details */
+struct ice_sq_cd {
+	struct ice_aq_desc *wb_desc;
+};
+
+#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
+
+/* rq event information */
+struct ice_rq_event_info {
+	struct ice_aq_desc desc;
+	u16 msg_len;
+	u16 buf_len;
+	u8 *msg_buf;
+};
+
+/* Control Queue information */
+struct ice_ctl_q_info {
+	enum ice_ctl_q qtype;
+	struct ice_ctl_q_ring rq;	/* receive queue */
+	struct ice_ctl_q_ring sq;	/* send queue */
+	u32 sq_cmd_timeout;		/* send queue cmd write back timeout */
+	u16 num_rq_entries;		/* receive queue depth */
+	u16 num_sq_entries;		/* send queue depth */
+	u16 rq_buf_size;		/* receive queue buffer size */
+	u16 sq_buf_size;		/* send queue buffer size */
+	struct mutex sq_lock;		/* Send queue lock */
+	struct mutex rq_lock;		/* Receive queue lock */
+	enum ice_aq_err sq_last_status;	/* last status on send queue */
+	enum ice_aq_err rq_last_status;	/* last status on receive queue */
+};
+
+#endif /* _ICE_CONTROLQ_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
new file mode 100644
index 000000000000..0e14d7215a6e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_DEVIDS_H_
+#define _ICE_DEVIDS_H_
+
+/* Device IDs */
+/* Intel(R) Ethernet Controller C810 for backplane */
+#define ICE_DEV_ID_C810_BACKPLANE	0x1591
+/* Intel(R) Ethernet Controller C810 for QSFP */
+#define ICE_DEV_ID_C810_QSFP		0x1592
+/* Intel(R) Ethernet Controller C810 for SFP */
+#define ICE_DEV_ID_C810_SFP		0x1593
+/* Intel(R) Ethernet Controller C810/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_C810_10G_BASE_T	0x1594
+/* Intel(R) Ethernet Controller C810 1GbE */
+#define ICE_DEV_ID_C810_SGMII		0x1595
+
+#endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
new file mode 100644
index 000000000000..186764a5c263
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -0,0 +1,940 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* ethtool support for ice */
+
+#include "ice.h"
+
+struct ice_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define ICE_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+#define ICE_VSI_STAT(_name, _stat) \
+		ICE_STAT(struct ice_vsi, _name, _stat)
+#define ICE_PF_STAT(_name, _stat) \
+		ICE_STAT(struct ice_pf, _name, _stat)
+
+static int ice_q_stats_len(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return ((np->vsi->num_txq + np->vsi->num_rxq) *
+		(sizeof(struct ice_q_stats) / sizeof(u64)));
+}
+
+#define ICE_PF_STATS_LEN	ARRAY_SIZE(ice_gstrings_pf_stats)
+#define ICE_VSI_STATS_LEN	ARRAY_SIZE(ice_gstrings_vsi_stats)
+
+#define ICE_ALL_STATS_LEN(n)	(ICE_PF_STATS_LEN + ICE_VSI_STATS_LEN + \
+				 ice_q_stats_len(n))
+
+static const struct ice_stats ice_gstrings_vsi_stats[] = {
+	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
+	ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
+	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
+	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
+	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
+	ICE_VSI_STAT("rx_discards", eth_stats.rx_discards),
+	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+	ICE_VSI_STAT("tx_linearize", tx_linearize),
+	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
+	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+};
+
+/* These PF_STATs might look like duplicates of some NETDEV_STATs,
+ * but they aren't. This device is capable of supporting multiple
+ * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
+ * netdevs whereas the PF_STATs are for the physical function that's
+ * hosting these netdevs.
+ *
+ * The PF_STATs are appended to the netdev stats only when ethtool -S
+ * is queried on the base PF netdev.
+ */
+static struct ice_stats ice_gstrings_pf_stats[] = {
+	ICE_PF_STAT("tx_bytes", stats.eth.tx_bytes),
+	ICE_PF_STAT("rx_bytes", stats.eth.rx_bytes),
+	ICE_PF_STAT("tx_unicast", stats.eth.tx_unicast),
+	ICE_PF_STAT("rx_unicast", stats.eth.rx_unicast),
+	ICE_PF_STAT("tx_multicast", stats.eth.tx_multicast),
+	ICE_PF_STAT("rx_multicast", stats.eth.rx_multicast),
+	ICE_PF_STAT("tx_broadcast", stats.eth.tx_broadcast),
+	ICE_PF_STAT("rx_broadcast", stats.eth.rx_broadcast),
+	ICE_PF_STAT("tx_errors", stats.eth.tx_errors),
+	ICE_PF_STAT("tx_size_64", stats.tx_size_64),
+	ICE_PF_STAT("rx_size_64", stats.rx_size_64),
+	ICE_PF_STAT("tx_size_127", stats.tx_size_127),
+	ICE_PF_STAT("rx_size_127", stats.rx_size_127),
+	ICE_PF_STAT("tx_size_255", stats.tx_size_255),
+	ICE_PF_STAT("rx_size_255", stats.rx_size_255),
+	ICE_PF_STAT("tx_size_511", stats.tx_size_511),
+	ICE_PF_STAT("rx_size_511", stats.rx_size_511),
+	ICE_PF_STAT("tx_size_1023", stats.tx_size_1023),
+	ICE_PF_STAT("rx_size_1023", stats.rx_size_1023),
+	ICE_PF_STAT("tx_size_1522", stats.tx_size_1522),
+	ICE_PF_STAT("rx_size_1522", stats.rx_size_1522),
+	ICE_PF_STAT("tx_size_big", stats.tx_size_big),
+	ICE_PF_STAT("rx_size_big", stats.rx_size_big),
+	ICE_PF_STAT("link_xon_tx", stats.link_xon_tx),
+	ICE_PF_STAT("link_xon_rx", stats.link_xon_rx),
+	ICE_PF_STAT("link_xoff_tx", stats.link_xoff_tx),
+	ICE_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
+	ICE_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down),
+	ICE_PF_STAT("rx_undersize", stats.rx_undersize),
+	ICE_PF_STAT("rx_fragments", stats.rx_fragments),
+	ICE_PF_STAT("rx_oversize", stats.rx_oversize),
+	ICE_PF_STAT("rx_jabber", stats.rx_jabber),
+	ICE_PF_STAT("rx_csum_bad", hw_csum_rx_error),
+	ICE_PF_STAT("rx_length_errors", stats.rx_len_errors),
+	ICE_PF_STAT("rx_dropped", stats.eth.rx_discards),
+	ICE_PF_STAT("rx_crc_errors", stats.crc_errors),
+	ICE_PF_STAT("illegal_bytes", stats.illegal_bytes),
+	ICE_PF_STAT("mac_local_faults", stats.mac_local_faults),
+	ICE_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
+};
+
+static u32 ice_regs_dump_list[] = {
+	PFGEN_STATE,
+	PRTGEN_STATUS,
+	QRX_CTRL(0),
+	QINT_TQCTL(0),
+	QINT_RQCTL(0),
+	PFINT_OICR_ENA,
+	QRX_ITR(0),
+};
+
+/**
+ * ice_nvm_version_str - format the NVM version strings
+ * @hw: ptr to the hardware info
+ */
+static char *ice_nvm_version_str(struct ice_hw *hw)
+{
+	static char buf[ICE_ETHTOOL_FWVER_LEN];
+	u8 ver, patch;
+	u32 full_ver;
+	u16 build;
+
+	full_ver = hw->nvm.oem_ver;
+	ver = (u8)((full_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT);
+	build = (u16)((full_ver & ICE_OEM_VER_BUILD_MASK) >>
+		      ICE_OEM_VER_BUILD_SHIFT);
+	patch = (u8)(full_ver & ICE_OEM_VER_PATCH_MASK);
+
+	snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d",
+		 (hw->nvm.ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT,
+		 (hw->nvm.ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT,
+		 hw->nvm.eetrack, ver, build, patch);
+
+	return buf;
+}
+
+static void
+ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version));
+	strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw),
+		sizeof(drvinfo->fw_version));
+	strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
+		sizeof(drvinfo->bus_info));
+}
+
+static int ice_get_regs_len(struct net_device __always_unused *netdev)
+{
+	return ARRAY_SIZE(ice_regs_dump_list);
+}
+
+static void
+ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 *regs_buf = (u32 *)p;
+	int i;
+
+	regs->version = 1;
+
+	for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list) / sizeof(u32); ++i)
+		regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
+}
+
+static u32 ice_get_msglevel(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (pf->hw.debug_mask)
+		netdev_info(netdev, "hw debug_mask: 0x%llX\n",
+			    pf->hw.debug_mask);
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+	return pf->msg_enable;
+}
+
+static void ice_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (ICE_DBG_USER & data)
+		pf->hw.debug_mask = data;
+	else
+		pf->msg_enable = data;
+#else
+	pf->msg_enable = data;
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+}
+
+static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	char *p = (char *)data;
+	unsigned int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ICE_VSI_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "%s",
+				 ice_gstrings_vsi_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+
+		ice_for_each_txq(vsi, i) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "tx-queue-%u.tx_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+
+		ice_for_each_rxq(vsi, i) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "rx-queue-%u.rx_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+
+		if (vsi->type != ICE_VSI_PF)
+			return;
+
+		for (i = 0; i < ICE_PF_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "port.%s",
+				 ice_gstrings_pf_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+
+		break;
+	default:
+		break;
+	}
+}
+
+static int ice_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ICE_ALL_STATS_LEN(netdev);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void
+ice_get_ethtool_stats(struct net_device *netdev,
+		      struct ethtool_stats __always_unused *stats, u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_ring *ring;
+	unsigned int j = 0;
+	int i = 0;
+	char *p;
+
+	for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
+		p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
+			    sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	/* populate per queue stats */
+	rcu_read_lock();
+
+	ice_for_each_txq(vsi, j) {
+		ring = READ_ONCE(vsi->tx_rings[j]);
+		if (!ring)
+			continue;
+		data[i++] = ring->stats.pkts;
+		data[i++] = ring->stats.bytes;
+	}
+
+	ice_for_each_rxq(vsi, j) {
+		ring = READ_ONCE(vsi->rx_rings[j]);
+		data[i++] = ring->stats.pkts;
+		data[i++] = ring->stats.bytes;
+	}
+
+	rcu_read_unlock();
+
+	if (vsi->type != ICE_VSI_PF)
+		return;
+
+	for (j = 0; j < ICE_PF_STATS_LEN; j++) {
+		p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+}
+
+static int
+ice_get_link_ksettings(struct net_device *netdev,
+		       struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_link_status *hw_link_info;
+	struct ice_vsi *vsi = np->vsi;
+	bool link_up;
+
+	hw_link_info = &vsi->port_info->phy.link_info;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	ethtool_link_ksettings_add_link_mode(ks, supported,
+					     10000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, advertising,
+					     10000baseT_Full);
+
+	/* set speed and duplex */
+	if (link_up) {
+		switch (hw_link_info->link_speed) {
+		case ICE_AQ_LINK_SPEED_100MB:
+			ks->base.speed = SPEED_100;
+			break;
+		case ICE_AQ_LINK_SPEED_2500MB:
+			ks->base.speed = SPEED_2500;
+			break;
+		case ICE_AQ_LINK_SPEED_5GB:
+			ks->base.speed = SPEED_5000;
+			break;
+		case ICE_AQ_LINK_SPEED_10GB:
+			ks->base.speed = SPEED_10000;
+			break;
+		case ICE_AQ_LINK_SPEED_25GB:
+			ks->base.speed = SPEED_25000;
+			break;
+		case ICE_AQ_LINK_SPEED_40GB:
+			ks->base.speed = SPEED_40000;
+			break;
+		default:
+			ks->base.speed = SPEED_UNKNOWN;
+			break;
+		}
+
+		ks->base.duplex = DUPLEX_FULL;
+	} else {
+		ks->base.speed = SPEED_UNKNOWN;
+		ks->base.duplex = DUPLEX_UNKNOWN;
+	}
+
+	/* set autoneg settings */
+	ks->base.autoneg = ((hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
+			    AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	/* set media type settings */
+	switch (vsi->port_info->phy.media_type) {
+	case ICE_MEDIA_FIBER:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ks->base.port = PORT_FIBRE;
+		break;
+	case ICE_MEDIA_BASET:
+		ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+		ks->base.port = PORT_TP;
+		break;
+	case ICE_MEDIA_BACKPLANE:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Backplane);
+		ks->base.port = PORT_NONE;
+		break;
+	case ICE_MEDIA_DA:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+		ks->base.port = PORT_DA;
+		break;
+	default:
+		ks->base.port = PORT_OTHER;
+		break;
+	}
+
+	/* flow control is symmetric and always supported */
+	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+
+	switch (vsi->port_info->fc.req_mode) {
+	case ICE_FC_FULL:
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		break;
+	case ICE_FC_TX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_RX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_PFC:
+	default:
+		ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: buffer to rturn Rx flow classification rules
+ *
+ * Returns Success if the command is supported.
+ */
+static int ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+			 u32 __always_unused *rule_locs)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = vsi->rss_size;
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void
+ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	ring->rx_max_pending = ICE_MAX_NUM_DESC;
+	ring->tx_max_pending = ICE_MAX_NUM_DESC;
+	ring->rx_pending = vsi->rx_rings[0]->count;
+	ring->tx_pending = vsi->tx_rings[0]->count;
+	ring->rx_mini_pending = ICE_MIN_NUM_DESC;
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static int
+ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
+{
+	struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int i, timeout = 50, err = 0;
+	u32 new_rx_cnt, new_tx_cnt;
+
+	if (ring->tx_pending > ICE_MAX_NUM_DESC ||
+	    ring->tx_pending < ICE_MIN_NUM_DESC ||
+	    ring->rx_pending > ICE_MAX_NUM_DESC ||
+	    ring->rx_pending < ICE_MIN_NUM_DESC) {
+		netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d]\n",
+			   ring->tx_pending, ring->rx_pending,
+			   ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC);
+		return -EINVAL;
+	}
+
+	new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
+	new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE);
+
+	/* if nothing to do return success */
+	if (new_tx_cnt == vsi->tx_rings[0]->count &&
+	    new_rx_cnt == vsi->rx_rings[0]->count) {
+		netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+		return 0;
+	}
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	/* set for the next time the netdev is started */
+	if (!netif_running(vsi->netdev)) {
+		for (i = 0; i < vsi->alloc_txq; i++)
+			vsi->tx_rings[i]->count = new_tx_cnt;
+		for (i = 0; i < vsi->alloc_rxq; i++)
+			vsi->rx_rings[i]->count = new_rx_cnt;
+		netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
+		goto done;
+	}
+
+	if (new_tx_cnt == vsi->tx_rings[0]->count)
+		goto process_rx;
+
+	/* alloc updated Tx resources */
+	netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
+		    vsi->tx_rings[0]->count, new_tx_cnt);
+
+	tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+				sizeof(struct ice_ring), GFP_KERNEL);
+	if (!tx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < vsi->num_txq; i++) {
+		/* clone ring and setup updated count */
+		tx_rings[i] = *vsi->tx_rings[i];
+		tx_rings[i].count = new_tx_cnt;
+		tx_rings[i].desc = NULL;
+		tx_rings[i].tx_buf = NULL;
+		err = ice_setup_tx_ring(&tx_rings[i]);
+		if (err) {
+			while (i) {
+				i--;
+				ice_clean_tx_ring(&tx_rings[i]);
+			}
+			devm_kfree(&pf->pdev->dev, tx_rings);
+			goto done;
+		}
+	}
+
+process_rx:
+	if (new_rx_cnt == vsi->rx_rings[0]->count)
+		goto process_link;
+
+	/* alloc updated Rx resources */
+	netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
+		    vsi->rx_rings[0]->count, new_rx_cnt);
+
+	rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+				sizeof(struct ice_ring), GFP_KERNEL);
+	if (!rx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < vsi->num_rxq; i++) {
+		/* clone ring and setup updated count */
+		rx_rings[i] = *vsi->rx_rings[i];
+		rx_rings[i].count = new_rx_cnt;
+		rx_rings[i].desc = NULL;
+		rx_rings[i].rx_buf = NULL;
+		/* this is to allow wr32 to have something to write to
+		 * during early allocation of Rx buffers
+		 */
+		rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS;
+
+		err = ice_setup_rx_ring(&rx_rings[i]);
+		if (err)
+			goto rx_unwind;
+
+		/* allocate Rx buffers */
+		err = ice_alloc_rx_bufs(&rx_rings[i],
+					ICE_DESC_UNUSED(&rx_rings[i]));
+rx_unwind:
+		if (err) {
+			while (i) {
+				i--;
+				ice_free_rx_ring(&rx_rings[i]);
+			}
+			devm_kfree(&pf->pdev->dev, rx_rings);
+			err = -ENOMEM;
+			goto free_tx;
+		}
+	}
+
+process_link:
+	/* Bring interface down, copy in the new ring info, then restore the
+	 * interface. if VSI is up, bring it down and then back up
+	 */
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		ice_down(vsi);
+
+		if (tx_rings) {
+			for (i = 0; i < vsi->alloc_txq; i++) {
+				ice_free_tx_ring(vsi->tx_rings[i]);
+				*vsi->tx_rings[i] = tx_rings[i];
+			}
+			devm_kfree(&pf->pdev->dev, tx_rings);
+		}
+
+		if (rx_rings) {
+			for (i = 0; i < vsi->alloc_rxq; i++) {
+				ice_free_rx_ring(vsi->rx_rings[i]);
+				/* copy the real tail offset */
+				rx_rings[i].tail = vsi->rx_rings[i]->tail;
+				/* this is to fake out the allocation routine
+				 * into thinking it has to realloc everything
+				 * but the recycling logic will let us re-use
+				 * the buffers allocated above
+				 */
+				rx_rings[i].next_to_use = 0;
+				rx_rings[i].next_to_clean = 0;
+				rx_rings[i].next_to_alloc = 0;
+				*vsi->rx_rings[i] = rx_rings[i];
+			}
+			devm_kfree(&pf->pdev->dev, rx_rings);
+		}
+
+		ice_up(vsi);
+	}
+	goto done;
+
+free_tx:
+	/* error cleanup if the Rx allocations failed after getting Tx */
+	if (tx_rings) {
+		for (i = 0; i < vsi->alloc_txq; i++)
+			ice_free_tx_ring(&tx_rings[i]);
+		devm_kfree(&pf->pdev->dev, tx_rings);
+	}
+
+done:
+	clear_bit(__ICE_CFG_BUSY, pf->state);
+	return err;
+}
+
+static int ice_nway_reset(struct net_device *netdev)
+{
+	/* restart autonegotiation */
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_link_status *hw_link_info;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	bool link_up;
+
+	pi = vsi->port_info;
+	hw_link_info = &pi->phy.link_info;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	status = ice_aq_set_link_restart_an(pi, link_up, NULL);
+	if (status) {
+		netdev_info(netdev, "link restart failed, err %d aq_err %d\n",
+			    status, pi->hw->adminq.sq_last_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_pauseparam - Get Flow Control status
+ * @netdev: network interface device structure
+ * @pause: ethernet pause (flow control) parameters
+ */
+static void
+ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_port_info *pi;
+
+	pi = np->vsi->port_info;
+	pause->autoneg =
+		((pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) ?
+		 AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (pi->fc.current_mode == ICE_FC_RX_PAUSE) {
+		pause->rx_pause = 1;
+	} else if (pi->fc.current_mode == ICE_FC_TX_PAUSE) {
+		pause->tx_pause = 1;
+	} else if (pi->fc.current_mode == ICE_FC_FULL) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+/**
+ * ice_set_pauseparam - Set Flow Control parameter
+ * @netdev: network interface device structure
+ * @pause: return tx/rx flow control status
+ */
+static int
+ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_link_status *hw_link_info;
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	u8 aq_failures;
+	bool link_up;
+	int err = 0;
+
+	pi = vsi->port_info;
+	hw_link_info = &pi->phy.link_info;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	/* Changing the port's flow control is not supported if this isn't the
+	 * PF VSI
+	 */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (pause->autoneg != (hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+		netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If we have link and don't have autoneg */
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+		/* Send message that it might not necessarily work*/
+		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
+	}
+
+	if (pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_FULL;
+	else if (pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_RX_PAUSE;
+	else if (!pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_TX_PAUSE;
+	else if (!pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_NONE;
+	else
+		return -EINVAL;
+
+	/* Tell the OS link is going down, the link will go back up when fw
+	 * says it is ready asynchronously
+	 */
+	ice_print_link_msg(vsi, false);
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	/* Set the FC mode and only restart AN if link is up */
+	status = ice_set_fc(pi, &aq_failures, link_up);
+
+	if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) {
+		netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %d\n",
+			    status, hw->adminq.sq_last_status);
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) {
+		netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %d\n",
+			    status, hw->adminq.sq_last_status);
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) {
+		netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %d\n",
+			    status, hw->adminq.sq_last_status);
+		err = -EAGAIN;
+	}
+
+	if (!test_bit(__ICE_DOWN, pf->state)) {
+		/* Give it a little more time to try to come back */
+		msleep(75);
+		if (!test_bit(__ICE_DOWN, pf->state))
+			return ice_nway_reset(netdev);
+	}
+
+	return err;
+}
+
+/**
+ * ice_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev)
+{
+	return ICE_VSIQF_HKEY_ARRAY_SIZE;
+}
+
+/**
+ * ice_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vsi->rss_table_size;
+}
+
+/**
+ * ice_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware.
+ */
+static int
+ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int ret = 0, i;
+	u8 *lut;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (!indir)
+		return 0;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		/* RSS not supported return error here */
+		netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+		return -EIO;
+	}
+
+	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	for (i = 0; i < vsi->rss_table_size; i++)
+		indir[i] = (u32)(lut[i]);
+
+out:
+	devm_kfree(&pf->pdev->dev, lut);
+	return ret;
+}
+
+/**
+ * ice_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * returns 0 after programming the table.
+ */
+static int ice_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u8 *seed = NULL;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		/* RSS not supported return error here */
+		netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+		return -EIO;
+	}
+
+	if (key) {
+		if (!vsi->rss_hkey_user) {
+			vsi->rss_hkey_user =
+				devm_kzalloc(&pf->pdev->dev,
+					     ICE_VSIQF_HKEY_ARRAY_SIZE,
+					     GFP_KERNEL);
+			if (!vsi->rss_hkey_user)
+				return -ENOMEM;
+		}
+		memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
+		seed = vsi->rss_hkey_user;
+	}
+
+	if (!vsi->rss_lut_user) {
+		vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev,
+						 vsi->rss_table_size,
+						 GFP_KERNEL);
+		if (!vsi->rss_lut_user)
+			return -ENOMEM;
+	}
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	if (indir) {
+		int i;
+
+		for (i = 0; i < vsi->rss_table_size; i++)
+			vsi->rss_lut_user[i] = (u8)(indir[i]);
+	} else {
+		ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size,
+				 vsi->rss_size);
+	}
+
+	if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size))
+		return -EIO;
+
+	return 0;
+}
+
+static const struct ethtool_ops ice_ethtool_ops = {
+	.get_link_ksettings	= ice_get_link_ksettings,
+	.get_drvinfo            = ice_get_drvinfo,
+	.get_regs_len           = ice_get_regs_len,
+	.get_regs               = ice_get_regs,
+	.get_msglevel           = ice_get_msglevel,
+	.set_msglevel           = ice_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_strings		= ice_get_strings,
+	.get_ethtool_stats      = ice_get_ethtool_stats,
+	.get_sset_count		= ice_get_sset_count,
+	.get_rxnfc		= ice_get_rxnfc,
+	.get_ringparam		= ice_get_ringparam,
+	.set_ringparam		= ice_set_ringparam,
+	.nway_reset		= ice_nway_reset,
+	.get_pauseparam		= ice_get_pauseparam,
+	.set_pauseparam		= ice_set_pauseparam,
+	.get_rxfh_key_size	= ice_get_rxfh_key_size,
+	.get_rxfh_indir_size	= ice_get_rxfh_indir_size,
+	.get_rxfh		= ice_get_rxfh,
+	.set_rxfh		= ice_set_rxfh,
+};
+
+/**
+ * ice_set_ethtool_ops - setup netdev ethtool ops
+ * @netdev: network interface device structure
+ *
+ * setup netdev ethtool ops with ice specific ops
+ */
+void ice_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
new file mode 100644
index 000000000000..1b9e2ef48a9d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Machine-generated file */
+
+#ifndef _ICE_HW_AUTOGEN_H_
+#define _ICE_HW_AUTOGEN_H_
+
+#define QTX_COMM_DBELL(_DBQM)		(0x002C0000 + ((_DBQM) * 4))
+#define PF_FW_ARQBAH			0x00080180
+#define PF_FW_ARQBAL			0x00080080
+#define PF_FW_ARQH			0x00080380
+#define PF_FW_ARQH_ARQH_S		0
+#define PF_FW_ARQH_ARQH_M		ICE_M(0x3FF, PF_FW_ARQH_ARQH_S)
+#define PF_FW_ARQLEN			0x00080280
+#define PF_FW_ARQLEN_ARQLEN_S		0
+#define PF_FW_ARQLEN_ARQLEN_M		ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S)
+#define PF_FW_ARQLEN_ARQVFE_S		28
+#define PF_FW_ARQLEN_ARQVFE_M		BIT(PF_FW_ARQLEN_ARQVFE_S)
+#define PF_FW_ARQLEN_ARQOVFL_S		29
+#define PF_FW_ARQLEN_ARQOVFL_M		BIT(PF_FW_ARQLEN_ARQOVFL_S)
+#define PF_FW_ARQLEN_ARQCRIT_S		30
+#define PF_FW_ARQLEN_ARQCRIT_M		BIT(PF_FW_ARQLEN_ARQCRIT_S)
+#define PF_FW_ARQLEN_ARQENABLE_S	31
+#define PF_FW_ARQLEN_ARQENABLE_M	BIT(PF_FW_ARQLEN_ARQENABLE_S)
+#define PF_FW_ARQT			0x00080480
+#define PF_FW_ATQBAH			0x00080100
+#define PF_FW_ATQBAL			0x00080000
+#define PF_FW_ATQH			0x00080300
+#define PF_FW_ATQH_ATQH_S		0
+#define PF_FW_ATQH_ATQH_M		ICE_M(0x3FF, PF_FW_ATQH_ATQH_S)
+#define PF_FW_ATQLEN			0x00080200
+#define PF_FW_ATQLEN_ATQLEN_S		0
+#define PF_FW_ATQLEN_ATQLEN_M		ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S)
+#define PF_FW_ATQLEN_ATQVFE_S		28
+#define PF_FW_ATQLEN_ATQVFE_M		BIT(PF_FW_ATQLEN_ATQVFE_S)
+#define PF_FW_ATQLEN_ATQOVFL_S		29
+#define PF_FW_ATQLEN_ATQOVFL_M		BIT(PF_FW_ATQLEN_ATQOVFL_S)
+#define PF_FW_ATQLEN_ATQCRIT_S		30
+#define PF_FW_ATQLEN_ATQCRIT_M		BIT(PF_FW_ATQLEN_ATQCRIT_S)
+#define PF_FW_ATQLEN_ATQENABLE_S	31
+#define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
+#define PF_FW_ATQT			0x00080400
+
+#define GLFLXP_RXDID_FLAGS(_i, _j)		(0x0045D000 + ((_i) * 4 + (_j) * 256))
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S	0
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S	8
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S	16
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S	24
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S)
+#define GLFLXP_RXDID_FLX_WRD_0(_i)		(0x0045c800 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_1(_i)		(0x0045c900 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_2(_i)		(0x0045ca00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_3(_i)		(0x0045cb00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S)
+
+#define QRXFLXP_CNTXT(_QRX)		(0x00480000 + ((_QRX) * 4))
+#define QRXFLXP_CNTXT_RXDID_IDX_S	0
+#define QRXFLXP_CNTXT_RXDID_IDX_M	ICE_M(0x3F, QRXFLXP_CNTXT_RXDID_IDX_S)
+#define QRXFLXP_CNTXT_RXDID_PRIO_S	8
+#define QRXFLXP_CNTXT_RXDID_PRIO_M	ICE_M(0x7, QRXFLXP_CNTXT_RXDID_PRIO_S)
+#define QRXFLXP_CNTXT_TS_S		11
+#define QRXFLXP_CNTXT_TS_M		BIT(QRXFLXP_CNTXT_TS_S)
+#define GLGEN_RSTAT			0x000B8188
+#define GLGEN_RSTAT_DEVSTATE_S		0
+#define GLGEN_RSTAT_DEVSTATE_M		ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S)
+#define GLGEN_RSTCTL			0x000B8180
+#define GLGEN_RSTCTL_GRSTDEL_S		0
+#define GLGEN_RSTCTL_GRSTDEL_M		ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
+#define GLGEN_RSTAT_RESET_TYPE_S	2
+#define GLGEN_RSTAT_RESET_TYPE_M	ICE_M(0x3, GLGEN_RSTAT_RESET_TYPE_S)
+#define GLGEN_RTRIG			0x000B8190
+#define GLGEN_RTRIG_CORER_S		0
+#define GLGEN_RTRIG_CORER_M		BIT(GLGEN_RTRIG_CORER_S)
+#define GLGEN_RTRIG_GLOBR_S		1
+#define GLGEN_RTRIG_GLOBR_M		BIT(GLGEN_RTRIG_GLOBR_S)
+#define GLGEN_STAT			0x000B612C
+#define PFGEN_CTRL			0x00091000
+#define PFGEN_CTRL_PFSWR_S		0
+#define PFGEN_CTRL_PFSWR_M		BIT(PFGEN_CTRL_PFSWR_S)
+#define PFGEN_STATE			0x00088000
+#define PRTGEN_STATUS			0x000B8100
+#define PFHMC_ERRORDATA			0x00520500
+#define PFHMC_ERRORINFO			0x00520400
+#define GLINT_DYN_CTL(_INT)		(0x00160000 + ((_INT) * 4))
+#define GLINT_DYN_CTL_INTENA_S		0
+#define GLINT_DYN_CTL_INTENA_M		BIT(GLINT_DYN_CTL_INTENA_S)
+#define GLINT_DYN_CTL_CLEARPBA_S	1
+#define GLINT_DYN_CTL_CLEARPBA_M	BIT(GLINT_DYN_CTL_CLEARPBA_S)
+#define GLINT_DYN_CTL_SWINT_TRIG_S	2
+#define GLINT_DYN_CTL_SWINT_TRIG_M	BIT(GLINT_DYN_CTL_SWINT_TRIG_S)
+#define GLINT_DYN_CTL_ITR_INDX_S	3
+#define GLINT_DYN_CTL_SW_ITR_INDX_S	25
+#define GLINT_DYN_CTL_SW_ITR_INDX_M	ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S)
+#define GLINT_DYN_CTL_INTENA_MSK_S	31
+#define GLINT_DYN_CTL_INTENA_MSK_M	BIT(GLINT_DYN_CTL_INTENA_MSK_S)
+#define GLINT_ITR(_i, _INT)		(0x00154000 + ((_i) * 8192 + (_INT) * 4))
+#define PFINT_FW_CTL			0x0016C800
+#define PFINT_FW_CTL_MSIX_INDX_S	0
+#define PFINT_FW_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_FW_CTL_MSIX_INDX_S)
+#define PFINT_FW_CTL_ITR_INDX_S		11
+#define PFINT_FW_CTL_ITR_INDX_M		ICE_M(0x3, PFINT_FW_CTL_ITR_INDX_S)
+#define PFINT_FW_CTL_CAUSE_ENA_S	30
+#define PFINT_FW_CTL_CAUSE_ENA_M	BIT(PFINT_FW_CTL_CAUSE_ENA_S)
+#define PFINT_OICR			0x0016CA00
+#define PFINT_OICR_INTEVENT_S		0
+#define PFINT_OICR_INTEVENT_M		BIT(PFINT_OICR_INTEVENT_S)
+#define PFINT_OICR_HLP_RDY_S		14
+#define PFINT_OICR_HLP_RDY_M		BIT(PFINT_OICR_HLP_RDY_S)
+#define PFINT_OICR_CPM_RDY_S		15
+#define PFINT_OICR_CPM_RDY_M		BIT(PFINT_OICR_CPM_RDY_S)
+#define PFINT_OICR_ECC_ERR_S		16
+#define PFINT_OICR_ECC_ERR_M		BIT(PFINT_OICR_ECC_ERR_S)
+#define PFINT_OICR_MAL_DETECT_S		19
+#define PFINT_OICR_MAL_DETECT_M		BIT(PFINT_OICR_MAL_DETECT_S)
+#define PFINT_OICR_GRST_S		20
+#define PFINT_OICR_GRST_M		BIT(PFINT_OICR_GRST_S)
+#define PFINT_OICR_PCI_EXCEPTION_S	21
+#define PFINT_OICR_PCI_EXCEPTION_M	BIT(PFINT_OICR_PCI_EXCEPTION_S)
+#define PFINT_OICR_GPIO_S		22
+#define PFINT_OICR_GPIO_M		BIT(PFINT_OICR_GPIO_S)
+#define PFINT_OICR_STORM_DETECT_S	24
+#define PFINT_OICR_STORM_DETECT_M	BIT(PFINT_OICR_STORM_DETECT_S)
+#define PFINT_OICR_HMC_ERR_S		26
+#define PFINT_OICR_HMC_ERR_M		BIT(PFINT_OICR_HMC_ERR_S)
+#define PFINT_OICR_PE_CRITERR_S		28
+#define PFINT_OICR_PE_CRITERR_M		BIT(PFINT_OICR_PE_CRITERR_S)
+#define PFINT_OICR_CTL			0x0016CA80
+#define PFINT_OICR_CTL_MSIX_INDX_S	0
+#define PFINT_OICR_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_OICR_CTL_MSIX_INDX_S)
+#define PFINT_OICR_CTL_ITR_INDX_S	11
+#define PFINT_OICR_CTL_ITR_INDX_M	ICE_M(0x3, PFINT_OICR_CTL_ITR_INDX_S)
+#define PFINT_OICR_CTL_CAUSE_ENA_S	30
+#define PFINT_OICR_CTL_CAUSE_ENA_M	BIT(PFINT_OICR_CTL_CAUSE_ENA_S)
+#define PFINT_OICR_ENA			0x0016C900
+#define QINT_RQCTL(_QRX)		(0x00150000 + ((_QRX) * 4))
+#define QINT_RQCTL_MSIX_INDX_S		0
+#define QINT_RQCTL_ITR_INDX_S		11
+#define QINT_RQCTL_CAUSE_ENA_S		30
+#define QINT_RQCTL_CAUSE_ENA_M		BIT(QINT_RQCTL_CAUSE_ENA_S)
+#define QINT_TQCTL(_DBQM)		(0x00140000 + ((_DBQM) * 4))
+#define QINT_TQCTL_MSIX_INDX_S		0
+#define QINT_TQCTL_ITR_INDX_S		11
+#define QINT_TQCTL_CAUSE_ENA_S		30
+#define QINT_TQCTL_CAUSE_ENA_M		BIT(QINT_TQCTL_CAUSE_ENA_S)
+#define GLLAN_RCTL_0			0x002941F8
+#define QRX_CONTEXT(_i, _QRX)		(0x00280000 + ((_i) * 8192 + (_QRX) * 4))
+#define QRX_CTRL(_QRX)			(0x00120000 + ((_QRX) * 4))
+#define QRX_CTRL_MAX_INDEX		2047
+#define QRX_CTRL_QENA_REQ_S		0
+#define QRX_CTRL_QENA_REQ_M		BIT(QRX_CTRL_QENA_REQ_S)
+#define QRX_CTRL_QENA_STAT_S		2
+#define QRX_CTRL_QENA_STAT_M		BIT(QRX_CTRL_QENA_STAT_S)
+#define QRX_ITR(_QRX)			(0x00292000 + ((_QRX) * 4))
+#define QRX_TAIL(_QRX)			(0x00290000 + ((_QRX) * 4))
+#define GLNVM_FLA			0x000B6108
+#define GLNVM_FLA_LOCKED_S		6
+#define GLNVM_FLA_LOCKED_M		BIT(GLNVM_FLA_LOCKED_S)
+#define GLNVM_GENS			0x000B6100
+#define GLNVM_GENS_SR_SIZE_S		5
+#define GLNVM_GENS_SR_SIZE_M		ICE_M(0x7, GLNVM_GENS_SR_SIZE_S)
+#define GLNVM_ULD			0x000B6008
+#define GLNVM_ULD_CORER_DONE_S		3
+#define GLNVM_ULD_CORER_DONE_M		BIT(GLNVM_ULD_CORER_DONE_S)
+#define GLNVM_ULD_GLOBR_DONE_S		4
+#define GLNVM_ULD_GLOBR_DONE_M		BIT(GLNVM_ULD_GLOBR_DONE_S)
+#define PF_FUNC_RID			0x0009E880
+#define PF_FUNC_RID_FUNC_NUM_S		0
+#define PF_FUNC_RID_FUNC_NUM_M		ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S)
+#define GLPRT_BPRCH(_i)			(0x00381384 + ((_i) * 8))
+#define GLPRT_BPRCL(_i)			(0x00381380 + ((_i) * 8))
+#define GLPRT_BPTCH(_i)			(0x00381244 + ((_i) * 8))
+#define GLPRT_BPTCL(_i)			(0x00381240 + ((_i) * 8))
+#define GLPRT_CRCERRS(_i)		(0x00380100 + ((_i) * 8))
+#define GLPRT_GORCH(_i)			(0x00380004 + ((_i) * 8))
+#define GLPRT_GORCL(_i)			(0x00380000 + ((_i) * 8))
+#define GLPRT_GOTCH(_i)			(0x00380B44 + ((_i) * 8))
+#define GLPRT_GOTCL(_i)			(0x00380B40 + ((_i) * 8))
+#define GLPRT_ILLERRC(_i)		(0x003801C0 + ((_i) * 8))
+#define GLPRT_LXOFFRXC(_i)		(0x003802C0 + ((_i) * 8))
+#define GLPRT_LXOFFTXC(_i)		(0x00381180 + ((_i) * 8))
+#define GLPRT_LXONRXC(_i)		(0x00380280 + ((_i) * 8))
+#define GLPRT_LXONTXC(_i)		(0x00381140 + ((_i) * 8))
+#define GLPRT_MLFC(_i)			(0x00380040 + ((_i) * 8))
+#define GLPRT_MPRCH(_i)			(0x00381344 + ((_i) * 8))
+#define GLPRT_MPRCL(_i)			(0x00381340 + ((_i) * 8))
+#define GLPRT_MPTCH(_i)			(0x00381204 + ((_i) * 8))
+#define GLPRT_MPTCL(_i)			(0x00381200 + ((_i) * 8))
+#define GLPRT_MRFC(_i)			(0x00380080 + ((_i) * 8))
+#define GLPRT_PRC1023H(_i)		(0x00380A04 + ((_i) * 8))
+#define GLPRT_PRC1023L(_i)		(0x00380A00 + ((_i) * 8))
+#define GLPRT_PRC127H(_i)		(0x00380944 + ((_i) * 8))
+#define GLPRT_PRC127L(_i)		(0x00380940 + ((_i) * 8))
+#define GLPRT_PRC1522H(_i)		(0x00380A44 + ((_i) * 8))
+#define GLPRT_PRC1522L(_i)		(0x00380A40 + ((_i) * 8))
+#define GLPRT_PRC255H(_i)		(0x00380984 + ((_i) * 8))
+#define GLPRT_PRC255L(_i)		(0x00380980 + ((_i) * 8))
+#define GLPRT_PRC511H(_i)		(0x003809C4 + ((_i) * 8))
+#define GLPRT_PRC511L(_i)		(0x003809C0 + ((_i) * 8))
+#define GLPRT_PRC64H(_i)		(0x00380904 + ((_i) * 8))
+#define GLPRT_PRC64L(_i)		(0x00380900 + ((_i) * 8))
+#define GLPRT_PRC9522H(_i)		(0x00380A84 + ((_i) * 8))
+#define GLPRT_PRC9522L(_i)		(0x00380A80 + ((_i) * 8))
+#define GLPRT_PTC1023H(_i)		(0x00380C84 + ((_i) * 8))
+#define GLPRT_PTC1023L(_i)		(0x00380C80 + ((_i) * 8))
+#define GLPRT_PTC127H(_i)		(0x00380BC4 + ((_i) * 8))
+#define GLPRT_PTC127L(_i)		(0x00380BC0 + ((_i) * 8))
+#define GLPRT_PTC1522H(_i)		(0x00380CC4 + ((_i) * 8))
+#define GLPRT_PTC1522L(_i)		(0x00380CC0 + ((_i) * 8))
+#define GLPRT_PTC255H(_i)		(0x00380C04 + ((_i) * 8))
+#define GLPRT_PTC255L(_i)		(0x00380C00 + ((_i) * 8))
+#define GLPRT_PTC511H(_i)		(0x00380C44 + ((_i) * 8))
+#define GLPRT_PTC511L(_i)		(0x00380C40 + ((_i) * 8))
+#define GLPRT_PTC64H(_i)		(0x00380B84 + ((_i) * 8))
+#define GLPRT_PTC64L(_i)		(0x00380B80 + ((_i) * 8))
+#define GLPRT_PTC9522H(_i)		(0x00380D04 + ((_i) * 8))
+#define GLPRT_PTC9522L(_i)		(0x00380D00 + ((_i) * 8))
+#define GLPRT_RFC(_i)			(0x00380AC0 + ((_i) * 8))
+#define GLPRT_RJC(_i)			(0x00380B00 + ((_i) * 8))
+#define GLPRT_RLEC(_i)			(0x00380140 + ((_i) * 8))
+#define GLPRT_ROC(_i)			(0x00380240 + ((_i) * 8))
+#define GLPRT_RUC(_i)			(0x00380200 + ((_i) * 8))
+#define GLPRT_TDOLD(_i)			(0x00381280 + ((_i) * 8))
+#define GLPRT_UPRCH(_i)			(0x00381304 + ((_i) * 8))
+#define GLPRT_UPRCL(_i)			(0x00381300 + ((_i) * 8))
+#define GLPRT_UPTCH(_i)			(0x003811C4 + ((_i) * 8))
+#define GLPRT_UPTCL(_i)			(0x003811C0 + ((_i) * 8))
+#define GLV_BPRCH(_i)			(0x003B6004 + ((_i) * 8))
+#define GLV_BPRCL(_i)			(0x003B6000 + ((_i) * 8))
+#define GLV_BPTCH(_i)			(0x0030E004 + ((_i) * 8))
+#define GLV_BPTCL(_i)			(0x0030E000 + ((_i) * 8))
+#define GLV_GORCH(_i)			(0x003B0004 + ((_i) * 8))
+#define GLV_GORCL(_i)			(0x003B0000 + ((_i) * 8))
+#define GLV_GOTCH(_i)			(0x00300004 + ((_i) * 8))
+#define GLV_GOTCL(_i)			(0x00300000 + ((_i) * 8))
+#define GLV_MPRCH(_i)			(0x003B4004 + ((_i) * 8))
+#define GLV_MPRCL(_i)			(0x003B4000 + ((_i) * 8))
+#define GLV_MPTCH(_i)			(0x0030C004 + ((_i) * 8))
+#define GLV_MPTCL(_i)			(0x0030C000 + ((_i) * 8))
+#define GLV_RDPC(_i)			(0x00294C04 + ((_i) * 4))
+#define GLV_TEPC(_VSI)			(0x00312000 + ((_VSI) * 4))
+#define GLV_UPRCH(_i)			(0x003B2004 + ((_i) * 8))
+#define GLV_UPRCL(_i)			(0x003B2000 + ((_i) * 8))
+#define GLV_UPTCH(_i)			(0x0030A004 + ((_i) * 8))
+#define GLV_UPTCL(_i)			(0x0030A000 + ((_i) * 8))
+#define VSIQF_HKEY_MAX_INDEX		12
+
+#endif /* _ICE_HW_AUTOGEN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
new file mode 100644
index 000000000000..d23a91665b46
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -0,0 +1,473 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LAN_TX_RX_H_
+#define _ICE_LAN_TX_RX_H_
+
+union ice_32byte_rx_desc {
+	struct {
+		__le64  pkt_addr; /* Packet buffer address */
+		__le64  hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_addr is DD bit */
+		__le64  rsvd1;
+		__le64  rsvd2;
+	} read;
+	struct {
+		struct {
+			struct {
+				__le16 mirroring_status;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fd_id; /* Flow Director filter id */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/PTYPE/length */
+			__le64 status_error_len;
+		} qword1;
+		struct {
+			__le16 ext_status; /* extended status */
+			__le16 rsvd;
+			__le16 l2tag2_1;
+			__le16 l2tag2_2;
+		} qword2;
+		struct {
+			__le32 reserved;
+			__le32 fd_id;
+		} qword3;
+	} wb; /* writeback */
+};
+
+struct ice_rx_ptype_decoded {
+	u32 ptype:10;
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:2;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+enum ice_rx_ptype_outer_ip {
+	ICE_RX_PTYPE_OUTER_L2	= 0,
+	ICE_RX_PTYPE_OUTER_IP	= 1,
+};
+
+enum ice_rx_ptype_outer_ip_ver {
+	ICE_RX_PTYPE_OUTER_NONE	= 0,
+	ICE_RX_PTYPE_OUTER_IPV4	= 1,
+	ICE_RX_PTYPE_OUTER_IPV6	= 2,
+};
+
+enum ice_rx_ptype_outer_fragmented {
+	ICE_RX_PTYPE_NOT_FRAG	= 0,
+	ICE_RX_PTYPE_FRAG	= 1,
+};
+
+enum ice_rx_ptype_tunnel_type {
+	ICE_RX_PTYPE_TUNNEL_NONE		= 0,
+	ICE_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum ice_rx_ptype_tunnel_end_prot {
+	ICE_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	ICE_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	ICE_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum ice_rx_ptype_inner_prot {
+	ICE_RX_PTYPE_INNER_PROT_NONE		= 0,
+	ICE_RX_PTYPE_INNER_PROT_UDP		= 1,
+	ICE_RX_PTYPE_INNER_PROT_TCP		= 2,
+	ICE_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	ICE_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	ICE_RX_PTYPE_INNER_PROT_TIMESYNC	= 5,
+};
+
+enum ice_rx_ptype_payload_layer {
+	ICE_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
+/* RX Flex Descriptor
+ * This descriptor is used instead of the legacy version descriptor when
+ * ice_rlan_ctx.adv_desc is set
+ */
+union ice_32b_rx_flex_desc {
+	struct {
+		__le64  pkt_addr; /* Packet buffer address */
+		__le64  hdr_addr; /* Header buffer address */
+				  /* bit 0 of hdr_addr is DD bit */
+		__le64  rsvd1;
+		__le64  rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile id */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor NIC Profile
+ * This descriptor corresponds to RxDID 2 which contains
+ * metadata fields for RSS, flow id and timestamp info
+ */
+struct ice_32b_rx_flex_desc_nic {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 vlan_id;
+			__le16 flow_id_ipv6;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum ice_rxdid {
+	ICE_RXDID_START			= 0,
+	ICE_RXDID_LEGACY_0		= ICE_RXDID_START,
+	ICE_RXDID_LEGACY_1,
+	ICE_RXDID_FLX_START,
+	ICE_RXDID_FLEX_NIC		= ICE_RXDID_FLX_START,
+	ICE_RXDID_FLX_LAST		= 63,
+	ICE_RXDID_LAST			= ICE_RXDID_FLX_LAST
+};
+
+/* Receive Flex Descriptor Rx opcode values */
+#define ICE_RX_OPC_MDID		0x01
+
+/* Receive Descriptor MDID values */
+#define ICE_RX_MDID_FLOW_ID_LOWER	5
+#define ICE_RX_MDID_FLOW_ID_HIGH	6
+#define ICE_RX_MDID_HASH_LOW		56
+#define ICE_RX_MDID_HASH_HIGH		57
+
+/* Rx Flag64 packet flag bits */
+enum ice_rx_flg64_bits {
+	ICE_RXFLG_PKT_DSI	= 0,
+	ICE_RXFLG_EVLAN_x8100	= 15,
+	ICE_RXFLG_EVLAN_x9100,
+	ICE_RXFLG_VLAN_x8100,
+	ICE_RXFLG_TNL_MAC	= 22,
+	ICE_RXFLG_TNL_VLAN,
+	ICE_RXFLG_PKT_FRG,
+	ICE_RXFLG_FIN		= 32,
+	ICE_RXFLG_SYN,
+	ICE_RXFLG_RST,
+	ICE_RXFLG_TNL0		= 38,
+	ICE_RXFLG_TNL1,
+	ICE_RXFLG_TNL2,
+	ICE_RXFLG_UDP_GRE,
+	ICE_RXFLG_RSVD		= 63
+};
+
+/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */
+#define ICE_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for ice_32byte_rx_flex_desc.pkt_length member */
+#define ICE_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
+enum ice_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	ICE_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	ICE_RX_FLEX_DESC_STATUS0_EOF_S,
+	ICE_RX_FLEX_DESC_STATUS0_HBO_S,
+	ICE_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_LPBK_S,
+	ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	ICE_RX_FLEX_DESC_STATUS0_RXE_S,
+	ICE_RX_FLEX_DESC_STATUS0_CRCP_S,
+	ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+#define ICE_RXQ_CTX_SIZE_DWORDS		8
+#define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+
+/* RLAN Rx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_rlan_ctx {
+	u16 head;
+	u16 cpuid; /* bigger than needed, see above for reason */
+	u64 base;
+	u16 qlen;
+#define ICE_RLAN_CTX_DBUF_S 7
+	u16 dbuf; /* bigger than needed, see above for reason */
+#define ICE_RLAN_CTX_HBUF_S 6
+	u16 hbuf; /* bigger than needed, see above for reason */
+	u8  dtype;
+	u8  dsize;
+	u8  crcstrip;
+	u8  l2tsel;
+	u8  hsplit_0;
+	u8  hsplit_1;
+	u8  showiv;
+	u32 rxmax; /* bigger than needed, see above for reason */
+	u8  tphrdesc_ena;
+	u8  tphwdesc_ena;
+	u8  tphdata_ena;
+	u8  tphhead_ena;
+	u16 lrxqthresh; /* bigger than needed, see above for reason */
+};
+
+struct ice_ctx_ele {
+	u16 offset;
+	u16 size_of;
+	u16 width;
+	u16 lsb;
+};
+
+#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) {	\
+	.offset = offsetof(struct _struct, _ele),	\
+	.size_of = FIELD_SIZEOF(struct _struct, _ele),	\
+	.width = _width,				\
+	.lsb = _lsb,					\
+}
+
+/* for hsplit_0 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_0 {
+	ICE_RLAN_RX_HSPLIT_0_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_IP		= 2,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP	= 4,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP		= 8,
+};
+
+/* for hsplit_1 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_1 {
+	ICE_RLAN_RX_HSPLIT_1_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS	= 2,
+};
+
+/* TX Descriptor */
+struct ice_tx_desc {
+	__le64 buf_addr; /* Address of descriptor's data buf */
+	__le64 cmd_type_offset_bsz;
+};
+
+enum ice_tx_desc_dtype_value {
+	ICE_TX_DESC_DTYPE_DATA		= 0x0,
+	ICE_TX_DESC_DTYPE_CTX		= 0x1,
+	/* DESC_DONE - HW has completed write-back of descriptor */
+	ICE_TX_DESC_DTYPE_DESC_DONE	= 0xF,
+};
+
+#define ICE_TXD_QW1_CMD_S	4
+#define ICE_TXD_QW1_CMD_M	(0xFFFUL << ICE_TXD_QW1_CMD_S)
+
+enum ice_tx_desc_cmd_bits {
+	ICE_TX_DESC_CMD_EOP			= 0x0001,
+	ICE_TX_DESC_CMD_RS			= 0x0002,
+	ICE_TX_DESC_CMD_IL2TAG1			= 0x0008,
+	ICE_TX_DESC_CMD_IIPT_IPV6		= 0x0020, /* 2 BITS */
+	ICE_TX_DESC_CMD_IIPT_IPV4		= 0x0040, /* 2 BITS */
+	ICE_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060, /* 2 BITS */
+	ICE_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100, /* 2 BITS */
+	ICE_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300, /* 2 BITS */
+};
+
+#define ICE_TXD_QW1_OFFSET_S	16
+#define ICE_TXD_QW1_OFFSET_M	(0x3FFFFULL << ICE_TXD_QW1_OFFSET_S)
+
+enum ice_tx_desc_len_fields {
+	/* Note: These are predefined bit offsets */
+	ICE_TX_DESC_LEN_MACLEN_S	= 0, /* 7 BITS */
+	ICE_TX_DESC_LEN_IPLEN_S	= 7, /* 7 BITS */
+	ICE_TX_DESC_LEN_L4_LEN_S	= 14 /* 4 BITS */
+};
+
+#define ICE_TXD_QW1_MACLEN_M (0x7FUL << ICE_TX_DESC_LEN_MACLEN_S)
+#define ICE_TXD_QW1_IPLEN_M  (0x7FUL << ICE_TX_DESC_LEN_IPLEN_S)
+#define ICE_TXD_QW1_L4LEN_M  (0xFUL << ICE_TX_DESC_LEN_L4_LEN_S)
+
+/* Tx descriptor field limits in bytes */
+#define ICE_TXD_MACLEN_MAX ((ICE_TXD_QW1_MACLEN_M >> \
+			     ICE_TX_DESC_LEN_MACLEN_S) * ICE_BYTES_PER_WORD)
+#define ICE_TXD_IPLEN_MAX ((ICE_TXD_QW1_IPLEN_M >> \
+			    ICE_TX_DESC_LEN_IPLEN_S) * ICE_BYTES_PER_DWORD)
+#define ICE_TXD_L4LEN_MAX ((ICE_TXD_QW1_L4LEN_M >> \
+			    ICE_TX_DESC_LEN_L4_LEN_S) * ICE_BYTES_PER_DWORD)
+
+#define ICE_TXD_QW1_TX_BUF_SZ_S	34
+#define ICE_TXD_QW1_L2TAG1_S	48
+
+/* Context descriptors */
+struct ice_tx_ctx_desc {
+	__le32 tunneling_params;
+	__le16 l2tag2;
+	__le16 rsvd;
+	__le64 qw1;
+};
+
+#define ICE_TXD_CTX_QW1_CMD_S	4
+#define ICE_TXD_CTX_QW1_CMD_M	(0x7FUL << ICE_TXD_CTX_QW1_CMD_S)
+
+#define ICE_TXD_CTX_QW1_TSO_LEN_S	30
+#define ICE_TXD_CTX_QW1_TSO_LEN_M	\
+			(0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
+
+#define ICE_TXD_CTX_QW1_MSS_S	50
+
+enum ice_tx_ctx_desc_cmd_bits {
+	ICE_TX_CTX_DESC_TSO		= 0x01,
+	ICE_TX_CTX_DESC_TSYN		= 0x02,
+	ICE_TX_CTX_DESC_IL2TAG2		= 0x04,
+	ICE_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
+	ICE_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	ICE_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	ICE_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	ICE_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	ICE_TX_CTX_DESC_RESERVED	= 0x40
+};
+
+#define ICE_LAN_TXQ_MAX_QGRPS	127
+#define ICE_LAN_TXQ_MAX_QDIS	1023
+
+/* Tx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_tlan_ctx {
+#define ICE_TLAN_CTX_BASE_S	7
+	u64 base;		/* base is defined in 128-byte units */
+	u8  port_num;
+	u16 cgd_num;		/* bigger than needed, see above for reason */
+	u8  pf_num;
+	u16 vmvf_num;
+	u8  vmvf_type;
+#define ICE_TLAN_CTX_VMVF_TYPE_VMQ	1
+#define ICE_TLAN_CTX_VMVF_TYPE_PF	2
+	u16 src_vsi;
+	u8  tsyn_ena;
+	u8  alt_vlan;
+	u16 cpuid;		/* bigger than needed, see above for reason */
+	u8  wb_mode;
+	u8  tphrd_desc;
+	u8  tphrd;
+	u8  tphwr_desc;
+	u16 cmpq_id;
+	u16 qnum_in_func;
+	u8  itr_notification_mode;
+	u8  adjust_prof_id;
+	u32 qlen;		/* bigger than needed, see above for reason */
+	u8  quanta_prof_idx;
+	u8  tso_ena;
+	u16 tso_qnum;
+	u8  legacy_int;
+	u8  drop_ena;
+	u8  cache_prof_idx;
+	u8  pkt_shaper_prof_idx;
+	u8  int_q_state;	/* width not needed - internal do not write */
+};
+
+/* macro to make the table lines short */
+#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	{	PTYPE, \
+		1, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		ICE_RX_PTYPE_##OUTER_FRAG, \
+		ICE_RX_PTYPE_TUNNEL_##T, \
+		ICE_RX_PTYPE_TUNNEL_END_##TE, \
+		ICE_RX_PTYPE_##TEF, \
+		ICE_RX_PTYPE_INNER_PROT_##I, \
+		ICE_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define ICE_PTT_UNUSED_ENTRY(PTYPE) { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define ICE_RX_PTYPE_NOF		ICE_RX_PTYPE_NOT_FRAG
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = {
+	/* L2 Packet types */
+	ICE_PTT_UNUSED_ENTRY(0),
+	ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	ICE_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+};
+
+static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
+{
+	return ice_ptype_lkup[ptype];
+}
+#endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
new file mode 100644
index 000000000000..210b7910f1cd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -0,0 +1,5495 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Intel(R) Ethernet Connection E800 Series Linux Driver */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "ice.h"
+
+#define DRV_VERSION	"ice-0.7.0-k"
+#define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
+const char ice_drv_ver[] = DRV_VERSION;
+static const char ice_driver_string[] = DRV_SUMMARY;
+static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
+
+MODULE_AUTHOR("Intel Corporation, <[email protected]>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static int debug = -1;
+module_param(debug, int, 0644);
+#ifndef CONFIG_DYNAMIC_DEBUG
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
+#else
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+static struct workqueue_struct *ice_wq;
+static const struct net_device_ops ice_netdev_ops;
+
+static void ice_pf_dis_all_vsi(struct ice_pf *pf);
+static void ice_rebuild(struct ice_pf *pf);
+static int ice_vsi_release(struct ice_vsi *vsi);
+static void ice_update_vsi_stats(struct ice_vsi *vsi);
+static void ice_update_pf_stats(struct ice_pf *pf);
+
+/**
+ * ice_get_free_slot - get the next non-NULL location index in array
+ * @array: array to search
+ * @size: size of the array
+ * @curr: last known occupied index to be used as a search hint
+ *
+ * void * is being used to keep the functionality generic. This lets us use this
+ * function on any array of pointers.
+ */
+static int ice_get_free_slot(void *array, int size, int curr)
+{
+	int **tmp_array = (int **)array;
+	int next;
+
+	if (curr < (size - 1) && !tmp_array[curr + 1]) {
+		next = curr + 1;
+	} else {
+		int i = 0;
+
+		while ((i < size) && (tmp_array[i]))
+			i++;
+		if (i == size)
+			next = ICE_NO_VSI;
+		else
+			next = i;
+	}
+	return next;
+}
+
+/**
+ * ice_search_res - Search the tracker for a block of resources
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ * Returns the base item index of the block, or -ENOMEM for error
+ */
+static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	int start = res->search_hint;
+	int end = start;
+
+	id |= ICE_RES_VALID_BIT;
+
+	do {
+		/* skip already allocated entries */
+		if (res->list[end++] & ICE_RES_VALID_BIT) {
+			start = end;
+			if ((start + needed) > res->num_entries)
+				break;
+		}
+
+		if (end == (start + needed)) {
+			int i = start;
+
+			/* there was enough, so assign it to the requestor */
+			while (i != end)
+				res->list[i++] = id;
+
+			if (end == res->num_entries)
+				end = 0;
+
+			res->search_hint = end;
+			return start;
+		}
+	} while (1);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_get_res - get a block of resources
+ * @pf: board private structure
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or -ENOMEM for error
+ * The search_hint trick and lack of advanced fit-finding only works
+ * because we're highly likely to have all the same sized requests.
+ * Linear search time and any fragmentation should be minimal.
+ */
+static int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	int ret;
+
+	if (!res || !pf)
+		return -EINVAL;
+
+	if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
+		dev_err(&pf->pdev->dev,
+			"param err: needed=%d, num_entries = %d id=0x%04x\n",
+			needed, res->num_entries, id);
+		return -EINVAL;
+	}
+
+	/* search based on search_hint */
+	ret = ice_search_res(res, needed, id);
+
+	if (ret < 0) {
+		/* previous search failed. Reset search hint and try again */
+		res->search_hint = 0;
+		ret = ice_search_res(res, needed, id);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_free_res - free a block of resources
+ * @res: pointer to the resource
+ * @index: starting index previously returned by ice_get_res
+ * @id: identifier to track owner
+ * Returns number of resources freed
+ */
+static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
+{
+	int count = 0;
+	int i;
+
+	if (!res || index >= res->num_entries)
+		return -EINVAL;
+
+	id |= ICE_RES_VALID_BIT;
+	for (i = index; i < res->num_entries && res->list[i] == id; i++) {
+		res->list[i] = 0;
+		count++;
+	}
+
+	return count;
+}
+
+/**
+ * ice_add_mac_to_list - Add a mac address filter entry to the list
+ * @vsi: the VSI to be forwarded to
+ * @add_list: pointer to the list which contains MAC filter entries
+ * @macaddr: the MAC address to be added.
+ *
+ * Adds mac address filter entry to the temp list
+ *
+ * Returns 0 on success or ENOMEM on failure.
+ */
+static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
+			       const u8 *macaddr)
+{
+	struct ice_fltr_list_entry *tmp;
+	struct ice_pf *pf = vsi->back;
+
+	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->fltr_info.flag = ICE_FLTR_TX;
+	tmp->fltr_info.src = vsi->vsi_num;
+	tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
+	ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr);
+
+	INIT_LIST_HEAD(&tmp->list_entry);
+	list_add(&tmp->list_entry, add_list);
+
+	return 0;
+}
+
+/**
+ * ice_add_mac_to_sync_list - creates list of mac addresses to be synced
+ * @netdev: the net device on which the sync is happening
+ * @addr: mac address to sync
+ *
+ * This is a callback function which is called by the in kernel device sync
+ * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
+ * populates the tmp_sync_list, which is later used by ice_add_mac to add the
+ * mac filters from the hardware.
+ */
+static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (ice_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_add_mac_to_unsync_list - creates list of mac addresses to be unsynced
+ * @netdev: the net device on which the unsync is happening
+ * @addr: mac address to unsync
+ *
+ * This is a callback function which is called by the in kernel device unsync
+ * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
+ * populates the tmp_unsync_list, which is later used by ice_remove_mac to
+ * delete the mac filters from the hardware.
+ */
+static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (ice_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_free_fltr_list - free filter lists helper
+ * @dev: pointer to the device struct
+ * @h: pointer to the list head to be freed
+ *
+ * Helper function to free filter lists previously created using
+ * ice_add_mac_to_list
+ */
+static void ice_free_fltr_list(struct device *dev, struct list_head *h)
+{
+	struct ice_fltr_list_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, h, list_entry) {
+		list_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_vsi_fltr_changed - check if filter state changed
+ * @vsi: VSI to be checked
+ *
+ * returns true if filter state has changed, false otherwise.
+ */
+static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
+{
+	return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) ||
+	       test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) ||
+	       test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+}
+
+/**
+ * ice_vsi_sync_fltr - Update the VSI filter list to the HW
+ * @vsi: ptr to the VSI
+ *
+ * Push any outstanding VSI filter changes through the AdminQ.
+ */
+static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct net_device *netdev = vsi->netdev;
+	bool promisc_forced_on = false;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status = 0;
+	u32 changed_flags = 0;
+	int err = 0;
+
+	if (!vsi->netdev)
+		return -EINVAL;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state))
+		usleep_range(1000, 2000);
+
+	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
+	vsi->current_netdev_flags = vsi->netdev->flags;
+
+	INIT_LIST_HEAD(&vsi->tmp_sync_list);
+	INIT_LIST_HEAD(&vsi->tmp_unsync_list);
+
+	if (ice_vsi_fltr_changed(vsi)) {
+		clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+		clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+		clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+
+		/* grab the netdev's addr_list_lock */
+		netif_addr_lock_bh(netdev);
+		__dev_uc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		__dev_mc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		/* our temp lists are populated. release lock */
+		netif_addr_unlock_bh(netdev);
+	}
+
+	/* Remove mac addresses in the unsync list */
+	status = ice_remove_mac(hw, &vsi->tmp_unsync_list);
+	ice_free_fltr_list(dev, &vsi->tmp_unsync_list);
+	if (status) {
+		netdev_err(netdev, "Failed to delete MAC filters\n");
+		/* if we failed because of alloc failures, just bail */
+		if (status == ICE_ERR_NO_MEMORY) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	/* Add mac addresses in the sync list */
+	status = ice_add_mac(hw, &vsi->tmp_sync_list);
+	ice_free_fltr_list(dev, &vsi->tmp_sync_list);
+	if (status) {
+		netdev_err(netdev, "Failed to add MAC filters\n");
+		/* If there is no more space for new umac filters, vsi
+		 * should go into promiscuous mode. There should be some
+		 * space reserved for promiscuous filters.
+		 */
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
+		    !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC,
+				      vsi->state)) {
+			promisc_forced_on = true;
+			netdev_warn(netdev,
+				    "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
+				    vsi->vsi_num);
+		} else {
+			err = -EIO;
+			goto out;
+		}
+	}
+	/* check for changes in promiscuous modes */
+	if (changed_flags & IFF_ALLMULTI)
+		netdev_warn(netdev, "Unsupported configuration\n");
+
+	if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
+	    test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
+		clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+		if (vsi->current_netdev_flags & IFF_PROMISC) {
+			/* Apply TX filter rule to get traffic from VMs */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true,
+						  ICE_FLTR_TX);
+			if (status) {
+				netdev_err(netdev, "Error setting default VSI %i tx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags &= ~IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+			/* Apply RX filter rule to get traffic from wire */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true,
+						  ICE_FLTR_RX);
+			if (status) {
+				netdev_err(netdev, "Error setting default VSI %i rx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags &= ~IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+		} else {
+			/* Clear TX filter rule to stop traffic from VMs */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false,
+						  ICE_FLTR_TX);
+			if (status) {
+				netdev_err(netdev, "Error clearing default VSI %i tx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags |= IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+			/* Clear filter RX to remove traffic from wire */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false,
+						  ICE_FLTR_RX);
+			if (status) {
+				netdev_err(netdev, "Error clearing default VSI %i rx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags |= IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+		}
+	}
+	goto exit;
+
+out_promisc:
+	set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+	goto exit;
+out:
+	/* if something went wrong then set the changed flag so we try again */
+	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+exit:
+	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	return err;
+}
+
+/**
+ * ice_sync_fltr_subtask - Sync the VSI filter list with HW
+ * @pf: board private structure
+ */
+static void ice_sync_fltr_subtask(struct ice_pf *pf)
+{
+	int v;
+
+	if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
+		return;
+
+	clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+
+	for (v = 0; v < pf->num_alloc_vsi; v++)
+		if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
+		    ice_vsi_sync_fltr(pf->vsi[v])) {
+			/* come back and try again later */
+			set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+			break;
+		}
+}
+
+/**
+ * ice_is_reset_recovery_pending - schedule a reset
+ * @state: pf state field
+ */
+static bool ice_is_reset_recovery_pending(unsigned long int *state)
+{
+	return test_bit(__ICE_RESET_RECOVERY_PENDING, state);
+}
+
+/**
+ * ice_prepare_for_reset - prep for the core to reset
+ * @pf: board private structure
+ *
+ * Inform or close all dependent features in prep for reset.
+ */
+static void
+ice_prepare_for_reset(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_remove_vsi_fltr(hw, pf->vsi[v]->vsi_num);
+
+	dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
+
+	/* disable the VSIs and their queues that are not already DOWN */
+	/* pf_dis_all_vsi modifies netdev structures -rtnl_lock needed */
+	ice_pf_dis_all_vsi(pf);
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			pf->vsi[v]->vsi_num = 0;
+
+	ice_shutdown_all_ctrlq(hw);
+}
+
+/**
+ * ice_do_reset - Initiate one of many types of resets
+ * @pf: board private structure
+ * @reset_type: reset type requested
+ * before this function was called.
+ */
+static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+	struct device *dev = &pf->pdev->dev;
+	struct ice_hw *hw = &pf->hw;
+
+	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
+	WARN_ON(in_interrupt());
+
+	/* PFR is a bit of a special case because it doesn't result in an OICR
+	 * interrupt. So for PFR, we prepare for reset, issue the reset and
+	 * rebuild sequentially.
+	 */
+	if (reset_type == ICE_RESET_PFR) {
+		set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		ice_prepare_for_reset(pf);
+	}
+
+	/* trigger the reset */
+	if (ice_reset(hw, reset_type)) {
+		dev_err(dev, "reset %d failed\n", reset_type);
+		set_bit(__ICE_RESET_FAILED, pf->state);
+		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		return;
+	}
+
+	if (reset_type == ICE_RESET_PFR) {
+		pf->pfr_count++;
+		ice_rebuild(pf);
+		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+	}
+}
+
+/**
+ * ice_reset_subtask - Set up for resetting the device and driver
+ * @pf: board private structure
+ */
+static void ice_reset_subtask(struct ice_pf *pf)
+{
+	enum ice_reset_req reset_type;
+
+	rtnl_lock();
+
+	/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
+	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what
+	 * type of reset happened and sets __ICE_RESET_RECOVERY_PENDING bit in
+	 * pf->state. So if reset/recovery is pending (as indicated by this bit)
+	 * we do a rebuild and return.
+	 */
+	if (ice_is_reset_recovery_pending(pf->state)) {
+		clear_bit(__ICE_GLOBR_RECV, pf->state);
+		clear_bit(__ICE_CORER_RECV, pf->state);
+		ice_prepare_for_reset(pf);
+
+		/* make sure we are ready to rebuild */
+		if (ice_check_reset(&pf->hw))
+			set_bit(__ICE_RESET_FAILED, pf->state);
+		else
+			ice_rebuild(pf);
+		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		goto unlock;
+	}
+
+	/* No pending resets to finish processing. Check for new resets */
+	if (test_and_clear_bit(__ICE_GLOBR_REQ, pf->state))
+		reset_type = ICE_RESET_GLOBR;
+	else if (test_and_clear_bit(__ICE_CORER_REQ, pf->state))
+		reset_type = ICE_RESET_CORER;
+	else if (test_and_clear_bit(__ICE_PFR_REQ, pf->state))
+		reset_type = ICE_RESET_PFR;
+	else
+		goto unlock;
+
+	/* reset if not already down or resetting */
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !test_bit(__ICE_CFG_BUSY, pf->state)) {
+		ice_do_reset(pf, reset_type);
+	}
+
+unlock:
+	rtnl_unlock();
+}
+
+/**
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
+ */
+static void ice_watchdog_subtask(struct ice_pf *pf)
+{
+	int i;
+
+	/* if interface is down do nothing */
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    test_bit(__ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* make sure we don't do these things too often */
+	if (time_before(jiffies,
+			pf->serv_tmr_prev + pf->serv_tmr_period))
+		return;
+
+	pf->serv_tmr_prev = jiffies;
+
+	/* Update the stats for active netdevs so the network stack
+	 * can look at updated numbers whenever it cares to
+	 */
+	ice_update_pf_stats(pf);
+	for (i = 0; i < pf->num_alloc_vsi; i++)
+		if (pf->vsi[i] && pf->vsi[i]->netdev)
+			ice_update_vsi_stats(pf->vsi[i]);
+}
+
+/**
+ * ice_print_link_msg - print link up or down message
+ * @vsi: the VSI whose link status is being queried
+ * @isup: boolean for if the link is now up or down
+ */
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
+{
+	const char *speed;
+	const char *fc;
+
+	if (vsi->current_isup == isup)
+		return;
+
+	vsi->current_isup = isup;
+
+	if (!isup) {
+		netdev_info(vsi->netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	switch (vsi->port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_40GB:
+		speed = "40 G";
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		speed = "25 G";
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		speed = "20 G";
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		speed = "10 G";
+		break;
+	case ICE_AQ_LINK_SPEED_5GB:
+		speed = "5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_2500MB:
+		speed = "2.5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_1000MB:
+		speed = "1 G";
+		break;
+	case ICE_AQ_LINK_SPEED_100MB:
+		speed = "100 M";
+		break;
+	default:
+		speed = "Unknown";
+		break;
+	}
+
+	switch (vsi->port_info->fc.current_mode) {
+	case ICE_FC_FULL:
+		fc = "RX/TX";
+		break;
+	case ICE_FC_TX_PAUSE:
+		fc = "TX";
+		break;
+	case ICE_FC_RX_PAUSE:
+		fc = "RX";
+		break;
+	default:
+		fc = "Unknown";
+		break;
+	}
+
+	netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n",
+		    speed, fc);
+}
+
+/**
+ * ice_init_link_events - enable/initialize link events
+ * @pi: pointer to the port_info instance
+ *
+ * Returns -EIO on failure, 0 on success
+ */
+static int ice_init_link_events(struct ice_port_info *pi)
+{
+	u16 mask;
+
+	mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
+		       ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
+
+	if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw),
+			"Failed to set link event mask for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw),
+			"Failed to enable link events for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_link_event - update the vsi's netdev
+ * @vsi: the vsi on which the link event occurred
+ * @link_up: whether or not the vsi needs to be set up or down
+ */
+static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
+{
+	if (!vsi || test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	if (vsi->type == ICE_VSI_PF) {
+		if (!vsi->netdev) {
+			dev_dbg(&vsi->back->pdev->dev,
+				"vsi->netdev is not initialized!\n");
+			return;
+		}
+		if (link_up) {
+			netif_carrier_on(vsi->netdev);
+			netif_tx_wake_all_queues(vsi->netdev);
+		} else {
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+	}
+}
+
+/**
+ * ice_link_event - process the link event
+ * @pf: pf that the link event is associated with
+ * @pi: port_info for the port that the link event is associated with
+ *
+ * Returns -EIO if ice_get_link_status() fails
+ * Returns 0 on success
+ */
+static int
+ice_link_event(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	u8 new_link_speed, old_link_speed;
+	struct ice_phy_info *phy_info;
+	bool new_link_same_as_old;
+	bool new_link, old_link;
+	u8 lport;
+	u16 v;
+
+	phy_info = &pi->phy;
+	phy_info->link_info_old = phy_info->link_info;
+	/* Force ice_get_link_status() to update link info */
+	phy_info->get_link_info = true;
+
+	old_link = (phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
+	old_link_speed = phy_info->link_info_old.link_speed;
+
+	lport = pi->lport;
+	if (ice_get_link_status(pi, &new_link)) {
+		dev_dbg(&pf->pdev->dev,
+			"Could not get link status for port %d\n", lport);
+		return -EIO;
+	}
+
+	new_link_speed = phy_info->link_info.link_speed;
+
+	new_link_same_as_old = (new_link == old_link &&
+				new_link_speed == old_link_speed);
+
+	ice_for_each_vsi(pf, v) {
+		struct ice_vsi *vsi = pf->vsi[v];
+
+		if (!vsi || !vsi->port_info)
+			continue;
+
+		if (new_link_same_as_old &&
+		    (test_bit(__ICE_DOWN, vsi->state) ||
+		    new_link == netif_carrier_ok(vsi->netdev)))
+			continue;
+
+		if (vsi->port_info->lport == lport) {
+			ice_print_link_msg(vsi, new_link);
+			ice_vsi_link_event(vsi, new_link);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_handle_link_event - handle link event via ARQ
+ * @pf: pf that the link event is associated with
+ *
+ * Return -EINVAL if port_info is null
+ * Return status on succes
+ */
+static int ice_handle_link_event(struct ice_pf *pf)
+{
+	struct ice_port_info *port_info;
+	int status;
+
+	port_info = pf->hw.port_info;
+	if (!port_info)
+		return -EINVAL;
+
+	status = ice_link_event(pf, port_info);
+	if (status)
+		dev_dbg(&pf->pdev->dev,
+			"Could not process link event, error %d\n", status);
+
+	return status;
+}
+
+/**
+ * __ice_clean_ctrlq - helper function to clean controlq rings
+ * @pf: ptr to struct ice_pf
+ * @q_type: specific Control queue type
+ */
+static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
+{
+	struct ice_rq_event_info event;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_ctl_q_info *cq;
+	u16 pending, i = 0;
+	const char *qtype;
+	u32 oldval, val;
+
+	/* Do not clean control queue if/when PF reset fails */
+	if (test_bit(__ICE_RESET_FAILED, pf->state))
+		return 0;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		qtype = "Admin";
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n",
+			 q_type);
+		return 0;
+	}
+
+	/* check for error indications - PF_xx_AxQLEN register layout for
+	 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
+	 */
+	val = rd32(hw, cq->rq.len);
+	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+		   PF_FW_ARQLEN_ARQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ARQLEN_ARQVFE_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue VF Error detected\n", qtype);
+		if (val & PF_FW_ARQLEN_ARQOVFL_M) {
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ARQLEN_ARQCRIT_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+			 PF_FW_ARQLEN_ARQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->rq.len, val);
+	}
+
+	val = rd32(hw, cq->sq.len);
+	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+		   PF_FW_ATQLEN_ATQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ATQLEN_ATQVFE_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Send Queue VF Error detected\n", qtype);
+		if (val & PF_FW_ATQLEN_ATQOVFL_M) {
+			dev_dbg(&pf->pdev->dev,
+				"%s Send Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ATQLEN_ATQCRIT_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Send Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+			 PF_FW_ATQLEN_ATQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->sq.len, val);
+	}
+
+	event.buf_len = cq->rq_buf_size;
+	event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len,
+				     GFP_KERNEL);
+	if (!event.msg_buf)
+		return 0;
+
+	do {
+		enum ice_status ret;
+		u16 opcode;
+
+		ret = ice_clean_rq_elem(hw, cq, &event, &pending);
+		if (ret == ICE_ERR_AQ_NO_WORK)
+			break;
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"%s Receive Queue event error %d\n", qtype,
+				ret);
+			break;
+		}
+
+		opcode = le16_to_cpu(event.desc.opcode);
+
+		switch (opcode) {
+		case ice_aqc_opc_get_link_status:
+			if (ice_handle_link_event(pf))
+				dev_err(&pf->pdev->dev,
+					"Could not handle link event");
+			break;
+		default:
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue unknown event 0x%04x ignored\n",
+				qtype, opcode);
+			break;
+		}
+	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
+
+	devm_kfree(&pf->pdev->dev, event.msg_buf);
+
+	return pending && (i == ICE_DFLT_IRQ_WORK);
+}
+
+/**
+ * ice_clean_adminq_subtask - clean the AdminQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_adminq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
+		return;
+
+	clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+
+	/* re-enable Admin queue interrupt causes */
+	val = rd32(hw, PFINT_FW_CTL);
+	wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M));
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_service_task_schedule - schedule the service task to wake up
+ * @pf: board private structure
+ *
+ * If not already scheduled, this puts the task into the work queue.
+ */
+static void ice_service_task_schedule(struct ice_pf *pf)
+{
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state))
+		queue_work(ice_wq, &pf->serv_task);
+}
+
+/**
+ * ice_service_task_complete - finish up the service task
+ * @pf: board private structure
+ */
+static void ice_service_task_complete(struct ice_pf *pf)
+{
+	WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state));
+
+	/* force memory (pf->state) to sync before next service task */
+	smp_mb__before_atomic();
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_service_timer - timer callback to schedule service task
+ * @t: pointer to timer_list
+ */
+static void ice_service_timer(struct timer_list *t)
+{
+	struct ice_pf *pf = from_timer(pf, t, serv_tmr);
+
+	mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_service_task - manage and run subtasks
+ * @work: pointer to work_struct contained by the PF struct
+ */
+static void ice_service_task(struct work_struct *work)
+{
+	struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
+	unsigned long start_time = jiffies;
+
+	/* subtasks */
+
+	/* process reset requests first */
+	ice_reset_subtask(pf);
+
+	/* bail if a reset/recovery cycle is pending */
+	if (ice_is_reset_recovery_pending(pf->state) ||
+	    test_bit(__ICE_SUSPENDED, pf->state)) {
+		ice_service_task_complete(pf);
+		return;
+	}
+
+	ice_sync_fltr_subtask(pf);
+	ice_watchdog_subtask(pf);
+	ice_clean_adminq_subtask(pf);
+
+	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
+	ice_service_task_complete(pf);
+
+	/* If the tasks have taken longer than one service timer period
+	 * or there is more work to be done, reset the service timer to
+	 * schedule the service task now.
+	 */
+	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
+	    test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+		mod_timer(&pf->serv_tmr, jiffies);
+}
+
+/**
+ * ice_set_ctrlq_len - helper function to set controlq length
+ * @hw: pointer to the hw instance
+ */
+static void ice_set_ctrlq_len(struct ice_hw *hw)
+{
+	hw->adminq.num_rq_entries = ICE_AQ_LEN;
+	hw->adminq.num_sq_entries = ICE_AQ_LEN;
+	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
+	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
+}
+
+/**
+ * ice_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ */
+static void ice_irq_affinity_notify(struct irq_affinity_notify *notify,
+				    const cpumask_t *mask)
+{
+	struct ice_q_vector *q_vector =
+		container_of(notify, struct ice_q_vector, affinity_notify);
+
+	cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * ice_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ */
+static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
+
+/**
+ * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ */
+static void ice_vsi_dis_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int base = vsi->base_vector;
+	u32 val;
+	int i;
+
+	/* disable interrupt causation from each queue */
+	if (vsi->tx_rings) {
+		ice_for_each_txq(vsi, i) {
+			if (vsi->tx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->tx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_TQCTL(reg));
+				val &= ~QINT_TQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_TQCTL(reg), val);
+			}
+		}
+	}
+
+	if (vsi->rx_rings) {
+		ice_for_each_rxq(vsi, i) {
+			if (vsi->rx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->rx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_RQCTL(reg));
+				val &= ~QINT_RQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_RQCTL(reg), val);
+			}
+		}
+	}
+
+	/* disable each interrupt */
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		for (i = vsi->base_vector;
+		     i < (vsi->num_q_vectors + vsi->base_vector); i++)
+			wr32(hw, GLINT_DYN_CTL(i), 0);
+
+		ice_flush(hw);
+		for (i = 0; i < vsi->num_q_vectors; i++)
+			synchronize_irq(pf->msix_entries[i + base].vector);
+	}
+}
+
+/**
+ * ice_vsi_ena_irq - Enable IRQ for the given VSI
+ * @vsi: the VSI being configured
+ */
+static int ice_vsi_ena_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		int i;
+
+		for (i = 0; i < vsi->num_q_vectors; i++)
+			ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
+	}
+
+	ice_flush(hw);
+	return 0;
+}
+
+/**
+ * ice_vsi_delete - delete a VSI from the switch
+ * @vsi: pointer to VSI being removed
+ */
+static void ice_vsi_delete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_vsi_ctx ctxt;
+	enum ice_status status;
+
+	ctxt.vsi_num = vsi->vsi_num;
+
+	memcpy(&ctxt.info, &vsi->info, sizeof(struct ice_aqc_vsi_props));
+
+	status = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL);
+	if (status)
+		dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n",
+			vsi->vsi_num);
+}
+
+/**
+ * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ */
+static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
+{
+	int q_vectors = vsi->num_q_vectors;
+	struct ice_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+	int rx_int_idx = 0;
+	int tx_int_idx = 0;
+	int vector, err;
+	int irq_num;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[vector];
+
+		irq_num = pf->msix_entries[base + vector].vector;
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
+			tx_int_idx++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "rx", rx_int_idx++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "tx", tx_int_idx++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = devm_request_irq(&pf->pdev->dev,
+				       pf->msix_entries[base + vector].vector,
+				       vsi->irq_handler, 0, q_vector->name,
+				       q_vector);
+		if (err) {
+			netdev_err(vsi->netdev,
+				   "MSIX request_irq failed, error: %d\n", err);
+			goto free_q_irqs;
+		}
+
+		/* register for affinity change notifications */
+		q_vector->affinity_notify.notify = ice_irq_affinity_notify;
+		q_vector->affinity_notify.release = ice_irq_affinity_release;
+		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
+
+		/* assign the mask for this irq */
+		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+	}
+
+	vsi->irqs_ready = true;
+	return 0;
+
+free_q_irqs:
+	while (vector) {
+		vector--;
+		irq_num = pf->msix_entries[base + vector].vector,
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]);
+	}
+	return err;
+}
+
+/**
+ * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
+{
+	struct ice_hw_common_caps *cap;
+	struct ice_pf *pf = vsi->back;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		vsi->rss_size = 1;
+		return;
+	}
+
+	cap = &pf->hw.func_caps.common_cap;
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		vsi->rss_table_size = cap->rss_table_size;
+		vsi->rss_size = min_t(int, num_online_cpus(),
+				      BIT(cap->rss_table_entry_width));
+		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		break;
+	}
+}
+
+/**
+ * ice_vsi_setup_q_map - Setup a VSI queue map
+ * @vsi: the VSI being configured
+ * @ctxt: VSI context structure
+ */
+static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+	u16 offset = 0, qmap = 0, numq_tc;
+	u16 pow = 0, max_rss = 0, qcount;
+	u16 qcount_tx = vsi->alloc_txq;
+	u16 qcount_rx = vsi->alloc_rxq;
+	bool ena_tc0 = false;
+	int i;
+
+	/* at least TC0 should be enabled by default */
+	if (vsi->tc_cfg.numtc) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(0)))
+			ena_tc0 =  true;
+	} else {
+		ena_tc0 =  true;
+	}
+
+	if (ena_tc0) {
+		vsi->tc_cfg.numtc++;
+		vsi->tc_cfg.ena_tc |= 1;
+	}
+
+	numq_tc = qcount_rx / vsi->tc_cfg.numtc;
+
+	/* TC mapping is a function of the number of Rx queues assigned to the
+	 * VSI for each traffic class and the offset of these queues.
+	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
+	 * queues allocated to TC0. No:of queues is a power-of-2.
+	 *
+	 * If TC is not enabled, the queue offset is set to 0, and allocate one
+	 * queue, this way, traffic for the given TC will be sent to the default
+	 * queue.
+	 *
+	 * Setup number and offset of Rx queues for all TCs for the VSI
+	 */
+
+	/* qcount will change if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) {
+		if (vsi->type == ICE_VSI_PF)
+			max_rss = ICE_MAX_LG_RSS_QS;
+		else
+			max_rss = ICE_MAX_SMALL_RSS_QS;
+
+		qcount = min_t(int, numq_tc, max_rss);
+		qcount = min_t(int, qcount, vsi->rss_size);
+	} else {
+		qcount = numq_tc;
+	}
+
+	/* find higher power-of-2 of qcount */
+	pow = ilog2(qcount);
+
+	if (!is_power_of_2(qcount))
+		pow++;
+
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+			/* TC is not enabled */
+			vsi->tc_cfg.tc_info[i].qoffset = 0;
+			vsi->tc_cfg.tc_info[i].qcount = 1;
+			ctxt->info.tc_mapping[i] = 0;
+			continue;
+		}
+
+		/* TC is enabled */
+		vsi->tc_cfg.tc_info[i].qoffset = offset;
+		vsi->tc_cfg.tc_info[i].qcount = qcount;
+
+		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+			ICE_AQ_VSI_TC_Q_OFFSET_M) |
+			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+			 ICE_AQ_VSI_TC_Q_NUM_M);
+		offset += qcount;
+		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+	}
+
+	vsi->num_txq = qcount_tx;
+	vsi->num_rxq = offset;
+
+	/* Rx queue mapping */
+	ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+	/* q_mapping buffer holds the info for the first queue allocated for
+	 * this VSI in the PF space and also the number of queues associated
+	 * with this VSI.
+	 */
+	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+	ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+}
+
+/**
+ * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ *
+ * This initializes a default VSI context for all sections except the Queues.
+ */
+static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
+{
+	u32 table = 0;
+
+	memset(&ctxt->info, 0, sizeof(ctxt->info));
+	/* VSI's should be allocated from shared pool */
+	ctxt->alloc_from_pool = true;
+	/* Src pruning enabled by default */
+	ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
+	/* Traffic from VSI can be sent to LAN */
+	ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
+	/* Allow all packets untagged/tagged */
+	ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL &
+				       ICE_AQ_VSI_PVLAN_MODE_M) >>
+				      ICE_AQ_VSI_PVLAN_MODE_S);
+	/* Show VLAN/UP from packets in Rx descriptors */
+	ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH &
+					ICE_AQ_VSI_PVLAN_EMOD_M) >>
+				       ICE_AQ_VSI_PVLAN_EMOD_S);
+	/* Have 1:1 UP mapping for both ingress/egress tables */
+	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
+	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
+	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
+	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
+	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
+	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
+	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
+	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
+	ctxt->info.ingress_table = cpu_to_le32(table);
+	ctxt->info.egress_table = cpu_to_le32(table);
+	/* Have 1:1 UP mapping for outer to inner UP table */
+	ctxt->info.outer_up_table = cpu_to_le32(table);
+	/* No Outer tag support outer_tag_flags remains to zero */
+}
+
+/**
+ * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 lut_type, hash_type;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
+			 vsi->type);
+		return;
+	}
+
+	ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+				ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
+				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+}
+
+/**
+ * ice_vsi_add - Create a new VSI or fetch preallocated VSI
+ * @vsi: the VSI being configured
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command to create a new VSI.
+ */
+static int ice_vsi_add(struct ice_vsi *vsi)
+{
+	struct ice_vsi_ctx ctxt = { 0 };
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int ret = 0;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		ctxt.flags = ICE_AQ_VSI_TYPE_PF;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	ice_set_dflt_vsi_ctx(&ctxt);
+	/* if the switch is in VEB mode, allow VSI loopback */
+	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
+		ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+
+	/* Set LUT type and HASH type if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_set_rss_vsi_ctx(&ctxt, vsi);
+
+	ctxt.info.sw_id = vsi->port_info->sw_id;
+	ice_vsi_setup_q_map(vsi, &ctxt);
+
+	ret = ice_aq_add_vsi(hw, &ctxt, NULL);
+	if (ret) {
+		dev_err(&vsi->back->pdev->dev,
+			"Add VSI AQ call failed, err %d\n", ret);
+		return -EIO;
+	}
+	vsi->info = ctxt.info;
+	vsi->vsi_num = ctxt.vsi_num;
+
+	return ret;
+}
+
+/**
+ * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
+ * @vsi: the VSI being cleaned up
+ */
+static void ice_vsi_release_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u16 vector = vsi->base_vector;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0;
+	u32 rxq = 0;
+	int i, q;
+
+	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), 0);
+		wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), 0);
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
+			rxq++;
+		}
+	}
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
+ * @vsi: the VSI having rings deallocated
+ */
+static void ice_vsi_clear_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (vsi->tx_rings) {
+		for (i = 0; i < vsi->alloc_txq; i++) {
+			if (vsi->tx_rings[i]) {
+				kfree_rcu(vsi->tx_rings[i], rcu);
+				vsi->tx_rings[i] = NULL;
+			}
+		}
+	}
+	if (vsi->rx_rings) {
+		for (i = 0; i < vsi->alloc_rxq; i++) {
+			if (vsi->rx_rings[i]) {
+				kfree_rcu(vsi->rx_rings[i], rcu);
+				vsi->rx_rings[i] = NULL;
+			}
+		}
+	}
+}
+
+/**
+ * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
+ * @vsi: VSI which is having rings allocated
+ */
+static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	/* Allocate tx_rings */
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->txq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_desc;
+
+		vsi->tx_rings[i] = ring;
+	}
+
+	/* Allocate rx_rings */
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->rxq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_desc;
+		vsi->rx_rings[i] = ring;
+	}
+
+	return 0;
+
+err_out:
+	ice_vsi_clear_rings(vsi);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_free_irq - Free the irq association with the OS
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_free_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		int i;
+
+		if (!vsi->q_vectors || !vsi->irqs_ready)
+			return;
+
+		vsi->irqs_ready = false;
+		for (i = 0; i < vsi->num_q_vectors; i++) {
+			u16 vector = i + base;
+			int irq_num;
+
+			irq_num = pf->msix_entries[vector].vector;
+
+			/* free only the irqs that were actually requested */
+			if (!vsi->q_vectors[i] ||
+			    !(vsi->q_vectors[i]->num_ring_tx ||
+			      vsi->q_vectors[i]->num_ring_rx))
+				continue;
+
+			/* clear the affinity notifier in the IRQ descriptor */
+			irq_set_affinity_notifier(irq_num, NULL);
+
+			/* clear the affinity_mask in the IRQ descriptor */
+			irq_set_affinity_hint(irq_num, NULL);
+			synchronize_irq(irq_num);
+			devm_free_irq(&pf->pdev->dev, irq_num,
+				      vsi->q_vectors[i]);
+		}
+		ice_vsi_release_msix(vsi);
+	}
+}
+
+/**
+ * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_cfg_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u16 vector = vsi->base_vector;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0, rxq = 0;
+	int i, q, itr;
+	u8 itr_gran;
+
+	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		itr_gran = hw->itr_gran_200;
+
+		if (q_vector->num_ring_rx) {
+			q_vector->rx.itr =
+				ITR_TO_REG(vsi->rx_rings[rxq]->rx_itr_setting,
+					   itr_gran);
+			q_vector->rx.latency_range = ICE_LOW_LATENCY;
+		}
+
+		if (q_vector->num_ring_tx) {
+			q_vector->tx.itr =
+				ITR_TO_REG(vsi->tx_rings[txq]->tx_itr_setting,
+					   itr_gran);
+			q_vector->tx.latency_range = ICE_LOW_LATENCY;
+		}
+		wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), q_vector->rx.itr);
+		wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), q_vector->tx.itr);
+
+		/* Both Transmit Queue Interrupt Cause Control register
+		 * and Receive Queue Interrupt Cause control register
+		 * expects MSIX_INDX field to be the vector index
+		 * within the function space and not the absolute
+		 * vector index across PF or across device.
+		 * For SR-IOV VF VSIs queue vector index always starts
+		 * with 1 since first vector index(0) is used for OICR
+		 * in VF space. Since VMDq and other PF VSIs are withtin
+		 * the PF function space, use the vector index thats
+		 * tracked for this PF.
+		 */
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			u32 val;
+
+			itr = ICE_TX_ITR;
+			val = QINT_TQCTL_CAUSE_ENA_M |
+			      (itr << QINT_TQCTL_ITR_INDX_S)  |
+			      (vector << QINT_TQCTL_MSIX_INDX_S);
+			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			u32 val;
+
+			itr = ICE_RX_ITR;
+			val = QINT_RQCTL_CAUSE_ENA_M |
+			      (itr << QINT_RQCTL_ITR_INDX_S)  |
+			      (vector << QINT_RQCTL_MSIX_INDX_S);
+			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+			rxq++;
+		}
+	}
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_ena_misc_vector - enable the non-queue interrupts
+ * @pf: board private structure
+ */
+static void ice_ena_misc_vector(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	/* clear things first */
+	wr32(hw, PFINT_OICR_ENA, 0);	/* disable all */
+	rd32(hw, PFINT_OICR);		/* read to clear */
+
+	val = (PFINT_OICR_HLP_RDY_M |
+	       PFINT_OICR_CPM_RDY_M |
+	       PFINT_OICR_ECC_ERR_M |
+	       PFINT_OICR_MAL_DETECT_M |
+	       PFINT_OICR_GRST_M |
+	       PFINT_OICR_PCI_EXCEPTION_M |
+	       PFINT_OICR_GPIO_M |
+	       PFINT_OICR_STORM_DETECT_M |
+	       PFINT_OICR_HMC_ERR_M);
+
+	wr32(hw, PFINT_OICR_ENA, val);
+
+	/* SW_ITR_IDX = 0, but don't change INTENA */
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
+}
+
+/**
+ * ice_misc_intr - misc interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
+{
+	struct ice_pf *pf = (struct ice_pf *)data;
+	struct ice_hw *hw = &pf->hw;
+	irqreturn_t ret = IRQ_NONE;
+	u32 oicr, ena_mask;
+
+	set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+
+	oicr = rd32(hw, PFINT_OICR);
+	ena_mask = rd32(hw, PFINT_OICR_ENA);
+
+	if (!(oicr & PFINT_OICR_INTEVENT_M))
+		goto ena_intr;
+
+	if (oicr & PFINT_OICR_GRST_M) {
+		u32 reset;
+		/* we have a reset warning */
+		ena_mask &= ~PFINT_OICR_GRST_M;
+		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
+			GLGEN_RSTAT_RESET_TYPE_S;
+
+		if (reset == ICE_RESET_CORER)
+			pf->corer_count++;
+		else if (reset == ICE_RESET_GLOBR)
+			pf->globr_count++;
+		else
+			pf->empr_count++;
+
+		/* If a reset cycle isn't already in progress, we set a bit in
+		 * pf->state so that the service task can start a reset/rebuild.
+		 * We also make note of which reset happened so that peer
+		 * devices/drivers can be informed.
+		 */
+		if (!test_bit(__ICE_RESET_RECOVERY_PENDING, pf->state)) {
+			if (reset == ICE_RESET_CORER)
+				set_bit(__ICE_CORER_RECV, pf->state);
+			else if (reset == ICE_RESET_GLOBR)
+				set_bit(__ICE_GLOBR_RECV, pf->state);
+			else
+				set_bit(__ICE_EMPR_RECV, pf->state);
+
+			set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		}
+	}
+
+	if (oicr & PFINT_OICR_HMC_ERR_M) {
+		ena_mask &= ~PFINT_OICR_HMC_ERR_M;
+		dev_dbg(&pf->pdev->dev,
+			"HMC Error interrupt - info 0x%x, data 0x%x\n",
+			rd32(hw, PFHMC_ERRORINFO),
+			rd32(hw, PFHMC_ERRORDATA));
+	}
+
+	/* Report and mask off any remaining unexpected interrupts */
+	oicr &= ena_mask;
+	if (oicr) {
+		dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n",
+			oicr);
+		/* If a critical error is pending there is no choice but to
+		 * reset the device.
+		 */
+		if (oicr & (PFINT_OICR_PE_CRITERR_M |
+			    PFINT_OICR_PCI_EXCEPTION_M |
+			    PFINT_OICR_ECC_ERR_M)) {
+			set_bit(__ICE_PFR_REQ, pf->state);
+			ice_service_task_schedule(pf);
+		}
+		ena_mask &= ~oicr;
+	}
+	ret = IRQ_HANDLED;
+
+ena_intr:
+	/* re-enable interrupt causes that are not handled during this pass */
+	wr32(hw, PFINT_OICR_ENA, ena_mask);
+	if (!test_bit(__ICE_DOWN, pf->state)) {
+		ice_service_task_schedule(pf);
+		ice_irq_dynamic_ena(hw, NULL, NULL);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+	int q_vectors = vsi->num_q_vectors;
+	int tx_rings_rem, rx_rings_rem;
+	int v_id;
+
+	/* initially assigning remaining rings count to VSIs num queue value */
+	tx_rings_rem = vsi->num_txq;
+	rx_rings_rem = vsi->num_rxq;
+
+	for (v_id = 0; v_id < q_vectors; v_id++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+		/* Tx rings mapping to vector */
+		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_tx = tx_rings_per_v;
+		q_vector->tx.ring = NULL;
+		q_base = vsi->num_txq - tx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+			tx_ring->q_vector = q_vector;
+			tx_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = tx_ring;
+		}
+		tx_rings_rem -= tx_rings_per_v;
+
+		/* Rx rings mapping to vector */
+		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_rx = rx_rings_per_v;
+		q_vector->rx.ring = NULL;
+		q_base = vsi->num_rxq - rx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+			rx_ring->q_vector = q_vector;
+			rx_ring->next = q_vector->rx.ring;
+			q_vector->rx.ring = rx_ring;
+		}
+		rx_rings_rem -= rx_rings_per_v;
+	}
+}
+
+/**
+ * ice_vsi_set_num_qs - Set num queues, descriptors and vectors for a VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ */
+static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		vsi->alloc_txq = pf->num_lan_tx;
+		vsi->alloc_rxq = pf->num_lan_rx;
+		vsi->num_desc = ALIGN(ICE_DFLT_NUM_DESC, ICE_REQ_DESC_MULTIPLE);
+		vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx);
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
+			 vsi->type);
+		break;
+	}
+}
+
+/**
+ * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
+ * @vsi: VSI pointer
+ * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ */
+static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors)
+{
+	struct ice_pf *pf = vsi->back;
+
+	/* allocate memory for both Tx and Rx ring pointers */
+	vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+				     sizeof(struct ice_ring *), GFP_KERNEL);
+	if (!vsi->tx_rings)
+		goto err_txrings;
+
+	vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+				     sizeof(struct ice_ring *), GFP_KERNEL);
+	if (!vsi->rx_rings)
+		goto err_rxrings;
+
+	if (alloc_qvectors) {
+		/* allocate memory for q_vector pointers */
+		vsi->q_vectors = devm_kcalloc(&pf->pdev->dev,
+					      vsi->num_q_vectors,
+					      sizeof(struct ice_q_vector *),
+					      GFP_KERNEL);
+		if (!vsi->q_vectors)
+			goto err_vectors;
+	}
+
+	return 0;
+
+err_vectors:
+	devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+err_rxrings:
+	devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+err_txrings:
+	return -ENOMEM;
+}
+
+/**
+ * ice_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
+		return IRQ_HANDLED;
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ice_vsi_alloc - Allocates the next available struct vsi in the PF
+ * @pf: board private structure
+ * @type: type of VSI
+ *
+ * returns a pointer to a VSI on success, NULL on failure.
+ */
+static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type)
+{
+	struct ice_vsi *vsi = NULL;
+
+	/* Need to protect the allocation of the VSIs at the PF level */
+	mutex_lock(&pf->sw_mutex);
+
+	/* If we have already allocated our maximum number of VSIs,
+	 * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
+	 * is available to be populated
+	 */
+	if (pf->next_vsi == ICE_NO_VSI) {
+		dev_dbg(&pf->pdev->dev, "out of VSI slots!\n");
+		goto unlock_pf;
+	}
+
+	vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL);
+	if (!vsi)
+		goto unlock_pf;
+
+	vsi->type = type;
+	vsi->back = pf;
+	set_bit(__ICE_DOWN, vsi->state);
+	vsi->idx = pf->next_vsi;
+	vsi->work_lmt = ICE_DFLT_IRQ_WORK;
+
+	ice_vsi_set_num_qs(vsi);
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		if (ice_vsi_alloc_arrays(vsi, true))
+			goto err_rings;
+
+		/* Setup default MSIX irq handler for VSI */
+		vsi->irq_handler = ice_msix_clean_rings;
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		goto unlock_pf;
+	}
+
+	/* fill VSI slot in the PF struct */
+	pf->vsi[pf->next_vsi] = vsi;
+
+	/* prepare pf->next_vsi for next use */
+	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
+					 pf->next_vsi);
+	goto unlock_pf;
+
+err_rings:
+	devm_kfree(&pf->pdev->dev, vsi);
+	vsi = NULL;
+unlock_pf:
+	mutex_unlock(&pf->sw_mutex);
+	return vsi;
+}
+
+/**
+ * ice_free_irq_msix_misc - Unroll misc vector setup
+ * @pf: board private structure
+ */
+static void ice_free_irq_msix_misc(struct ice_pf *pf)
+{
+	/* disable OICR interrupt */
+	wr32(&pf->hw, PFINT_OICR_ENA, 0);
+	ice_flush(&pf->hw);
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
+		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
+		devm_free_irq(&pf->pdev->dev,
+			      pf->msix_entries[pf->oicr_idx].vector, pf);
+	}
+
+	ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
+}
+
+/**
+ * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0, which is used to manage the
+ * non-queue interrupts, e.g. AdminQ and errors.  This is not used
+ * when in MSI or Legacy interrupt mode.
+ */
+static int ice_req_irq_msix_misc(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	int oicr_idx, err = 0;
+	u8 itr_gran;
+	u32 val;
+
+	if (!pf->int_name[0])
+		snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
+			 dev_driver_string(&pf->pdev->dev),
+			 dev_name(&pf->pdev->dev));
+
+	/* Do not request IRQ but do enable OICR interrupt since settings are
+	 * lost during reset. Note that this function is called only during
+	 * rebuild path and not while reset is in progress.
+	 */
+	if (ice_is_reset_recovery_pending(pf->state))
+		goto skip_req_irq;
+
+	/* reserve one vector in irq_tracker for misc interrupts */
+	oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+	if (oicr_idx < 0)
+		return oicr_idx;
+
+	pf->oicr_idx = oicr_idx;
+
+	err = devm_request_irq(&pf->pdev->dev,
+			       pf->msix_entries[pf->oicr_idx].vector,
+			       ice_misc_intr, 0, pf->int_name, pf);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"devm_request_irq for %s failed: %d\n",
+			pf->int_name, err);
+		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+		return err;
+	}
+
+skip_req_irq:
+	ice_ena_misc_vector(pf);
+
+	val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
+	      (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) |
+	      PFINT_OICR_CTL_CAUSE_ENA_M;
+	wr32(hw, PFINT_OICR_CTL, val);
+
+	/* This enables Admin queue Interrupt causes */
+	val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) |
+	      (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) |
+	      PFINT_FW_CTL_CAUSE_ENA_M;
+	wr32(hw, PFINT_FW_CTL, val);
+
+	itr_gran = hw->itr_gran_200;
+
+	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
+	     ITR_TO_REG(ICE_ITR_8K, itr_gran));
+
+	ice_flush(hw);
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @vsi: the VSI getting queues
+ *
+ * Return 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs_contig(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int offset, ret = 0;
+
+	mutex_lock(&pf->avail_q_mutex);
+	/* look for contiguous block of queues for tx */
+	offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS,
+					    0, vsi->alloc_txq, 0);
+	if (offset < ICE_MAX_TXQS) {
+		int i;
+
+		bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq);
+		for (i = 0; i < vsi->alloc_txq; i++)
+			vsi->txq_map[i] = i + offset;
+	} else {
+		ret = -ENOMEM;
+		vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER;
+	}
+
+	/* look for contiguous block of queues for rx */
+	offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS,
+					    0, vsi->alloc_rxq, 0);
+	if (offset < ICE_MAX_RXQS) {
+		int i;
+
+		bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq);
+		for (i = 0; i < vsi->alloc_rxq; i++)
+			vsi->rxq_map[i] = i + offset;
+	} else {
+		ret = -ENOMEM;
+		vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI
+ * @vsi: the VSI getting queues
+ *
+ * Return 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i, index = 0;
+
+	mutex_lock(&pf->avail_q_mutex);
+
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) {
+		for (i = 0; i < vsi->alloc_txq; i++) {
+			index = find_next_zero_bit(pf->avail_txqs,
+						   ICE_MAX_TXQS, index);
+			if (index < ICE_MAX_TXQS) {
+				set_bit(index, pf->avail_txqs);
+				vsi->txq_map[i] = index;
+			} else {
+				goto err_scatter_tx;
+			}
+		}
+	}
+
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) {
+		for (i = 0; i < vsi->alloc_rxq; i++) {
+			index = find_next_zero_bit(pf->avail_rxqs,
+						   ICE_MAX_RXQS, index);
+			if (index < ICE_MAX_RXQS) {
+				set_bit(index, pf->avail_rxqs);
+				vsi->rxq_map[i] = index;
+			} else {
+				goto err_scatter_rx;
+			}
+		}
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+	return 0;
+
+err_scatter_rx:
+	/* unflag any queues we have grabbed (i is failed position) */
+	for (index = 0; index < i; index++) {
+		clear_bit(vsi->rxq_map[index], pf->avail_rxqs);
+		vsi->rxq_map[index] = 0;
+	}
+	i = vsi->alloc_txq;
+err_scatter_tx:
+	/* i is either position of failed attempt or vsi->alloc_txq */
+	for (index = 0; index < i; index++) {
+		clear_bit(vsi->txq_map[index], pf->avail_txqs);
+		vsi->txq_map[index] = 0;
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_get_qs - Assign queues from PF to VSI
+ * @vsi: the VSI to assign queues to
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs(struct ice_vsi *vsi)
+{
+	int ret = 0;
+
+	vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG;
+	vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG;
+
+	/* NOTE: ice_vsi_get_qs_contig() will set the rx/tx mapping
+	 * modes individually to scatter if assigning contiguous queues
+	 * to rx or tx fails
+	 */
+	ret = ice_vsi_get_qs_contig(vsi);
+	if (ret < 0) {
+		if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER)
+			vsi->alloc_txq = max_t(u16, vsi->alloc_txq,
+					       ICE_MAX_SCATTER_TXQS);
+		if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER)
+			vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq,
+					       ICE_MAX_SCATTER_RXQS);
+		ret = ice_vsi_get_qs_scatter(vsi);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_put_qs - Release queues from VSI to PF
+ * @vsi: the VSI thats going to release queues
+ */
+static void ice_vsi_put_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	mutex_lock(&pf->avail_q_mutex);
+
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		clear_bit(vsi->txq_map[i], pf->avail_txqs);
+		vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
+		vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_ring *ring;
+
+	if (!vsi->q_vectors[v_idx]) {
+		dev_dbg(&vsi->back->pdev->dev, "Queue vector at index %d not found\n",
+			v_idx);
+		return;
+	}
+	q_vector = vsi->q_vectors[v_idx];
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+	ice_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
+
+	/* only VSI with an associated netdev is set up with NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	devm_kfree(&vsi->back->pdev->dev, q_vector);
+	vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+static void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
+		ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_cfg_netdev - Setup the netdev flags
+ * @vsi: the VSI being configured
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_cfg_netdev(struct ice_vsi *vsi)
+{
+	netdev_features_t csumo_features;
+	netdev_features_t vlano_features;
+	netdev_features_t dflt_features;
+	netdev_features_t tso_features;
+	struct ice_netdev_priv *np;
+	struct net_device *netdev;
+	u8 mac_addr[ETH_ALEN];
+
+	netdev = alloc_etherdev_mqs(sizeof(struct ice_netdev_priv),
+				    vsi->alloc_txq, vsi->alloc_rxq);
+	if (!netdev)
+		return -ENOMEM;
+
+	vsi->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vsi = vsi;
+
+	dflt_features = NETIF_F_SG	|
+			NETIF_F_HIGHDMA	|
+			NETIF_F_RXHASH;
+
+	csumo_features = NETIF_F_RXCSUM	  |
+			 NETIF_F_IP_CSUM  |
+			 NETIF_F_IPV6_CSUM;
+
+	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
+			 NETIF_F_HW_VLAN_CTAG_TX     |
+			 NETIF_F_HW_VLAN_CTAG_RX;
+
+	tso_features = NETIF_F_TSO;
+
+	/* set features that user can change */
+	netdev->hw_features = dflt_features | csumo_features |
+			      vlano_features | tso_features;
+
+	/* enable features */
+	netdev->features |= netdev->hw_features;
+	/* encap and VLAN devices inherit default, csumo and tso features */
+	netdev->hw_enc_features |= dflt_features | csumo_features |
+				   tso_features;
+	netdev->vlan_features |= dflt_features | csumo_features |
+				 tso_features;
+
+	if (vsi->type == ICE_VSI_PF) {
+		SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev);
+		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+
+		ether_addr_copy(netdev->dev_addr, mac_addr);
+		ether_addr_copy(netdev->perm_addr, mac_addr);
+	}
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* assign netdev_ops */
+	netdev->netdev_ops = &ice_netdev_ops;
+
+	/* setup watchdog timeout value to be 5 second */
+	netdev->watchdog_timeo = 5 * HZ;
+
+	ice_set_ethtool_ops(netdev);
+
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = ICE_MAX_MTU;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_free_arrays - clean up vsi resources
+ * @vsi: pointer to VSI being cleared
+ * @free_qvectors: bool to specify if q_vectors should be deallocated
+ */
+static void ice_vsi_free_arrays(struct ice_vsi *vsi, bool free_qvectors)
+{
+	struct ice_pf *pf = vsi->back;
+
+	/* free the ring and vector containers */
+	if (free_qvectors && vsi->q_vectors) {
+		devm_kfree(&pf->pdev->dev, vsi->q_vectors);
+		vsi->q_vectors = NULL;
+	}
+	if (vsi->tx_rings) {
+		devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+		vsi->tx_rings = NULL;
+	}
+	if (vsi->rx_rings) {
+		devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+		vsi->rx_rings = NULL;
+	}
+}
+
+/**
+ * ice_vsi_clear - clean up and deallocate the provided vsi
+ * @vsi: pointer to VSI being cleared
+ *
+ * This deallocates the vsi's queue resources, removes it from the PF's
+ * VSI array if necessary, and deallocates the VSI
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_vsi_clear(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = NULL;
+
+	if (!vsi)
+		return 0;
+
+	if (!vsi->back)
+		return -EINVAL;
+
+	pf = vsi->back;
+
+	if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
+		dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n",
+			vsi->idx);
+		return -EINVAL;
+	}
+
+	mutex_lock(&pf->sw_mutex);
+	/* updates the PF for this cleared vsi */
+
+	pf->vsi[vsi->idx] = NULL;
+	if (vsi->idx < pf->next_vsi)
+		pf->next_vsi = vsi->idx;
+
+	ice_vsi_free_arrays(vsi, true);
+	mutex_unlock(&pf->sw_mutex);
+	devm_kfree(&pf->pdev->dev, vsi);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the vsi struct
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	/* only set affinity_mask if the CPU is online */
+	if (cpu_online(v_idx))
+		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+			       NAPI_POLL_WEIGHT);
+	/* tie q_vector and vsi together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int v_idx = 0, num_q_vectors;
+	int err;
+
+	if (vsi->q_vectors[0]) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+			vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		num_q_vectors = vsi->num_q_vectors;
+	} else {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+		err = ice_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		ice_free_q_vector(vsi, v_idx);
+
+	dev_err(&pf->pdev->dev,
+		"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+		vsi->num_q_vectors, vsi->vsi_num, err);
+	vsi->num_q_vectors = 0;
+	return err;
+}
+
+/**
+ * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
+ * @vsi: ptr to the VSI
+ *
+ * This should only be called after ice_vsi_alloc() which allocates the
+ * corresponding SW VSI structure and initializes num_queue_pairs for the
+ * newly allocated VSI.
+ *
+ * Returns 0 on success or negative on failure
+ */
+static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int num_q_vectors = 0;
+
+	if (vsi->base_vector) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+			vsi->vsi_num, vsi->base_vector);
+		return -EEXIST;
+	}
+
+	if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		return -ENOENT;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		num_q_vectors = vsi->num_q_vectors;
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
+			 vsi->type);
+		break;
+	}
+
+	if (num_q_vectors)
+		vsi->base_vector = ice_get_res(pf, pf->irq_tracker,
+					       num_q_vectors, vsi->idx);
+
+	if (vsi->base_vector < 0) {
+		dev_err(&pf->pdev->dev,
+			"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
+			num_q_vectors, vsi->vsi_num, vsi->base_vector);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fill_rss_lut - Fill the RSS lookup table with default values
+ * @lut: Lookup table
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ */
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
+{
+	u16 i;
+
+	for (i = 0; i < rss_table_size; i++)
+		lut[i] = i % rss_size;
+}
+
+/**
+ * ice_vsi_cfg_rss - Configure RSS params for a VSI
+ * @vsi: VSI to be configured
+ */
+static int ice_vsi_cfg_rss(struct ice_vsi *vsi)
+{
+	u8 seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
+	struct ice_aqc_get_set_rss_keys *key;
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	int err = 0;
+	u8 *lut;
+
+	vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq);
+
+	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+
+	status = ice_aq_set_rss_lut(&pf->hw, vsi->vsi_num, vsi->rss_lut_type,
+				    lut, vsi->rss_table_size);
+
+	if (status) {
+		dev_err(&vsi->back->pdev->dev,
+			"set_rss_lut failed, error %d\n", status);
+		err = -EIO;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	key = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*key), GFP_KERNEL);
+	if (!key) {
+		err = -ENOMEM;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user,
+		       ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed,
+				    ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
+	memcpy(&key->standard_rss_key, seed,
+	       ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
+
+	status = ice_aq_set_rss_key(&pf->hw, vsi->vsi_num, key);
+
+	if (status) {
+		dev_err(&vsi->back->pdev->dev, "set_rss_key failed, error %d\n",
+			status);
+		err = -EIO;
+	}
+
+	devm_kfree(&pf->pdev->dev, key);
+ice_vsi_cfg_rss_exit:
+	devm_kfree(&pf->pdev->dev, lut);
+	return err;
+}
+
+/**
+ * ice_vsi_reinit_setup - return resource and reallocate resource for a VSI
+ * @vsi: pointer to the ice_vsi
+ *
+ * This reallocates the VSIs queue resources
+ *
+ * Returns 0 on success and negative value on failure
+ */
+static int ice_vsi_reinit_setup(struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	int ret, i;
+
+	if (!vsi)
+		return -EINVAL;
+
+	ice_vsi_free_q_vectors(vsi);
+	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
+	vsi->base_vector = 0;
+	ice_vsi_clear_rings(vsi);
+	ice_vsi_free_arrays(vsi, false);
+	ice_vsi_set_num_qs(vsi);
+
+	/* Initialize VSI struct elements and create VSI in FW */
+	ret = ice_vsi_add(vsi);
+	if (ret < 0)
+		goto err_vsi;
+
+	ret = ice_vsi_alloc_arrays(vsi, false);
+	if (ret < 0)
+		goto err_vsi;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		if (!vsi->netdev) {
+			ret = ice_cfg_netdev(vsi);
+			if (ret)
+				goto err_rings;
+
+			ret = register_netdev(vsi->netdev);
+			if (ret)
+				goto err_rings;
+
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		break;
+	default:
+		break;
+	}
+
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num,
+			      vsi->tc_cfg.ena_tc, max_txqs);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed VSI lan queue config\n");
+		goto err_vectors;
+	}
+	return 0;
+
+err_vectors:
+	ice_vsi_free_q_vectors(vsi);
+err_rings:
+	if (vsi->netdev) {
+		vsi->current_netdev_flags = 0;
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+err_vsi:
+	ice_vsi_clear(vsi);
+	set_bit(__ICE_RESET_FAILED, vsi->back->state);
+	return ret;
+}
+
+/**
+ * ice_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @type: VSI type
+ * @pi: pointer to the port_info instance
+ *
+ * This allocates the sw VSI structure and its queue resources.
+ *
+ * Returns pointer to the successfully allocated and configure VSI sw struct on
+ * success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type,
+	      struct ice_port_info *pi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct device *dev = &pf->pdev->dev;
+	struct ice_vsi_ctx ctxt = { 0 };
+	struct ice_vsi *vsi;
+	int ret, i;
+
+	vsi = ice_vsi_alloc(pf, type);
+	if (!vsi) {
+		dev_err(dev, "could not allocate VSI\n");
+		return NULL;
+	}
+
+	vsi->port_info = pi;
+	vsi->vsw = pf->first_sw;
+
+	if (ice_vsi_get_qs(vsi)) {
+		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
+			vsi->idx);
+		goto err_get_qs;
+	}
+
+	/* set RSS capabilities */
+	ice_vsi_set_rss_params(vsi);
+
+	/* create the VSI */
+	ret = ice_vsi_add(vsi);
+	if (ret)
+		goto err_vsi;
+
+	ctxt.vsi_num = vsi->vsi_num;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		ret = ice_cfg_netdev(vsi);
+		if (ret)
+			goto err_cfg_netdev;
+
+		ret = register_netdev(vsi->netdev);
+		if (ret)
+			goto err_register_netdev;
+
+		netif_carrier_off(vsi->netdev);
+
+		/* make sure transmit queues start off as stopped */
+		netif_tx_stop_all_queues(vsi->netdev);
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_msix;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_rings;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			ice_vsi_cfg_rss(vsi);
+		break;
+	default:
+		/* if vsi type is not recognized, clean up the resources and
+		 * exit
+		 */
+		goto err_rings;
+	}
+
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num,
+			      vsi->tc_cfg.ena_tc, max_txqs);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "Failed VSI lan queue config\n");
+		goto err_rings;
+	}
+
+	return vsi;
+
+err_rings:
+	ice_vsi_free_q_vectors(vsi);
+err_msix:
+	if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(vsi->netdev);
+err_register_netdev:
+	if (vsi->netdev) {
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+err_cfg_netdev:
+	ret = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL);
+	if (ret)
+		dev_err(&vsi->back->pdev->dev,
+			"Free VSI AQ call failed, err %d\n", ret);
+err_vsi:
+	ice_vsi_put_qs(vsi);
+err_get_qs:
+	pf->q_left_tx += vsi->alloc_txq;
+	pf->q_left_rx += vsi->alloc_rxq;
+	ice_vsi_clear(vsi);
+
+	return NULL;
+}
+
+/**
+ * ice_vsi_add_vlan - Add vsi membership for given vlan
+ * @vsi: the vsi being configured
+ * @vid: vlan id to be added
+ */
+static int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
+{
+	struct ice_fltr_list_entry *tmp;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+	int err = 0;
+
+	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.flag = ICE_FLTR_TX;
+	tmp->fltr_info.src = vsi->vsi_num;
+	tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
+	tmp->fltr_info.l_data.vlan.vlan_id = vid;
+
+	INIT_LIST_HEAD(&tmp->list_entry);
+	list_add(&tmp->list_entry, &tmp_add_list);
+
+	status = ice_add_vlan(&pf->hw, &tmp_add_list);
+	if (status) {
+		err = -ENODEV;
+		dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n",
+			vid, vsi->vsi_num);
+	}
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	return err;
+}
+
+/**
+ * ice_vlan_rx_add_vid - Add a vlan id filter to HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol
+ * @vid: vlan id to be added
+ *
+ * net_device_ops implementation for adding vlan ids
+ */
+static int ice_vlan_rx_add_vid(struct net_device *netdev,
+			       __always_unused __be16 proto, u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = 0;
+
+	if (vid >= VLAN_N_VID) {
+		netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
+			   vid, VLAN_N_VID);
+		return -EINVAL;
+	}
+
+	if (vsi->info.pvid)
+		return -EINVAL;
+
+	/* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is
+	 * needed to continue allowing all untagged packets since VLAN prune
+	 * list is applied to all packets by the switch
+	 */
+	ret = ice_vsi_add_vlan(vsi, vid);
+
+	if (!ret)
+		set_bit(vid, vsi->active_vlans);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN id to be removed
+ */
+static void ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
+{
+	struct ice_fltr_list_entry *list;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+
+	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return;
+
+	list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
+	list->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
+	list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	list->fltr_info.l_data.vlan.vlan_id = vid;
+	list->fltr_info.flag = ICE_FLTR_TX;
+	list->fltr_info.src = vsi->vsi_num;
+
+	INIT_LIST_HEAD(&list->list_entry);
+	list_add(&list->list_entry, &tmp_add_list);
+
+	if (ice_remove_vlan(&pf->hw, &tmp_add_list))
+		dev_err(&pf->pdev->dev, "Error removing VLAN %d on vsi %i\n",
+			vid, vsi->vsi_num);
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+}
+
+/**
+ * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol
+ * @vid: vlan id to be removed
+ *
+ * net_device_ops implementation for removing vlan ids
+ */
+static int ice_vlan_rx_kill_vid(struct net_device *netdev,
+				__always_unused __be16 proto, u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (vsi->info.pvid)
+		return -EINVAL;
+
+	/* return code is ignored as there is nothing a user
+	 * can do about failure to remove and a log message was
+	 * already printed from the other function
+	 */
+	ice_vsi_kill_vlan(vsi, vid);
+
+	clear_bit(vid, vsi->active_vlans);
+
+	return 0;
+}
+
+/**
+ * ice_setup_pf_sw - Setup the HW switch on startup or after reset
+ * @pf: board private structure
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_setup_pf_sw(struct ice_pf *pf)
+{
+	LIST_HEAD(tmp_add_list);
+	u8 broadcast[ETH_ALEN];
+	struct ice_vsi *vsi;
+	int status = 0;
+
+	if (!ice_is_reset_recovery_pending(pf->state)) {
+		vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info);
+		if (!vsi) {
+			status = -ENOMEM;
+			goto error_exit;
+		}
+	} else {
+		vsi = pf->vsi[0];
+		status = ice_vsi_reinit_setup(vsi);
+		if (status < 0)
+			return -EIO;
+	}
+
+	/* tmp_add_list contains a list of MAC addresses for which MAC
+	 * filters need to be programmed. Add the VSI's unicast MAC to
+	 * this list
+	 */
+	status = ice_add_mac_to_list(vsi, &tmp_add_list,
+				     vsi->port_info->mac.perm_addr);
+	if (status)
+		goto error_exit;
+
+	/* VSI needs to receive broadcast traffic, so add the broadcast
+	 * MAC address to the list.
+	 */
+	eth_broadcast_addr(broadcast);
+	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
+	if (status)
+		goto error_exit;
+
+	/* program MAC filters for entries in tmp_add_list */
+	status = ice_add_mac(&pf->hw, &tmp_add_list);
+	if (status) {
+		dev_err(&pf->pdev->dev, "Could not add MAC filters\n");
+		status = -ENOMEM;
+		goto error_exit;
+	}
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	return status;
+
+error_exit:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+
+	if (vsi) {
+		ice_vsi_free_q_vectors(vsi);
+		if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED)
+			unregister_netdev(vsi->netdev);
+		if (vsi->netdev) {
+			free_netdev(vsi->netdev);
+			vsi->netdev = NULL;
+		}
+
+		ice_vsi_delete(vsi);
+		ice_vsi_put_qs(vsi);
+		pf->q_left_tx += vsi->alloc_txq;
+		pf->q_left_rx += vsi->alloc_rxq;
+		ice_vsi_clear(vsi);
+	}
+	return status;
+}
+
+/**
+ * ice_determine_q_usage - Calculate queue distribution
+ * @pf: board private structure
+ *
+ * Return -ENOMEM if we don't get enough queues for all ports
+ */
+static void ice_determine_q_usage(struct ice_pf *pf)
+{
+	u16 q_left_tx, q_left_rx;
+
+	q_left_tx = pf->hw.func_caps.common_cap.num_txq;
+	q_left_rx = pf->hw.func_caps.common_cap.num_rxq;
+
+	pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus());
+
+	/* only 1 rx queue unless RSS is enabled */
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		pf->num_lan_rx = 1;
+	else
+		pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus());
+
+	pf->q_left_tx = q_left_tx - pf->num_lan_tx;
+	pf->q_left_rx = q_left_rx - pf->num_lan_rx;
+}
+
+/**
+ * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
+ * @pf: board private structure to initialize
+ */
+static void ice_deinit_pf(struct ice_pf *pf)
+{
+	if (pf->serv_tmr.function)
+		del_timer_sync(&pf->serv_tmr);
+	if (pf->serv_task.func)
+		cancel_work_sync(&pf->serv_task);
+	mutex_destroy(&pf->sw_mutex);
+	mutex_destroy(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_init_pf - Initialize general software structures (struct ice_pf)
+ * @pf: board private structure to initialize
+ */
+static void ice_init_pf(struct ice_pf *pf)
+{
+	bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS);
+	set_bit(ICE_FLAG_MSIX_ENA, pf->flags);
+
+	mutex_init(&pf->sw_mutex);
+	mutex_init(&pf->avail_q_mutex);
+
+	/* Clear avail_[t|r]x_qs bitmaps (set all to avail) */
+	mutex_lock(&pf->avail_q_mutex);
+	bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS);
+	bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS);
+	mutex_unlock(&pf->avail_q_mutex);
+
+	if (pf->hw.func_caps.common_cap.rss_table_size)
+		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
+
+	/* setup service timer and periodic service task */
+	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
+	pf->serv_tmr_period = HZ;
+	INIT_WORK(&pf->serv_task, ice_service_task);
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_ena_msix_range - Request a range of MSIX vectors from the OS
+ * @pf: board private structure
+ *
+ * compute the number of MSIX vectors required (v_budget) and request from
+ * the OS. Return the number of vectors reserved or negative on failure
+ */
+static int ice_ena_msix_range(struct ice_pf *pf)
+{
+	int v_left, v_actual, v_budget = 0;
+	int needed, err, i;
+
+	v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
+
+	/* reserve one vector for miscellaneous handler */
+	needed = 1;
+	v_budget += needed;
+	v_left -= needed;
+
+	/* reserve vectors for LAN traffic */
+	pf->num_lan_msix = min_t(int, num_online_cpus(), v_left);
+	v_budget += pf->num_lan_msix;
+
+	pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget,
+					sizeof(struct msix_entry), GFP_KERNEL);
+
+	if (!pf->msix_entries) {
+		err = -ENOMEM;
+		goto exit_err;
+	}
+
+	for (i = 0; i < v_budget; i++)
+		pf->msix_entries[i].entry = i;
+
+	/* actually reserve the vectors */
+	v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
+					 ICE_MIN_MSIX, v_budget);
+
+	if (v_actual < 0) {
+		dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n");
+		err = v_actual;
+		goto msix_err;
+	}
+
+	if (v_actual < v_budget) {
+		dev_warn(&pf->pdev->dev,
+			 "not enough vectors. requested = %d, obtained = %d\n",
+			 v_budget, v_actual);
+		if (v_actual >= (pf->num_lan_msix + 1)) {
+			pf->num_avail_msix = v_actual - (pf->num_lan_msix + 1);
+		} else if (v_actual >= 2) {
+			pf->num_lan_msix = 1;
+			pf->num_avail_msix = v_actual - 2;
+		} else {
+			pci_disable_msix(pf->pdev);
+			err = -ERANGE;
+			goto msix_err;
+		}
+	}
+
+	return v_actual;
+
+msix_err:
+	devm_kfree(&pf->pdev->dev, pf->msix_entries);
+	goto exit_err;
+
+exit_err:
+	pf->num_lan_msix = 0;
+	clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
+	return err;
+}
+
+/**
+ * ice_dis_msix - Disable MSI-X interrupt setup in OS
+ * @pf: board private structure
+ */
+static void ice_dis_msix(struct ice_pf *pf)
+{
+	pci_disable_msix(pf->pdev);
+	devm_kfree(&pf->pdev->dev, pf->msix_entries);
+	pf->msix_entries = NULL;
+	clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
+}
+
+/**
+ * ice_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ */
+static int ice_init_interrupt_scheme(struct ice_pf *pf)
+{
+	int vectors = 0;
+	ssize_t size;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		vectors = ice_ena_msix_range(pf);
+	else
+		return -ENODEV;
+
+	if (vectors < 0)
+		return vectors;
+
+	/* set up vector assignment tracking */
+	size = sizeof(struct ice_res_tracker) + (sizeof(u16) * vectors);
+
+	pf->irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+	if (!pf->irq_tracker) {
+		ice_dis_msix(pf);
+		return -ENOMEM;
+	}
+
+	pf->irq_tracker->num_entries = vectors;
+
+	return 0;
+}
+
+/**
+ * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
+ * @pf: board private structure
+ */
+static void ice_clear_interrupt_scheme(struct ice_pf *pf)
+{
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		ice_dis_msix(pf);
+
+	devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+	pf->irq_tracker = NULL;
+}
+
+/**
+ * ice_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ice_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_probe(struct pci_dev *pdev,
+		     const struct pci_device_id __always_unused *ent)
+{
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int err;
+
+	/* this driver uses devres, see Documentation/driver-model/devres.txt */
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev));
+	if (err) {
+		dev_err(&pdev->dev, "I/O map error %d\n", err);
+		return err;
+	}
+
+	pf = devm_kzalloc(&pdev->dev, sizeof(*pf), GFP_KERNEL);
+	if (!pf)
+		return -ENOMEM;
+
+	/* set up for high or low dma */
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err)
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err);
+		return err;
+	}
+
+	pci_enable_pcie_error_reporting(pdev);
+	pci_set_master(pdev);
+
+	pf->pdev = pdev;
+	pci_set_drvdata(pdev, pf);
+	set_bit(__ICE_DOWN, pf->state);
+
+	hw = &pf->hw;
+	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+	hw->back = pf;
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+	hw->bus.device = PCI_SLOT(pdev->devfn);
+	hw->bus.func = PCI_FUNC(pdev->devfn);
+	ice_set_ctrlq_len(hw);
+
+	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (debug < -1)
+		hw->debug_mask = debug;
+#endif
+
+	err = ice_init_hw(hw);
+	if (err) {
+		dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err);
+		err = -EIO;
+		goto err_exit_unroll;
+	}
+
+	dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n",
+		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
+		 hw->api_maj_ver, hw->api_min_ver);
+
+	ice_init_pf(pf);
+
+	ice_determine_q_usage(pf);
+
+	pf->num_alloc_vsi = min_t(u16, ICE_MAX_VSI_ALLOC,
+				  hw->func_caps.guaranteed_num_vsi);
+	if (!pf->num_alloc_vsi) {
+		err = -EIO;
+		goto err_init_pf_unroll;
+	}
+
+	pf->vsi = devm_kcalloc(&pdev->dev, pf->num_alloc_vsi,
+			       sizeof(struct ice_vsi *), GFP_KERNEL);
+	if (!pf->vsi) {
+		err = -ENOMEM;
+		goto err_init_pf_unroll;
+	}
+
+	err = ice_init_interrupt_scheme(pf);
+	if (err) {
+		dev_err(&pdev->dev,
+			"ice_init_interrupt_scheme failed: %d\n", err);
+		err = -EIO;
+		goto err_init_interrupt_unroll;
+	}
+
+	/* In case of MSIX we are going to setup the misc vector right here
+	 * to handle admin queue events etc. In case of legacy and MSI
+	 * the misc functionality and queue processing is combined in
+	 * the same vector and that gets setup at open.
+	 */
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		err = ice_req_irq_msix_misc(pf);
+		if (err) {
+			dev_err(&pdev->dev,
+				"setup of misc vector failed: %d\n", err);
+			goto err_init_interrupt_unroll;
+		}
+	}
+
+	/* create switch struct for the switch element created by FW on boot */
+	pf->first_sw = devm_kzalloc(&pdev->dev, sizeof(struct ice_sw),
+				    GFP_KERNEL);
+	if (!pf->first_sw) {
+		err = -ENOMEM;
+		goto err_msix_misc_unroll;
+	}
+
+	pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
+	pf->first_sw->pf = pf;
+
+	/* record the sw_id available for later use */
+	pf->first_sw->sw_id = hw->port_info->sw_id;
+
+	err = ice_setup_pf_sw(pf);
+	if (err) {
+		dev_err(&pdev->dev,
+			"probe failed due to setup pf switch:%d\n", err);
+		goto err_alloc_sw_unroll;
+	}
+
+	/* Driver is mostly up */
+	clear_bit(__ICE_DOWN, pf->state);
+
+	/* since everything is good, start the service timer */
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+	err = ice_init_link_events(pf->hw.port_info);
+	if (err) {
+		dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err);
+		goto err_alloc_sw_unroll;
+	}
+
+	return 0;
+
+err_alloc_sw_unroll:
+	set_bit(__ICE_DOWN, pf->state);
+	devm_kfree(&pf->pdev->dev, pf->first_sw);
+err_msix_misc_unroll:
+	ice_free_irq_msix_misc(pf);
+err_init_interrupt_unroll:
+	ice_clear_interrupt_scheme(pf);
+	devm_kfree(&pdev->dev, pf->vsi);
+err_init_pf_unroll:
+	ice_deinit_pf(pf);
+	ice_deinit_hw(hw);
+err_exit_unroll:
+	pci_disable_pcie_error_reporting(pdev);
+	return err;
+}
+
+/**
+ * ice_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void ice_remove(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	int i = 0;
+	int err;
+
+	if (!pf)
+		return;
+
+	set_bit(__ICE_DOWN, pf->state);
+
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (!pf->vsi[i])
+			continue;
+
+		err = ice_vsi_release(pf->vsi[i]);
+		if (err)
+			dev_dbg(&pf->pdev->dev, "Failed to release VSI index %d (err %d)\n",
+				i, err);
+	}
+
+	ice_free_irq_msix_misc(pf);
+	ice_clear_interrupt_scheme(pf);
+	ice_deinit_pf(pf);
+	ice_deinit_hw(&pf->hw);
+	pci_disable_pcie_error_reporting(pdev);
+}
+
+/* ice_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ice_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SGMII), 0 },
+	/* required last entry */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
+
+static struct pci_driver ice_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = ice_pci_tbl,
+	.probe = ice_probe,
+	.remove = ice_remove,
+};
+
+/**
+ * ice_module_init - Driver registration routine
+ *
+ * ice_module_init is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init ice_module_init(void)
+{
+	int status;
+
+	pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver);
+	pr_info("%s\n", ice_copyright);
+
+	ice_wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, KBUILD_MODNAME);
+	if (!ice_wq) {
+		pr_err("Failed to create workqueue\n");
+		return -ENOMEM;
+	}
+
+	status = pci_register_driver(&ice_driver);
+	if (status) {
+		pr_err("failed to register pci driver, err %d\n", status);
+		destroy_workqueue(ice_wq);
+	}
+
+	return status;
+}
+module_init(ice_module_init);
+
+/**
+ * ice_module_exit - Driver exit cleanup routine
+ *
+ * ice_module_exit is called just before the driver is removed
+ * from memory.
+ */
+static void __exit ice_module_exit(void)
+{
+	pci_unregister_driver(&ice_driver);
+	destroy_workqueue(ice_wq);
+	pr_info("module unloaded\n");
+}
+module_exit(ice_module_exit);
+
+/**
+ * ice_set_mac_address - NDO callback to set mac address
+ * @netdev: network interface device structure
+ * @pi: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_set_mac_address(struct net_device *netdev, void *pi)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct sockaddr *addr = pi;
+	enum ice_status status;
+	LIST_HEAD(a_mac_list);
+	LIST_HEAD(r_mac_list);
+	u8 flags = 0;
+	int err;
+	u8 *mac;
+
+	mac = (u8 *)addr->sa_data;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(netdev->dev_addr, mac)) {
+		netdev_warn(netdev, "already using mac %pM\n", mac);
+		return 0;
+	}
+
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    ice_is_reset_recovery_pending(pf->state)) {
+		netdev_err(netdev, "can't set mac %pM. device not ready\n",
+			   mac);
+		return -EBUSY;
+	}
+
+	/* When we change the mac address we also have to change the mac address
+	 * based filter rules that were created previously for the old mac
+	 * address. So first, we remove the old filter rule using ice_remove_mac
+	 * and then create a new filter rule using ice_add_mac. Note that for
+	 * both these operations, we first need to form a "list" of mac
+	 * addresses (even though in this case, we have only 1 mac address to be
+	 * added/removed) and this done using ice_add_mac_to_list. Depending on
+	 * the ensuing operation this "list" of mac addresses is either to be
+	 * added or removed from the filter.
+	 */
+	err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr);
+	if (err) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+	status = ice_remove_mac(hw, &r_mac_list);
+	if (status) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+	err = ice_add_mac_to_list(vsi, &a_mac_list, mac);
+	if (err) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+	status = ice_add_mac(hw, &a_mac_list);
+	if (status) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+free_lists:
+	/* free list entries */
+	ice_free_fltr_list(&pf->pdev->dev, &r_mac_list);
+	ice_free_fltr_list(&pf->pdev->dev, &a_mac_list);
+
+	if (err) {
+		netdev_err(netdev, "can't set mac %pM. filter update failed\n",
+			   mac);
+		return err;
+	}
+
+	/* change the netdev's mac address */
+	memcpy(netdev->dev_addr, mac, netdev->addr_len);
+	netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
+		   netdev->dev_addr);
+
+	/* write new mac address to the firmware */
+	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
+	status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
+	if (status) {
+		netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n",
+			   mac);
+	}
+	return 0;
+}
+
+/**
+ * ice_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ */
+static void ice_set_rx_mode(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (!vsi)
+		return;
+
+	/* Set the flags to synchronize filters
+	 * ndo_set_rx_mode may be triggered even without a change in netdev
+	 * flags
+	 */
+	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	ice_service_task_schedule(vsi->back);
+}
+
+/**
+ * ice_fdb_add - add an entry to the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN id
+ * @flags: instructions from stack about fdb operation
+ */
+static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
+		       struct net_device *dev, const unsigned char *addr,
+		       u16 vid, u16 flags)
+{
+	int err;
+
+	if (vid) {
+		netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
+		return -EINVAL;
+	}
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_add_excl(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_add_excl(dev, addr);
+	else
+		err = -EINVAL;
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+
+/**
+ * ice_fdb_del - delete an entry from the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN id
+ */
+static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
+		       struct net_device *dev, const unsigned char *addr,
+		       __always_unused u16 vid)
+{
+	int err;
+
+	if (ndm->ndm_state & NUD_PERMANENT) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr))
+		err = dev_uc_del(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_del(dev, addr);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+
+/**
+ * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
+ * @vsi: the vsi being changed
+ */
+static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx ctxt = { 0 };
+	enum ice_status status;
+
+	/* Here we are configuring the VSI to let the driver add VLAN tags by
+	 * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN
+	 * tag insertion happens in the Tx hot path, in ice_tx_map.
+	 */
+	ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL;
+
+	ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+	ctxt.vsi_num = vsi->vsi_num;
+
+	status = ice_aq_update_vsi(hw, &ctxt, NULL);
+	if (status) {
+		dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		return -EIO;
+	}
+
+	vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags;
+	return 0;
+}
+
+/**
+ * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
+ * @vsi: the vsi being changed
+ * @ena: boolean value indicating if this is a enable or disable request
+ */
+static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx ctxt = { 0 };
+	enum ice_status status;
+
+	/* Here we are configuring what the VSI should do with the VLAN tag in
+	 * the Rx packet. We can either leave the tag in the packet or put it in
+	 * the Rx descriptor.
+	 */
+	if (ena) {
+		/* Strip VLAN tag from Rx packet and put it in the desc */
+		ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH;
+	} else {
+		/* Disable stripping. Leave tag in packet */
+		ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING;
+	}
+
+	ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+	ctxt.vsi_num = vsi->vsi_num;
+
+	status = ice_aq_update_vsi(hw, &ctxt, NULL);
+	if (status) {
+		dev_err(dev, "update VSI for VALN strip failed, ena = %d err %d aq_err %d\n",
+			ena, status, hw->adminq.sq_last_status);
+		return -EIO;
+	}
+
+	vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags;
+	return 0;
+}
+
+/**
+ * ice_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ */
+static int ice_set_features(struct net_device *netdev,
+			    netdev_features_t features)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = 0;
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
+		ret = ice_vsi_manage_vlan_stripping(vsi, true);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
+		ret = ice_vsi_manage_vlan_stripping(vsi, false);
+	else if ((features & NETIF_F_HW_VLAN_CTAG_TX) &&
+		 !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_vlan_setup - Setup vlan offload properties on a VSI
+ * @vsi: VSI to setup vlan properties for
+ */
+static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
+{
+	int ret = 0;
+
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		ret = ice_vsi_manage_vlan_stripping(vsi, true);
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+
+	return ret;
+}
+
+/**
+ * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up
+ * @vsi: the VSI being brought back up
+ */
+static int ice_restore_vlan(struct ice_vsi *vsi)
+{
+	int err;
+	u16 vid;
+
+	if (!vsi->netdev)
+		return -EINVAL;
+
+	err = ice_vsi_vlan_setup(vsi);
+	if (err)
+		return err;
+
+	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) {
+		err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+
+	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+	tlan_ctx->port_num = vsi->port_info->lport;
+
+	/* Transmit Queue Length */
+	tlan_ctx->qlen = ring->count;
+
+	/* PF number */
+	tlan_ctx->pf_num = hw->pf_id;
+
+	/* queue belongs to a specific VSI type
+	 * VF / VM index should be programmed per vmvf_type setting:
+	 * for vmvf_type = VF, it is VF number between 0-256
+	 * for vmvf_type = VM, it is VM number between 0-767
+	 * for PF or EMP this field should be set to zero
+	 */
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+		break;
+	default:
+		return;
+	}
+
+	/* make sure the context is associated with the right VSI */
+	tlan_ctx->src_vsi = vsi->vsi_num;
+
+	tlan_ctx->tso_ena = ICE_TX_LEGACY;
+	tlan_ctx->tso_qnum = pf_q;
+
+	/* Legacy or Advanced Host Interface:
+	 * 0: Advanced Host Interface
+	 * 1: Legacy Host Interface
+	 */
+	tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+static int ice_vsi_cfg_txqs(struct ice_vsi *vsi)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	u16 buf_len, i, pf_q;
+	int err = 0, tc = 0;
+	u8 num_q_grps;
+
+	buf_len = sizeof(struct ice_aqc_add_tx_qgrp);
+	qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	if (vsi->num_txq > ICE_MAX_TXQ_PER_TXQG) {
+		err = -EINVAL;
+		goto err_cfg_txqs;
+	}
+	qg_buf->num_txqs = 1;
+	num_q_grps = 1;
+
+	/* set up and configure the tx queues */
+	ice_for_each_txq(vsi, i) {
+		struct ice_tlan_ctx tlan_ctx = { 0 };
+
+		pf_q = vsi->txq_map[i];
+		ice_setup_tx_ctx(vsi->tx_rings[i], &tlan_ctx, pf_q);
+		/* copy context contents into the qg_buf */
+		qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+		ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+			    ice_tlan_ctx_info);
+
+		/* init queue specific tail reg. It is referred as transmit
+		 * comm scheduler queue doorbell.
+		 */
+		vsi->tx_rings[i]->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+		status = ice_ena_vsi_txq(vsi->port_info, vsi->vsi_num, tc,
+					 num_q_grps, qg_buf, buf_len, NULL);
+		if (status) {
+			dev_err(&vsi->back->pdev->dev,
+				"Failed to set LAN Tx queue context, error: %d\n",
+				status);
+			err = -ENODEV;
+			goto err_cfg_txqs;
+		}
+
+		/* Add Tx Queue TEID into the VSI tx ring from the response
+		 * This will complete configuring and enabling the queue.
+		 */
+		txq = &qg_buf->txqs[0];
+		if (pf_q == le16_to_cpu(txq->txq_id))
+			vsi->tx_rings[i]->txq_teid =
+				le32_to_cpu(txq->q_teid);
+	}
+err_cfg_txqs:
+	devm_kfree(&pf->pdev->dev, qg_buf);
+	return err;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+static int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 rxdid = ICE_RXDID_FLEX_NIC;
+	struct ice_rlan_ctx rlan_ctx;
+	u32 regval;
+	u16 pf_q;
+	int err;
+
+	/* what is RX queue number in global space of 2K rx queues */
+	pf_q = vsi->rxq_map[ring->q_index];
+
+	/* clear the context structure first */
+	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+	rlan_ctx.base = ring->dma >> 7;
+
+	rlan_ctx.qlen = ring->count;
+
+	/* Receive Packet Data Buffer Size.
+	 * The Packet Data Buffer Size is defined in 128 byte units.
+	 */
+	rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+	/* use 32 byte descriptors */
+	rlan_ctx.dsize = 1;
+
+	/* Strip the Ethernet CRC bytes before the packet is posted to host
+	 * memory.
+	 */
+	rlan_ctx.crcstrip = 1;
+
+	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+	rlan_ctx.l2tsel = 1;
+
+	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+	/* This controls whether VLAN is stripped from inner headers
+	 * The VLAN in the inner L2 header is stripped to the receive
+	 * descriptor if enabled by this flag.
+	 */
+	rlan_ctx.showiv = 0;
+
+	/* Max packet size for this queue - must not be set to a larger value
+	 * than 5 x DBUF
+	 */
+	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+			       ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
+
+	/* Rx queue threshold in units of 64 */
+	rlan_ctx.lrxqthresh = 1;
+
+	 /* Enable Flexible Descriptors in the queue context which
+	  * allows this driver to select a specific receive descriptor format
+	  */
+	regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+	regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+		QRXFLXP_CNTXT_RXDID_IDX_M;
+
+	/* increasing context priority to pick up profile id;
+	 * default is 0x01; setting to 0x03 to ensure profile
+	 * is programming if prev context is of same priority
+	 */
+	regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+		QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+
+	/* Absolute queue number out of 2K needs to be passed */
+	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+	if (err) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+			pf_q, err);
+		return -EIO;
+	}
+
+	/* init queue specific tail register */
+	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+	ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_rxqs - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Rx VSI for operation.
+ */
+static int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
+{
+	int err = 0;
+	u16 i;
+
+	if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
+		vsi->max_frame = vsi->netdev->mtu +
+			ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	else
+		vsi->max_frame = ICE_RXBUF_2048;
+
+	vsi->rx_buf_len = ICE_RXBUF_2048;
+	/* set up individual rings */
+	for (i = 0; i < vsi->num_rxq && !err; i++)
+		err = ice_setup_rx_ctx(vsi->rx_rings[i]);
+
+	if (err) {
+		dev_err(&vsi->back->pdev->dev, "ice_setup_rx_ctx failed\n");
+		return -EIO;
+	}
+	return err;
+}
+
+/**
+ * ice_vsi_cfg - Setup the VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_vsi_cfg(struct ice_vsi *vsi)
+{
+	int err;
+
+	ice_set_rx_mode(vsi->netdev);
+
+	err = ice_restore_vlan(vsi);
+	if (err)
+		return err;
+
+	err = ice_vsi_cfg_txqs(vsi);
+	if (!err)
+		err = ice_vsi_cfg_rxqs(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_stop_tx_rings - Disable Tx rings
+ * @vsi: the VSI being configured
+ */
+static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u32 *q_teids, val;
+	u16 *q_ids, i;
+	int err = 0;
+
+	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+		return -EINVAL;
+
+	q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids),
+			       GFP_KERNEL);
+	if (!q_teids)
+		return -ENOMEM;
+
+	q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids),
+			     GFP_KERNEL);
+	if (!q_ids) {
+		err = -ENOMEM;
+		goto err_alloc_q_ids;
+	}
+
+	/* set up the tx queue list to be disabled */
+	ice_for_each_txq(vsi, i) {
+		u16 v_idx;
+
+		if (!vsi->tx_rings || !vsi->tx_rings[i]) {
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		q_ids[i] = vsi->txq_map[i];
+		q_teids[i] = vsi->tx_rings[i]->txq_teid;
+
+		/* clear cause_ena bit for disabled queues */
+		val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx));
+		val &= ~QINT_TQCTL_CAUSE_ENA_M;
+		wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val);
+
+		/* software is expected to wait for 100 ns */
+		ndelay(100);
+
+		/* trigger a software interrupt for the vector associated to
+		 * the queue to schedule napi handler
+		 */
+		v_idx = vsi->tx_rings[i]->q_vector->v_idx;
+		wr32(hw, GLINT_DYN_CTL(vsi->base_vector + v_idx),
+		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
+	}
+	status = ice_dis_vsi_txq(vsi->port_info, vsi->num_txq, q_ids, q_teids,
+				 NULL);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"Failed to disable LAN Tx queues, error: %d\n",
+			status);
+		err = -ENODEV;
+	}
+
+err_out:
+	devm_kfree(&pf->pdev->dev, q_ids);
+
+err_alloc_q_ids:
+	devm_kfree(&pf->pdev->dev, q_teids);
+
+	return err;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+	int i;
+
+	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
+		u32 rx_reg = rd32(&pf->hw, QRX_CTRL(pf_q));
+
+		if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+			break;
+
+		usleep_range(10, 20);
+	}
+	if (i >= ICE_Q_WAIT_RETRY_LIMIT)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_ctrl_rx_rings - Start or stop a VSI's rx rings
+ * @vsi: the VSI being configured
+ * @ena: start or stop the rx rings
+ */
+static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int i, j, ret = 0;
+
+	for (i = 0; i < vsi->num_rxq; i++) {
+		int pf_q = vsi->rxq_map[i];
+		u32 rx_reg;
+
+		for (j = 0; j < ICE_Q_WAIT_MAX_RETRY; j++) {
+			rx_reg = rd32(hw, QRX_CTRL(pf_q));
+			if (((rx_reg >> QRX_CTRL_QENA_REQ_S) & 1) ==
+			    ((rx_reg >> QRX_CTRL_QENA_STAT_S) & 1))
+				break;
+			usleep_range(1000, 2000);
+		}
+
+		/* Skip if the queue is already in the requested state */
+		if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+			continue;
+
+		/* turn on/off the queue */
+		if (ena)
+			rx_reg |= QRX_CTRL_QENA_REQ_M;
+		else
+			rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+		wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+		/* wait for the change to finish */
+		ret = ice_pf_rxq_wait(pf, pf_q, ena);
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"VSI idx %d Rx ring %d %sable timeout\n",
+				vsi->idx, pf_q, (ena ? "en" : "dis"));
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_start_rx_rings - start VSI's rx rings
+ * @vsi: the VSI whose rings are to be started
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_start_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_rx_rings(vsi, true);
+}
+
+/**
+ * ice_vsi_stop_rx_rings - stop VSI's rx rings
+ * @vsi: the VSI
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_rx_rings(vsi, false);
+}
+
+/**
+ * ice_vsi_stop_tx_rx_rings - stop VSI's tx and rx rings
+ * @vsi: the VSI
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi)
+{
+	int err_tx, err_rx;
+
+	err_tx = ice_vsi_stop_tx_rings(vsi);
+	if (err_tx)
+		dev_dbg(&vsi->back->pdev->dev, "Failed to disable Tx rings\n");
+
+	err_rx = ice_vsi_stop_rx_rings(vsi);
+	if (err_rx)
+		dev_dbg(&vsi->back->pdev->dev, "Failed to disable Rx rings\n");
+
+	if (err_tx || err_rx)
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_napi_enable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+		napi_enable(&vsi->q_vectors[q_idx]->napi);
+}
+
+/**
+ * ice_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: The VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_up_complete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		ice_vsi_cfg_msix(vsi);
+	else
+		return -ENOTSUPP;
+
+	/* Enable only Rx rings, Tx rings were enabled by the FW when the
+	 * Tx queue group list was configured and the context bits were
+	 * programmed using ice_vsi_cfg_txqs
+	 */
+	err = ice_vsi_start_rx_rings(vsi);
+	if (err)
+		return err;
+
+	clear_bit(__ICE_DOWN, vsi->state);
+	ice_napi_enable_all(vsi);
+	ice_vsi_ena_irq(vsi);
+
+	if (vsi->port_info &&
+	    (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
+	    vsi->netdev) {
+		ice_print_link_msg(vsi, true);
+		netif_tx_start_all_queues(vsi->netdev);
+		netif_carrier_on(vsi->netdev);
+	}
+
+	ice_service_task_schedule(pf);
+
+	return err;
+}
+
+/**
+ * ice_up - Bring the connection back up after being down
+ * @vsi: VSI being configured
+ */
+int ice_up(struct ice_vsi *vsi)
+{
+	int err;
+
+	err = ice_vsi_cfg(vsi);
+	if (!err)
+		err = ice_up_complete(vsi);
+
+	return err;
+}
+
+/**
+ * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
+ * @ring: Tx or Rx ring to read stats from
+ * @pkts: packets stats counter
+ * @bytes: bytes stats counter
+ *
+ * This function fetches stats from the ring considering the atomic operations
+ * that needs to be performed to read u64 values in 32 bit machine.
+ */
+static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts,
+					 u64 *bytes)
+{
+	unsigned int start;
+	*pkts = 0;
+	*bytes = 0;
+
+	if (!ring)
+		return;
+	do {
+		start = u64_stats_fetch_begin_irq(&ring->syncp);
+		*pkts = ring->stats.pkts;
+		*bytes = ring->stats.bytes;
+	} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+}
+
+/**
+ * ice_stat_update40 - read 40 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @hireg: high 32 bit HW register to read from
+ * @loreg: low 32 bit HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+static void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg,
+			      bool prev_stat_loaded, u64 *prev_stat,
+			      u64 *cur_stat)
+{
+	u64 new_data;
+
+	new_data = rd32(hw, loreg);
+	new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32;
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. So save the first values read and use them as
+	 * offsets to be subtracted from the raw values in order to report stats
+	 * that count from zero.
+	 */
+	if (!prev_stat_loaded)
+		*prev_stat = new_data;
+	if (likely(new_data >= *prev_stat))
+		*cur_stat = new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat = (new_data + BIT_ULL(40)) - *prev_stat;
+	*cur_stat &= 0xFFFFFFFFFFULL;
+}
+
+/**
+ * ice_stat_update32 - read 32 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+static void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+			      u64 *prev_stat, u64 *cur_stat)
+{
+	u32 new_data;
+
+	new_data = rd32(hw, reg);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. So save the first values read and use them as
+	 * offsets to be subtracted from the raw values in order to report stats
+	 * that count from zero.
+	 */
+	if (!prev_stat_loaded)
+		*prev_stat = new_data;
+	if (likely(new_data >= *prev_stat))
+		*cur_stat = new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat = (new_data + BIT_ULL(32)) - *prev_stat;
+}
+
+/**
+ * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_eth_stats(struct ice_vsi *vsi)
+{
+	struct ice_eth_stats *prev_es, *cur_es;
+	struct ice_hw *hw = &vsi->back->hw;
+	u16 vsi_num = vsi->vsi_num;    /* HW absolute index of a VSI */
+
+	prev_es = &vsi->eth_stats_prev;
+	cur_es = &vsi->eth_stats;
+
+	ice_stat_update40(hw, GLV_GORCH(vsi_num), GLV_GORCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_bytes,
+			  &cur_es->rx_bytes);
+
+	ice_stat_update40(hw, GLV_UPRCH(vsi_num), GLV_UPRCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_unicast,
+			  &cur_es->rx_unicast);
+
+	ice_stat_update40(hw, GLV_MPRCH(vsi_num), GLV_MPRCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_multicast,
+			  &cur_es->rx_multicast);
+
+	ice_stat_update40(hw, GLV_BPRCH(vsi_num), GLV_BPRCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_broadcast,
+			  &cur_es->rx_broadcast);
+
+	ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_discards, &cur_es->rx_discards);
+
+	ice_stat_update40(hw, GLV_GOTCH(vsi_num), GLV_GOTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_bytes,
+			  &cur_es->tx_bytes);
+
+	ice_stat_update40(hw, GLV_UPTCH(vsi_num), GLV_UPTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_unicast,
+			  &cur_es->tx_unicast);
+
+	ice_stat_update40(hw, GLV_MPTCH(vsi_num), GLV_MPTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_multicast,
+			  &cur_es->tx_multicast);
+
+	ice_stat_update40(hw, GLV_BPTCH(vsi_num), GLV_BPTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_broadcast,
+			  &cur_es->tx_broadcast);
+
+	ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_errors, &cur_es->tx_errors);
+
+	vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * ice_update_vsi_ring_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
+	struct ice_ring *ring;
+	u64 pkts, bytes;
+	int i;
+
+	/* reset netdev stats */
+	vsi_stats->tx_packets = 0;
+	vsi_stats->tx_bytes = 0;
+	vsi_stats->rx_packets = 0;
+	vsi_stats->rx_bytes = 0;
+
+	/* reset non-netdev (extended) stats */
+	vsi->tx_restart = 0;
+	vsi->tx_busy = 0;
+	vsi->tx_linearize = 0;
+	vsi->rx_buf_failed = 0;
+	vsi->rx_page_failed = 0;
+
+	rcu_read_lock();
+
+	/* update Tx rings counters */
+	ice_for_each_txq(vsi, i) {
+		ring = READ_ONCE(vsi->tx_rings[i]);
+		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
+		vsi_stats->tx_packets += pkts;
+		vsi_stats->tx_bytes += bytes;
+		vsi->tx_restart += ring->tx_stats.restart_q;
+		vsi->tx_busy += ring->tx_stats.tx_busy;
+		vsi->tx_linearize += ring->tx_stats.tx_linearize;
+	}
+
+	/* update Rx rings counters */
+	ice_for_each_rxq(vsi, i) {
+		ring = READ_ONCE(vsi->rx_rings[i]);
+		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
+		vsi_stats->rx_packets += pkts;
+		vsi_stats->rx_bytes += bytes;
+		vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
+		vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
+	}
+
+	rcu_read_unlock();
+}
+
+/**
+ * ice_update_vsi_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_vsi_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
+	struct ice_eth_stats *cur_es = &vsi->eth_stats;
+	struct ice_pf *pf = vsi->back;
+
+	if (test_bit(__ICE_DOWN, vsi->state) ||
+	    test_bit(__ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* get stats as recorded by Tx/Rx rings */
+	ice_update_vsi_ring_stats(vsi);
+
+	/* get VSI stats as recorded by the hardware */
+	ice_update_eth_stats(vsi);
+
+	cur_ns->tx_errors = cur_es->tx_errors;
+	cur_ns->rx_dropped = cur_es->rx_discards;
+	cur_ns->tx_dropped = cur_es->tx_discards;
+	cur_ns->multicast = cur_es->rx_multicast;
+
+	/* update some more netdev stats if this is main VSI */
+	if (vsi->type == ICE_VSI_PF) {
+		cur_ns->rx_crc_errors = pf->stats.crc_errors;
+		cur_ns->rx_errors = pf->stats.crc_errors +
+				    pf->stats.illegal_bytes;
+		cur_ns->rx_length_errors = pf->stats.rx_len_errors;
+	}
+}
+
+/**
+ * ice_update_pf_stats - Update PF port stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+static void ice_update_pf_stats(struct ice_pf *pf)
+{
+	struct ice_hw_port_stats *prev_ps, *cur_ps;
+	struct ice_hw *hw = &pf->hw;
+	u8 pf_id;
+
+	prev_ps = &pf->stats_prev;
+	cur_ps = &pf->stats;
+	pf_id = hw->pf_id;
+
+	ice_stat_update40(hw, GLPRT_GORCH(pf_id), GLPRT_GORCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_bytes,
+			  &cur_ps->eth.rx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPRCH(pf_id), GLPRT_UPRCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_unicast,
+			  &cur_ps->eth.rx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPRCH(pf_id), GLPRT_MPRCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_multicast,
+			  &cur_ps->eth.rx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPRCH(pf_id), GLPRT_BPRCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_broadcast,
+			  &cur_ps->eth.rx_broadcast);
+
+	ice_stat_update40(hw, GLPRT_GOTCH(pf_id), GLPRT_GOTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_bytes,
+			  &cur_ps->eth.tx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPTCH(pf_id), GLPRT_UPTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_unicast,
+			  &cur_ps->eth.tx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPTCH(pf_id), GLPRT_MPTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_multicast,
+			  &cur_ps->eth.tx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPTCH(pf_id), GLPRT_BPTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_broadcast,
+			  &cur_ps->eth.tx_broadcast);
+
+	ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_dropped_link_down,
+			  &cur_ps->tx_dropped_link_down);
+
+	ice_stat_update40(hw, GLPRT_PRC64H(pf_id), GLPRT_PRC64L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_64,
+			  &cur_ps->rx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PRC127H(pf_id), GLPRT_PRC127L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_127,
+			  &cur_ps->rx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PRC255H(pf_id), GLPRT_PRC255L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_255,
+			  &cur_ps->rx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PRC511H(pf_id), GLPRT_PRC511L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_511,
+			  &cur_ps->rx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PRC1023H(pf_id),
+			  GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PRC1522H(pf_id),
+			  GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PRC9522H(pf_id),
+			  GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_big, &cur_ps->rx_size_big);
+
+	ice_stat_update40(hw, GLPRT_PTC64H(pf_id), GLPRT_PTC64L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_64,
+			  &cur_ps->tx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PTC127H(pf_id), GLPRT_PTC127L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_127,
+			  &cur_ps->tx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PTC255H(pf_id), GLPRT_PTC255L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_255,
+			  &cur_ps->tx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PTC511H(pf_id), GLPRT_PTC511L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_511,
+			  &cur_ps->tx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PTC1023H(pf_id),
+			  GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PTC1522H(pf_id),
+			  GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PTC9522H(pf_id),
+			  GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
+
+	ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
+
+	ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
+
+	ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->crc_errors, &cur_ps->crc_errors);
+
+	ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
+
+	ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->mac_local_faults,
+			  &cur_ps->mac_local_faults);
+
+	ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->mac_remote_faults,
+			  &cur_ps->mac_remote_faults);
+
+	ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
+
+	ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_undersize, &cur_ps->rx_undersize);
+
+	ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_fragments, &cur_ps->rx_fragments);
+
+	ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_oversize, &cur_ps->rx_oversize);
+
+	ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
+
+	pf->stat_prev_loaded = true;
+}
+
+/**
+ * ice_get_stats64 - get statistics for network device structure
+ * @netdev: network interface device structure
+ * @stats: main device statistics structure
+ */
+static
+void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct rtnl_link_stats64 *vsi_stats;
+	struct ice_vsi *vsi = np->vsi;
+
+	vsi_stats = &vsi->net_stats;
+
+	if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq)
+		return;
+	/* netdev packet/byte stats come from ring counter. These are obtained
+	 * by summing up ring counters (done by ice_update_vsi_ring_stats).
+	 */
+	ice_update_vsi_ring_stats(vsi);
+	stats->tx_packets = vsi_stats->tx_packets;
+	stats->tx_bytes = vsi_stats->tx_bytes;
+	stats->rx_packets = vsi_stats->rx_packets;
+	stats->rx_bytes = vsi_stats->rx_bytes;
+
+	/* The rest of the stats can be read from the hardware but instead we
+	 * just return values that the watchdog task has already obtained from
+	 * the hardware.
+	 */
+	stats->multicast = vsi_stats->multicast;
+	stats->tx_errors = vsi_stats->tx_errors;
+	stats->tx_dropped = vsi_stats->tx_dropped;
+	stats->rx_errors = vsi_stats->rx_errors;
+	stats->rx_dropped = vsi_stats->rx_dropped;
+	stats->rx_crc_errors = vsi_stats->rx_crc_errors;
+	stats->rx_length_errors = vsi_stats->rx_length_errors;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/**
+ * ice_netpoll - polling "interrupt" handler
+ * @netdev: network interface device structure
+ *
+ * Used by netconsole to send skbs without having to re-enable interrupts.
+ * This is not called in the normal interrupt path.
+ */
+static void ice_netpoll(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	if (test_bit(__ICE_DOWN, vsi->state) ||
+	    !test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		return;
+
+	for (i = 0; i < vsi->num_q_vectors; i++)
+		ice_msix_clean_rings(0, vsi->q_vectors[i]);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+/**
+ * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
+ * @vsi: VSI having NAPI disabled
+ */
+static void ice_napi_disable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+		napi_disable(&vsi->q_vectors[q_idx]->napi);
+}
+
+/**
+ * ice_down - Shutdown the connection
+ * @vsi: The VSI being stopped
+ */
+int ice_down(struct ice_vsi *vsi)
+{
+	int i, err;
+
+	/* Caller of this function is expected to set the
+	 * vsi->state __ICE_DOWN bit
+	 */
+	if (vsi->netdev) {
+		netif_carrier_off(vsi->netdev);
+		netif_tx_disable(vsi->netdev);
+	}
+
+	ice_vsi_dis_irq(vsi);
+	err = ice_vsi_stop_tx_rx_rings(vsi);
+	ice_napi_disable_all(vsi);
+
+	ice_for_each_txq(vsi, i)
+		ice_clean_tx_ring(vsi->tx_rings[i]);
+
+	ice_for_each_rxq(vsi, i)
+		ice_clean_rx_ring(vsi->rx_rings[i]);
+
+	if (err)
+		netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+	return err;
+}
+
+/**
+ * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+{
+	int i, err;
+
+	if (!vsi->num_txq) {
+		dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_txq(vsi, i) {
+		err = ice_setup_tx_ring(vsi->tx_rings[i]);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+{
+	int i, err;
+
+	if (!vsi->num_rxq) {
+		dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_rxq(vsi, i) {
+		err = ice_setup_rx_ring(vsi->rx_rings[i]);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_req_irq - Request IRQ from the OS
+ * @vsi: The VSI IRQ is being requested for
+ * @basename: name for the vector
+ *
+ * Return 0 on success and a negative value on error
+ */
+static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename)
+{
+	struct ice_pf *pf = vsi->back;
+	int err = -EINVAL;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		err = ice_vsi_req_irq_msix(vsi, basename);
+
+	return err;
+}
+
+/**
+ * ice_vsi_free_tx_rings - Free Tx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+static void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->tx_rings)
+		return;
+
+	ice_for_each_txq(vsi, i)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			ice_free_tx_ring(vsi->tx_rings[i]);
+}
+
+/**
+ * ice_vsi_free_rx_rings - Free Rx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+static void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->rx_rings)
+		return;
+
+	ice_for_each_rxq(vsi, i)
+		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
+			ice_free_rx_ring(vsi->rx_rings[i]);
+}
+
+/**
+ * ice_vsi_open - Called when a network interface is made active
+ * @vsi: the VSI to open
+ *
+ * Initialization of the VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+static int ice_vsi_open(struct ice_vsi *vsi)
+{
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	/* allocate descriptors */
+	err = ice_vsi_setup_tx_rings(vsi);
+	if (err)
+		goto err_setup_tx;
+
+	err = ice_vsi_setup_rx_rings(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = ice_vsi_cfg(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+		 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
+	err = ice_vsi_req_irq(vsi, int_name);
+	if (err)
+		goto err_setup_rx;
+
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
+	if (err)
+		goto err_set_qs;
+
+	err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
+	if (err)
+		goto err_set_qs;
+
+	err = ice_up_complete(vsi);
+	if (err)
+		goto err_up_complete;
+
+	return 0;
+
+err_up_complete:
+	ice_down(vsi);
+err_set_qs:
+	ice_vsi_free_irq(vsi);
+err_setup_rx:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+	ice_vsi_free_tx_rings(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_close - Shut down a VSI
+ * @vsi: the VSI being shut down
+ */
+static void ice_vsi_close(struct ice_vsi *vsi)
+{
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state))
+		ice_down(vsi);
+
+	ice_vsi_free_irq(vsi);
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+}
+
+/**
+ * ice_rss_clean - Delete RSS related VSI structures that hold user inputs
+ * @vsi: the VSI being removed
+ */
+static void ice_rss_clean(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	pf = vsi->back;
+
+	if (vsi->rss_hkey_user)
+		devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user);
+	if (vsi->rss_lut_user)
+		devm_kfree(&pf->pdev->dev, vsi->rss_lut_user);
+}
+
+/**
+ * ice_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ */
+static int ice_vsi_release(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	if (!vsi->back)
+		return -ENODEV;
+	pf = vsi->back;
+
+	if (vsi->netdev) {
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_rss_clean(vsi);
+
+	/* Disable VSI and free resources */
+	ice_vsi_dis_irq(vsi);
+	ice_vsi_close(vsi);
+
+	/* reclaim interrupt vectors back to PF */
+	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
+	pf->num_avail_msix += vsi->num_q_vectors;
+
+	ice_remove_vsi_fltr(&pf->hw, vsi->vsi_num);
+	ice_vsi_delete(vsi);
+	ice_vsi_free_q_vectors(vsi);
+	ice_vsi_clear_rings(vsi);
+
+	ice_vsi_put_qs(vsi);
+	pf->q_left_tx += vsi->alloc_txq;
+	pf->q_left_rx += vsi->alloc_rxq;
+
+	ice_vsi_clear(vsi);
+
+	return 0;
+}
+
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ */
+static void ice_dis_vsi(struct ice_vsi *vsi)
+{
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->netdev && netif_running(vsi->netdev) &&
+	    vsi->type == ICE_VSI_PF)
+		vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+
+	ice_vsi_close(vsi);
+}
+
+/**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ */
+static void ice_ena_vsi(struct ice_vsi *vsi)
+{
+	if (!test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state))
+		return;
+
+	if (vsi->netdev && netif_running(vsi->netdev))
+		vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
+	else if (ice_vsi_open(vsi))
+		/* this clears the DOWN bit */
+		dev_dbg(&vsi->back->pdev->dev, "Failed open VSI 0x%04X on switch 0x%04X\n",
+			vsi->vsi_num, vsi->vsw->sw_id);
+}
+
+/**
+ * ice_pf_dis_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ */
+static void ice_pf_dis_all_vsi(struct ice_pf *pf)
+{
+	int v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_dis_vsi(pf->vsi[v]);
+}
+
+/**
+ * ice_pf_ena_all_vsi - Resume all VSIs on a PF
+ * @pf: the PF
+ */
+static void ice_pf_ena_all_vsi(struct ice_pf *pf)
+{
+	int v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_ena_vsi(pf->vsi[v]);
+}
+
+/**
+ * ice_rebuild - rebuild after reset
+ * @pf: pf to rebuild
+ */
+static void ice_rebuild(struct ice_pf *pf)
+{
+	struct device *dev = &pf->pdev->dev;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status ret;
+	int err;
+
+	if (test_bit(__ICE_DOWN, pf->state))
+		goto clear_recovery;
+
+	dev_dbg(dev, "rebuilding pf\n");
+
+	ret = ice_init_all_ctrlq(hw);
+	if (ret) {
+		dev_err(dev, "control queues init failed %d\n", ret);
+		goto fail_reset;
+	}
+
+	ret = ice_clear_pf_cfg(hw);
+	if (ret) {
+		dev_err(dev, "clear PF configuration failed %d\n", ret);
+		goto fail_reset;
+	}
+
+	ice_clear_pxe_mode(hw);
+
+	ret = ice_get_caps(hw);
+	if (ret) {
+		dev_err(dev, "ice_get_caps failed %d\n", ret);
+		goto fail_reset;
+	}
+
+	/* basic nic switch setup */
+	err = ice_setup_pf_sw(pf);
+	if (err) {
+		dev_err(dev, "ice_setup_pf_sw failed\n");
+		goto fail_reset;
+	}
+
+	/* start misc vector */
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		err = ice_req_irq_msix_misc(pf);
+		if (err) {
+			dev_err(dev, "misc vector setup failed: %d\n", err);
+			goto fail_reset;
+		}
+	}
+
+	/* restart the VSIs that were rebuilt and running before the reset */
+	ice_pf_ena_all_vsi(pf);
+
+	return;
+
+fail_reset:
+	ice_shutdown_all_ctrlq(hw);
+	set_bit(__ICE_RESET_FAILED, pf->state);
+clear_recovery:
+	set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+}
+
+/**
+ * ice_change_mtu - NDO callback to change the MTU
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u8 count = 0;
+
+	if (new_mtu == netdev->mtu) {
+		netdev_warn(netdev, "mtu is already %d\n", netdev->mtu);
+		return 0;
+	}
+
+	if (new_mtu < netdev->min_mtu) {
+		netdev_err(netdev, "new mtu invalid. min_mtu is %d\n",
+			   netdev->min_mtu);
+		return -EINVAL;
+	} else if (new_mtu > netdev->max_mtu) {
+		netdev_err(netdev, "new mtu invalid. max_mtu is %d\n",
+			   netdev->min_mtu);
+		return -EINVAL;
+	}
+	/* if a reset is in progress, wait for some time for it to complete */
+	do {
+		if (ice_is_reset_recovery_pending(pf->state)) {
+			count++;
+			usleep_range(1000, 2000);
+		} else {
+			break;
+		}
+
+	} while (count < 100);
+
+	if (count == 100) {
+		netdev_err(netdev, "can't change mtu. Device is busy\n");
+		return -EBUSY;
+	}
+
+	netdev->mtu = new_mtu;
+
+	/* if VSI is up, bring it down and then back up */
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		int err;
+
+		err = ice_down(vsi);
+		if (err) {
+			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			return err;
+		}
+
+		err = ice_up(vsi);
+		if (err) {
+			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			return err;
+		}
+	}
+
+	netdev_dbg(netdev, "changed mtu to %d\n", new_mtu);
+	return 0;
+}
+
+/**
+ * ice_set_rss - Set RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+
+	if (seed) {
+		struct ice_aqc_get_set_rss_keys *buf =
+				  (struct ice_aqc_get_set_rss_keys *)seed;
+
+		status = ice_aq_set_rss_key(hw, vsi->vsi_num, buf);
+
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot set RSS key, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	if (lut) {
+		status = ice_aq_set_rss_lut(hw, vsi->vsi_num,
+					    vsi->rss_lut_type, lut, lut_size);
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot set RSS lut, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rss - Get RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the keys
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+
+	if (seed) {
+		struct ice_aqc_get_set_rss_keys *buf =
+				  (struct ice_aqc_get_set_rss_keys *)seed;
+
+		status = ice_aq_get_rss_key(hw, vsi->vsi_num, buf);
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot get RSS key, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	if (lut) {
+		status = ice_aq_get_rss_lut(hw, vsi->vsi_num,
+					    vsi->rss_lut_type, lut, lut_size);
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot get RSS lut, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_open - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog is enabled,
+ * and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_open(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int err;
+
+	netif_carrier_off(netdev);
+
+	err = ice_vsi_open(vsi);
+
+	if (err)
+		netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+	return err;
+}
+
+/**
+ * ice_stop - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when an interface is de-activated by the OS,
+ * and the netdevice enters the DOWN state.  The hardware is still under the
+ * driver's control, but the netdev interface is disabled.
+ *
+ * Returns success only - not allowed to fail
+ */
+static int ice_stop(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	ice_vsi_close(vsi);
+
+	return 0;
+}
+
+/**
+ * ice_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buffer
+ * @netdev: This port's netdev
+ * @features: Offload features that the stack believes apply
+ */
+static netdev_features_t
+ice_features_check(struct sk_buff *skb,
+		   struct net_device __always_unused *netdev,
+		   netdev_features_t features)
+{
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+		features &= ~NETIF_F_GSO_MASK;
+
+	len = skb_network_header(skb) - skb->data;
+	if (len & ~(ICE_TXD_MACLEN_MAX))
+		goto out_rm_features;
+
+	len = skb_transport_header(skb) - skb_network_header(skb);
+	if (len & ~(ICE_TXD_IPLEN_MAX))
+		goto out_rm_features;
+
+	if (skb->encapsulation) {
+		len = skb_inner_network_header(skb) - skb_transport_header(skb);
+		if (len & ~(ICE_TXD_L4LEN_MAX))
+			goto out_rm_features;
+
+		len = skb_inner_transport_header(skb) -
+		      skb_inner_network_header(skb);
+		if (len & ~(ICE_TXD_IPLEN_MAX))
+			goto out_rm_features;
+	}
+
+	return features;
+out_rm_features:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static const struct net_device_ops ice_netdev_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
+	.ndo_features_check = ice_features_check,
+	.ndo_set_rx_mode = ice_set_rx_mode,
+	.ndo_set_mac_address = ice_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ice_change_mtu,
+	.ndo_get_stats64 = ice_get_stats64,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = ice_netpoll,
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
+	.ndo_set_features = ice_set_features,
+	.ndo_fdb_add = ice_fdb_add,
+	.ndo_fdb_del = ice_fdb_del,
+};
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
new file mode 100644
index 000000000000..fa7a69ac92b0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_aq_read_nvm
+ * @hw: pointer to the hw struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands (0x0701)
+ */
+static enum ice_status
+ice_aq_read_nvm(struct ice_hw *hw, u8 module_typeid, u32 offset, u16 length,
+		void *data, bool last_command, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	/* In offset the highest byte must be zeroed. */
+	if (offset & 0xFF000000)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read);
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+	cmd->module_typeid = module_typeid;
+	cmd->offset = cpu_to_le32(offset);
+	cmd->length = cpu_to_le16(length);
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_check_sr_access_params - verify params for Shadow RAM R/W operations.
+ * @hw: pointer to the HW structure
+ * @offset: offset in words from module start
+ * @words: number of words to access
+ */
+static enum ice_status
+ice_check_sr_access_params(struct ice_hw *hw, u32 offset, u16 words)
+{
+	if ((offset + words) > hw->nvm.sr_words) {
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM error: offset beyond SR lmt.\n");
+		return ICE_ERR_PARAM;
+	}
+
+	if (words > ICE_SR_SECTOR_SIZE_IN_WORDS) {
+		/* We can access only up to 4KB (one sector), in one AQ write */
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM error: tried to access %d words, limit is %d.\n",
+			  words, ICE_SR_SECTOR_SIZE_IN_WORDS);
+		return ICE_ERR_PARAM;
+	}
+
+	if (((offset + (words - 1)) / ICE_SR_SECTOR_SIZE_IN_WORDS) !=
+	    (offset / ICE_SR_SECTOR_SIZE_IN_WORDS)) {
+		/* A single access cannot spread over two sectors */
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM error: cannot spread over two sectors.\n");
+		return ICE_ERR_PARAM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_read_sr_aq - Read Shadow RAM.
+ * @hw: pointer to the HW structure
+ * @offset: offset in words from module start
+ * @words: number of words to read
+ * @data: buffer for words reads from Shadow RAM
+ * @last_command: tells the AdminQ that this is the last command
+ *
+ * Reads 16-bit word buffers from the Shadow RAM using the admin command.
+ */
+static enum ice_status
+ice_read_sr_aq(struct ice_hw *hw, u32 offset, u16 words, u16 *data,
+	       bool last_command)
+{
+	enum ice_status status;
+
+	status = ice_check_sr_access_params(hw, offset, words);
+
+	/* values in "offset" and "words" parameters are sized as words
+	 * (16 bits) but ice_aq_read_nvm expects these values in bytes.
+	 * So do this conversion while calling ice_aq_read_nvm.
+	 */
+	if (!status)
+		status = ice_aq_read_nvm(hw, 0, 2 * offset, 2 * words, data,
+					 last_command, NULL);
+
+	return status;
+}
+
+/**
+ * ice_read_sr_word_aq - Reads Shadow RAM via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_aq method.
+ */
+static enum ice_status
+ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	enum ice_status status;
+
+	status = ice_read_sr_aq(hw, offset, 1, data, true);
+	if (!status)
+		*data = le16_to_cpu(*(__le16 *)data);
+
+	return status;
+}
+
+/**
+ * ice_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
+ *
+ * This function will request NVM ownership.
+ */
+static enum
+ice_status ice_acquire_nvm(struct ice_hw *hw,
+			   enum ice_aq_res_access_type access)
+{
+	if (hw->nvm.blank_nvm_mode)
+		return 0;
+
+	return ice_acquire_res(hw, ICE_NVM_RES_ID, access);
+}
+
+/**
+ * ice_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
+ *
+ * This function will release NVM ownership.
+ */
+static void ice_release_nvm(struct ice_hw *hw)
+{
+	if (hw->nvm.blank_nvm_mode)
+		return;
+
+	ice_release_res(hw, ICE_NVM_RES_ID);
+}
+
+/**
+ * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
+ */
+static enum ice_status
+ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (!status) {
+		status = ice_read_sr_word_aq(hw, offset, data);
+		ice_release_nvm(hw);
+	}
+
+	return status;
+}
+
+/**
+ * ice_init_nvm - initializes NVM setting
+ * @hw: pointer to the hw struct
+ *
+ * This function reads and populates NVM settings such as Shadow RAM size,
+ * max_timeout, and blank_nvm_mode
+ */
+enum ice_status ice_init_nvm(struct ice_hw *hw)
+{
+	struct ice_nvm_info *nvm = &hw->nvm;
+	u16 eetrack_lo, eetrack_hi;
+	enum ice_status status = 0;
+	u32 fla, gens_stat;
+	u8 sr_size;
+
+	/* The SR size is stored regardless of the nvm programming mode
+	 * as the blank mode may be used in the factory line.
+	 */
+	gens_stat = rd32(hw, GLNVM_GENS);
+	sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S;
+
+	/* Switching to words (sr_size contains power of 2) */
+	nvm->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB;
+
+	/* Check if we are in the normal or blank NVM programming mode */
+	fla = rd32(hw, GLNVM_FLA);
+	if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
+		nvm->blank_nvm_mode = false;
+	} else { /* Blank programming mode */
+		nvm->blank_nvm_mode = true;
+		status = ICE_ERR_NVM_BLANK_MODE;
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM init error: unsupported blank mode.\n");
+		return status;
+	}
+
+	status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read DEV starter version.\n");
+		return status;
+	}
+
+	status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK lo.\n");
+		return status;
+	}
+	status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK hi.\n");
+		return status;
+	}
+
+	hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
new file mode 100644
index 000000000000..f57c414bc0a9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_OSDEP_H_
+#define _ICE_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/io.h>
+#ifndef CONFIG_64BIT
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+
+#define ice_flush(a)		rd32((a), GLGEN_STAT)
+#define ICE_M(m, s)		((m) << (s))
+
+struct ice_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	size_t size;
+};
+
+#define ice_hw_to_dev(ptr)	\
+	(&(container_of((ptr), struct ice_pf, hw))->pdev->dev)
+
+#ifdef CONFIG_DYNAMIC_DEBUG
+#define ice_debug(hw, type, fmt, args...) \
+	dev_dbg(ice_hw_to_dev(hw), fmt, ##args)
+
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+	print_hex_dump_debug(KBUILD_MODNAME " ",		\
+			     DUMP_PREFIX_OFFSET, rowsize,	\
+			     groupsize, buf, len, false)
+#else
+#define ice_debug(hw, type, fmt, args...)			\
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		dev_info(ice_hw_to_dev(hw), fmt, ##args);	\
+} while (0)
+
+#ifdef DEBUG
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		print_hex_dump_debug(KBUILD_MODNAME,		\
+				     DUMP_PREFIX_OFFSET,	\
+				     rowsize, groupsize, buf,	\
+				     len, false);		\
+} while (0)
+#else
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	struct ice_hw *hw_l = hw;				\
+	if ((type) & (hw_l)->debug_mask) {			\
+		u16 len_l = len;				\
+		u8 *buf_l = buf;				\
+		int i;						\
+		for (i = 0; i < (len_l - 16); i += 16)		\
+			ice_debug(hw_l, type, "0x%04X  %16ph\n",\
+				  i, ((buf_l) + i));		\
+		if (i < len_l)					\
+			ice_debug(hw_l, type, "0x%04X  %*ph\n", \
+				  i, ((len_l) - i), ((buf_l) + i));\
+	}							\
+} while (0)
+#endif /* DEBUG */
+#endif /* CONFIG_DYNAMIC_DEBUG */
+
+#endif /* _ICE_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
new file mode 100644
index 000000000000..f16ff3e4a840
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -0,0 +1,1659 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_sched.h"
+
+/**
+ * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
+ * @pi: port information structure
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts the root node of the scheduling tree topology
+ * to the SW DB.
+ */
+static enum ice_status
+ice_sched_add_root_node(struct ice_port_info *pi,
+			struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_sched_node *root;
+	struct ice_hw *hw;
+	u16 max_children;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return ICE_ERR_NO_MEMORY;
+
+	max_children = le16_to_cpu(hw->layer_info[0].max_children);
+	root->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
+				      sizeof(*root), GFP_KERNEL);
+	if (!root->children) {
+		devm_kfree(ice_hw_to_dev(hw), root);
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	memcpy(&root->info, info, sizeof(*info));
+	pi->root = root;
+	return 0;
+}
+
+/**
+ * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
+ * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
+ * @teid: node teid to search
+ *
+ * This function searches for a node matching the teid in the scheduling tree
+ * from the SW DB. The search is recursive and is restricted by the number of
+ * layers it has searched through; stopping at the max supported layer.
+ *
+ * This function needs to be called when holding the port_info->sched_lock
+ */
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
+{
+	u16 i;
+
+	/* The TEID is same as that of the start_node */
+	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
+		return start_node;
+
+	/* The node has no children or is at the max layer */
+	if (!start_node->num_children ||
+	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
+	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
+		return NULL;
+
+	/* Check if teid matches to any of the children nodes */
+	for (i = 0; i < start_node->num_children; i++)
+		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
+			return start_node->children[i];
+
+	/* Search within each child's sub-tree */
+	for (i = 0; i < start_node->num_children; i++) {
+		struct ice_sched_node *tmp;
+
+		tmp = ice_sched_find_node_by_teid(start_node->children[i],
+						  teid);
+		if (tmp)
+			return tmp;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_sched_add_node - Insert the Tx scheduler node in SW DB
+ * @pi: port information structure
+ * @layer: Scheduler layer of the node
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts a scheduler node to the SW DB.
+ */
+enum ice_status
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_sched_node *parent;
+	struct ice_sched_node *node;
+	struct ice_hw *hw;
+	u16 max_children;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	/* A valid parent node should be there */
+	parent = ice_sched_find_node_by_teid(pi->root,
+					     le32_to_cpu(info->parent_teid));
+	if (!parent) {
+		ice_debug(hw, ICE_DBG_SCHED,
+			  "Parent Node not found for parent_teid=0x%x\n",
+			  le32_to_cpu(info->parent_teid));
+		return ICE_ERR_PARAM;
+	}
+
+	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return ICE_ERR_NO_MEMORY;
+	max_children = le16_to_cpu(hw->layer_info[layer].max_children);
+	if (max_children) {
+		node->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
+					      sizeof(*node), GFP_KERNEL);
+		if (!node->children) {
+			devm_kfree(ice_hw_to_dev(hw), node);
+			return ICE_ERR_NO_MEMORY;
+		}
+	}
+
+	node->in_use = true;
+	node->parent = parent;
+	node->tx_sched_layer = layer;
+	parent->children[parent->num_children++] = node;
+	memcpy(&node->info, info, sizeof(*info));
+	return 0;
+}
+
+/**
+ * ice_aq_delete_sched_elems - delete scheduler elements
+ * @hw: pointer to the hw struct
+ * @grps_req: number of groups to delete
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_del: returns total number of elements deleted
+ * @cd: pointer to command details structure or NULL
+ *
+ * Delete scheduling elements (0x040F)
+ */
+static enum ice_status
+ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
+			  struct ice_aqc_delete_elem *buf, u16 buf_size,
+			  u16 *grps_del, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_move_delete_elem *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.add_move_delete_elem;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_grps_req = cpu_to_le16(grps_req);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && grps_del)
+		*grps_del = le16_to_cpu(cmd->num_grps_updated);
+
+	return status;
+}
+
+/**
+ * ice_sched_remove_elems - remove nodes from hw
+ * @hw: pointer to the hw struct
+ * @parent: pointer to the parent node
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be deleted
+ *
+ * This function remove nodes from hw
+ */
+static enum ice_status
+ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
+		       u16 num_nodes, u32 *node_teids)
+{
+	struct ice_aqc_delete_elem *buf;
+	u16 i, num_groups_removed = 0;
+	enum ice_status status;
+	u16 buf_size;
+
+	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++)
+		buf->teid[i] = cpu_to_le32(node_teids[i]);
+	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
+					   &num_groups_removed, NULL);
+	if (status || num_groups_removed != 1)
+		ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n");
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_get_first_node - get the first node of the given layer
+ * @hw: pointer to the hw struct
+ * @parent: pointer the base node of the subtree
+ * @layer: layer number
+ *
+ * This function retrieves the first node of the given layer from the subtree
+ */
+static struct ice_sched_node *
+ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
+			 u8 layer)
+{
+	u8 i;
+
+	if (layer < hw->sw_entry_point_layer)
+		return NULL;
+	for (i = 0; i < parent->num_children; i++) {
+		struct ice_sched_node *node = parent->children[i];
+
+		if (node) {
+			if (node->tx_sched_layer == layer)
+				return node;
+			/* this recursion is intentional, and wouldn't
+			 * go more than 9 calls
+			 */
+			return ice_sched_get_first_node(hw, node, layer);
+		}
+	}
+	return NULL;
+}
+
+/**
+ * ice_sched_get_tc_node - get pointer to TC node
+ * @pi: port information structure
+ * @tc: TC number
+ *
+ * This function returns the TC node pointer
+ */
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
+{
+	u8 i;
+
+	if (!pi)
+		return NULL;
+	for (i = 0; i < pi->root->num_children; i++)
+		if (pi->root->children[i]->tc_num == tc)
+			return pi->root->children[i];
+	return NULL;
+}
+
+/**
+ * ice_free_sched_node - Free a Tx scheduler node from SW DB
+ * @pi: port information structure
+ * @node: pointer to the ice_sched_node struct
+ *
+ * This function frees up a node from SW DB as well as from HW
+ *
+ * This function needs to be called with the port_info->sched_lock held
+ */
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+	struct ice_sched_node *parent;
+	struct ice_hw *hw = pi->hw;
+	u8 i, j;
+
+	/* Free the children before freeing up the parent node
+	 * The parent array is updated below and that shifts the nodes
+	 * in the array. So always pick the first child if num children > 0
+	 */
+	while (node->num_children)
+		ice_free_sched_node(pi, node->children[0]);
+
+	/* Leaf, TC and root nodes can't be deleted by SW */
+	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+		enum ice_status status;
+
+		status = ice_sched_remove_elems(hw, node->parent, 1, &teid);
+		if (status)
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "remove element failed %d\n", status);
+	}
+	parent = node->parent;
+	/* root has no parent */
+	if (parent) {
+		struct ice_sched_node *p, *tc_node;
+
+		/* update the parent */
+		for (i = 0; i < parent->num_children; i++)
+			if (parent->children[i] == node) {
+				for (j = i + 1; j < parent->num_children; j++)
+					parent->children[j - 1] =
+						parent->children[j];
+				parent->num_children--;
+				break;
+			}
+
+		/* search for previous sibling that points to this node and
+		 * remove the reference
+		 */
+		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
+		if (!tc_node) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "Invalid TC number %d\n", node->tc_num);
+			goto err_exit;
+		}
+		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
+		while (p) {
+			if (p->sibling == node) {
+				p->sibling = node->sibling;
+				break;
+			}
+			p = p->sibling;
+		}
+	}
+err_exit:
+	/* leaf nodes have no children */
+	if (node->children)
+		devm_kfree(ice_hw_to_dev(hw), node->children);
+	devm_kfree(ice_hw_to_dev(hw), node);
+}
+
+/**
+ * ice_aq_get_dflt_topo - gets default scheduler topology
+ * @hw: pointer to the hw struct
+ * @lport: logical port number
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_branches: returns total number of queue to port branches
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get default scheduler topology (0x400)
+ */
+static enum ice_status
+ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
+		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
+		     u8 *num_branches, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_topo *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_topo;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
+	cmd->port_num = lport;
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_branches)
+		*num_branches = cmd->num_branches;
+
+	return status;
+}
+
+/**
+ * ice_aq_add_sched_elems - adds scheduling element
+ * @hw: pointer to the hw struct
+ * @grps_req: the number of groups that are requested to be added
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_added: returns total number of groups added
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add scheduling elements (0x0401)
+ */
+static enum ice_status
+ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
+		       struct ice_aqc_add_elem *buf, u16 buf_size,
+		       u16 *grps_added, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_move_delete_elem *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.add_move_delete_elem;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_grps_req = cpu_to_le16(grps_req);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && grps_added)
+		*grps_added = le16_to_cpu(cmd->num_grps_updated);
+
+	return status;
+}
+
+/**
+ * ice_suspend_resume_elems - suspend/resume scheduler elements
+ * @hw: pointer to the hw struct
+ * @elems_req: number of elements to suspend
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements suspended
+ * @cd: pointer to command details structure or NULL
+ * @cmd_code: command code for suspend or resume
+ *
+ * suspend/resume scheduler elements
+ */
+static enum ice_status
+ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_suspend_resume_elem *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd,
+			 enum ice_adminq_opc cmd_code)
+{
+	struct ice_aqc_get_cfg_elem *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_update_elem;
+	ice_fill_dflt_direct_cmd_desc(&desc, cmd_code);
+	cmd->num_elem_req = cpu_to_le16(elems_req);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && elems_ret)
+		*elems_ret = le16_to_cpu(cmd->num_elem_resp);
+	return status;
+}
+
+/**
+ * ice_aq_suspend_sched_elems - suspend scheduler elements
+ * @hw: pointer to the hw struct
+ * @elems_req: number of elements to suspend
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements suspended
+ * @cd: pointer to command details structure or NULL
+ *
+ * Suspend scheduling elements (0x0409)
+ */
+static enum ice_status
+ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
+			   struct ice_aqc_suspend_resume_elem *buf,
+			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
+					cd, ice_aqc_opc_suspend_sched_elems);
+}
+
+/**
+ * ice_aq_resume_sched_elems - resume scheduler elements
+ * @hw: pointer to the hw struct
+ * @elems_req: number of elements to resume
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements resumed
+ * @cd: pointer to command details structure or NULL
+ *
+ * resume scheduling elements (0x040A)
+ */
+static enum ice_status
+ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
+			  struct ice_aqc_suspend_resume_elem *buf,
+			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
+					cd, ice_aqc_opc_resume_sched_elems);
+}
+
+/**
+ * ice_aq_query_sched_res - query scheduler resource
+ * @hw: pointer to the hw struct
+ * @buf_size: buffer size in bytes
+ * @buf: pointer to buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduler resource allocation (0x0412)
+ */
+static enum ice_status
+ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
+		       struct ice_aqc_query_txsched_res_resp *buf,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_sched_suspend_resume_elems - suspend or resume hw nodes
+ * @hw: pointer to the hw struct
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be suspended or resumed
+ * @suspend: true means suspend / false means resume
+ *
+ * This function suspends or resumes hw nodes
+ */
+static enum ice_status
+ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
+			       bool suspend)
+{
+	struct ice_aqc_suspend_resume_elem *buf;
+	u16 i, buf_size, num_elem_ret = 0;
+	enum ice_status status;
+
+	buf_size = sizeof(*buf) * num_nodes;
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	for (i = 0; i < num_nodes; i++)
+		buf->teid[i] = cpu_to_le32(node_teids[i]);
+
+	if (suspend)
+		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
+						    buf_size, &num_elem_ret,
+						    NULL);
+	else
+		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
+						   buf_size, &num_elem_ret,
+						   NULL);
+	if (status || num_elem_ret != num_nodes)
+		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_clear_tx_topo - clears the schduler tree nodes
+ * @pi: port information structure
+ *
+ * This function removes all the nodes from HW as well as from SW DB.
+ */
+static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_vsi_info *vsi_elem;
+	struct ice_sched_agg_info *atmp;
+	struct ice_sched_vsi_info *tmp;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return;
+
+	hw = pi->hw;
+
+	list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(agg_vsi_info, vtmp,
+					 &agg_info->agg_vsi_list, list_entry) {
+			list_del(&agg_vsi_info->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
+		}
+	}
+
+	/* remove the vsi list */
+	list_for_each_entry_safe(vsi_elem, tmp, &pi->vsi_info_list,
+				 list_entry) {
+		list_del(&vsi_elem->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_elem);
+	}
+
+	if (pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+}
+
+/**
+ * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
+ * @pi: port information structure
+ *
+ * Cleanup scheduling elements from SW DB
+ */
+static void ice_sched_clear_port(struct ice_port_info *pi)
+{
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return;
+
+	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
+	mutex_lock(&pi->sched_lock);
+	ice_sched_clear_tx_topo(pi);
+	mutex_unlock(&pi->sched_lock);
+	mutex_destroy(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
+ * @hw: pointer to the hw struct
+ *
+ * Cleanup scheduling elements from SW DB for all the ports
+ */
+void ice_sched_cleanup_all(struct ice_hw *hw)
+{
+	if (!hw || !hw->port_info)
+		return;
+
+	if (hw->layer_info)
+		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
+
+	ice_sched_clear_port(hw->port_info);
+
+	hw->num_tx_sched_layers = 0;
+	hw->num_tx_sched_phys_layers = 0;
+	hw->flattened_layers = 0;
+	hw->max_cgds = 0;
+}
+
+/**
+ * ice_sched_create_vsi_info_entry - create an empty new VSI entry
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ *
+ * This function creates a new VSI entry and adds it to list
+ */
+static struct ice_sched_vsi_info *
+ice_sched_create_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
+{
+	struct ice_sched_vsi_info *vsi_elem;
+
+	if (!pi)
+		return NULL;
+
+	vsi_elem = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*vsi_elem),
+				GFP_KERNEL);
+	if (!vsi_elem)
+		return NULL;
+
+	list_add(&vsi_elem->list_entry, &pi->vsi_info_list);
+	vsi_elem->vsi_id = vsi_id;
+	return vsi_elem;
+}
+
+/**
+ * ice_sched_add_elems - add nodes to hw and SW DB
+ * @pi: port information structure
+ * @tc_node: pointer to the branch node
+ * @parent: pointer to the parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes
+ * @num_nodes_added: pointer to num nodes added
+ * @first_node_teid: if new nodes are added then return the teid of first node
+ *
+ * This function add nodes to hw as well as to SW DB for a given layer
+ */
+static enum ice_status
+ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
+		    u16 *num_nodes_added, u32 *first_node_teid)
+{
+	struct ice_sched_node *prev, *new_node;
+	struct ice_aqc_add_elem *buf;
+	u16 i, num_groups_added = 0;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 buf_size;
+	u32 teid;
+
+	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++) {
+		buf->generic[i].parent_teid = parent->info.node_teid;
+		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
+		buf->generic[i].data.valid_sections =
+			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+			ICE_AQC_ELEM_VALID_EIR;
+		buf->generic[i].data.generic = 0;
+		buf->generic[i].data.cir_bw.bw_profile_idx =
+			ICE_SCHED_DFLT_RL_PROF_ID;
+		buf->generic[i].data.eir_bw.bw_profile_idx =
+			ICE_SCHED_DFLT_RL_PROF_ID;
+	}
+
+	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
+					&num_groups_added, NULL);
+	if (status || num_groups_added != 1) {
+		ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n");
+		devm_kfree(ice_hw_to_dev(hw), buf);
+		return ICE_ERR_CFG;
+	}
+
+	*num_nodes_added = num_nodes;
+	/* add nodes to the SW DB */
+	for (i = 0; i < num_nodes; i++) {
+		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "add nodes in SW DB failed status =%d\n",
+				  status);
+			break;
+		}
+
+		teid = le32_to_cpu(buf->generic[i].node_teid);
+		new_node = ice_sched_find_node_by_teid(parent, teid);
+
+		if (!new_node) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "Node is missing for teid =%d\n", teid);
+			break;
+		}
+
+		new_node->sibling = NULL;
+		new_node->tc_num = tc_node->tc_num;
+
+		/* add it to previous node sibling pointer */
+		/* Note: siblings are not linked across branches */
+		prev = ice_sched_get_first_node(hw, tc_node, layer);
+
+		if (prev && prev != new_node) {
+			while (prev->sibling)
+				prev = prev->sibling;
+			prev->sibling = new_node;
+		}
+
+		if (i == 0)
+			*first_node_teid = teid;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_add_nodes_to_layer - Add nodes to a given layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node teid
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * This function add nodes to a given layer.
+ */
+static enum ice_status
+ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
+			     struct ice_sched_node *tc_node,
+			     struct ice_sched_node *parent, u8 layer,
+			     u16 num_nodes, u32 *first_node_teid,
+			     u16 *num_nodes_added)
+{
+	u32 *first_teid_ptr = first_node_teid;
+	u16 new_num_nodes, max_child_nodes;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 num_added = 0;
+	u32 temp;
+
+	if (!num_nodes)
+		return status;
+
+	if (!parent || layer < hw->sw_entry_point_layer)
+		return ICE_ERR_PARAM;
+
+	*num_nodes_added = 0;
+
+	/* max children per node per layer */
+	max_child_nodes =
+	    le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children);
+
+	/* current number of children + required nodes exceed max children ? */
+	if ((parent->num_children + num_nodes) > max_child_nodes) {
+		/* Fail if the parent is a TC node */
+		if (parent == tc_node)
+			return ICE_ERR_CFG;
+
+		/* utilize all the spaces if the parent is not full */
+		if (parent->num_children < max_child_nodes) {
+			new_num_nodes = max_child_nodes - parent->num_children;
+			/* this recursion is intentional, and wouldn't
+			 * go more than 2 calls
+			 */
+			status = ice_sched_add_nodes_to_layer(pi, tc_node,
+							      parent, layer,
+							      new_num_nodes,
+							      first_node_teid,
+							      &num_added);
+			if (status)
+				return status;
+
+			*num_nodes_added += num_added;
+		}
+		/* Don't modify the first node teid memory if the first node was
+		 * added already in the above call. Instead send some temp
+		 * memory for all other recursive calls.
+		 */
+		if (num_added)
+			first_teid_ptr = &temp;
+
+		new_num_nodes = num_nodes - num_added;
+
+		/* This parent is full, try the next sibling */
+		parent = parent->sibling;
+
+		/* this recursion is intentional, for 1024 queues
+		 * per VSI, it goes max of 16 iterations.
+		 * 1024 / 8 = 128 layer 8 nodes
+		 * 128 /8 = 16 (add 8 nodes per iteration)
+		 */
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+						      layer, new_num_nodes,
+						      first_teid_ptr,
+						      &num_added);
+		*num_nodes_added += num_added;
+		return status;
+	}
+
+	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
+				     num_nodes_added, first_node_teid);
+	return status;
+}
+
+/**
+ * ice_sched_get_qgrp_layer - get the current queue group layer number
+ * @hw: pointer to the hw struct
+ *
+ * This function returns the current queue group layer number
+ */
+static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
+{
+	/* It's always total layers - 1, the array is 0 relative so -2 */
+	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
+}
+
+/**
+ * ice_sched_get_vsi_layer - get the current VSI layer number
+ * @hw: pointer to the hw struct
+ *
+ * This function returns the current VSI layer number
+ */
+static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
+{
+	/* Num Layers       VSI layer
+	 *     9               6
+	 *     7               4
+	 *     5 or less       sw_entry_point_layer
+	 */
+	/* calculate the vsi layer based on number of layers. */
+	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
+		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
+
+		if (layer > hw->sw_entry_point_layer)
+			return layer;
+	}
+	return hw->sw_entry_point_layer;
+}
+
+/**
+ * ice_sched_get_num_nodes_per_layer - Get the total number of nodes per layer
+ * @pi: pointer to the port info struct
+ * @layer: layer number
+ *
+ * This function calculates the number of nodes present in the scheduler tree
+ * including all the branches for a given layer
+ */
+static u16
+ice_sched_get_num_nodes_per_layer(struct ice_port_info *pi, u8 layer)
+{
+	struct ice_hw *hw;
+	u16 num_nodes = 0;
+	u8 i;
+
+	if (!pi)
+		return num_nodes;
+
+	hw = pi->hw;
+
+	/* Calculate the number of nodes for all TCs */
+	for (i = 0; i < pi->root->num_children; i++) {
+		struct ice_sched_node *tc_node, *node;
+
+		tc_node = pi->root->children[i];
+
+		/* Get the first node */
+		node = ice_sched_get_first_node(hw, tc_node, layer);
+		if (!node)
+			continue;
+
+		/* count the siblings */
+		while (node) {
+			num_nodes++;
+			node = node->sibling;
+		}
+	}
+
+	return num_nodes;
+}
+
+/**
+ * ice_sched_val_max_nodes - check max number of nodes reached or not
+ * @pi: port information structure
+ * @new_num_nodes_per_layer: pointer to the new number of nodes array
+ *
+ * This function checks whether the scheduler tree layers have enough space to
+ * add new nodes
+ */
+static enum ice_status
+ice_sched_validate_for_max_nodes(struct ice_port_info *pi,
+				 u16 *new_num_nodes_per_layer)
+{
+	struct ice_hw *hw = pi->hw;
+	u8 i, qg_layer;
+	u16 num_nodes;
+
+	qg_layer = ice_sched_get_qgrp_layer(hw);
+
+	/* walk through all the layers from SW entry point to qgroup layer */
+	for (i = hw->sw_entry_point_layer; i <= qg_layer; i++) {
+		num_nodes = ice_sched_get_num_nodes_per_layer(pi, i);
+		if (num_nodes + new_num_nodes_per_layer[i] >
+		    le16_to_cpu(hw->layer_info[i].max_pf_nodes)) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "max nodes reached for layer = %d\n", i);
+			return ICE_ERR_CFG;
+		}
+	}
+	return 0;
+}
+
+/**
+ * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
+ * @pi: port information structure
+ *
+ * This function removes the leaf node that was created by the FW
+ * during initialization
+ */
+static void
+ice_rm_dflt_leaf_node(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	node = pi->root;
+	while (node) {
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+		enum ice_status status;
+
+		/* remove the default leaf node */
+		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
+		if (!status)
+			ice_free_sched_node(pi, node);
+	}
+}
+
+/**
+ * ice_sched_rm_dflt_nodes - free the default nodes in the tree
+ * @pi: port information structure
+ *
+ * This function frees all the nodes except root and TC that were created by
+ * the FW during initialization
+ */
+static void
+ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	ice_rm_dflt_leaf_node(pi);
+
+	/* remove the default nodes except TC and root nodes */
+	node = pi->root;
+	while (node) {
+		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
+			ice_free_sched_node(pi, node);
+			break;
+		}
+
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+}
+
+/**
+ * ice_sched_init_port - Initialize scheduler by querying information from FW
+ * @pi: port info structure for the tree to cleanup
+ *
+ * This function is the initial call to find the total number of Tx scheduler
+ * resources, default topology created by firmware and storing the information
+ * in SW DB.
+ */
+enum ice_status ice_sched_init_port(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_topo_elem *buf;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 num_branches;
+	u16 num_elems;
+	u8 i, j;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+
+	/* Query the Default Topology from FW */
+	buf = devm_kcalloc(ice_hw_to_dev(hw), ICE_TXSCHED_MAX_BRANCHES,
+			   sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Query default scheduling tree topology */
+	status = ice_aq_get_dflt_topo(hw, pi->lport, buf,
+				      sizeof(*buf) * ICE_TXSCHED_MAX_BRANCHES,
+				      &num_branches, NULL);
+	if (status)
+		goto err_init_port;
+
+	/* num_branches should be between 1-8 */
+	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
+			  num_branches);
+		status = ICE_ERR_PARAM;
+		goto err_init_port;
+	}
+
+	/* get the number of elements on the default/first branch */
+	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
+
+	/* num_elems should always be between 1-9 */
+	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
+			  num_elems);
+		status = ICE_ERR_PARAM;
+		goto err_init_port;
+	}
+
+	/* If the last node is a leaf node then the index of the Q group
+	 * layer is two less than the number of elements.
+	 */
+	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
+	    ICE_AQC_ELEM_TYPE_LEAF)
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
+	else
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
+
+	/* Insert the Tx Sched root node */
+	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
+	if (status)
+		goto err_init_port;
+
+	/* Parse the default tree and cache the information */
+	for (i = 0; i < num_branches; i++) {
+		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
+
+		/* Skip root element as already inserted */
+		for (j = 1; j < num_elems; j++) {
+			/* update the sw entry point */
+			if (buf[0].generic[j].data.elem_type ==
+			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
+				hw->sw_entry_point_layer = j;
+
+			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
+			if (status)
+				goto err_init_port;
+		}
+	}
+
+	/* Remove the default nodes. */
+	if (pi->root)
+		ice_sched_rm_dflt_nodes(pi);
+
+	/* initialize the port for handling the scheduler tree */
+	pi->port_state = ICE_SCHED_PORT_STATE_READY;
+	mutex_init(&pi->sched_lock);
+	INIT_LIST_HEAD(&pi->agg_list);
+	INIT_LIST_HEAD(&pi->vsi_info_list);
+
+err_init_port:
+	if (status && pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_query_res_alloc - query the FW for num of logical sched layers
+ * @hw: pointer to the HW struct
+ *
+ * query FW for allocated scheduler resources and store in HW struct
+ */
+enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
+{
+	struct ice_aqc_query_txsched_res_resp *buf;
+	enum ice_status status = 0;
+
+	if (hw->layer_info)
+		return status;
+
+	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
+	if (status)
+		goto sched_query_out;
+
+	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
+	hw->num_tx_sched_phys_layers =
+		le16_to_cpu(buf->sched_props.phys_levels);
+	hw->flattened_layers = buf->sched_props.flattening_bitmap;
+	hw->max_cgds = buf->sched_props.max_pf_cgds;
+
+	 hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
+				       (hw->num_tx_sched_layers *
+					sizeof(*hw->layer_info)),
+				       GFP_KERNEL);
+	if (!hw->layer_info) {
+		status = ICE_ERR_NO_MEMORY;
+		goto sched_query_out;
+	}
+
+sched_query_out:
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_get_vsi_info_entry - Get the vsi entry list for given vsi_id
+ * @pi: port information structure
+ * @vsi_id: vsi id
+ *
+ * This function retrieves the vsi list for the given vsi id
+ */
+static struct ice_sched_vsi_info *
+ice_sched_get_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
+{
+	struct ice_sched_vsi_info *list_elem;
+
+	if (!pi)
+		return NULL;
+
+	list_for_each_entry(list_elem, &pi->vsi_info_list, list_entry)
+		if (list_elem->vsi_id == vsi_id)
+			return list_elem;
+	return NULL;
+}
+
+/**
+ * ice_sched_find_node_in_subtree - Find node in part of base node subtree
+ * @hw: pointer to the hw struct
+ * @base: pointer to the base node
+ * @node: pointer to the node to search
+ *
+ * This function checks whether a given node is part of the base node
+ * subtree or not
+ */
+static bool
+ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
+			       struct ice_sched_node *node)
+{
+	u8 i;
+
+	for (i = 0; i < base->num_children; i++) {
+		struct ice_sched_node *child = base->children[i];
+
+		if (node == child)
+			return true;
+
+		if (child->tx_sched_layer > node->tx_sched_layer)
+			return false;
+
+		/* this recursion is intentional, and wouldn't
+		 * go more than 8 calls
+		 */
+		if (ice_sched_find_node_in_subtree(hw, child, node))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * ice_sched_get_free_qparent - Get a free lan or rdma q group node
+ * @pi: port information structure
+ * @vsi_id: vsi id
+ * @tc: branch number
+ * @owner: lan or rdma
+ *
+ * This function retrieves a free lan or rdma q group node
+ */
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+			   u8 owner)
+{
+	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
+	struct ice_sched_vsi_info *list_elem;
+	u16 max_children;
+	u8 qgrp_layer;
+
+	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
+	max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children);
+
+	list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id);
+	if (!list_elem)
+		goto lan_q_exit;
+
+	vsi_node = list_elem->vsi_node[tc];
+
+	/* validate invalid VSI id */
+	if (!vsi_node)
+		goto lan_q_exit;
+
+	/* get the first q group node from VSI sub-tree */
+	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
+	while (qgrp_node) {
+		/* make sure the qgroup node is part of the VSI subtree */
+		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+			if (qgrp_node->num_children < max_children &&
+			    qgrp_node->owner == owner)
+				break;
+		qgrp_node = qgrp_node->sibling;
+	}
+
+lan_q_exit:
+	return qgrp_node;
+}
+
+/**
+ * ice_sched_get_vsi_node - Get a VSI node based on VSI id
+ * @hw: pointer to the hw struct
+ * @tc_node: pointer to the TC node
+ * @vsi_id: VSI id
+ *
+ * This function retrieves a VSI node for a given VSI id from a given
+ * TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
+		       u16 vsi_id)
+{
+	struct ice_sched_node *node;
+	u8 vsi_layer;
+
+	vsi_layer = ice_sched_get_vsi_layer(hw);
+	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
+
+	/* Check whether it already exists */
+	while (node) {
+		if (node->vsi_id == vsi_id)
+			return node;
+		node = node->sibling;
+	}
+
+	return node;
+}
+
+/**
+ * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
+ * @hw: pointer to the hw struct
+ * @num_qs: number of queues
+ * @num_nodes: num nodes array
+ *
+ * This function calculates the number of VSI child nodes based on the
+ * number of queues.
+ */
+static void
+ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
+{
+	u16 num = num_qs;
+	u8 i, qgl, vsil;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+
+	/* calculate num nodes from q group to VSI layer */
+	for (i = qgl; i > vsil; i--) {
+		u16 max_children = le16_to_cpu(hw->layer_info[i].max_children);
+
+		/* round to the next integer if there is a remainder */
+		num = DIV_ROUND_UP(num, max_children);
+
+		/* need at least one node */
+		num_nodes[i] = num ? num : 1;
+	}
+}
+
+/**
+ * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
+ * @pi: port information structure
+ * @vsi_id: VSI id
+ * @tc_node: pointer to the TC node
+ * @num_nodes: pointer to the num nodes that needs to be added per layer
+ * @owner: node owner (lan or rdma)
+ *
+ * This function adds the VSI child nodes to tree. It gets called for
+ * lan and rdma separately.
+ */
+static enum ice_status
+ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id,
+			      struct ice_sched_node *tc_node, u16 *num_nodes,
+			      u8 owner)
+{
+	struct ice_sched_node *parent, *node;
+	struct ice_hw *hw = pi->hw;
+	enum ice_status status;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, qgl, vsil;
+
+	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
+	if (status)
+		return status;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+	for (i = vsil + 1; i <= qgl; i++) {
+		if (!parent)
+			return ICE_ERR_CFG;
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+						      num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return ICE_ERR_CFG;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added) {
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+			node = parent;
+			while (node) {
+				node->owner = owner;
+				node = node->sibling;
+			}
+		} else {
+			parent = parent->children[0];
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree
+ * @pi: port information structure
+ * @vsi_node: pointer to the VSI node
+ * @num_nodes: pointer to the num nodes that needs to be removed per layer
+ * @owner: node owner (lan or rdma)
+ *
+ * This function removes the VSI child nodes from the tree. It gets called for
+ * lan and rdma separately.
+ */
+static void
+ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi,
+			     struct ice_sched_node *vsi_node, u16 *num_nodes,
+			     u8 owner)
+{
+	struct ice_sched_node *node, *next;
+	u8 i, qgl, vsil;
+	u16 num;
+
+	qgl = ice_sched_get_qgrp_layer(pi->hw);
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+
+	for (i = qgl; i > vsil; i--) {
+		num = num_nodes[i];
+		node = ice_sched_get_first_node(pi->hw, vsi_node, i);
+		while (node && num) {
+			next = node->sibling;
+			if (node->owner == owner && !node->num_children) {
+				ice_free_sched_node(pi, node);
+				num--;
+			}
+			node = next;
+		}
+	}
+}
+
+/**
+ * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
+ * @hw: pointer to the hw struct
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function calculates the number of supported nodes needed to add this
+ * VSI into tx tree including the VSI, parent and intermediate nodes in below
+ * layers
+ */
+static void
+ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
+				 struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *node;
+	u16 max_child;
+	u8 i, vsil;
+
+	vsil = ice_sched_get_vsi_layer(hw);
+	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
+		/* Add intermediate nodes if TC has no children and
+		 * need at least one node for VSI
+		 */
+		if (!tc_node->num_children || i == vsil) {
+			num_nodes[i]++;
+		} else {
+			/* If intermediate nodes are reached max children
+			 * then add a new one.
+			 */
+			node = ice_sched_get_first_node(hw, tc_node, i);
+			max_child = le16_to_cpu(hw->layer_info[i].max_children);
+
+			/* scan all the siblings */
+			while (node) {
+				if (node->num_children < max_child)
+					break;
+				node = node->sibling;
+			}
+
+			/* all the nodes are full, allocate a new one */
+			if (!node)
+				num_nodes[i]++;
+		}
+}
+
+/**
+ * ice_sched_add_vsi_support_nodes - add VSI supported nodes into tx tree
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function adds the VSI supported nodes into tx tree including the
+ * VSI, its parent and intermediate nodes in below layers
+ */
+static enum ice_status
+ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_id,
+				struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *parent = tc_node;
+	enum ice_status status;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, vsil;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
+	if (status)
+		return status;
+
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+						      i, num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return ICE_ERR_CFG;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added)
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+		else
+			parent = parent->children[0];
+
+		if (!parent)
+			return ICE_ERR_CFG;
+
+		if (i == vsil)
+			parent->vsi_id = vsi_id;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_add_vsi_to_topo - add a new VSI into tree
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc: TC number
+ *
+ * This function adds a new VSI into scheduler tree
+ */
+static enum ice_status
+ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_id, u8 tc)
+{
+	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *tc_node;
+	struct ice_hw *hw = pi->hw;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_PARAM;
+
+	/* calculate number of supported nodes needed for this VSI */
+	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
+
+	/* add vsi supported nodes to tc subtree */
+	return ice_sched_add_vsi_support_nodes(pi, vsi_id, tc_node, num_nodes);
+}
+
+/**
+ * ice_sched_update_vsi_child_nodes - update VSI child nodes
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc: TC number
+ * @new_numqs: new number of max queues
+ * @owner: owner of this subtree
+ *
+ * This function updates the VSI child nodes based on the number of queues
+ */
+static enum ice_status
+ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+				 u16 new_numqs, u8 owner)
+{
+	u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *vsi_node;
+	struct ice_sched_node *tc_node;
+	struct ice_sched_vsi_info *vsi;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 prev_numqs;
+	u8 i;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_CFG;
+
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+	if (!vsi_node)
+		return ICE_ERR_CFG;
+
+	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
+	if (!vsi)
+		return ICE_ERR_CFG;
+
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		prev_numqs = vsi->max_lanq[tc];
+	else
+		return ICE_ERR_PARAM;
+
+	/* num queues are not changed */
+	if (prev_numqs == new_numqs)
+		return status;
+
+	/* calculate number of nodes based on prev/new number of qs */
+	if (prev_numqs)
+		ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes);
+
+	if (new_numqs)
+		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
+
+	if (prev_numqs > new_numqs) {
+		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+			new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i];
+
+		ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes,
+					     owner);
+	} else {
+		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+			new_num_nodes[i] -= prev_num_nodes[i];
+
+		status = ice_sched_add_vsi_child_nodes(pi, vsi_id, tc_node,
+						       new_num_nodes, owner);
+		if (status)
+			return status;
+	}
+
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		vsi->max_lanq[tc] = new_numqs;
+
+	return status;
+}
+
+/**
+ * ice_sched_cfg_vsi - configure the new/exisiting VSI
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc: TC number
+ * @maxqs: max number of queues
+ * @owner: lan or rdma
+ * @enable: TC enabled or disabled
+ *
+ * This function adds/updates VSI nodes based on the number of queues. If TC is
+ * enabled and VSI is in suspended state then resume the VSI back. If TC is
+ * disabled then suspend the VSI if it is not already.
+ */
+enum ice_status
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
+		  u8 owner, bool enable)
+{
+	struct ice_sched_node *vsi_node, *tc_node;
+	struct ice_sched_vsi_info *vsi;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_PARAM;
+
+	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
+	if (!vsi)
+		vsi = ice_sched_create_vsi_info_entry(pi, vsi_id);
+	if (!vsi)
+		return ICE_ERR_NO_MEMORY;
+
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+
+	/* suspend the VSI if tc is not enabled */
+	if (!enable) {
+		if (vsi_node && vsi_node->in_use) {
+			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
+								true);
+			if (!status)
+				vsi_node->in_use = false;
+		}
+		return status;
+	}
+
+	/* TC is enabled, if it is a new VSI then add it to the tree */
+	if (!vsi_node) {
+		status = ice_sched_add_vsi_to_topo(pi, vsi_id, tc);
+		if (status)
+			return status;
+		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+		if (!vsi_node)
+			return ICE_ERR_CFG;
+		vsi->vsi_node[tc] = vsi_node;
+		vsi_node->in_use = true;
+	}
+
+	/* update the VSI child nodes */
+	status = ice_sched_update_vsi_child_nodes(pi, vsi_id, tc, maxqs, owner);
+	if (status)
+		return status;
+
+	/* TC is enabled, resume the VSI if it is in the suspend state */
+	if (!vsi_node->in_use) {
+		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
+		if (!status)
+			vsi_node->in_use = true;
+	}
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
new file mode 100644
index 000000000000..badadcc120d3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SCHED_H_
+#define _ICE_SCHED_H_
+
+#include "ice_common.h"
+
+#define ICE_QGRP_LAYER_OFFSET	2
+#define ICE_VSI_LAYER_OFFSET	4
+
+struct ice_sched_agg_vsi_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u16 vsi_id;
+};
+
+struct ice_sched_agg_info {
+	struct list_head agg_vsi_list;
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u32 agg_id;
+	enum ice_agg_type agg_type;
+};
+
+/* FW AQ command calls */
+enum ice_status ice_sched_init_port(struct ice_port_info *pi);
+enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_cleanup_all(struct ice_hw *hw);
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid);
+enum ice_status
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info);
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+			   u8 owner);
+enum ice_status
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
+		  u8 owner, bool enable);
+#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
new file mode 100644
index 000000000000..9a95c4ffd7d7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_STATUS_H_
+#define _ICE_STATUS_H_
+
+/* Error Codes */
+enum ice_status {
+	ICE_ERR_PARAM				= -1,
+	ICE_ERR_NOT_IMPL			= -2,
+	ICE_ERR_NOT_READY			= -3,
+	ICE_ERR_BAD_PTR				= -5,
+	ICE_ERR_INVAL_SIZE			= -6,
+	ICE_ERR_DEVICE_NOT_SUPPORTED		= -8,
+	ICE_ERR_RESET_FAILED			= -9,
+	ICE_ERR_FW_API_VER			= -10,
+	ICE_ERR_NO_MEMORY			= -11,
+	ICE_ERR_CFG				= -12,
+	ICE_ERR_OUT_OF_RANGE			= -13,
+	ICE_ERR_ALREADY_EXISTS			= -14,
+	ICE_ERR_DOES_NOT_EXIST			= -15,
+	ICE_ERR_MAX_LIMIT			= -17,
+	ICE_ERR_BUF_TOO_SHORT			= -52,
+	ICE_ERR_NVM_BLANK_MODE			= -53,
+	ICE_ERR_AQ_ERROR			= -100,
+	ICE_ERR_AQ_TIMEOUT			= -101,
+	ICE_ERR_AQ_FULL				= -102,
+	ICE_ERR_AQ_NO_WORK			= -103,
+	ICE_ERR_AQ_EMPTY			= -104,
+};
+
+#endif /* _ICE_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
new file mode 100644
index 000000000000..723d15f1e90b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -0,0 +1,1883 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_switch.h"
+
+#define ICE_ETH_DA_OFFSET		0
+#define ICE_ETH_ETHTYPE_OFFSET		12
+#define ICE_ETH_VLAN_TCI_OFFSET		14
+#define ICE_MAX_VLAN_ID			0xFFF
+
+/* Dummy ethernet header needed in the ice_aqc_sw_rules_elem
+ * struct to configure any switch filter rules.
+ * {DA (6 bytes), SA(6 bytes),
+ * Ether type (2 bytes for header without VLAN tag) OR
+ * VLAN tag (4 bytes for header with VLAN tag) }
+ *
+ * Word on Hardcoded values
+ * byte 0 = 0x2: to identify it as locally administered DA MAC
+ * byte 6 = 0x2: to identify it as locally administered SA MAC
+ * byte 12 = 0x81 & byte 13 = 0x00:
+ *	In case of VLAN filter first two bytes defines ether type (0x8100)
+ *	and remaining two bytes are placeholder for programming a given VLAN id
+ *	In case of Ether type filter it is treated as header without VLAN tag
+ *	and byte 12 and 13 is used to program a given Ether type instead
+ */
+#define DUMMY_ETH_HDR_LEN		16
+static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0,
+							0x2, 0, 0, 0, 0, 0,
+							0x81, 0, 0, 0};
+
+#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_lkup_rx_tx) + DUMMY_ETH_HDR_LEN - 1)
+#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_lkup_rx_tx) - 1)
+#define ICE_SW_RULE_LG_ACT_SIZE(n) \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_lg_act) - \
+	 sizeof(((struct ice_sw_rule_lg_act *)0)->act) + \
+	 ((n) * sizeof(((struct ice_sw_rule_lg_act *)0)->act)))
+#define ICE_SW_RULE_VSI_LIST_SIZE(n) \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_vsi_list) - \
+	 sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi) + \
+	 ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi)))
+
+/**
+ * ice_aq_alloc_free_res - command to allocate/free resources
+ * @hw: pointer to the hw struct
+ * @num_entries: number of resource entries in buffer
+ * @buf: Indirect buffer to hold data parameters and response
+ * @buf_size: size of buffer for indirect commands
+ * @opc: pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Helper function to allocate/free resources using the admin queue commands
+ */
+static enum ice_status
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+		      enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_alloc_free_res_cmd *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.sw_res_ctrl;
+
+	if (!buf)
+		return ICE_ERR_PARAM;
+
+	if (buf_size < (num_entries * sizeof(buf->elem[0])))
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_entries = cpu_to_le16(num_entries);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_aq_get_sw_cfg - get switch configuration
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of the buffer available for response
+ * @req_desc: pointer to requested descriptor
+ * @num_elems: pointer to number of elements
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get switch configuration (0x0200) to be placed in 'buff'.
+ * This admin command returns information such as initial VSI/port number
+ * and switch ID it belongs to.
+ *
+ * NOTE: *req_desc is both an input/output parameter.
+ * The caller of this function first calls this function with *request_desc set
+ * to 0.  If the response from f/w has *req_desc set to 0, all the switch
+ * configuration information has been returned; if non-zero (meaning not all
+ * the information was returned), the caller should call this function again
+ * with *req_desc set to the previous value returned by f/w to get the
+ * next block of switch configuration information.
+ *
+ * *num_elems is output only parameter. This reflects the number of elements
+ * in response buffer. The caller of this function to use *num_elems while
+ * parsing the response buffer.
+ */
+static enum ice_status
+ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp *buf,
+		  u16 buf_size, u16 *req_desc, u16 *num_elems,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_sw_cfg *cmd;
+	enum ice_status status;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg);
+	cmd = &desc.params.get_sw_conf;
+	cmd->element = cpu_to_le16(*req_desc);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status) {
+		*req_desc = le16_to_cpu(cmd->element);
+		*num_elems = le16_to_cpu(cmd->num_elems);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_add_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware (0x0210)
+ */
+enum ice_status
+ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *res;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	enum ice_status status;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.vsi_cmd;
+	res = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi);
+
+	if (!vsi_ctx->alloc_from_pool)
+		cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num |
+					   ICE_AQ_VSI_IS_VALID);
+
+	cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsi_num = le16_to_cpu(res->vsi_num) & ICE_AQ_VSI_NUM_M;
+		vsi_ctx->vsis_allocd = le16_to_cpu(res->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(res->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_update_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware (0x0211)
+ */
+enum ice_status
+ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_free_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get VSI context info from hardware (0x0213)
+ */
+enum ice_status
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+	if (keep_vsi_alloc)
+		cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_alloc_free_vsi_list
+ * @hw: pointer to the hw struct
+ * @vsi_list_id: VSI list id returned or used for lookup
+ * @lkup_type: switch rule filter lookup type
+ * @opc: switch rules population command type - pass in the command opcode
+ *
+ * allocates or free a VSI list resource
+ */
+static enum ice_status
+ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id,
+			   enum ice_sw_lkup_type lkup_type,
+			   enum ice_adminq_opc opc)
+{
+	struct ice_aqc_alloc_free_res_elem *sw_buf;
+	struct ice_aqc_res_elem *vsi_ele;
+	enum ice_status status;
+	u16 buf_len;
+
+	buf_len = sizeof(*sw_buf);
+	sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL);
+	if (!sw_buf)
+		return ICE_ERR_NO_MEMORY;
+	sw_buf->num_elems = cpu_to_le16(1);
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN) {
+		sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP);
+	} else if (lkup_type == ICE_SW_LKUP_VLAN) {
+		sw_buf->res_type =
+			cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE);
+	} else {
+		status = ICE_ERR_PARAM;
+		goto ice_aq_alloc_free_vsi_list_exit;
+	}
+
+	if (opc == ice_aqc_opc_free_res)
+		sw_buf->elem[0].e.sw_resp = cpu_to_le16(*vsi_list_id);
+
+	status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL);
+	if (status)
+		goto ice_aq_alloc_free_vsi_list_exit;
+
+	if (opc == ice_aqc_opc_alloc_res) {
+		vsi_ele = &sw_buf->elem[0];
+		*vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp);
+	}
+
+ice_aq_alloc_free_vsi_list_exit:
+	devm_kfree(ice_hw_to_dev(hw), sw_buf);
+	return status;
+}
+
+/**
+ * ice_aq_sw_rules - add/update/remove switch rules
+ * @hw: pointer to the hw struct
+ * @rule_list: pointer to switch rule population list
+ * @rule_list_sz: total size of the rule list in bytes
+ * @num_rules: number of switch rules in the rule_list
+ * @opc: switch rules population command type - pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware
+ */
+static enum ice_status
+ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
+		u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	if (opc != ice_aqc_opc_add_sw_rules &&
+	    opc != ice_aqc_opc_update_sw_rules &&
+	    opc != ice_aqc_opc_remove_sw_rules)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	desc.params.sw_rules.num_rules_fltr_entry_index =
+		cpu_to_le16(num_rules);
+	return ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd);
+}
+
+/* ice_init_port_info - Initialize port_info with switch configuration data
+ * @pi: pointer to port_info
+ * @vsi_port_num: VSI number or port number
+ * @type: Type of switch element (port or VSI)
+ * @swid: switch ID of the switch the element is attached to
+ * @pf_vf_num: PF or VF number
+ * @is_vf: true if the element is a VF, false otherwise
+ */
+static void
+ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type,
+		   u16 swid, u16 pf_vf_num, bool is_vf)
+{
+	switch (type) {
+	case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT:
+		pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK);
+		pi->sw_id = swid;
+		pi->pf_vf_num = pf_vf_num;
+		pi->is_vf = is_vf;
+		pi->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL;
+		pi->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL;
+		break;
+	default:
+		ice_debug(pi->hw, ICE_DBG_SW,
+			  "incorrect VSI/port type received\n");
+		break;
+	}
+}
+
+/* ice_get_initial_sw_cfg - Get initial port and default VSI data
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw)
+{
+	struct ice_aqc_get_sw_cfg_resp *rbuf;
+	enum ice_status status;
+	u16 req_desc = 0;
+	u16 num_elems;
+	u16 i;
+
+	rbuf = devm_kzalloc(ice_hw_to_dev(hw), ICE_SW_CFG_MAX_BUF_LEN,
+			    GFP_KERNEL);
+
+	if (!rbuf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Multiple calls to ice_aq_get_sw_cfg may be required
+	 * to get all the switch configuration information. The need
+	 * for additional calls is indicated by ice_aq_get_sw_cfg
+	 * writing a non-zero value in req_desc
+	 */
+	do {
+		status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN,
+					   &req_desc, &num_elems, NULL);
+
+		if (status)
+			break;
+
+		for (i = 0; i < num_elems; i++) {
+			struct ice_aqc_get_sw_cfg_resp_elem *ele;
+			u16 pf_vf_num, swid, vsi_port_num;
+			bool is_vf = false;
+			u8 type;
+
+			ele = rbuf[i].elements;
+			vsi_port_num = le16_to_cpu(ele->vsi_port_num) &
+				ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M;
+
+			pf_vf_num = le16_to_cpu(ele->pf_vf_num) &
+				ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M;
+
+			swid = le16_to_cpu(ele->swid);
+
+			if (le16_to_cpu(ele->pf_vf_num) &
+			    ICE_AQC_GET_SW_CONF_RESP_IS_VF)
+				is_vf = true;
+
+			type = le16_to_cpu(ele->vsi_port_num) >>
+				ICE_AQC_GET_SW_CONF_RESP_TYPE_S;
+
+			if (type == ICE_AQC_GET_SW_CONF_RESP_VSI) {
+				/* FW VSI is not needed. Just continue. */
+				continue;
+			}
+
+			ice_init_port_info(hw->port_info, vsi_port_num,
+					   type, swid, pf_vf_num, is_vf);
+		}
+	} while (req_desc && !status);
+
+	devm_kfree(ice_hw_to_dev(hw), (void *)rbuf);
+	return status;
+}
+
+/**
+ * ice_fill_sw_info - Helper function to populate lb_en and lan_en
+ * @hw: pointer to the hardware structure
+ * @f_info: filter info structure to fill/update
+ *
+ * This helper function populates the lb_en and lan_en elements of the provided
+ * ice_fltr_info struct using the switch's type and characteristics of the
+ * switch rule being configured.
+ */
+static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *f_info)
+{
+	f_info->lb_en = false;
+	f_info->lan_en = false;
+	if ((f_info->flag & ICE_FLTR_TX) &&
+	    (f_info->fltr_act == ICE_FWD_TO_VSI ||
+	     f_info->fltr_act == ICE_FWD_TO_VSI_LIST ||
+	     f_info->fltr_act == ICE_FWD_TO_Q ||
+	     f_info->fltr_act == ICE_FWD_TO_QGRP)) {
+		f_info->lb_en = true;
+		if (!(hw->evb_veb && f_info->lkup_type == ICE_SW_LKUP_MAC &&
+		      is_unicast_ether_addr(f_info->l_data.mac.mac_addr)))
+			f_info->lan_en = true;
+	}
+}
+
+/**
+ * ice_fill_sw_rule - Helper function to fill switch rule structure
+ * @hw: pointer to the hardware structure
+ * @f_info: entry containing packet forwarding information
+ * @s_rule: switch rule structure to be filled in based on mac_entry
+ * @opc: switch rules population command type - pass in the command opcode
+ */
+static void
+ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
+		 struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc)
+{
+	u16 vlan_id = ICE_MAX_VLAN_ID + 1;
+	u8 eth_hdr[DUMMY_ETH_HDR_LEN];
+	void *daddr = NULL;
+	u32 act = 0;
+	__be16 *off;
+
+	if (opc == ice_aqc_opc_remove_sw_rules) {
+		s_rule->pdata.lkup_tx_rx.act = 0;
+		s_rule->pdata.lkup_tx_rx.index =
+			cpu_to_le16(f_info->fltr_rule_id);
+		s_rule->pdata.lkup_tx_rx.hdr_len = 0;
+		return;
+	}
+
+	/* initialize the ether header with a dummy header */
+	memcpy(eth_hdr, dummy_eth_header, sizeof(dummy_eth_header));
+	ice_fill_sw_info(hw, f_info);
+
+	switch (f_info->fltr_act) {
+	case ICE_FWD_TO_VSI:
+		act |= (f_info->fwd_id.vsi_id << ICE_SINGLE_ACT_VSI_ID_S) &
+			ICE_SINGLE_ACT_VSI_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_VSI_LIST:
+		act |= ICE_SINGLE_ACT_VSI_LIST;
+		act |= (f_info->fwd_id.vsi_list_id <<
+			ICE_SINGLE_ACT_VSI_LIST_ID_S) &
+			ICE_SINGLE_ACT_VSI_LIST_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_Q:
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+			ICE_SINGLE_ACT_Q_INDEX_M;
+		break;
+	case ICE_FWD_TO_QGRP:
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->qgrp_size << ICE_SINGLE_ACT_Q_REGION_S) &
+			ICE_SINGLE_ACT_Q_REGION_M;
+		break;
+	case ICE_DROP_PACKET:
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP;
+		break;
+	default:
+		return;
+	}
+
+	if (f_info->lb_en)
+		act |= ICE_SINGLE_ACT_LB_ENABLE;
+	if (f_info->lan_en)
+		act |= ICE_SINGLE_ACT_LAN_ENABLE;
+
+	switch (f_info->lkup_type) {
+	case ICE_SW_LKUP_MAC:
+		daddr = f_info->l_data.mac.mac_addr;
+		break;
+	case ICE_SW_LKUP_VLAN:
+		vlan_id = f_info->l_data.vlan.vlan_id;
+		if (f_info->fltr_act == ICE_FWD_TO_VSI ||
+		    f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+			act |= ICE_SINGLE_ACT_PRUNE;
+			act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS;
+		}
+		break;
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+		daddr = f_info->l_data.ethertype_mac.mac_addr;
+		/* fall-through */
+	case ICE_SW_LKUP_ETHERTYPE:
+		off = (__be16 *)&eth_hdr[ICE_ETH_ETHTYPE_OFFSET];
+		*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
+		break;
+	case ICE_SW_LKUP_MAC_VLAN:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		break;
+	case ICE_SW_LKUP_PROMISC_VLAN:
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		/* fall-through */
+	case ICE_SW_LKUP_PROMISC:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		break;
+	default:
+		break;
+	}
+
+	s_rule->type = (f_info->flag & ICE_FLTR_RX) ?
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) :
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
+
+	/* Recipe set depending on lookup type */
+	s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(f_info->lkup_type);
+	s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(f_info->src);
+	s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act);
+
+	if (daddr)
+		ether_addr_copy(&eth_hdr[ICE_ETH_DA_OFFSET], daddr);
+
+	if (!(vlan_id > ICE_MAX_VLAN_ID)) {
+		off = (__be16 *)&eth_hdr[ICE_ETH_VLAN_TCI_OFFSET];
+		*off = cpu_to_be16(vlan_id);
+	}
+
+	/* Create the switch rule with the final dummy Ethernet header */
+	if (opc != ice_aqc_opc_update_sw_rules)
+		s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(sizeof(eth_hdr));
+
+	memcpy(s_rule->pdata.lkup_tx_rx.hdr, eth_hdr, sizeof(eth_hdr));
+}
+
+/**
+ * ice_add_marker_act
+ * @hw: pointer to the hardware structure
+ * @m_ent: the management entry for which sw marker needs to be added
+ * @sw_marker: sw marker to tag the Rx descriptor with
+ * @l_id: large action resource id
+ *
+ * Create a large action to hold software marker and update the switch rule
+ * entry pointed by m_ent with newly created large action
+ */
+static enum ice_status
+ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent,
+		   u16 sw_marker, u16 l_id)
+{
+	struct ice_aqc_sw_rules_elem *lg_act, *rx_tx;
+	/* For software marker we need 3 large actions
+	 * 1. FWD action: FWD TO VSI or VSI LIST
+	 * 2. GENERIC VALUE action to hold the profile id
+	 * 3. GENERIC VALUE action to hold the software marker id
+	 */
+	const u16 num_lg_acts = 3;
+	enum ice_status status;
+	u16 lg_act_size;
+	u16 rules_size;
+	u16 vsi_info;
+	u32 act;
+
+	if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+		return ICE_ERR_PARAM;
+
+	/* Create two back-to-back switch rules and submit them to the HW using
+	 * one memory buffer:
+	 *    1. Large Action
+	 *    2. Look up tx rx
+	 */
+	lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(num_lg_acts);
+	rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
+	lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL);
+	if (!lg_act)
+		return ICE_ERR_NO_MEMORY;
+
+	rx_tx = (struct ice_aqc_sw_rules_elem *)((u8 *)lg_act + lg_act_size);
+
+	/* Fill in the first switch rule i.e. large action */
+	lg_act->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT);
+	lg_act->pdata.lg_act.index = cpu_to_le16(l_id);
+	lg_act->pdata.lg_act.size = cpu_to_le16(num_lg_acts);
+
+	/* First action VSI forwarding or VSI list forwarding depending on how
+	 * many VSIs
+	 */
+	vsi_info = (m_ent->vsi_count > 1) ?
+		m_ent->fltr_info.fwd_id.vsi_list_id :
+		m_ent->fltr_info.fwd_id.vsi_id;
+
+	act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT;
+	act |= (vsi_info << ICE_LG_ACT_VSI_LIST_ID_S) &
+		ICE_LG_ACT_VSI_LIST_ID_M;
+	if (m_ent->vsi_count > 1)
+		act |= ICE_LG_ACT_VSI_LIST;
+	lg_act->pdata.lg_act.act[0] = cpu_to_le32(act);
+
+	/* Second action descriptor type */
+	act = ICE_LG_ACT_GENERIC;
+
+	act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+	lg_act->pdata.lg_act.act[1] = cpu_to_le32(act);
+
+	act = (7 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+
+	/* Third action Marker value */
+	act |= ICE_LG_ACT_GENERIC;
+	act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) &
+		ICE_LG_ACT_GENERIC_VALUE_M;
+
+	act |= (0 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+	lg_act->pdata.lg_act.act[2] = cpu_to_le32(act);
+
+	/* call the fill switch rule to fill the lookup tx rx structure */
+	ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx,
+			 ice_aqc_opc_update_sw_rules);
+
+	/* Update the action to point to the large action id */
+	rx_tx->pdata.lkup_tx_rx.act =
+		cpu_to_le32(ICE_SINGLE_ACT_PTR |
+			    ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) &
+			     ICE_SINGLE_ACT_PTR_VAL_M));
+
+	/* Use the filter rule id of the previously created rule with single
+	 * act. Once the update happens, hardware will treat this as large
+	 * action
+	 */
+	rx_tx->pdata.lkup_tx_rx.index =
+		cpu_to_le16(m_ent->fltr_info.fltr_rule_id);
+
+	status = ice_aq_sw_rules(hw, lg_act, rules_size, 2,
+				 ice_aqc_opc_update_sw_rules, NULL);
+	if (!status) {
+		m_ent->lg_act_idx = l_id;
+		m_ent->sw_marker_id = sw_marker;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), lg_act);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_map
+ * @hw: pointer to the hardware structure
+ * @vsi_array: array of VSIs to form a VSI list
+ * @num_vsi: num VSI in the array
+ * @vsi_list_id: VSI list id generated as part of allocate resource
+ *
+ * Helper function to create a new entry of VSI list id to VSI mapping
+ * using the given VSI list id
+ */
+static struct ice_vsi_list_map_info *
+ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+			u16 vsi_list_id)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_map;
+	int i;
+
+	v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL);
+	if (!v_map)
+		return NULL;
+
+	v_map->vsi_list_id = vsi_list_id;
+
+	for (i = 0; i < num_vsi; i++)
+		set_bit(vsi_array[i], v_map->vsi_map);
+
+	list_add(&v_map->list_entry, &sw->vsi_list_map_head);
+	return v_map;
+}
+
+/**
+ * ice_update_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_array: array of VSIs to form a VSI list
+ * @num_vsi: num VSI in the array
+ * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @remove: Boolean value to indicate if this is a remove action
+ * @opc: switch rules population command type - pass in the command opcode
+ * @lkup_type: lookup type of the filter
+ *
+ * Call AQ command to add a new switch rule or update existing switch rule
+ * using the given VSI list id
+ */
+static enum ice_status
+ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+			 u16 vsi_list_id, bool remove, enum ice_adminq_opc opc,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+	u16 s_rule_size;
+	u16 type;
+	int i;
+
+	if (!num_vsi)
+		return ICE_ERR_PARAM;
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN)
+		type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR :
+				ICE_AQC_SW_RULES_T_VSI_LIST_SET;
+	else if (lkup_type == ICE_SW_LKUP_VLAN)
+		type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR :
+				ICE_AQC_SW_RULES_T_PRUNE_LIST_SET;
+	else
+		return ICE_ERR_PARAM;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(num_vsi);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	for (i = 0; i < num_vsi; i++)
+		s_rule->pdata.vsi_list.vsi[i] = cpu_to_le16(vsi_array[i]);
+
+	s_rule->type = cpu_to_le16(type);
+	s_rule->pdata.vsi_list.number_vsi = cpu_to_le16(num_vsi);
+	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_rule - Creates and populates a VSI list rule
+ * @hw: pointer to the hw struct
+ * @vsi_array: array of VSIs to form a VSI list
+ * @num_vsi: number of VSIs in the array
+ * @vsi_list_id: stores the ID of the VSI list to be created
+ * @lkup_type: switch rule filter's lookup type
+ */
+static enum ice_status
+ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+			 u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type)
+{
+	enum ice_status status;
+	int i;
+
+	for (i = 0; i < num_vsi; i++)
+		if (vsi_array[i] >= ICE_MAX_VSI)
+			return ICE_ERR_OUT_OF_RANGE;
+
+	status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type,
+					    ice_aqc_opc_alloc_res);
+	if (status)
+		return status;
+
+	/* Update the newly created VSI list to include the specified VSIs */
+	return ice_update_vsi_list_rule(hw, vsi_array, num_vsi, *vsi_list_id,
+					false, ice_aqc_opc_add_sw_rules,
+					lkup_type);
+}
+
+/**
+ * ice_create_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: entry containing packet forwarding information
+ *
+ * Create switch rule with given filter information and add an entry
+ * to the corresponding filter management list to track this switch rule
+ * and VSI mapping
+ */
+static enum ice_status
+ice_create_pkt_fwd_rule(struct ice_hw *hw,
+			struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_sw_lkup_type l_type;
+	enum ice_status status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+	fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry),
+				GFP_KERNEL);
+	if (!fm_entry) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	fm_entry->fltr_info = f_entry->fltr_info;
+
+	/* Initialize all the fields for the management entry */
+	fm_entry->vsi_count = 1;
+	fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX;
+	fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID;
+	fm_entry->counter_index = ICE_INVAL_COUNTER_ID;
+
+	ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule,
+			 ice_aqc_opc_add_sw_rules);
+
+	status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+				 ice_aqc_opc_add_sw_rules, NULL);
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	f_entry->fltr_info.fltr_rule_id =
+		le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+	fm_entry->fltr_info.fltr_rule_id =
+		le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+
+	/* The book keeping entries will get removed when base driver
+	 * calls remove filter AQ command
+	 */
+	l_type = fm_entry->fltr_info.lkup_type;
+	if (l_type == ICE_SW_LKUP_MAC) {
+		mutex_lock(&sw->mac_list_lock);
+		list_add(&fm_entry->list_entry, &sw->mac_list_head);
+		mutex_unlock(&sw->mac_list_lock);
+	} else if (l_type == ICE_SW_LKUP_VLAN) {
+		mutex_lock(&sw->vlan_list_lock);
+		list_add(&fm_entry->list_entry, &sw->vlan_list_head);
+		mutex_unlock(&sw->vlan_list_lock);
+	} else if (l_type == ICE_SW_LKUP_ETHERTYPE ||
+		   l_type == ICE_SW_LKUP_ETHERTYPE_MAC) {
+		mutex_lock(&sw->eth_m_list_lock);
+		list_add(&fm_entry->list_entry, &sw->eth_m_list_head);
+		mutex_unlock(&sw->eth_m_list_lock);
+	} else if (l_type == ICE_SW_LKUP_PROMISC ||
+		   l_type == ICE_SW_LKUP_PROMISC_VLAN) {
+		mutex_lock(&sw->promisc_list_lock);
+		list_add(&fm_entry->list_entry, &sw->promisc_list_head);
+		mutex_unlock(&sw->promisc_list_lock);
+	} else if (fm_entry->fltr_info.lkup_type == ICE_SW_LKUP_MAC_VLAN) {
+		mutex_lock(&sw->mac_vlan_list_lock);
+		list_add(&fm_entry->list_entry, &sw->mac_vlan_list_head);
+		mutex_unlock(&sw->mac_vlan_list_lock);
+	} else {
+		status = ICE_ERR_NOT_IMPL;
+	}
+ice_create_pkt_fwd_rule_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_update_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @rule_id: rule of previously created switch rule to update
+ * @vsi_list_id: VSI list id to be updated with
+ * @f_info: ice_fltr_info to pull other information for switch rule
+ *
+ * Call AQ command to update a previously created switch rule with a
+ * VSI list id
+ */
+static enum ice_status
+ice_update_pkt_fwd_rule(struct ice_hw *hw, u16 rule_id, u16 vsi_list_id,
+			struct ice_fltr_info f_info)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	struct ice_fltr_info tmp_fltr;
+	enum ice_status status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	tmp_fltr = f_info;
+	tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+	tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+
+	ice_fill_sw_rule(hw, &tmp_fltr, s_rule,
+			 ice_aqc_opc_update_sw_rules);
+
+	s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(rule_id);
+
+	/* Update switch rule with new rule set to forward VSI list */
+	status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+				 ice_aqc_opc_update_sw_rules, NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_handle_vsi_list_mgmt
+ * @hw: pointer to the hardware structure
+ * @m_entry: pointer to current filter management list entry
+ * @cur_fltr: filter information from the book keeping entry
+ * @new_fltr: filter information with the new VSI to be added
+ *
+ * Call AQ command to add or update previously created VSI list with new VSI.
+ *
+ * Helper function to do book keeping associated with adding filter information
+ * The algorithm to do the booking keeping is described below :
+ * When a VSI needs to subscribe to a given filter( MAC/VLAN/Ethtype etc.)
+ *	if only one VSI has been added till now
+ *		Allocate a new VSI list and add two VSIs
+ *		to this list using switch rule command
+ *		Update the previously created switch rule with the
+ *		newly created VSI list id
+ *	if a VSI list was previously created
+ *		Add the new VSI to the previously created VSI list set
+ *		using the update switch rule command
+ */
+static enum ice_status
+ice_handle_vsi_list_mgmt(struct ice_hw *hw,
+			 struct ice_fltr_mgmt_list_entry *m_entry,
+			 struct ice_fltr_info *cur_fltr,
+			 struct ice_fltr_info *new_fltr)
+{
+	enum ice_status status = 0;
+	u16 vsi_list_id = 0;
+
+	if ((cur_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_QGRP))
+		return ICE_ERR_NOT_IMPL;
+
+	if ((new_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     new_fltr->fltr_act == ICE_FWD_TO_QGRP) &&
+	    (cur_fltr->fltr_act == ICE_FWD_TO_VSI ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST))
+		return ICE_ERR_NOT_IMPL;
+
+	if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) {
+		/* Only one entry existed in the mapping and it was not already
+		 * a part of a VSI list. So, create a VSI list with the old and
+		 * new VSIs.
+		 */
+		u16 vsi_id_arr[2];
+		u16 fltr_rule;
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_fltr->fwd_id.vsi_id == new_fltr->fwd_id.vsi_id)
+			return ICE_ERR_ALREADY_EXISTS;
+
+		vsi_id_arr[0] = cur_fltr->fwd_id.vsi_id;
+		vsi_id_arr[1] = new_fltr->fwd_id.vsi_id;
+		status = ice_create_vsi_list_rule(hw, &vsi_id_arr[0], 2,
+						  &vsi_list_id,
+						  new_fltr->lkup_type);
+		if (status)
+			return status;
+
+		fltr_rule = cur_fltr->fltr_rule_id;
+		/* Update the previous switch rule of "MAC forward to VSI" to
+		 * "MAC fwd to VSI list"
+		 */
+		status = ice_update_pkt_fwd_rule(hw, fltr_rule, vsi_list_id,
+						 *new_fltr);
+		if (status)
+			return status;
+
+		cur_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+		m_entry->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_id_arr[0], 2,
+						vsi_list_id);
+
+		/* If this entry was large action then the large action needs
+		 * to be updated to point to FWD to VSI list
+		 */
+		if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID)
+			status =
+			    ice_add_marker_act(hw, m_entry,
+					       m_entry->sw_marker_id,
+					       m_entry->lg_act_idx);
+	} else {
+		u16 vsi_id = new_fltr->fwd_id.vsi_id;
+		enum ice_adminq_opc opcode;
+
+		/* A rule already exists with the new VSI being added */
+		if (test_bit(vsi_id, m_entry->vsi_list_info->vsi_map))
+			return 0;
+
+		/* Update the previously created VSI list set with
+		 * the new VSI id passed in
+		 */
+		vsi_list_id = cur_fltr->fwd_id.vsi_list_id;
+		opcode = ice_aqc_opc_update_sw_rules;
+
+		status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id,
+						  false, opcode,
+						  new_fltr->lkup_type);
+		/* update VSI list mapping info with new VSI id */
+		if (!status)
+			set_bit(vsi_id, m_entry->vsi_list_info->vsi_map);
+	}
+	if (!status)
+		m_entry->vsi_count++;
+	return status;
+}
+
+/**
+ * ice_find_mac_entry
+ * @hw: pointer to the hardware structure
+ * @mac_addr: MAC address to search for
+ *
+ * Helper function to search for a MAC entry using a given MAC address
+ * Returns pointer to the entry if found.
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_mac_entry(struct ice_hw *hw, u8 *mac_addr)
+{
+	struct ice_fltr_mgmt_list_entry *m_list_itr, *mac_ret = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+
+	mutex_lock(&sw->mac_list_lock);
+	list_for_each_entry(m_list_itr, &sw->mac_list_head, list_entry) {
+		u8 *buf = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+
+		if (ether_addr_equal(buf, mac_addr)) {
+			mac_ret = m_list_itr;
+			break;
+		}
+	}
+	mutex_unlock(&sw->mac_list_lock);
+	return mac_ret;
+}
+
+/**
+ * ice_add_shared_mac - Add one MAC shared filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: structure containing MAC forwarding information
+ *
+ * Adds or updates the book keeping list for the MAC addresses
+ */
+static enum ice_status
+ice_add_shared_mac(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	struct ice_fltr_mgmt_list_entry *m_entry;
+
+	new_fltr = &f_entry->fltr_info;
+
+	m_entry = ice_find_mac_entry(hw, &new_fltr->l_data.mac.mac_addr[0]);
+	if (!m_entry)
+		return ice_create_pkt_fwd_rule(hw, f_entry);
+
+	cur_fltr = &m_entry->fltr_info;
+
+	return ice_handle_vsi_list_mgmt(hw, m_entry, cur_fltr, new_fltr);
+}
+
+/**
+ * ice_add_mac - Add a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * IMPORTANT: When the ucast_shared flag is set to false and m_list has
+ * multiple unicast addresses, the function assumes that all the
+ * addresses are unique in a given add_mac call. It doesn't
+ * check for duplicates in this case, removing duplicates from a given
+ * list should be taken care of in the caller of this function.
+ */
+enum ice_status
+ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
+	struct ice_fltr_list_entry *m_list_itr;
+	u16 elem_sent, total_elem_left;
+	enum ice_status status = 0;
+	u16 num_unicast = 0;
+	u16 s_rule_size;
+
+	if (!m_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+
+		if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+			return ICE_ERR_PARAM;
+		if (is_zero_ether_addr(add))
+			return ICE_ERR_PARAM;
+		if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
+			/* Don't overwrite the unicast address */
+			if (ice_find_mac_entry(hw, add))
+				return ICE_ERR_ALREADY_EXISTS;
+			num_unicast++;
+		} else if (is_multicast_ether_addr(add) ||
+			   (is_unicast_ether_addr(add) && hw->ucast_shared)) {
+			status = ice_add_shared_mac(hw, m_list_itr);
+			if (status) {
+				m_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
+				return status;
+			}
+			m_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+		}
+	}
+
+	/* Exit if no suitable entries were found for adding bulk switch rule */
+	if (!num_unicast)
+		return 0;
+
+	/* Allocate switch rule buffer for the bulk update for unicast */
+	s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
+	s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
+			      GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
+		u8 *addr = &f_info->l_data.mac.mac_addr[0];
+
+		if (is_unicast_ether_addr(addr)) {
+			ice_fill_sw_rule(hw, &m_list_itr->fltr_info,
+					 r_iter, ice_aqc_opc_add_sw_rules);
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+	/* Call AQ bulk switch rule update for all unicast addresses */
+	r_iter = s_rule;
+	/* Call AQ switch rule in AQ_MAX chunk */
+	for (total_elem_left = num_unicast; total_elem_left > 0;
+	     total_elem_left -= elem_sent) {
+		struct ice_aqc_sw_rules_elem *entry = r_iter;
+
+		elem_sent = min(total_elem_left,
+				(u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size));
+		status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
+					 elem_sent, ice_aqc_opc_add_sw_rules,
+					 NULL);
+		if (status)
+			goto ice_add_mac_exit;
+		r_iter = (struct ice_aqc_sw_rules_elem *)
+			((u8 *)r_iter + (elem_sent * s_rule_size));
+	}
+
+	/* Fill up rule id based on the value returned from FW */
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
+		u8 *addr = &f_info->l_data.mac.mac_addr[0];
+		struct ice_switch_info *sw = hw->switch_info;
+		struct ice_fltr_mgmt_list_entry *fm_entry;
+
+		if (is_unicast_ether_addr(addr)) {
+			f_info->fltr_rule_id =
+				le16_to_cpu(r_iter->pdata.lkup_tx_rx.index);
+			f_info->fltr_act = ICE_FWD_TO_VSI;
+			/* Create an entry to track this MAC address */
+			fm_entry = devm_kzalloc(ice_hw_to_dev(hw),
+						sizeof(*fm_entry), GFP_KERNEL);
+			if (!fm_entry) {
+				status = ICE_ERR_NO_MEMORY;
+				goto ice_add_mac_exit;
+			}
+			fm_entry->fltr_info = *f_info;
+			fm_entry->vsi_count = 1;
+			/* The book keeping entries will get removed when
+			 * base driver calls remove filter AQ command
+			 */
+			mutex_lock(&sw->mac_list_lock);
+			list_add(&fm_entry->list_entry, &sw->mac_list_head);
+			mutex_unlock(&sw->mac_list_lock);
+
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+ice_add_mac_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_find_vlan_entry
+ * @hw: pointer to the hardware structure
+ * @vlan_id: VLAN id to search for
+ *
+ * Helper function to search for a VLAN entry using a given VLAN id
+ * Returns pointer to the entry if found.
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_vlan_entry(struct ice_hw *hw, u16 vlan_id)
+{
+	struct ice_fltr_mgmt_list_entry *vlan_list_itr, *vlan_ret = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+
+	mutex_lock(&sw->vlan_list_lock);
+	list_for_each_entry(vlan_list_itr, &sw->vlan_list_head, list_entry)
+		if (vlan_list_itr->fltr_info.l_data.vlan.vlan_id == vlan_id) {
+			vlan_ret = vlan_list_itr;
+			break;
+		}
+
+	mutex_unlock(&sw->vlan_list_lock);
+	return vlan_ret;
+}
+
+/**
+ * ice_add_vlan_internal - Add one VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: filter entry containing one VLAN information
+ */
+static enum ice_status
+ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	struct ice_fltr_mgmt_list_entry *v_list_itr;
+	u16 vlan_id;
+
+	new_fltr = &f_entry->fltr_info;
+	/* VLAN id should only be 12 bits */
+	if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID)
+		return ICE_ERR_PARAM;
+
+	vlan_id = new_fltr->l_data.vlan.vlan_id;
+	v_list_itr = ice_find_vlan_entry(hw, vlan_id);
+	if (!v_list_itr) {
+		u16 vsi_id = ICE_VSI_INVAL_ID;
+		enum ice_status status;
+		u16 vsi_list_id = 0;
+
+		if (new_fltr->fltr_act == ICE_FWD_TO_VSI) {
+			enum ice_sw_lkup_type lkup_type = new_fltr->lkup_type;
+
+			/* All VLAN pruning rules use a VSI list.
+			 * Convert the action to forwarding to a VSI list.
+			 */
+			vsi_id = new_fltr->fwd_id.vsi_id;
+			status = ice_create_vsi_list_rule(hw, &vsi_id, 1,
+							  &vsi_list_id,
+							  lkup_type);
+			if (status)
+				return status;
+			new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+			new_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		}
+
+		status = ice_create_pkt_fwd_rule(hw, f_entry);
+		if (!status && vsi_id != ICE_VSI_INVAL_ID) {
+			v_list_itr = ice_find_vlan_entry(hw, vlan_id);
+			if (!v_list_itr)
+				return ICE_ERR_DOES_NOT_EXIST;
+			v_list_itr->vsi_list_info =
+				ice_create_vsi_list_map(hw, &vsi_id, 1,
+							vsi_list_id);
+		}
+
+		return status;
+	}
+
+	cur_fltr = &v_list_itr->fltr_info;
+	return ice_handle_vsi_list_mgmt(hw, v_list_itr, cur_fltr, new_fltr);
+}
+
+/**
+ * ice_add_vlan - Add VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+enum ice_status
+ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr;
+
+	if (!v_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(v_list_itr, v_list, list_entry) {
+		enum ice_status status;
+
+		if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN)
+			return ICE_ERR_PARAM;
+
+		status = ice_add_vlan_internal(hw, v_list_itr);
+		if (status) {
+			v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
+			return status;
+		}
+		v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+	}
+	return 0;
+}
+
+/**
+ * ice_remove_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @lkup_type: switch rule filter lookup type
+ */
+static enum ice_status
+ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+	u16 s_rule_size;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR);
+	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
+	/* FW expects number of VSIs in vsi_list resource to be 0 for clear
+	 * command. Since memory is zero'ed out during initialization, it's not
+	 * necessary to explicitly initialize the variable to 0.
+	 */
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1,
+				 ice_aqc_opc_remove_sw_rules, NULL);
+	if (!status)
+		/* Free the vsi_list resource that we allocated */
+		status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type,
+						    ice_aqc_opc_free_res);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_handle_rem_vsi_list_mgmt
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of the VSI to remove
+ * @fm_list_itr: filter management entry for which the VSI list management
+ * needs to be done
+ */
+static enum ice_status
+ice_handle_rem_vsi_list_mgmt(struct ice_hw *hw, u16 vsi_id,
+			     struct ice_fltr_mgmt_list_entry *fm_list_itr)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	enum ice_status status = 0;
+	enum ice_sw_lkup_type lkup_type;
+	bool is_last_elem = true;
+	bool conv_list = false;
+	bool del_list = false;
+	u16 vsi_list_id;
+
+	lkup_type = fm_list_itr->fltr_info.lkup_type;
+	vsi_list_id = fm_list_itr->fltr_info.fwd_id.vsi_list_id;
+
+	if (fm_list_itr->vsi_count > 1) {
+		status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id,
+						  true,
+						  ice_aqc_opc_update_sw_rules,
+						  lkup_type);
+		if (status)
+			return status;
+		fm_list_itr->vsi_count--;
+		is_last_elem = false;
+		clear_bit(vsi_id, fm_list_itr->vsi_list_info->vsi_map);
+	}
+
+	/* For non-VLAN rules that forward packets to a VSI list, convert them
+	 * to forwarding packets to a VSI if there is only one VSI left in the
+	 * list.  Unused lists are then removed.
+	 * VLAN rules need to use VSI lists even with only one VSI.
+	 */
+	if (fm_list_itr->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST) {
+		if (lkup_type == ICE_SW_LKUP_VLAN) {
+			del_list = is_last_elem;
+		} else if (fm_list_itr->vsi_count == 1) {
+			conv_list = true;
+			del_list = true;
+		}
+	}
+
+	if (del_list) {
+		/* Remove the VSI list since it is no longer used */
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list_itr->vsi_list_info;
+
+		status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+		if (status)
+			return status;
+
+		if (conv_list) {
+			u16 rem_vsi_id;
+
+			rem_vsi_id = find_first_bit(vsi_list_info->vsi_map,
+						    ICE_MAX_VSI);
+
+			/* Error out when the expected last element is not in
+			 * the VSI list map
+			 */
+			if (rem_vsi_id == ICE_MAX_VSI)
+				return ICE_ERR_OUT_OF_RANGE;
+
+			/* Change the list entry action from VSI_LIST to VSI */
+			fm_list_itr->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+			fm_list_itr->fltr_info.fwd_id.vsi_id = rem_vsi_id;
+		}
+
+		list_del(&vsi_list_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+		fm_list_itr->vsi_list_info = NULL;
+	}
+
+	if (conv_list) {
+		/* Convert the rule's forward action to forwarding packets to
+		 * a VSI
+		 */
+		struct ice_aqc_sw_rules_elem *s_rule;
+
+		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+				      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE,
+				      GFP_KERNEL);
+		if (!s_rule)
+			return ICE_ERR_NO_MEMORY;
+
+		ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule,
+				 ice_aqc_opc_update_sw_rules);
+
+		s_rule->pdata.lkup_tx_rx.index =
+			cpu_to_le16(fm_list_itr->fltr_info.fltr_rule_id);
+
+		status = ice_aq_sw_rules(hw, s_rule,
+					 ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+					 ice_aqc_opc_update_sw_rules, NULL);
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+		if (status)
+			return status;
+	}
+
+	if (is_last_elem) {
+		/* Remove the lookup rule */
+		struct ice_aqc_sw_rules_elem *s_rule;
+
+		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+				      ICE_SW_RULE_RX_TX_NO_HDR_SIZE,
+				      GFP_KERNEL);
+		if (!s_rule)
+			return ICE_ERR_NO_MEMORY;
+
+		ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule,
+				 ice_aqc_opc_remove_sw_rules);
+
+		status = ice_aq_sw_rules(hw, s_rule,
+					 ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1,
+					 ice_aqc_opc_remove_sw_rules, NULL);
+		if (status)
+			return status;
+
+		/* Remove a book keeping entry from the MAC address list */
+		mutex_lock(&sw->mac_list_lock);
+		list_del(&fm_list_itr->list_entry);
+		mutex_unlock(&sw->mac_list_lock);
+		devm_kfree(ice_hw_to_dev(hw), fm_list_itr);
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+	}
+	return status;
+}
+
+/**
+ * ice_remove_mac_entry
+ * @hw: pointer to the hardware structure
+ * @f_entry: structure containing MAC forwarding information
+ */
+static enum ice_status
+ice_remove_mac_entry(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_mgmt_list_entry *m_entry;
+	u16 vsi_id;
+	u8 *add;
+
+	add = &f_entry->fltr_info.l_data.mac.mac_addr[0];
+
+	m_entry = ice_find_mac_entry(hw, add);
+	if (!m_entry)
+		return ICE_ERR_PARAM;
+
+	vsi_id = f_entry->fltr_info.fwd_id.vsi_id;
+	return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, m_entry);
+}
+
+/**
+ * ice_remove_mac - remove a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * This function removes either a MAC filter rule or a specific VSI from a
+ * VSI list for a multicast MAC address.
+ *
+ * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by
+ * ice_add_mac. Caller should be aware that this call will only work if all
+ * the entries passed into m_list were added previously. It will not attempt to
+ * do a partial remove of entries that were found.
+ */
+enum ice_status
+ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
+	u8 s_rule_size = ICE_SW_RULE_RX_TX_NO_HDR_SIZE;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *m_entry;
+	struct ice_fltr_list_entry *m_list_itr;
+	u16 elem_sent, total_elem_left;
+	enum ice_status status = 0;
+	u16 num_unicast = 0;
+
+	if (!m_list)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
+
+		if (is_unicast_ether_addr(addr) && !hw->ucast_shared)
+			num_unicast++;
+		else if (is_multicast_ether_addr(addr) ||
+			 (is_unicast_ether_addr(addr) && hw->ucast_shared))
+			ice_remove_mac_entry(hw, m_list_itr);
+	}
+
+	/* Exit if no unicast addresses found. Multicast switch rules
+	 * were added individually
+	 */
+	if (!num_unicast)
+		return 0;
+
+	/* Allocate switch rule buffer for the bulk update for unicast */
+	s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
+			      GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
+
+		if (is_unicast_ether_addr(addr)) {
+			m_entry = ice_find_mac_entry(hw, addr);
+			if (!m_entry) {
+				status = ICE_ERR_DOES_NOT_EXIST;
+				goto ice_remove_mac_exit;
+			}
+
+			ice_fill_sw_rule(hw, &m_entry->fltr_info,
+					 r_iter, ice_aqc_opc_remove_sw_rules);
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+	/* Call AQ bulk switch rule update for all unicast addresses */
+	r_iter = s_rule;
+	/* Call AQ switch rule in AQ_MAX chunk */
+	for (total_elem_left = num_unicast; total_elem_left > 0;
+	     total_elem_left -= elem_sent) {
+		struct ice_aqc_sw_rules_elem *entry = r_iter;
+
+		elem_sent = min(total_elem_left,
+				(u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size));
+		status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
+					 elem_sent, ice_aqc_opc_remove_sw_rules,
+					 NULL);
+		if (status)
+			break;
+		r_iter = (struct ice_aqc_sw_rules_elem *)
+			((u8 *)r_iter + s_rule_size);
+	}
+
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
+
+		if (is_unicast_ether_addr(addr)) {
+			m_entry = ice_find_mac_entry(hw, addr);
+			if (!m_entry)
+				return ICE_ERR_OUT_OF_RANGE;
+			mutex_lock(&sw->mac_list_lock);
+			list_del(&m_entry->list_entry);
+			mutex_unlock(&sw->mac_list_lock);
+			devm_kfree(ice_hw_to_dev(hw), m_entry);
+		}
+	}
+
+ice_remove_mac_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_cfg_dflt_vsi - add filter rule to set/unset given VSI as default
+ * VSI for the switch (represented by swid)
+ * @hw: pointer to the hardware structure
+ * @vsi_id: number of VSI to set as default
+ * @set: true to add the above mentioned switch rule, false to remove it
+ * @direction: ICE_FLTR_RX or ICE_FLTR_TX
+ */
+enum ice_status
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	struct ice_fltr_info f_info;
+	enum ice_adminq_opc opcode;
+	enum ice_status status;
+	u16 s_rule_size;
+
+	s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE :
+			    ICE_SW_RULE_RX_TX_NO_HDR_SIZE;
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	memset(&f_info, 0, sizeof(f_info));
+
+	f_info.lkup_type = ICE_SW_LKUP_DFLT;
+	f_info.flag = direction;
+	f_info.fltr_act = ICE_FWD_TO_VSI;
+	f_info.fwd_id.vsi_id = vsi_id;
+
+	if (f_info.flag & ICE_FLTR_RX) {
+		f_info.src = hw->port_info->lport;
+		if (!set)
+			f_info.fltr_rule_id =
+				hw->port_info->dflt_rx_vsi_rule_id;
+	} else if (f_info.flag & ICE_FLTR_TX) {
+		f_info.src = vsi_id;
+		if (!set)
+			f_info.fltr_rule_id =
+				hw->port_info->dflt_tx_vsi_rule_id;
+	}
+
+	if (set)
+		opcode = ice_aqc_opc_add_sw_rules;
+	else
+		opcode = ice_aqc_opc_remove_sw_rules;
+
+	ice_fill_sw_rule(hw, &f_info, s_rule, opcode);
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opcode, NULL);
+	if (status || !(f_info.flag & ICE_FLTR_TX_RX))
+		goto out;
+	if (set) {
+		u16 index = le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+
+		if (f_info.flag & ICE_FLTR_TX) {
+			hw->port_info->dflt_tx_vsi_num = vsi_id;
+			hw->port_info->dflt_tx_vsi_rule_id = index;
+		} else if (f_info.flag & ICE_FLTR_RX) {
+			hw->port_info->dflt_rx_vsi_num = vsi_id;
+			hw->port_info->dflt_rx_vsi_rule_id = index;
+		}
+	} else {
+		if (f_info.flag & ICE_FLTR_TX) {
+			hw->port_info->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL;
+			hw->port_info->dflt_tx_vsi_rule_id = ICE_INVAL_ACT;
+		} else if (f_info.flag & ICE_FLTR_RX) {
+			hw->port_info->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL;
+			hw->port_info->dflt_rx_vsi_rule_id = ICE_INVAL_ACT;
+		}
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_remove_vlan_internal - Remove one VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: filter entry containing one VLAN information
+ */
+static enum ice_status
+ice_remove_vlan_internal(struct ice_hw *hw,
+			 struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_info *new_fltr;
+	struct ice_fltr_mgmt_list_entry *v_list_elem;
+	u16 vsi_id;
+
+	new_fltr = &f_entry->fltr_info;
+
+	v_list_elem = ice_find_vlan_entry(hw, new_fltr->l_data.vlan.vlan_id);
+	if (!v_list_elem)
+		return ICE_ERR_PARAM;
+
+	vsi_id = f_entry->fltr_info.fwd_id.vsi_id;
+	return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, v_list_elem);
+}
+
+/**
+ * ice_remove_vlan - Remove VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+enum ice_status
+ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr;
+	enum ice_status status = 0;
+
+	if (!v_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(v_list_itr, v_list, list_entry) {
+		status = ice_remove_vlan_internal(hw, v_list_itr);
+		if (status) {
+			v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
+			return status;
+		}
+		v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+	}
+	return status;
+}
+
+/**
+ * ice_add_to_vsi_fltr_list - Add VSI filters to the list
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of VSI to remove filters from
+ * @lkup_list_head: pointer to the list that has certain lookup type filters
+ * @vsi_list_head: pointer to the list pertaining to VSI with vsi_id
+ */
+static enum ice_status
+ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_id,
+			 struct list_head *lkup_list_head,
+			 struct list_head *vsi_list_head)
+{
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+
+	/* check to make sure VSI id is valid and within boundary */
+	if (vsi_id >=
+	    (sizeof(fm_entry->vsi_list_info->vsi_map) * BITS_PER_BYTE - 1))
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(fm_entry, lkup_list_head, list_entry) {
+		struct ice_fltr_info *fi;
+
+		fi = &fm_entry->fltr_info;
+		if ((fi->fltr_act == ICE_FWD_TO_VSI &&
+		     fi->fwd_id.vsi_id == vsi_id) ||
+		    (fi->fltr_act == ICE_FWD_TO_VSI_LIST &&
+		     (test_bit(vsi_id, fm_entry->vsi_list_info->vsi_map)))) {
+			struct ice_fltr_list_entry *tmp;
+
+			/* this memory is freed up in the caller function
+			 * ice_remove_vsi_lkup_fltr() once filters for
+			 * this VSI are removed
+			 */
+			tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp),
+					   GFP_KERNEL);
+			if (!tmp)
+				return ICE_ERR_NO_MEMORY;
+
+			memcpy(&tmp->fltr_info, fi, sizeof(*fi));
+
+			/* Expected below fields to be set to ICE_FWD_TO_VSI and
+			 * the particular VSI id since we are only removing this
+			 * one VSI
+			 */
+			if (fi->fltr_act == ICE_FWD_TO_VSI_LIST) {
+				tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+				tmp->fltr_info.fwd_id.vsi_id = vsi_id;
+			}
+
+			list_add(&tmp->list_entry, vsi_list_head);
+		}
+	}
+	return 0;
+}
+
+/**
+ * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of VSI to remove filters from
+ * @lkup: switch rule filter lookup type
+ */
+static void
+ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_id,
+			 enum ice_sw_lkup_type lkup)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *fm_entry;
+	struct list_head remove_list_head;
+	struct ice_fltr_list_entry *tmp;
+	enum ice_status status;
+
+	INIT_LIST_HEAD(&remove_list_head);
+	switch (lkup) {
+	case ICE_SW_LKUP_MAC:
+		mutex_lock(&sw->mac_list_lock);
+		status = ice_add_to_vsi_fltr_list(hw, vsi_id,
+						  &sw->mac_list_head,
+						  &remove_list_head);
+		mutex_unlock(&sw->mac_list_lock);
+		if (!status) {
+			ice_remove_mac(hw, &remove_list_head);
+			goto free_fltr_list;
+		}
+		break;
+	case ICE_SW_LKUP_VLAN:
+		mutex_lock(&sw->vlan_list_lock);
+		status = ice_add_to_vsi_fltr_list(hw, vsi_id,
+						  &sw->vlan_list_head,
+						  &remove_list_head);
+		mutex_unlock(&sw->vlan_list_lock);
+		if (!status) {
+			ice_remove_vlan(hw, &remove_list_head);
+			goto free_fltr_list;
+		}
+		break;
+	case ICE_SW_LKUP_MAC_VLAN:
+	case ICE_SW_LKUP_ETHERTYPE:
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+	case ICE_SW_LKUP_PROMISC:
+	case ICE_SW_LKUP_PROMISC_VLAN:
+	case ICE_SW_LKUP_DFLT:
+		ice_debug(hw, ICE_DBG_SW,
+			  "Remove filters for this lookup type hasn't been implemented yet\n");
+		break;
+	}
+
+	return;
+free_fltr_list:
+	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+		list_del(&fm_entry->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+	}
+}
+
+/**
+ * ice_remove_vsi_fltr - Remove all filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of VSI to remove filters from
+ */
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id)
+{
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_DFLT);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC_VLAN);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
new file mode 100644
index 000000000000..6f4a0d159dbf
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SWITCH_H_
+#define _ICE_SWITCH_H_
+
+#include "ice_common.h"
+
+#define ICE_SW_CFG_MAX_BUF_LEN 2048
+#define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_VSI_INVAL_ID 0xffff
+
+/* VSI context structure for add/get/update/free operations */
+struct ice_vsi_ctx {
+	u16 vsi_num;
+	u16 vsis_allocd;
+	u16 vsis_unallocated;
+	u16 flags;
+	struct ice_aqc_vsi_props info;
+	bool alloc_from_pool;
+};
+
+enum ice_sw_fwd_act_type {
+	ICE_FWD_TO_VSI = 0,
+	ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */
+	ICE_FWD_TO_Q,
+	ICE_FWD_TO_QGRP,
+	ICE_DROP_PACKET,
+	ICE_INVAL_ACT
+};
+
+/* Switch recipe ID enum values are specific to hardware */
+enum ice_sw_lkup_type {
+	ICE_SW_LKUP_ETHERTYPE = 0,
+	ICE_SW_LKUP_MAC = 1,
+	ICE_SW_LKUP_MAC_VLAN = 2,
+	ICE_SW_LKUP_PROMISC = 3,
+	ICE_SW_LKUP_VLAN = 4,
+	ICE_SW_LKUP_DFLT = 5,
+	ICE_SW_LKUP_ETHERTYPE_MAC = 8,
+	ICE_SW_LKUP_PROMISC_VLAN = 9,
+};
+
+struct ice_fltr_info {
+	/* Look up information: how to look up packet */
+	enum ice_sw_lkup_type lkup_type;
+	/* Forward action: filter action to do after lookup */
+	enum ice_sw_fwd_act_type fltr_act;
+	/* rule ID returned by firmware once filter rule is created */
+	u16 fltr_rule_id;
+	u16 flag;
+#define ICE_FLTR_RX		BIT(0)
+#define ICE_FLTR_TX		BIT(1)
+#define ICE_FLTR_TX_RX		(ICE_FLTR_RX | ICE_FLTR_TX)
+
+	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
+	u16 src;
+
+	union {
+		struct {
+			u8 mac_addr[ETH_ALEN];
+		} mac;
+		struct {
+			u8 mac_addr[ETH_ALEN];
+			u16 vlan_id;
+		} mac_vlan;
+		struct {
+			u16 vlan_id;
+		} vlan;
+		/* Set lkup_type as ICE_SW_LKUP_ETHERTYPE
+		 * if just using ethertype as filter. Set lkup_type as
+		 * ICE_SW_LKUP_ETHERTYPE_MAC if MAC also needs to be
+		 * passed in as filter.
+		 */
+		struct {
+			u16 ethertype;
+			u8 mac_addr[ETH_ALEN]; /* optional */
+		} ethertype_mac;
+	} l_data;
+
+	/* Depending on filter action */
+	union {
+		/* queue id in case of ICE_FWD_TO_Q and starting
+		 * queue id in case of ICE_FWD_TO_QGRP.
+		 */
+		u16 q_id:11;
+		u16 vsi_id:10;
+		u16 vsi_list_id:10;
+	} fwd_id;
+
+	/* Set to num_queues if action is ICE_FWD_TO_QGRP. This field
+	 * determines the range of queues the packet needs to be forwarded to
+	 */
+	u8 qgrp_size;
+
+	/* Rule creations populate these indicators basing on the switch type */
+	bool lb_en;	/* Indicate if packet can be looped back */
+	bool lan_en;	/* Indicate if packet can be forwarded to the uplink */
+};
+
+/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */
+struct ice_vsi_list_map_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(vsi_map, ICE_MAX_VSI);
+	u16 vsi_list_id;
+};
+
+enum ice_sw_fltr_status {
+	ICE_FLTR_STATUS_NEW = 0,
+	ICE_FLTR_STATUS_FW_SUCCESS,
+	ICE_FLTR_STATUS_FW_FAIL,
+};
+
+struct ice_fltr_list_entry {
+	struct list_head list_entry;
+	enum ice_sw_fltr_status status;
+	struct ice_fltr_info fltr_info;
+};
+
+/* This defines an entry in the list that maintains MAC or VLAN membership
+ * to HW list mapping, since multiple VSIs can subscribe to the same MAC or
+ * VLAN. As an optimization the VSI list should be created only when a
+ * second VSI becomes a subscriber to the VLAN address.
+ */
+struct ice_fltr_mgmt_list_entry {
+	/* back pointer to VSI list id to VSI list mapping */
+	struct ice_vsi_list_map_info *vsi_list_info;
+	u16 vsi_count;
+#define ICE_INVAL_LG_ACT_INDEX 0xffff
+	u16 lg_act_idx;
+#define ICE_INVAL_SW_MARKER_ID 0xffff
+	u16 sw_marker_id;
+	struct list_head list_entry;
+	struct ice_fltr_info fltr_info;
+#define ICE_INVAL_COUNTER_ID 0xff
+	u8 counter_index;
+};
+
+/* VSI related commands */
+enum ice_status
+ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		  struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		bool keep_vsi_alloc, struct ice_sq_cd *cd);
+
+enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
+
+/* Switch/bridge related commands */
+enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
+enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id);
+enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
+enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
+enum ice_status
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction);
+
+#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
new file mode 100644
index 000000000000..6481e3d86374
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -0,0 +1,1782 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* The driver transmit and receive code */
+
+#include <linux/prefetch.h>
+#include <linux/mm.h>
+#include "ice.h"
+
+#define ICE_RX_HDR_SIZE		256
+
+/**
+ * ice_unmap_and_free_tx_buf - Release a Tx buffer
+ * @ring: the ring that owns the buffer
+ * @tx_buf: the buffer to free
+ */
+static void
+ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
+{
+	if (tx_buf->skb) {
+		dev_kfree_skb_any(tx_buf->skb);
+		if (dma_unmap_len(tx_buf, len))
+			dma_unmap_single(ring->dev,
+					 dma_unmap_addr(tx_buf, dma),
+					 dma_unmap_len(tx_buf, len),
+					 DMA_TO_DEVICE);
+	} else if (dma_unmap_len(tx_buf, len)) {
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buf, dma),
+			       dma_unmap_len(tx_buf, len),
+			       DMA_TO_DEVICE);
+	}
+
+	tx_buf->next_to_watch = NULL;
+	tx_buf->skb = NULL;
+	dma_unmap_len_set(tx_buf, len, 0);
+	/* tx_buf must be completely set up in the transmit path */
+}
+
+static struct netdev_queue *txring_txq(const struct ice_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->q_index);
+}
+
+/**
+ * ice_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ */
+void ice_clean_tx_ring(struct ice_ring *tx_ring)
+{
+	unsigned long size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!tx_ring->tx_buf)
+		return;
+
+	/* Free all the Tx ring sk_bufss */
+	for (i = 0; i < tx_ring->count; i++)
+		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+
+	size = sizeof(struct ice_tx_buf) * tx_ring->count;
+	memset(tx_ring->tx_buf, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	if (!tx_ring->netdev)
+		return;
+
+	/* cleanup Tx queue statistics */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+}
+
+/**
+ * ice_free_tx_ring - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void ice_free_tx_ring(struct ice_ring *tx_ring)
+{
+	ice_clean_tx_ring(tx_ring);
+	devm_kfree(tx_ring->dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+
+	if (tx_ring->desc) {
+		dmam_free_coherent(tx_ring->dev, tx_ring->size,
+				   tx_ring->desc, tx_ring->dma);
+		tx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_clean_tx_irq - Reclaim resources after transmit completes
+ * @vsi: the VSI we care about
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring,
+			     int napi_budget)
+{
+	unsigned int total_bytes = 0, total_pkts = 0;
+	unsigned int budget = vsi->work_lmt;
+	s16 i = tx_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+
+	tx_buf = &tx_ring->tx_buf[i];
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		smp_rmb();	/* prevent any other reads prior to eop_desc */
+
+		/* if the descriptor isn't done, no work yet to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_pkts += tx_buf->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buf->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buf data */
+		tx_buf->skb = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buf++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buf = tx_ring->tx_buf;
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buf, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buf, dma),
+					       dma_unmap_len(tx_buf, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buf, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.pkts += total_pkts;
+	u64_stats_update_end(&tx_ring->syncp);
+	tx_ring->q_vector->tx.total_bytes += total_bytes;
+	tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+	netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
+				  total_bytes);
+
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
+	if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
+		     (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->q_index) &&
+		   !test_bit(__ICE_DOWN, vsi->state)) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->q_index);
+			++tx_ring->tx_stats.restart_q;
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * ice_setup_tx_ring - Allocate the Tx descriptors
+ * @tx_ring: the tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_tx_ring(struct ice_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int bi_size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(tx_ring->tx_buf);
+	bi_size = sizeof(struct ice_tx_buf) * tx_ring->count;
+	tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
+	if (!tx_ring->tx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+	tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma,
+					    GFP_KERNEL);
+	if (!tx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+			tx_ring->size);
+		goto err;
+	}
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	return 0;
+
+err:
+	devm_kfree(dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ */
+void ice_clean_rx_ring(struct ice_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	unsigned long size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!rx_ring->rx_buf)
+		return;
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+		if (rx_buf->skb) {
+			dev_kfree_skb(rx_buf->skb);
+			rx_buf->skb = NULL;
+		}
+		if (!rx_buf->page)
+			continue;
+
+		dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE);
+		__free_pages(rx_buf->page, 0);
+
+		rx_buf->page = NULL;
+		rx_buf->page_offset = 0;
+	}
+
+	size = sizeof(struct ice_rx_buf) * rx_ring->count;
+	memset(rx_ring->rx_buf, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * ice_free_rx_ring - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+void ice_free_rx_ring(struct ice_ring *rx_ring)
+{
+	ice_clean_rx_ring(rx_ring);
+	devm_kfree(rx_ring->dev, rx_ring->rx_buf);
+	rx_ring->rx_buf = NULL;
+
+	if (rx_ring->desc) {
+		dmam_free_coherent(rx_ring->dev, rx_ring->size,
+				   rx_ring->desc, rx_ring->dma);
+		rx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_setup_rx_ring - Allocate the Rx descriptors
+ * @rx_ring: the rx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_rx_ring(struct ice_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int bi_size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(rx_ring->rx_buf);
+	bi_size = sizeof(struct ice_rx_buf) * rx_ring->count;
+	rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
+	if (!rx_ring->rx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+	rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma,
+					    GFP_KERNEL);
+	if (!rx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+			rx_ring->size);
+		goto err;
+	}
+
+	rx_ring->next_to_use = 0;
+	rx_ring->next_to_clean = 0;
+	return 0;
+
+err:
+	devm_kfree(dev, rx_ring->rx_buf);
+	rx_ring->rx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+	writel(val, rx_ring->tail);
+}
+
+/**
+ * ice_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buf struct to modify
+ *
+ * Returns true if the page was successfully allocated or
+ * reused.
+ */
+static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,
+				  struct ice_rx_buf *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page)) {
+		rx_ring->rx_stats.page_reuse_count++;
+		return true;
+	}
+
+	/* alloc new page for storage */
+	page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, 0);
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = 0;
+
+	return true;
+}
+
+/**
+ * ice_alloc_rx_bufs - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail
+ */
+bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	struct ice_rx_buf *bi;
+
+	/* do nothing if no valid netdev defined */
+	if (!rx_ring->netdev || !cleaned_count)
+		return false;
+
+	/* get the RX descriptor and buffer based on next_to_use */
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	bi = &rx_ring->rx_buf[ntu];
+
+	do {
+		if (!ice_alloc_mapped_page(rx_ring, bi))
+			goto no_bufs;
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		ntu++;
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = ICE_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buf;
+			ntu = 0;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.status_error0 = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	return false;
+
+no_bufs:
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	/* make sure to come back via polling to try again after
+	 * allocation failure
+	 */
+	return true;
+}
+
+/**
+ * ice_page_is_reserved - check if reuse is possible
+ * @page: page struct to check
+ */
+static bool ice_page_is_reserved(struct page *page)
+{
+	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+/**
+ * ice_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_buf: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buf to place the data into
+ *
+ * This function will add the data contained in rx_buf->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ */
+static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf,
+			    union ice_32b_rx_flex_desc *rx_desc,
+			    struct sk_buff *skb)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ICE_RXBUF_2048;
+#else
+	unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
+	unsigned int truesize;
+#endif /* PAGE_SIZE < 8192) */
+
+	struct page *page;
+	unsigned int size;
+
+	size = le16_to_cpu(rx_desc->wb.pkt_len) &
+		ICE_RX_FLX_DESC_PKT_LEN_M;
+
+	page = rx_buf->page;
+
+#if (PAGE_SIZE >= 8192)
+	truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif /* PAGE_SIZE >= 8192) */
+
+	/* will the data fit in the skb we allocated? if so, just
+	 * copy it as it is pretty small anyway
+	 */
+	if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) {
+		unsigned char *va = page_address(page) + rx_buf->page_offset;
+
+		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+		/* page is not reserved, we can reuse buffer as-is */
+		if (likely(!ice_page_is_reserved(page)))
+			return true;
+
+		/* this page cannot be reused so discard it */
+		__free_pages(page, 0);
+		return false;
+	}
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+			rx_buf->page_offset, size, truesize);
+
+	/* avoid re-using remote pages */
+	if (unlikely(ice_page_is_reserved(page)))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely(page_count(page) != 1))
+		return false;
+
+	/* flip page offset to other buffer */
+	rx_buf->page_offset ^= truesize;
+#else
+	/* move offset up to the next cache line */
+	rx_buf->page_offset += truesize;
+
+	if (rx_buf->page_offset > last_offset)
+		return false;
+#endif /* PAGE_SIZE < 8192) */
+
+	/* Even if we own the page, we are not allowed to use atomic_set()
+	 * This would break get_page_unless_zero() users.
+	 */
+	get_page(rx_buf->page);
+
+	return true;
+}
+
+/**
+ * ice_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buf: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void ice_reuse_rx_page(struct ice_ring *rx_ring,
+			      struct ice_rx_buf *old_buf)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct ice_rx_buf *new_buf;
+
+	new_buf = &rx_ring->rx_buf[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	*new_buf = *old_buf;
+}
+
+/**
+ * ice_fetch_rx_buf - Allocate skb and populate it
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_desc: descriptor containing info written by hardware
+ *
+ * This function allocates an skb on the fly, and populates it with the page
+ * data from the current receive descriptor, taking care to set up the skb
+ * correctly, as well as handling calling the page recycle function if
+ * necessary.
+ */
+static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring,
+					union ice_32b_rx_flex_desc *rx_desc)
+{
+	struct ice_rx_buf *rx_buf;
+	struct sk_buff *skb;
+	struct page *page;
+
+	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+	page = rx_buf->page;
+	prefetchw(page);
+
+	skb = rx_buf->skb;
+
+	if (likely(!skb)) {
+		u8 *page_addr = page_address(page) + rx_buf->page_offset;
+
+		/* prefetch first cache line of first page */
+		prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+		prefetch((void *)(page_addr + L1_CACHE_BYTES));
+#endif /* L1_CACHE_BYTES */
+
+		/* allocate a skb to store the frags */
+		skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+				       ICE_RX_HDR_SIZE,
+				       GFP_ATOMIC | __GFP_NOWARN);
+		if (unlikely(!skb)) {
+			rx_ring->rx_stats.alloc_buf_failed++;
+			return NULL;
+		}
+
+		/* we will be copying header into skb->data in
+		 * pskb_may_pull so it is in our interest to prefetch
+		 * it now to avoid a possible cache miss
+		 */
+		prefetchw(skb->data);
+
+		skb_record_rx_queue(skb, rx_ring->q_index);
+	} else {
+		/* we are reusing so sync this buffer for CPU use */
+		dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
+					      rx_buf->page_offset,
+					      ICE_RXBUF_2048,
+					      DMA_FROM_DEVICE);
+
+		rx_buf->skb = NULL;
+	}
+
+	/* pull page into skb */
+	if (ice_add_rx_frag(rx_buf, rx_desc, skb)) {
+		/* hand second half of page back to the ring */
+		ice_reuse_rx_page(rx_ring, rx_buf);
+		rx_ring->rx_stats.page_reuse_count++;
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
+			       DMA_FROM_DEVICE);
+	}
+
+	/* clear contents of buffer_info */
+	rx_buf->page = NULL;
+
+	return skb;
+}
+
+/**
+ * ice_pull_tail - ice specific version of skb_pull_tail
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an ice specific version of __pskb_pull_tail.  The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void ice_pull_tail(struct sk_buff *skb)
+{
+	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int pull_len;
+	unsigned char *va;
+
+	/* it is valid to use page_address instead of kmap since we are
+	 * working with pages allocated out of the lomem pool per
+	 * alloc_page(GFP_ATOMIC)
+	 */
+	va = skb_frag_address(frag);
+
+	/* we need the header to contain the greater of either ETH_HLEN or
+	 * 60 bytes if the skb->len is less than 60 for skb_pad.
+	 */
+	pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+	/* update all of the pointers */
+	skb_frag_size_sub(frag, pull_len);
+	frag->page_offset += pull_len;
+	skb->data_len -= pull_len;
+	skb->tail += pull_len;
+}
+
+/**
+ * ice_cleanup_headers - Correct empty headers
+ * @skb: pointer to current skb being fixed
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool ice_cleanup_headers(struct sk_buff *skb)
+{
+	/* place header in linear portion of buffer */
+	if (skb_is_nonlinear(skb))
+		ice_pull_tail(skb);
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc,
+			     const u16 stat_err_bits)
+{
+	return !!(rx_desc->wb.status_error0 &
+		  cpu_to_le16(stat_err_bits));
+}
+
+/**
+ * ice_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ */
+static bool ice_is_non_eop(struct ice_ring *rx_ring,
+			   union ice_32b_rx_flex_desc *rx_desc,
+			   struct sk_buff *skb)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(ICE_RX_DESC(rx_ring, ntc));
+
+	/* if we are the last buffer then there is nothing else to do */
+#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
+	if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
+		return false;
+
+	/* place skb in next buffer to be received */
+	rx_ring->rx_buf[ntc].skb = skb;
+	rx_ring->rx_stats.non_eop_descs++;
+
+	return true;
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+	return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	    struct sk_buff *skb, u8 rx_ptype)
+{
+	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	u32 hash;
+
+	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+		return;
+
+	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+	hash = le32_to_cpu(nic_mdid->rss_hash);
+	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @vsi: the VSI we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb,
+			union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+	struct ice_rx_ptype_decoded decoded;
+	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
+
+	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_error = rx_status;
+
+	decoded = ice_decode_rx_desc_ptype(ptype);
+
+	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
+	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
+
+	/* check if Rx checksum is enabled */
+	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+		return;
+
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+		goto checksum_fail;
+	else if (ipv6 && (rx_status &
+		 (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+		goto checksum_fail;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed due to arrival speed
+	 */
+	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+		goto checksum_fail;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case ICE_RX_PTYPE_INNER_PROT_TCP:
+	case ICE_RX_PTYPE_INNER_PROT_UDP:
+	case ICE_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	default:
+		break;
+	}
+	return;
+
+checksum_fail:
+	vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+static void ice_process_skb_fields(struct ice_ring *rx_ring,
+				   union ice_32b_rx_flex_desc *rx_desc,
+				   struct sk_buff *skb, u8 ptype)
+{
+	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+	ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: vlan tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without vlan tag)
+ */
+static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb,
+			    u16 vlan_tag)
+{
+	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (vlan_tag & VLAN_VID_MASK)) {
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+	}
+	napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing.  The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ */
+static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	bool failure = false;
+
+	/* start the loop to process RX packets bounded by 'budget' */
+	while (likely(total_rx_pkts < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		struct sk_buff *skb;
+		u16 stat_err_bits;
+		u16 vlan_tag = 0;
+		u8 rx_ptype;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= ICE_RX_BUF_WRITE) {
+			failure = failure ||
+				  ice_alloc_rx_bufs(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		/* get the RX desc from RX ring based on 'next_to_clean' */
+		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		/* status_error_len will always be zero for unused descriptors
+		 * because it's cleared in cleanup, and overlaps with hdr_addr
+		 * which is always zero because packet split isn't used, if the
+		 * hardware wrote DD then it will be non-zero
+		 */
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * DD bit is set.
+		 */
+		dma_rmb();
+
+		/* allocate (if needed) and populate skb */
+		skb = ice_fetch_rx_buf(rx_ring, rx_desc);
+		if (!skb)
+			break;
+
+		cleaned_count++;
+
+		/* skip if it is NOP desc */
+		if (ice_is_non_eop(rx_ring, rx_desc, skb))
+			continue;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
+		if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+			ICE_RX_FLEX_DESC_PTYPE_M;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+		if (ice_test_staterr(rx_desc, stat_err_bits))
+			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+		/* correct empty headers and pad skb if needed (to make valid
+		 * ethernet frame
+		 */
+		if (ice_cleanup_headers(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* populate checksum, VLAN, and protocol */
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+
+		/* send completed skb up the stack */
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+
+		/* update budget accounting */
+		total_rx_pkts++;
+	}
+
+	/* update queue and vector specific stats */
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.pkts += total_rx_pkts;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
+	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return failure ? budget : (int)total_rx_pkts;
+}
+
+/**
+ * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ */
+int ice_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct ice_q_vector *q_vector =
+				container_of(napi, struct ice_q_vector, napi);
+	struct ice_vsi *vsi = q_vector->vsi;
+	struct ice_pf *pf = vsi->back;
+	bool clean_complete = true;
+	int budget_per_ring = 0;
+	struct ice_ring *ring;
+	int work_done = 0;
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	ice_for_each_ring(ring, q_vector->tx)
+		if (!ice_clean_tx_irq(vsi, ring, budget))
+			clean_complete = false;
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (budget <= 0)
+		return budget;
+
+	/* We attempt to distribute budget to each Rx queue fairly, but don't
+	 * allow the budget to go below 1 because that would exit polling early.
+	 */
+	if (q_vector->num_ring_rx)
+		budget_per_ring = max(budget / q_vector->num_ring_rx, 1);
+
+	ice_for_each_ring(ring, q_vector->rx) {
+		int cleaned;
+
+		cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+		work_done += cleaned;
+		/* if we clean as many as budgeted, we must not be done */
+		if (cleaned >= budget_per_ring)
+			clean_complete = false;
+	}
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete)
+		return budget;
+
+	/* Work is done so exit the polling mode and re-enable the interrupt */
+	napi_complete_done(napi, work_done);
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector);
+	return 0;
+}
+
+/* helper function for building cmd/type/offset */
+static __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
+			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
+			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * __ice_maybe_stop_tx - 2nd level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size: the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ */
+static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index);
+	/* Memory barrier before checking head and tail */
+	smp_mb();
+
+	/* Check again in a case another CPU has just made room available. */
+	if (likely(ICE_DESC_UNUSED(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_subqueue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->q_index);
+	++tx_ring->tx_stats.restart_q;
+	return 0;
+}
+
+/**
+ * ice_maybe_stop_tx - 1st level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ */
+static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
+{
+	if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+	return __ice_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * ice_tx_map - Build the Tx descriptor
+ * @tx_ring: ring to send buffer on
+ * @first: first buffer info buffer to use
+ * @off: pointer to struct that holds offload parameters
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit descriptor.
+ */
+static void
+ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
+	   struct ice_tx_offload_params *off)
+{
+	u64 td_offset, td_tag, td_cmd;
+	u16 i = tx_ring->next_to_use;
+	struct skb_frag_struct *frag;
+	unsigned int data_len, size;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	struct sk_buff *skb;
+	dma_addr_t dma;
+
+	td_tag = off->td_l2tag1;
+	td_cmd = off->td_cmd;
+	td_offset = off->td_offset;
+	skb = first->skb;
+
+	data_len = skb->data_len;
+	size = skb_headlen(skb);
+
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+
+	if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
+		td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
+		td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
+			  ICE_TX_FLAGS_VLAN_S;
+	}
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buf = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buf, len, size);
+		dma_unmap_addr_set(tx_buf, dma, dma);
+
+		/* align size to end of page */
+		max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+
+		/* account for data chunks larger than the hardware
+		 * can handle
+		 */
+		while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
+			tx_desc->cmd_type_offset_bsz =
+				build_ctob(td_cmd, td_offset, max_data, td_tag);
+
+			tx_desc++;
+			i++;
+
+			if (i == tx_ring->count) {
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+
+			dma += max_data;
+			size -= max_data;
+
+			max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+			tx_desc->buf_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
+							  size, td_tag);
+
+		tx_desc++;
+		i++;
+
+		if (i == tx_ring->count) {
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buf = &tx_ring->tx_buf[i];
+	}
+
+	/* record bytecount for BQL */
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* record SW timestamp if HW timestamp is not available */
+	skb_tx_timestamp(first->skb);
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	/* write last descriptor with RS and EOP bits */
+	td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+	tx_desc->cmd_type_offset_bsz =
+			build_ctob(td_cmd, td_offset, size, td_tag);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	tx_ring->next_to_use = i;
+
+	ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* notify HW of packet */
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
+	}
+
+	return;
+
+dma_error:
+	/* clear dma mappings for failed tx_buf map */
+	for (;;) {
+		tx_buf = &tx_ring->tx_buf[i];
+		ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
+		if (tx_buf == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+}
+
+/**
+ * ice_tx_csum - Enable Tx checksum offloads
+ * @first: pointer to the first descriptor
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
+ */
+static
+int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	u32 l4_len = 0, l3_len = 0, l2_len = 0;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	__be16 frag_off, protocol;
+	unsigned char *exthdr;
+	u32 offset, cmd = 0;
+	u8 l4_proto = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* compute outer L2 header size */
+	l2_len = ip.hdr - skb->data;
+	offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
+
+	if (skb->encapsulation)
+		return -1;
+
+	/* Enable IP checksum offloads */
+	protocol = vlan_get_protocol(skb);
+	if (protocol == htons(ETH_P_IP)) {
+		l4_proto = ip.v4->protocol;
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+		else
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+
+	} else if (protocol == htons(ETH_P_IPV6)) {
+		cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+		exthdr = ip.hdr + sizeof(*ip.v6);
+		l4_proto = ip.v6->nexthdr;
+		if (l4.hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
+					 &frag_off);
+	} else {
+		return -1;
+	}
+
+	/* compute inner L3 header size */
+	l3_len = l4.hdr - ip.hdr;
+	offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
+
+	/* Enable L4 checksum offloads */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		/* enable checksum offloads */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+		l4_len = l4.tcp->doff;
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_UDP:
+		/* enable UDP checksum offload */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+		l4_len = (sizeof(struct udphdr) >> 2);
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_SCTP:
+	default:
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			return -1;
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	off->td_cmd |= cmd;
+	off->td_offset |= offset;
+	return 1;
+}
+
+/**
+ * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ *
+ * Returns error code indicate the frame should be dropped upon error and the
+ * otherwise returns 0 to indicate the flags has been set properly.
+ */
+static int
+ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first)
+{
+	struct sk_buff *skb = first->skb;
+	__be16 protocol = skb->protocol;
+
+	if (protocol == htons(ETH_P_8021Q) &&
+	    !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
+		/* when HW VLAN acceleration is turned off by the user the
+		 * stack sets the protocol to 8021q so that the driver
+		 * can take any steps required to support the SW only
+		 * VLAN handling. In our case the driver doesn't need
+		 * to take any further steps so just set the protocol
+		 * to the encapsulated ethertype.
+		 */
+		skb->protocol = vlan_get_protocol(skb);
+		goto out;
+	}
+
+	/* if we have a HW VLAN tag being added, default to the HW one */
+	if (skb_vlan_tag_present(skb)) {
+		first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
+		first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+	} else if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vhdr, _vhdr;
+
+		/* for SW VLAN, check the next protocol and store the tag */
+		vhdr = (struct vlan_hdr *)skb_header_pointer(skb, ETH_HLEN,
+							     sizeof(_vhdr),
+							     &_vhdr);
+		if (!vhdr)
+			return -EINVAL;
+
+		first->tx_flags |= ntohs(vhdr->h_vlan_TCI) <<
+				   ICE_TX_FLAGS_VLAN_S;
+		first->tx_flags |= ICE_TX_FLAGS_SW_VLAN;
+	}
+
+out:
+	return 0;
+}
+
+/**
+ * ice_tso - computes mss and TSO length to prepare for TSO
+ * @first: pointer to struct ice_tx_buf
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
+ */
+static
+int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	u64 cd_mss, cd_tso_len;
+	u32 paylen, l4_start;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		ip.v4->tot_len = 0;
+		ip.v4->check = 0;
+	} else {
+		ip.v6->payload_len = 0;
+	}
+
+	/* determine offset of transport header */
+	l4_start = l4.hdr - skb->data;
+
+	/* remove payload length from checksum */
+	paylen = skb->len - l4_start;
+	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+
+	/* compute length of segmentation header */
+	off->header_len = (l4.tcp->doff * 4) + l4_start;
+
+	/* update gso_segs and bytecount */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount = (first->gso_segs - 1) * off->header_len;
+
+	cd_tso_len = skb->len - off->header_len;
+	cd_mss = skb_shinfo(skb)->gso_size;
+
+	/* record cdesc_qw1 with TSO parameters */
+	off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX |
+			 (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+			 (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+			 (cd_mss << ICE_TXD_CTX_QW1_MSS_S);
+	first->tx_flags |= ICE_TX_FLAGS_TSO;
+	return 1;
+}
+
+/**
+ * ice_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment.  For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + 1;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + 1;
+ */
+static unsigned int ice_txd_use_count(unsigned int size)
+{
+	return ((size * 85) >> 20) + 1;
+}
+
+/**
+ * ice_xmit_desc_count - calculate number of tx descriptors needed
+ * @skb: send buffer
+ *
+ * Returns number of data descriptors needed for this skb.
+ */
+static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
+{
+	const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	unsigned int count = 0, size = skb_headlen(skb);
+
+	for (;;) {
+		count += ice_txd_use_count(size);
+
+		if (!nr_frags--)
+			break;
+
+		size = skb_frag_size(frag++);
+	}
+
+	return count;
+}
+
+/**
+ * __ice_chk_linearize - Check if there are more than 8 buffers per packet
+ * @skb: send buffer
+ *
+ * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
+ */
+static bool __ice_chk_linearize(struct sk_buff *skb)
+{
+	const struct skb_frag_struct *frag, *stale;
+	int nr_frags, sum;
+
+	/* no need to check if number of frags is less than 7 */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags < (ICE_MAX_BUF_TXD - 1))
+		return false;
+
+	/* We need to walk through the list and validate that each group
+	 * of 6 fragments totals at least gso_size.
+	 */
+	nr_frags -= ICE_MAX_BUF_TXD - 2;
+	frag = &skb_shinfo(skb)->frags[0];
+
+	/* Initialize size to the negative value of gso_size minus 1.  We
+	 * use this as the worst case scenerio in which the frag ahead
+	 * of us only provides one byte which is why we are limited to 6
+	 * descriptors for a single transmit as the header and previous
+	 * fragment are already consuming 2 descriptors.
+	 */
+	sum = 1 - skb_shinfo(skb)->gso_size;
+
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+
+	/* Walk through fragments adding latest fragment, testing it, and
+	 * then removing stale fragments from the sum.
+	 */
+	stale = &skb_shinfo(skb)->frags[0];
+	for (;;) {
+		sum += skb_frag_size(frag++);
+
+		/* if sum is negative we failed to make sufficient progress */
+		if (sum < 0)
+			return true;
+
+		if (!nr_frags--)
+			break;
+
+		sum -= skb_frag_size(stale++);
+	}
+
+	return false;
+}
+
+/**
+ * ice_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb:      send buffer
+ * @count:    number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ */
+static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
+{
+	/* Both TSO and single send will work if count is less than 8 */
+	if (likely(count < ICE_MAX_BUF_TXD))
+		return false;
+
+	if (skb_is_gso(skb))
+		return __ice_chk_linearize(skb);
+
+	/* we can support up to 8 data buffers for a single send */
+	return count != ICE_MAX_BUF_TXD;
+}
+
+/**
+ * ice_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb: send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t
+ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
+{
+	struct ice_tx_offload_params offload = { 0 };
+	struct ice_tx_buf *first;
+	unsigned int count;
+	int tso, csum;
+
+	count = ice_xmit_desc_count(skb);
+	if (ice_chk_linearize(skb, count)) {
+		if (__skb_linearize(skb))
+			goto out_drop;
+		count = ice_txd_use_count(skb->len);
+		tx_ring->tx_stats.tx_linearize++;
+	}
+
+	/* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
+	 *       + 4 desc gap to avoid the cache line where head is,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	offload.tx_ring = tx_ring;
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buf[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+	first->gso_segs = 1;
+	first->tx_flags = 0;
+
+	/* prepare the VLAN tagging flags for Tx */
+	if (ice_tx_prepare_vlan_flags(tx_ring, first))
+		goto out_drop;
+
+	/* set up TSO offload */
+	tso = ice_tso(first, &offload);
+	if (tso < 0)
+		goto out_drop;
+
+	/* always set up Tx checksum offload */
+	csum = ice_tx_csum(first, &offload);
+	if (csum < 0)
+		goto out_drop;
+
+	if (tso || offload.cd_tunnel_params) {
+		struct ice_tx_ctx_desc *cdesc;
+		int i = tx_ring->next_to_use;
+
+		/* grab the next descriptor */
+		cdesc = ICE_TX_CTX_DESC(tx_ring, i);
+		i++;
+		tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+		/* setup context descriptor */
+		cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
+		cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
+		cdesc->rsvd = cpu_to_le16(0);
+		cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
+	}
+
+	ice_tx_map(tx_ring, first, &offload);
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/**
+ * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *tx_ring;
+
+	tx_ring = vsi->tx_rings[skb->queue_mapping];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (skb_put_padto(skb, ICE_MIN_TX_LEN))
+		return NETDEV_TX_OK;
+
+	return ice_xmit_frame_ring(skb, tx_ring);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
new file mode 100644
index 000000000000..567067b650c4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TXRX_H_
+#define _ICE_TXRX_H_
+
+#define ICE_DFLT_IRQ_WORK	256
+#define ICE_RXBUF_2048		2048
+#define ICE_MAX_CHAINED_RX_BUFS	5
+#define ICE_MAX_BUF_TXD		8
+#define ICE_MIN_TX_LEN		17
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define ICE_MAX_READ_REQ_SIZE	4096
+#define ICE_MAX_DATA_PER_TXD	(16 * 1024 - 1)
+#define ICE_MAX_DATA_PER_TXD_ALIGNED \
+	(~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD)
+
+#define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
+#define ICE_MAX_TXQ_PER_TXQG	128
+
+/* Tx Descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+#define ICE_DESC_UNUSED(R)	\
+	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	(R)->next_to_clean - (R)->next_to_use - 1)
+
+#define ICE_TX_FLAGS_TSO	BIT(0)
+#define ICE_TX_FLAGS_HW_VLAN	BIT(1)
+#define ICE_TX_FLAGS_SW_VLAN	BIT(2)
+#define ICE_TX_FLAGS_VLAN_M	0xffff0000
+#define ICE_TX_FLAGS_VLAN_S	16
+
+struct ice_tx_buf {
+	struct ice_tx_desc *next_to_watch;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	unsigned short gso_segs;
+	u32 tx_flags;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+};
+
+struct ice_tx_offload_params {
+	u8 header_len;
+	u32 td_cmd;
+	u32 td_offset;
+	u32 td_l2tag1;
+	u16 cd_l2tag2;
+	u32 cd_tunnel_params;
+	u64 cd_qw1;
+	struct ice_ring *tx_ring;
+};
+
+struct ice_rx_buf {
+	struct sk_buff *skb;
+	dma_addr_t dma;
+	struct page *page;
+	unsigned int page_offset;
+};
+
+struct ice_q_stats {
+	u64 pkts;
+	u64 bytes;
+};
+
+struct ice_txq_stats {
+	u64 restart_q;
+	u64 tx_busy;
+	u64 tx_linearize;
+};
+
+struct ice_rxq_stats {
+	u64 non_eop_descs;
+	u64 alloc_page_failed;
+	u64 alloc_buf_failed;
+	u64 page_reuse_count;
+};
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum ice_dyn_idx_t {
+	ICE_IDX_ITR0 = 0,
+	ICE_IDX_ITR1 = 1,
+	ICE_IDX_ITR2 = 2,
+	ICE_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
+};
+
+/* Header split modes defined by DTYPE field of Rx RLAN context */
+enum ice_rx_dtype {
+	ICE_RX_DTYPE_NO_SPLIT		= 0,
+	ICE_RX_DTYPE_HEADER_SPLIT	= 1,
+	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
+};
+
+/* indices into GLINT_ITR registers */
+#define ICE_RX_ITR	ICE_IDX_ITR0
+#define ICE_TX_ITR	ICE_IDX_ITR1
+#define ICE_ITR_DYNAMIC	0x8000  /* use top bit as a flag */
+#define ICE_ITR_8K	0x003E
+
+/* apply ITR HW granularity translation to program the HW registers */
+#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran))
+
+/* Legacy or Advanced Mode Queue */
+#define ICE_TX_ADVANCED	0
+#define ICE_TX_LEGACY	1
+
+/* descriptor ring, associated with a VSI */
+struct ice_ring {
+	struct ice_ring *next;		/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
+	struct device *dev;		/* Used for DMA mapping */
+	struct net_device *netdev;	/* netdev ring maps to */
+	struct ice_vsi *vsi;		/* Backreference to associated VSI */
+	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
+	u8 __iomem *tail;
+	union {
+		struct ice_tx_buf *tx_buf;
+		struct ice_rx_buf *rx_buf;
+	};
+	u16 q_index;			/* Queue number of ring */
+	u32 txq_teid;			/* Added Tx queue TEID */
+
+	/* high bit set means dynamic, use accessor routines to read/write.
+	 * hardware supports 2us/1us resolution for the ITR registers.
+	 * these values always store the USER setting, and must be converted
+	 * before programming to a register.
+	 */
+	u16 rx_itr_setting;
+	u16 tx_itr_setting;
+
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+
+	/* used in interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	bool ring_active;		/* is ring online or not */
+
+	/* stats structs */
+	struct ice_q_stats	stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct ice_txq_stats tx_stats;
+		struct ice_rxq_stats rx_stats;
+	};
+
+	unsigned int size;		/* length of descriptor ring in bytes */
+	dma_addr_t dma;			/* physical address of ring */
+	struct rcu_head rcu;		/* to avoid race on free */
+	u16 next_to_alloc;
+} ____cacheline_internodealigned_in_smp;
+
+enum ice_latency_range {
+	ICE_LOWEST_LATENCY = 0,
+	ICE_LOW_LATENCY = 1,
+	ICE_BULK_LATENCY = 2,
+	ICE_ULTRA_LATENCY = 3,
+};
+
+struct ice_ring_container {
+	/* array of pointers to rings */
+	struct ice_ring *ring;
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_pkts;	/* total packets processed this int */
+	enum ice_latency_range latency_range;
+	u16 itr;
+};
+
+/* iterator for handling rings in ring container */
+#define ice_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos; pos = pos->next)
+
+bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+void ice_clean_tx_ring(struct ice_ring *tx_ring);
+void ice_clean_rx_ring(struct ice_ring *rx_ring);
+int ice_setup_tx_ring(struct ice_ring *tx_ring);
+int ice_setup_rx_ring(struct ice_ring *rx_ring);
+void ice_free_tx_ring(struct ice_ring *tx_ring);
+void ice_free_rx_ring(struct ice_ring *rx_ring);
+int ice_napi_poll(struct napi_struct *napi, int budget);
+
+#endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
new file mode 100644
index 000000000000..99c8a9a71b5e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TYPE_H_
+#define _ICE_TYPE_H_
+
+#include "ice_status.h"
+#include "ice_hw_autogen.h"
+#include "ice_osdep.h"
+#include "ice_controlq.h"
+#include "ice_lan_tx_rx.h"
+
+#define ICE_BYTES_PER_WORD	2
+#define ICE_BYTES_PER_DWORD	4
+
+static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
+{
+	return test_bit(tc, (unsigned long *)&bitmap);
+}
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+#define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_LINK		BIT_ULL(4)
+#define ICE_DBG_QCTX		BIT_ULL(6)
+#define ICE_DBG_NVM		BIT_ULL(7)
+#define ICE_DBG_LAN		BIT_ULL(8)
+#define ICE_DBG_SW		BIT_ULL(13)
+#define ICE_DBG_SCHED		BIT_ULL(14)
+#define ICE_DBG_RES		BIT_ULL(17)
+#define ICE_DBG_AQ_MSG		BIT_ULL(24)
+#define ICE_DBG_AQ_CMD		BIT_ULL(27)
+#define ICE_DBG_USER		BIT_ULL(31)
+
+enum ice_aq_res_ids {
+	ICE_NVM_RES_ID = 1,
+	ICE_SPD_RES_ID,
+	ICE_GLOBAL_CFG_LOCK_RES_ID,
+	ICE_CHANGE_LOCK_RES_ID
+};
+
+enum ice_aq_res_access_type {
+	ICE_RES_READ = 1,
+	ICE_RES_WRITE
+};
+
+enum ice_fc_mode {
+	ICE_FC_NONE = 0,
+	ICE_FC_RX_PAUSE,
+	ICE_FC_TX_PAUSE,
+	ICE_FC_FULL,
+	ICE_FC_PFC,
+	ICE_FC_DFLT
+};
+
+enum ice_set_fc_aq_failures {
+	ICE_SET_FC_AQ_FAIL_NONE = 0,
+	ICE_SET_FC_AQ_FAIL_GET,
+	ICE_SET_FC_AQ_FAIL_SET,
+	ICE_SET_FC_AQ_FAIL_UPDATE
+};
+
+/* Various MAC types */
+enum ice_mac_type {
+	ICE_MAC_UNKNOWN = 0,
+	ICE_MAC_GENERIC,
+};
+
+/* Media Types */
+enum ice_media_type {
+	ICE_MEDIA_UNKNOWN = 0,
+	ICE_MEDIA_FIBER,
+	ICE_MEDIA_BASET,
+	ICE_MEDIA_BACKPLANE,
+	ICE_MEDIA_DA,
+};
+
+enum ice_vsi_type {
+	ICE_VSI_PF = 0,
+};
+
+struct ice_link_status {
+	/* Refer to ice_aq_phy_type for bits definition */
+	u64 phy_type_low;
+	u16 max_frame_size;
+	u16 link_speed;
+	bool lse_ena;	/* Link Status Event notification */
+	u8 link_info;
+	u8 an_info;
+	u8 ext_info;
+	u8 pacing;
+	u8 req_speeds;
+	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
+	 * ice_aqc_get_phy_caps structure
+	 */
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+};
+
+/* PHY info such as phy_type, etc... */
+struct ice_phy_info {
+	struct ice_link_status link_info;
+	struct ice_link_status link_info_old;
+	u64 phy_type_low;
+	enum ice_media_type media_type;
+	bool get_link_info;
+};
+
+/* Common HW capabilities for SW use */
+struct ice_hw_common_caps {
+	/* TX/RX queues */
+	u16 num_rxq;		/* Number/Total RX queues */
+	u16 rxq_first_id;	/* First queue ID for RX queues */
+	u16 num_txq;		/* Number/Total TX queues */
+	u16 txq_first_id;	/* First queue ID for TX queues */
+
+	/* MSI-X vectors */
+	u16 num_msix_vectors;
+	u16 msix_vector_first_id;
+
+	/* Max MTU for function or device */
+	u16 max_mtu;
+
+	/* RSS related capabilities */
+	u16 rss_table_size;		/* 512 for PFs and 64 for VFs */
+	u8 rss_table_entry_width;	/* RSS Entry width in bits */
+};
+
+/* Function specific capabilities */
+struct ice_hw_func_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 guaranteed_num_vsi;
+};
+
+/* Device wide capabilities */
+struct ice_hw_dev_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
+};
+
+/* MAC info */
+struct ice_mac_info {
+	u8 lan_addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+};
+
+/* Various RESET request, These are not tied with HW reset types */
+enum ice_reset_req {
+	ICE_RESET_PFR	= 0,
+	ICE_RESET_CORER	= 1,
+	ICE_RESET_GLOBR	= 2,
+};
+
+/* Bus parameters */
+struct ice_bus_info {
+	u16 device;
+	u8 func;
+};
+
+/* Flow control (FC) parameters */
+struct ice_fc_info {
+	enum ice_fc_mode current_mode;	/* FC mode in effect */
+	enum ice_fc_mode req_mode;	/* FC mode requested by caller */
+};
+
+/* NVM Information */
+struct ice_nvm_info {
+	u32 eetrack;              /* NVM data version */
+	u32 oem_ver;              /* OEM version info */
+	u16 sr_words;             /* Shadow RAM size in words */
+	u16 ver;                  /* NVM package version */
+	bool blank_nvm_mode;      /* is NVM empty (no FW present) */
+};
+
+/* Max number of port to queue branches w.r.t topology */
+#define ICE_MAX_TRAFFIC_CLASS 8
+#define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS
+
+struct ice_sched_node {
+	struct ice_sched_node *parent;
+	struct ice_sched_node *sibling; /* next sibling in the same layer */
+	struct ice_sched_node **children;
+	struct ice_aqc_txsched_elem_data info;
+	u32 agg_id;			/* aggregator group id */
+	u16 vsi_id;
+	bool in_use;			/* suspended or in use */
+	u8 tx_sched_layer;		/* Logical Layer (1-9) */
+	u8 num_children;
+	u8 tc_num;
+	u8 owner;
+#define ICE_SCHED_NODE_OWNER_LAN	0
+};
+
+/* Access Macros for Tx Sched Elements data */
+#define ICE_TXSCHED_GET_NODE_TEID(x) le32_to_cpu((x)->info.node_teid)
+
+/* The aggregator type determines if identifier is for a VSI group,
+ * aggregator group, aggregator of queues, or queue group.
+ */
+enum ice_agg_type {
+	ICE_AGG_TYPE_UNKNOWN = 0,
+	ICE_AGG_TYPE_VSI,
+	ICE_AGG_TYPE_AGG, /* aggregator */
+	ICE_AGG_TYPE_Q,
+	ICE_AGG_TYPE_QG
+};
+
+#define ICE_SCHED_DFLT_RL_PROF_ID	0
+
+/* vsi type list entry to locate corresponding vsi/ag nodes */
+struct ice_sched_vsi_info {
+	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
+	struct list_head list_entry;
+	u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
+	u16 vsi_id;
+};
+
+/* driver defines the policy */
+struct ice_sched_tx_policy {
+	u16 max_num_vsis;
+	u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS];
+	bool rdma_ena;
+};
+
+struct ice_port_info {
+	struct ice_sched_node *root;	/* Root Node per Port */
+	struct ice_hw *hw;		/* back pointer to hw instance */
+	u32 last_node_teid;		/* scheduler last node info */
+	u16 sw_id;			/* Initial switch ID belongs to port */
+	u16 pf_vf_num;
+	u8 port_state;
+#define ICE_SCHED_PORT_STATE_INIT	0x0
+#define ICE_SCHED_PORT_STATE_READY	0x1
+	u16 dflt_tx_vsi_rule_id;
+	u16 dflt_tx_vsi_num;
+	u16 dflt_rx_vsi_rule_id;
+	u16 dflt_rx_vsi_num;
+	struct ice_fc_info fc;
+	struct ice_mac_info mac;
+	struct ice_phy_info phy;
+	struct mutex sched_lock;	/* protect access to TXSched tree */
+	struct ice_sched_tx_policy sched_policy;
+	struct list_head vsi_info_list;
+	struct list_head agg_list;	/* lists all aggregator */
+	u8 lport;
+#define ICE_LPORT_MASK		0xff
+	bool is_vf;
+};
+
+struct ice_switch_info {
+	/* Switch VSI lists to MAC/VLAN translation */
+	struct mutex mac_list_lock;		/* protect MAC list */
+	struct list_head mac_list_head;
+	struct mutex vlan_list_lock;		/* protect VLAN list */
+	struct list_head vlan_list_head;
+	struct mutex eth_m_list_lock;	/* protect ethtype list */
+	struct list_head eth_m_list_head;
+	struct mutex promisc_list_lock;	/* protect promisc mode list */
+	struct list_head promisc_list_head;
+	struct mutex mac_vlan_list_lock;	/* protect MAC-VLAN list */
+	struct list_head mac_vlan_list_head;
+
+	struct list_head vsi_list_map_head;
+};
+
+/* Port hardware description */
+struct ice_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+	struct ice_aqc_layer_props *layer_info;
+	struct ice_port_info *port_info;
+	u64 debug_mask;		/* bitmap for debug mask */
+	enum ice_mac_type mac_type;
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+
+	u8 pf_id;		/* device profile info */
+
+	/* TX Scheduler values */
+	u16 num_tx_sched_layers;
+	u16 num_tx_sched_phys_layers;
+	u8 flattened_layers;
+	u8 max_cgds;
+	u8 sw_entry_point_layer;
+
+	bool evb_veb;		/* true for VEB, false for VEPA */
+	struct ice_bus_info bus;
+	struct ice_nvm_info nvm;
+	struct ice_hw_dev_caps dev_caps;	/* device capabilities */
+	struct ice_hw_func_caps func_caps;	/* function capabilities */
+
+	struct ice_switch_info *switch_info;	/* switch filter lists */
+
+	/* Control Queue info */
+	struct ice_ctl_q_info adminq;
+
+	u8 api_branch;		/* API branch version */
+	u8 api_maj_ver;		/* API major version */
+	u8 api_min_ver;		/* API minor version */
+	u8 api_patch;		/* API patch version */
+	u8 fw_branch;		/* firmware branch version */
+	u8 fw_maj_ver;		/* firmware major version */
+	u8 fw_min_ver;		/* firmware minor version */
+	u8 fw_patch;		/* firmware patch version */
+	u32 fw_build;		/* firmware build number */
+
+	/* minimum allowed value for different speeds */
+#define ICE_ITR_GRAN_MIN_200	1
+#define ICE_ITR_GRAN_MIN_100	1
+#define ICE_ITR_GRAN_MIN_50	2
+#define ICE_ITR_GRAN_MIN_25	4
+	/* ITR granularity in 1 us */
+	u8 itr_gran_200;
+	u8 itr_gran_100;
+	u8 itr_gran_50;
+	u8 itr_gran_25;
+	bool ucast_shared;	/* true if VSIs can share unicast addr */
+
+};
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct ice_eth_stats {
+	u64 rx_bytes;			/* gorc */
+	u64 rx_unicast;			/* uprc */
+	u64 rx_multicast;		/* mprc */
+	u64 rx_broadcast;		/* bprc */
+	u64 rx_discards;		/* rdpc */
+	u64 rx_unknown_protocol;	/* rupp */
+	u64 tx_bytes;			/* gotc */
+	u64 tx_unicast;			/* uptc */
+	u64 tx_multicast;		/* mptc */
+	u64 tx_broadcast;		/* bptc */
+	u64 tx_discards;		/* tdpc */
+	u64 tx_errors;			/* tepc */
+};
+
+/* Statistics collected by the MAC */
+struct ice_hw_port_stats {
+	/* eth stats collected by the port */
+	struct ice_eth_stats eth;
+	/* additional port specific stats */
+	u64 tx_dropped_link_down;	/* tdold */
+	u64 crc_errors;			/* crcerrs */
+	u64 illegal_bytes;		/* illerrc */
+	u64 error_bytes;		/* errbc */
+	u64 mac_local_faults;		/* mlfc */
+	u64 mac_remote_faults;		/* mrfc */
+	u64 rx_len_errors;		/* rlec */
+	u64 link_xon_rx;		/* lxonrxc */
+	u64 link_xoff_rx;		/* lxoffrxc */
+	u64 link_xon_tx;		/* lxontxc */
+	u64 link_xoff_tx;		/* lxofftxc */
+	u64 rx_size_64;			/* prc64 */
+	u64 rx_size_127;		/* prc127 */
+	u64 rx_size_255;		/* prc255 */
+	u64 rx_size_511;		/* prc511 */
+	u64 rx_size_1023;		/* prc1023 */
+	u64 rx_size_1522;		/* prc1522 */
+	u64 rx_size_big;		/* prc9522 */
+	u64 rx_undersize;		/* ruc */
+	u64 rx_fragments;		/* rfc */
+	u64 rx_oversize;		/* roc */
+	u64 rx_jabber;			/* rjc */
+	u64 tx_size_64;			/* ptc64 */
+	u64 tx_size_127;		/* ptc127 */
+	u64 tx_size_255;		/* ptc255 */
+	u64 tx_size_511;		/* ptc511 */
+	u64 tx_size_1023;		/* ptc1023 */
+	u64 tx_size_1522;		/* ptc1522 */
+	u64 tx_size_big;		/* ptc9522 */
+};
+
+/* Checksum and Shadow RAM pointers */
+#define ICE_SR_NVM_DEV_STARTER_VER	0x18
+#define ICE_SR_NVM_EETRACK_LO		0x2D
+#define ICE_SR_NVM_EETRACK_HI		0x2E
+#define ICE_NVM_VER_LO_SHIFT		0
+#define ICE_NVM_VER_LO_MASK		(0xff << ICE_NVM_VER_LO_SHIFT)
+#define ICE_NVM_VER_HI_SHIFT		12
+#define ICE_NVM_VER_HI_MASK		(0xf << ICE_NVM_VER_HI_SHIFT)
+#define ICE_OEM_VER_PATCH_SHIFT		0
+#define ICE_OEM_VER_PATCH_MASK		(0xff << ICE_OEM_VER_PATCH_SHIFT)
+#define ICE_OEM_VER_BUILD_SHIFT		8
+#define ICE_OEM_VER_BUILD_MASK		(0xffff << ICE_OEM_VER_BUILD_SHIFT)
+#define ICE_OEM_VER_SHIFT		24
+#define ICE_OEM_VER_MASK		(0xff << ICE_OEM_VER_SHIFT)
+#define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
+#define ICE_SR_WORDS_IN_1KB		512
+
+#endif /* _ICE_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
index 5bcb2de75933..c48583e98ac1 100644
--- a/drivers/net/ethernet/intel/igb/Makefile
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 82575 PCI-Express Ethernet Linux driver
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index c37cc8bccf47..dd9b6cac220d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
index acf06051e111..e53ebe97d709 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.h
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 83cabff1e0ab..98534f765e0e 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h
index 6c9485ab4b57..ff835e1e853d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_hw.h
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index 07d48f2e3369..6f548247e6d8 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h
index b2964a2a60b1..56f015ccb206 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.h
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 5eff82678f0b..298afa0d9159 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h
index 90c8893c3eed..04d80c765aee 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index bffd58f7b2a1..ef42f1689b3b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h
index a62b08e1572e..4f0ecd28354d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
index 3582c5cf8843..e4596f151cd4 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h
index febc9cdb7391..dde68cd54a53 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.h
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 413025bdcb50..4ec61243da82 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h
index 9b622b33bb5a..856d2cda0643 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.h
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 568c96842f28..e8fa8c6530e0 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 1c6b8d9176a8..8dbc399b345e 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
@@ -109,6 +110,7 @@ struct vf_data_storage {
 	u16 pf_qos;
 	u16 tx_rate;
 	bool spoofchk_enabled;
+	bool trusted;
 };
 
 /* Number of unicast MAC filters reserved for the PF in the RAR registers */
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 606e6761758f..e77ba0d5866d 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
index 44b6a68f1af7..bebe43b3a836 100644
--- a/drivers/net/ethernet/intel/igb/igb_hwmon.c
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
@@ -147,7 +148,7 @@ static int igb_add_hwmon_attr(struct igb_adapter *adapter,
 		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
 	igb_attr->hw = &adapter->hw;
 	igb_attr->dev_attr.store = NULL;
-	igb_attr->dev_attr.attr.mode = S_IRUGO;
+	igb_attr->dev_attr.attr.mode = 0444;
 	igb_attr->dev_attr.attr.name = igb_attr->name;
 	sysfs_attr_init(&igb_attr->dev_attr.attr);
 
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b88fae785369..c1c0bc30a16d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
@@ -190,6 +191,8 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf,
+				bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 				 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
@@ -774,8 +777,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
 	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
 		struct net_device *netdev = igb->netdev;
 		hw->hw_addr = NULL;
-		netif_device_detach(netdev);
-		netdev_err(netdev, "PCIe link lost, device now detached\n");
+		netdev_err(netdev, "PCIe link lost\n");
 	}
 
 	return value;
@@ -2527,6 +2529,7 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
+	.ndo_set_vf_trust	= igb_ndo_set_vf_trust,
 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= igb_netpoll,
@@ -5747,7 +5750,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
 
-		if (adapter->tstamp_config.tx_type & HWTSTAMP_TX_ON &&
+		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
 		    !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
 					   &adapter->state)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
@@ -6383,6 +6386,9 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf)
 	/* By default spoof check is enabled for all VFs */
 	adapter->vf_data[vf].spoofchk_enabled = true;
 
+	/* By default VFs are not trusted */
+	adapter->vf_data[vf].trusted = false;
+
 	return 0;
 }
 
@@ -6940,13 +6946,13 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf,
 		}
 		break;
 	case E1000_VF_MAC_FILTER_ADD:
-		if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+		if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+		    !vf_data->trusted) {
 			dev_warn(&pdev->dev,
 				 "VF %d requested MAC filter but is administratively denied\n",
 				 vf);
 			return -EINVAL;
 		}
-
 		if (!is_valid_ether_addr(addr)) {
 			dev_warn(&pdev->dev,
 				 "VF %d attempted to set invalid MAC filter\n",
@@ -6998,7 +7004,8 @@ static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
 	int ret = 0;
 
 	if (!info) {
-		if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+		if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+		    !vf_data->trusted) {
 			dev_warn(&pdev->dev,
 				 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
 				 vf);
@@ -8934,6 +8941,22 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 	return 0;
 }
 
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+	if (adapter->vf_data[vf].trusted == setting)
+		return 0;
+
+	adapter->vf_data[vf].trusted = setting;
+
+	dev_info(&adapter->pdev->dev, "VF %u is %strusted\n",
+		 vf, setting ? "" : "not ");
+	return 0;
+}
+
 static int igb_ndo_get_vf_config(struct net_device *netdev,
 				 int vf, struct ifla_vf_info *ivi)
 {
@@ -8947,6 +8970,7 @@ static int igb_ndo_get_vf_config(struct net_device *netdev,
 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
 	ivi->qos = adapter->vf_data[vf].pf_qos;
 	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
+	ivi->trusted = adapter->vf_data[vf].trusted;
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 0746b19ec6d3..7454b9895a65 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /* PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580
  *
  * Copyright (C) 2011 Richard Cochran <[email protected]>
diff --git a/drivers/net/ethernet/intel/igbvf/Makefile b/drivers/net/ethernet/intel/igbvf/Makefile
index 044b0ad5fcb9..efe29dae384a 100644
--- a/drivers/net/ethernet/intel/igbvf/Makefile
+++ b/drivers/net/ethernet/intel/igbvf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/defines.h b/drivers/net/ethernet/intel/igbvf/defines.h
index f1789d192e24..04bcfec0641b 100644
--- a/drivers/net/ethernet/intel/igbvf/defines.h
+++ b/drivers/net/ethernet/intel/igbvf/defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index a127688e83e6..ca39e3cccaeb 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h
index bf69f01f8467..f5bf248e22eb 100644
--- a/drivers/net/ethernet/intel/igbvf/igbvf.h
+++ b/drivers/net/ethernet/intel/igbvf/igbvf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
index c9a441632e9f..9195884096f8 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.c
+++ b/drivers/net/ethernet/intel/igbvf/mbx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.h b/drivers/net/ethernet/intel/igbvf/mbx.h
index 30d58c4a444e..479b062fe9ee 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.h
+++ b/drivers/net/ethernet/intel/igbvf/mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 4214c1519a87..e2b7502f1953 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/regs.h b/drivers/net/ethernet/intel/igbvf/regs.h
index 86a7c120b574..614e52409f11 100644
--- a/drivers/net/ethernet/intel/igbvf/regs.h
+++ b/drivers/net/ethernet/intel/igbvf/regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c
index 9577ccf4b26a..bfe8d8297b2e 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.c
+++ b/drivers/net/ethernet/intel/igbvf/vf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h
index d213eefb6169..193b50026246 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.h
+++ b/drivers/net/ethernet/intel/igbvf/vf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/Makefile b/drivers/net/ethernet/intel/ixgb/Makefile
index 0b20c5e62ffe..1b42dd554dd2 100644
--- a/drivers/net/ethernet/intel/ixgb/Makefile
+++ b/drivers/net/ethernet/intel/ixgb/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb.h b/drivers/net/ethernet/intel/ixgb/ixgb.h
index 1180cd59b570..92022841755f 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
index 5680f64314b8..475297a810fe 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
index 0bd5d72e1af5..19f36d87ef61 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
index 32c1b302d791..24e849902d60 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
index 8fc905192231..b1710379192e 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 8319465eb38d..4cd96c88cb5d 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index c1e3a0039ea5..4f08c712e58e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index a0ebd9ecf243..cb0fe5fedb33 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 4dfc81dbee4b..66a74f4651e8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 61188f343955..633be93f3dbb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 4d4c02366cb3..2b311382167a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
@@ -153,6 +154,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
 void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
 				      ixgbe_link_speed speed);
 
+#define IXGBE_FAILED_READ_RETRIES 5
 #define IXGBE_FAILED_READ_REG 0xffffffffU
 #define IXGBE_FAILED_READ_CFG_DWORD 0xffffffffU
 #define IXGBE_FAILED_READ_CFG_WORD 0xffffU
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
index fc0a2dd52499..73b6362d4327 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
index f94c7e82a30b..085130626330 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
index 3164f5453b8f..7edce607f901 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
index 90c370230e20..fa030f0abc18 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 221f15803480..c0e6ab42e0e1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -97,6 +97,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"tx_heartbeat_errors", IXGBE_NETDEV_STAT(tx_heartbeat_errors)},
 	{"tx_timeout_count", IXGBE_STAT(tx_timeout_count)},
 	{"tx_restart_queue", IXGBE_STAT(restart_queue)},
+	{"rx_length_errors", IXGBE_STAT(stats.rlec)},
 	{"rx_long_length_errors", IXGBE_STAT(stats.roc)},
 	{"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
 	{"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},
@@ -3059,6 +3060,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 
 		for (i = 0; i < reta_entries; i++)
 			adapter->rss_indir_tbl[i] = indir[i];
+
+		ixgbe_store_reta(adapter);
 	}
 
 	/* Fill out the rss hash key */
@@ -3067,8 +3070,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 		ixgbe_store_key(adapter);
 	}
 
-	ixgbe_store_reta(adapter);
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index 38385876effb..cf1919901514 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 93eacddb6704..68af127987bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -423,15 +423,21 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
 	const char aes_gcm_name[] = "rfc4106(gcm(aes))";
 	int key_len;
 
-	if (xs->aead) {
-		key_data = &xs->aead->alg_key[0];
-		key_len = xs->aead->alg_key_len;
-		alg_name = xs->aead->alg_name;
-	} else {
+	if (!xs->aead) {
 		netdev_err(dev, "Unsupported IPsec algorithm\n");
 		return -EINVAL;
 	}
 
+	if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+		netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+			   IXGBE_IPSEC_AUTH_BITS);
+		return -EINVAL;
+	}
+
+	key_data = &xs->aead->alg_key[0];
+	key_len = xs->aead->alg_key_len;
+	alg_name = xs->aead->alg_name;
+
 	if (strcmp(alg_name, aes_gcm_name)) {
 		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
 			   aes_gcm_name);
@@ -718,23 +724,10 @@ static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
 	return true;
 }
 
-/**
- * ixgbe_ipsec_free - called by xfrm garbage collections
- * @xs: pointer to transformer state struct
- *
- * We don't have any garbage to collect, so we shouldn't bother
- * implementing this function, but the XFRM code doesn't check for
- * existence before calling the API callback.
- **/
-static void ixgbe_ipsec_free(struct xfrm_state *xs)
-{
-}
-
 static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
 	.xdo_dev_state_add = ixgbe_ipsec_add_sa,
 	.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
 	.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
-	.xdo_dev_state_free = ixgbe_ipsec_free,
 };
 
 /**
@@ -781,13 +774,40 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 
 	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
 
-	itd->flags = 0;
 	if (xs->id.proto == IPPROTO_ESP) {
+
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
 			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
 		if (first->protocol == htons(ETH_P_IP))
 			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
-		itd->trailer_len = xs->props.trailer_len;
+
+		/* The actual trailer length is authlen (16 bytes) plus
+		 * 2 bytes for the proto and the padlen values, plus
+		 * padlen bytes of padding.  This ends up not the same
+		 * as the static value found in xs->props.trailer_len (21).
+		 *
+		 * ... but if we're doing GSO, don't bother as the stack
+		 * doesn't add a trailer for those.
+		 */
+		if (!skb_is_gso(first->skb)) {
+			/* The "correct" way to get the auth length would be
+			 * to use
+			 *    authlen = crypto_aead_authsize(xs->data);
+			 * but since we know we only have one size to worry
+			 * about * we can let the compiler use the constant
+			 * and save us a few CPU cycles.
+			 */
+			const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+			struct sk_buff *skb = first->skb;
+			u8 padlen;
+			int ret;
+
+			ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+					    &padlen, 1);
+			if (unlikely(ret))
+				return 0;
+			itd->trailer_len = authlen + 2 + padlen;
+		}
 	}
 	if (tsa->encrypt)
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
@@ -909,8 +929,13 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
 	ixgbe_ipsec_clear_hw_tables(adapter);
 
 	adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
-	adapter->netdev->features |= NETIF_F_HW_ESP;
-	adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+#define IXGBE_ESP_FEATURES	(NETIF_F_HW_ESP | \
+				 NETIF_F_HW_ESP_TX_CSUM | \
+				 NETIF_F_GSO_ESP)
+
+	adapter->netdev->features |= IXGBE_ESP_FEATURES;
+	adapter->netdev->hw_enc_features |= IXGBE_ESP_FEATURES;
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index da3ce7849e85..4f099f516645 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
@@ -32,6 +33,7 @@
 #define IXGBE_IPSEC_MAX_RX_IP_COUNT	128
 #define IXGBE_IPSEC_BASE_RX_INDEX	0
 #define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS		128
 
 #define IXGBE_RXTXIDX_IPS_EN		0x00000001
 #define IXGBE_RXIDX_TBL_SHIFT		1
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 4242f0213e46..ed4cbe94c355 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
 		return false;
 
 	/* start at VMDq register offset for SR-IOV enabled setups */
-	pool = 0;
 	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
 	for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
 		/* If we are greater than indices move to next pool */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0da5aa2c8aba..afadba99f7b8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -353,23 +353,32 @@ static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
 		ixgbe_service_event_schedule(adapter);
 }
 
-static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
+static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
 {
+	u8 __iomem *reg_addr;
 	u32 value;
+	int i;
 
-	/* The following check not only optimizes a bit by not
-	 * performing a read on the status register when the
-	 * register just read was a status register read that
-	 * returned IXGBE_FAILED_READ_REG. It also blocks any
-	 * potential recursion.
+	reg_addr = READ_ONCE(hw->hw_addr);
+	if (ixgbe_removed(reg_addr))
+		return IXGBE_FAILED_READ_REG;
+
+	/* Register read of 0xFFFFFFF can indicate the adapter has been removed,
+	 * so perform several status register reads to determine if the adapter
+	 * has been removed.
 	 */
-	if (reg == IXGBE_STATUS) {
-		ixgbe_remove_adapter(hw);
-		return;
+	for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
+		value = readl(reg_addr + IXGBE_STATUS);
+		if (value != IXGBE_FAILED_READ_REG)
+			break;
+		mdelay(3);
 	}
-	value = ixgbe_read_reg(hw, IXGBE_STATUS);
+
 	if (value == IXGBE_FAILED_READ_REG)
 		ixgbe_remove_adapter(hw);
+	else
+		value = readl(reg_addr + reg);
+	return value;
 }
 
 /**
@@ -415,7 +424,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
 writes_completed:
 	value = readl(reg_addr + reg);
 	if (unlikely(value == IXGBE_FAILED_READ_REG))
-		ixgbe_check_remove(hw, reg);
+		value = ixgbe_check_remove(hw, reg);
 	return value;
 }
 
@@ -1620,7 +1629,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
 	bi->dma = dma;
 	bi->page = page;
 	bi->page_offset = ixgbe_rx_offset(rx_ring);
-	bi->pagecnt_bias = 1;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
 	rx_ring->rx_stats.alloc_rx_page++;
 
 	return true;
@@ -1888,6 +1898,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
 				     ixgbe_rx_pg_size(rx_ring),
 				     DMA_FROM_DEVICE,
 				     IXGBE_RX_DMA_ATTR);
+	} else if (ring_uses_build_skb(rx_ring)) {
+		unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
+
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      IXGBE_CB(skb)->dma,
+					      offset,
+					      skb_headlen(skb),
+					      DMA_FROM_DEVICE);
 	} else {
 		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 
@@ -2022,8 +2040,8 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(!pagecnt_bias)) {
-		page_ref_add(page, USHRT_MAX);
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
 
@@ -7703,7 +7721,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
 	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
 		ixgbe_ptp_overflow_check(adapter);
-		ixgbe_ptp_rx_hang(adapter);
+		if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+			ixgbe_ptp_rx_hang(adapter);
 		ixgbe_ptp_tx_hang(adapter);
 	}
 
@@ -7712,7 +7731,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
 static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 		     struct ixgbe_tx_buffer *first,
-		     u8 *hdr_len)
+		     u8 *hdr_len,
+		     struct ixgbe_ipsec_tx_data *itd)
 {
 	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
 	struct sk_buff *skb = first->skb;
@@ -7726,6 +7746,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 		unsigned char *hdr;
 	} l4;
 	u32 paylen, l4_offset;
+	u32 fceof_saidx = 0;
 	int err;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -7751,13 +7772,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 	if (ip.v4->version == 4) {
 		unsigned char *csum_start = skb_checksum_start(skb);
 		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+		int len = csum_start - trans_start;
 
 		/* IP header will have to cancel out any data that
-		 * is not a part of the outer IP header
+		 * is not a part of the outer IP header, so set to
+		 * a reverse csum if needed, else init check to 0.
 		 */
-		ip.v4->check = csum_fold(csum_partial(trans_start,
-						      csum_start - trans_start,
-						      0));
+		ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+					   csum_fold(csum_partial(trans_start,
+								  len, 0)) : 0;
 		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -7788,12 +7811,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 	mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT;
 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
 
+	fceof_saidx |= itd->sa_idx;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
 	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
 	vlan_macip_lens = l4.hdr - ip.hdr;
 	vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd,
+	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
 			  mss_l4len_idx);
 
 	return 1;
@@ -7855,10 +7881,8 @@ no_csum:
 	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
-		fceof_saidx |= itd->sa_idx;
-		type_tucmd |= itd->flags | itd->trailer_len;
-	}
+	fceof_saidx |= itd->sa_idx;
+	type_tucmd |= itd->flags | itd->trailer_len;
 
 	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
 }
@@ -8486,7 +8510,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 	if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
 		goto out_drop;
 #endif
-	tso = ixgbe_tso(tx_ring, first, &hdr_len);
+	tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
 	if (tso < 0)
 		goto out_drop;
 	else if (!tso)
@@ -9895,15 +9919,15 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 
 	/* We can only support IPV4 TSO in tunnels if we can mangle the
 	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 * IPsec offoad sets skb->encapsulation but still can handle
+	 * the TSO, so it's the exception.
 	 */
-	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
-		features &= ~NETIF_F_TSO;
-
-#ifdef CONFIG_XFRM_OFFLOAD
-	/* IPsec offload doesn't get along well with others *yet* */
-	if (skb->sp)
-		features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM);
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
+#ifdef CONFIG_XFRM
+		if (!skb->sp)
 #endif
+			features &= ~NETIF_F_TSO;
+	}
 
 	return features;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index 811cb4f64a5b..c4628b663590 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
index 538a1c5475b6..72446644f9fa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel 10 Gigabit PCI Express Linux drive
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index b0cac961df3b..d6a7e77348c5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 27a70a52f3c9..008aa073a679 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -831,7 +831,11 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg);
 
 	/* force drop enable for all VF Rx queues */
-	ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE);
+	reg = IXGBE_QDE_ENABLE;
+	if (adapter->vfinfo[vf].pf_vlan)
+		reg |= IXGBE_QDE_HIDE_VLAN;
+
+	ixgbe_write_qde(adapter, vf, reg);
 
 	/* enable receive for vf */
 	reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index cf67b9b18ed7..e30d1f07e891 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
index ef6df3d6437e..24766e125592 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
@@ -146,7 +146,7 @@ static int ixgbe_add_hwmon_attr(struct ixgbe_adapter *adapter,
 		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
 	ixgbe_attr->hw = &adapter->hw;
 	ixgbe_attr->dev_attr.store = NULL;
-	ixgbe_attr->dev_attr.attr.mode = S_IRUGO;
+	ixgbe_attr->dev_attr.attr.mode = 0444;
 	ixgbe_attr->dev_attr.attr.name = ixgbe_attr->name;
 	sysfs_attr_init(&ixgbe_attr->dev_attr.attr);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index ca45359686d3..2daa81e6e9b2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
index e21cd48491d3..182d640e9f7a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index f470d0204771..3123267dfba9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1847,9 +1847,9 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
 			 (IXGBE_CS4227_EDC_MODE_SR << 1));
 
 	if (setup_linear)
-		reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
+		reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
 	else
-		reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
+		reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
 
 	ret_val = hw->phy.ops.write_reg(hw, reg_slice,
 					IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);
diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
index 4ce4c97ef5ad..bb47814cfa90 100644
--- a/drivers/net/ethernet/intel/ixgbevf/Makefile
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 8617cae2f801..71c828842b11 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 4400e49090b4..8e7d6c6f5c92 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -82,6 +82,7 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
 
 #define IXGBEVF_QUEUE_STATS_LEN ( \
 	(((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \
+	 ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_xdp_queues + \
 	 ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \
 	 (sizeof(struct ixgbevf_stats) / sizeof(u64)))
 #define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats)
@@ -94,6 +95,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
 
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
 static int ixgbevf_get_link_ksettings(struct net_device *netdev,
 				      struct ethtool_link_ksettings *cmd)
 {
@@ -241,6 +249,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev,
 		sizeof(drvinfo->version));
 	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
 		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
 }
 
 static void ixgbevf_get_ringparam(struct net_device *netdev,
@@ -260,7 +270,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbevf_ring *tx_ring = NULL, *rx_ring = NULL;
 	u32 new_rx_count, new_tx_count;
-	int i, err = 0;
+	int i, j, err = 0;
 
 	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
@@ -284,15 +294,19 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 	if (!netif_running(adapter->netdev)) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			adapter->xdp_ring[i]->count = new_tx_count;
 		for (i = 0; i < adapter->num_rx_queues; i++)
 			adapter->rx_ring[i]->count = new_rx_count;
 		adapter->tx_ring_count = new_tx_count;
+		adapter->xdp_ring_count = new_tx_count;
 		adapter->rx_ring_count = new_rx_count;
 		goto clear_reset;
 	}
 
 	if (new_tx_count != adapter->tx_ring_count) {
-		tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring));
+		tx_ring = vmalloc((adapter->num_tx_queues +
+				   adapter->num_xdp_queues) * sizeof(*tx_ring));
 		if (!tx_ring) {
 			err = -ENOMEM;
 			goto clear_reset;
@@ -315,6 +329,24 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 				goto clear_reset;
 			}
 		}
+
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			/* clone ring and setup updated count */
+			tx_ring[i] = *adapter->xdp_ring[j];
+			tx_ring[i].count = new_tx_count;
+			err = ixgbevf_setup_tx_resources(&tx_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbevf_free_tx_resources(&tx_ring[i]);
+				}
+
+				vfree(tx_ring);
+				tx_ring = NULL;
+
+				goto clear_reset;
+			}
+		}
 	}
 
 	if (new_rx_count != adapter->rx_ring_count) {
@@ -327,8 +359,13 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 		for (i = 0; i < adapter->num_rx_queues; i++) {
 			/* clone ring and setup updated count */
 			rx_ring[i] = *adapter->rx_ring[i];
+
+			/* Clear copied XDP RX-queue info */
+			memset(&rx_ring[i].xdp_rxq, 0,
+			       sizeof(rx_ring[i].xdp_rxq));
+
 			rx_ring[i].count = new_rx_count;
-			err = ixgbevf_setup_rx_resources(&rx_ring[i]);
+			err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]);
 			if (err) {
 				while (i) {
 					i--;
@@ -354,6 +391,12 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 		}
 		adapter->tx_ring_count = new_tx_count;
 
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+			*adapter->xdp_ring[j] = tx_ring[i];
+		}
+		adapter->xdp_ring_count = new_tx_count;
+
 		vfree(tx_ring);
 		tx_ring = NULL;
 	}
@@ -376,7 +419,8 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 clear_reset:
 	/* free Tx resources if Rx error is encountered */
 	if (tx_ring) {
-		for (i = 0; i < adapter->num_tx_queues; i++)
+		for (i = 0;
+		     i < adapter->num_tx_queues + adapter->num_xdp_queues; i++)
 			ixgbevf_free_tx_resources(&tx_ring[i]);
 		vfree(tx_ring);
 	}
@@ -392,6 +436,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
 		return IXGBEVF_TEST_LEN;
 	case ETH_SS_STATS:
 		return IXGBEVF_STATS_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IXGBEVF_PRIV_FLAGS_STR_LEN;
 	default:
 		return -EINVAL;
 	}
@@ -446,6 +492,23 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev,
 		i += 2;
 	}
 
+	/* populate XDP queue data */
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		ring = adapter->xdp_ring[j];
+		if (!ring) {
+			data[i++] = 0;
+			data[i++] = 0;
+			continue;
+		}
+
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->syncp);
+			data[i] = ring->stats.packets;
+			data[i + 1] = ring->stats.bytes;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+		i += 2;
+	}
+
 	/* populate Rx queue data */
 	for (j = 0; j < adapter->num_rx_queues; j++) {
 		ring = adapter->rx_ring[j];
@@ -489,6 +552,12 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
 			sprintf(p, "tx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
+		for (i = 0; i < adapter->num_xdp_queues; i++) {
+			sprintf(p, "xdp_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "xdp_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
 		for (i = 0; i < adapter->num_rx_queues; i++) {
 			sprintf(p, "rx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
@@ -496,6 +565,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, ixgbevf_priv_flags_strings,
+		       IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
 	}
 }
 
@@ -888,6 +961,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	return err;
 }
 
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+	if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+		flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			ixgbevf_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops ixgbevf_ethtool_ops = {
 	.get_drvinfo		= ixgbevf_get_drvinfo,
 	.get_regs_len		= ixgbevf_get_regs_len,
@@ -909,6 +1013,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
 	.get_rxfh_key_size	= ixgbevf_get_rxfh_key_size,
 	.get_rxfh		= ixgbevf_get_rxfh,
 	.get_link_ksettings	= ixgbevf_get_link_ksettings,
+	.get_priv_flags		= ixgbevf_get_priv_flags,
+	.set_priv_flags		= ixgbevf_set_priv_flags,
 };
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index f6952425c87d..447ce1d5e0e3 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -1,7 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -34,6 +35,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
 
 #include "vf.h"
 
@@ -50,7 +52,11 @@
 struct ixgbevf_tx_buffer {
 	union ixgbe_adv_tx_desc *next_to_watch;
 	unsigned long time_stamp;
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		/* XDP uses address ptr on irq_clean */
+		void *data;
+	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	__be16 protocol;
@@ -89,20 +95,25 @@ struct ixgbevf_rx_queue_stats {
 };
 
 enum ixgbevf_ring_state_t {
+	__IXGBEVF_RX_3K_BUFFER,
+	__IXGBEVF_RX_BUILD_SKB_ENABLED,
 	__IXGBEVF_TX_DETECT_HANG,
 	__IXGBEVF_HANG_CHECK_ARMED,
+	__IXGBEVF_TX_XDP_RING,
 };
 
-#define check_for_tx_hang(ring) \
-	test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
-	set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
-	clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define ring_is_xdp(ring) \
+		test_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define set_ring_xdp(ring) \
+		set_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define clear_ring_xdp(ring) \
+		clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
 
 struct ixgbevf_ring {
 	struct ixgbevf_ring *next;
+	struct ixgbevf_q_vector *q_vector;	/* backpointer to q_vector */
 	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
 	struct device *dev;
 	void *desc;			/* descriptor ring memory */
 	dma_addr_t dma;			/* phys. address of descriptor ring */
@@ -123,7 +134,7 @@ struct ixgbevf_ring {
 		struct ixgbevf_tx_queue_stats tx_stats;
 		struct ixgbevf_rx_queue_stats rx_stats;
 	};
-
+	struct xdp_rxq_info xdp_rxq;
 	u64 hw_csum_rx_error;
 	u8 __iomem *tail;
 	struct sk_buff *skb;
@@ -133,13 +144,14 @@ struct ixgbevf_ring {
 	 */
 	u16 reg_idx;
 	int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define IXGBEVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
 
 #define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES
 #define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES
+#define MAX_XDP_QUEUES IXGBE_VF_MAX_TX_QUEUES
 #define IXGBEVF_MAX_RSS_QUEUES		2
 #define IXGBEVF_82599_RETA_SIZE		128	/* 128 entries */
 #define IXGBEVF_X550_VFRETA_SIZE	64	/* 64 entries */
@@ -156,12 +168,20 @@ struct ixgbevf_ring {
 /* Supported Rx Buffer Sizes */
 #define IXGBEVF_RXBUFFER_256	256    /* Used for packet split */
 #define IXGBEVF_RXBUFFER_2048	2048
+#define IXGBEVF_RXBUFFER_3072	3072
 
 #define IXGBEVF_RX_HDR_SIZE	IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ	IXGBEVF_RXBUFFER_2048
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
+#define IXGBEVF_SKB_PAD		(NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+	(SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB	IXGBEVF_RXBUFFER_2048
+#endif
+
 #define IXGBE_TX_FLAGS_CSUM		BIT(0)
 #define IXGBE_TX_FLAGS_VLAN		BIT(1)
 #define IXGBE_TX_FLAGS_TSO		BIT(2)
@@ -170,6 +190,50 @@ struct ixgbevf_ring {
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
 
+#define ring_uses_large_buffer(ring) \
+	test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+	set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+	clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+	test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+	set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+	clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IXGBEVF_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+	return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+	test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+	set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+	clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
 struct ixgbevf_ring_container {
 	struct ixgbevf_ring *ring;	/* pointer to linked list of rings */
 	unsigned int total_bytes;	/* total bytes processed this int */
@@ -194,7 +258,11 @@ struct ixgbevf_q_vector {
 	u16 itr; /* Interrupt throttle rate written to EITR */
 	struct napi_struct napi;
 	struct ixgbevf_ring_container rx, tx;
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
 	char name[IFNAMSIZ + 9];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define IXGBEVF_QV_STATE_IDLE		0
@@ -284,6 +352,10 @@ struct ixgbevf_adapter {
 	u32 eims_enable_mask;
 	u32 eims_other;
 
+	/* XDP */
+	int num_xdp_queues;
+	struct ixgbevf_ring *xdp_ring[MAX_XDP_QUEUES];
+
 	/* TX */
 	int num_tx_queues;
 	struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
@@ -304,6 +376,7 @@ struct ixgbevf_adapter {
 
 	/* OS defined structs */
 	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
 	struct pci_dev *pdev;
 
 	/* structs defined in ixgbe_vf.h */
@@ -317,6 +390,7 @@ struct ixgbevf_adapter {
 	unsigned long state;
 	u64 tx_busy;
 	unsigned int tx_ring_count;
+	unsigned int xdp_ring_count;
 	unsigned int rx_ring_count;
 
 	u8 __iomem *io_addr; /* Mainly for iounmap use */
@@ -331,6 +405,8 @@ struct ixgbevf_adapter {
 
 	u32 *rss_key;
 	u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+	u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX		BIT(1)
 };
 
 enum ixbgevf_state_t {
@@ -388,7 +464,8 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter);
 void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter);
 void ixgbevf_reset(struct ixgbevf_adapter *adapter);
 void ixgbevf_set_ethtool_ops(struct net_device *netdev);
-int ixgbevf_setup_rx_resources(struct ixgbevf_ring *);
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+			       struct ixgbevf_ring *rx_ring);
 int ixgbevf_setup_tx_resources(struct ixgbevf_ring *);
 void ixgbevf_free_rx_resources(struct ixgbevf_ring *);
 void ixgbevf_free_tx_resources(struct ixgbevf_ring *);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 9b3d43d28106..3d9033f26eff 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -50,6 +50,9 @@
 #include <linux/if_vlan.h>
 #include <linux/prefetch.h>
 #include <net/mpls.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/atomic.h>
 
 #include "ixgbevf.h"
 
@@ -130,6 +133,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
 static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
 static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
 static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *old_buff);
 
 static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
 {
@@ -318,7 +324,10 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		total_packets += tx_buffer->gso_segs;
 
 		/* free the skb */
-		napi_consume_skb(tx_buffer->skb, napi_budget);
+		if (ring_is_xdp(tx_ring))
+			page_frag_free(tx_buffer->data);
+		else
+			napi_consume_skb(tx_buffer->skb, napi_budget);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -382,7 +391,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 
 		eop_desc = tx_ring->tx_buffer_info[i].next_to_watch;
 
-		pr_err("Detected Tx Unit Hang\n"
+		pr_err("Detected Tx Unit Hang%s\n"
 		       "  Tx Queue             <%d>\n"
 		       "  TDH, TDT             <%x>, <%x>\n"
 		       "  next_to_use          <%x>\n"
@@ -392,6 +401,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		       "  eop_desc->wb.status  <%x>\n"
 		       "  time_stamp           <%lx>\n"
 		       "  jiffies              <%lx>\n",
+		       ring_is_xdp(tx_ring) ? " XDP" : "",
 		       tx_ring->queue_index,
 		       IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)),
 		       IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)),
@@ -399,7 +409,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		       eop_desc, (eop_desc ? eop_desc->wb.status : 0),
 		       tx_ring->tx_buffer_info[i].time_stamp, jiffies);
 
-		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+		if (!ring_is_xdp(tx_ring))
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
 
 		/* schedule immediate reset if we believe we hung */
 		ixgbevf_tx_timeout_reset(adapter);
@@ -407,6 +419,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		return true;
 	}
 
+	if (ring_is_xdp(tx_ring))
+		return !!budget;
+
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
@@ -527,6 +542,51 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+						const unsigned int size)
+{
+	struct ixgbevf_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *rx_buffer,
+				  struct sk_buff *skb)
+{
+	if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+		/* hand second half of page back to the ring */
+		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		if (IS_ERR(skb))
+			/* We are not reusing the buffer so unmap it and free
+			 * any references we are holding to it
+			 */
+			dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+					     ixgbevf_rx_pg_size(rx_ring),
+					     DMA_FROM_DEVICE,
+					     IXGBEVF_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
 /**
  * ixgbevf_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
@@ -554,32 +614,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
 	return true;
 }
 
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
 static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 				      struct ixgbevf_rx_buffer *bi)
 {
 	struct page *page = bi->page;
-	dma_addr_t dma = bi->dma;
+	dma_addr_t dma;
 
 	/* since we are recycling buffers we should seldom need to alloc */
 	if (likely(page))
 		return true;
 
 	/* alloc new page for storage */
-	page = dev_alloc_page();
+	page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
 	if (unlikely(!page)) {
 		rx_ring->rx_stats.alloc_rx_page_failed++;
 		return false;
 	}
 
 	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 ixgbevf_rx_pg_size(rx_ring),
 				 DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
 
 	/* if mapping failed free memory back to system since
 	 * there isn't much point in holding memory we can't use
 	 */
 	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_page(page);
+		__free_pages(page, ixgbevf_rx_pg_order(rx_ring));
 
 		rx_ring->rx_stats.alloc_rx_page_failed++;
 		return false;
@@ -587,7 +653,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 
 	bi->dma = dma;
 	bi->page = page;
-	bi->page_offset = 0;
+	bi->page_offset = ixgbevf_rx_offset(rx_ring);
 	bi->pagecnt_bias = 1;
 	rx_ring->rx_stats.alloc_rx_page++;
 
@@ -621,7 +687,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
 		/* sync the buffer for use by the device */
 		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
 						 bi->page_offset,
-						 IXGBEVF_RX_BUFSZ,
+						 ixgbevf_rx_bufsz(rx_ring),
 						 DMA_FROM_DEVICE);
 
 		/* Refresh the desc even if pkt_addr didn't change
@@ -685,6 +751,10 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
 				    union ixgbe_adv_rx_desc *rx_desc,
 				    struct sk_buff *skb)
 {
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
 	/* verify that the packet does not have any known errors */
 	if (unlikely(ixgbevf_test_staterr(rx_desc,
 					  IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
@@ -734,11 +804,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
 	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
-				      struct page *page,
-				      const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
 {
-	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
 
 	/* avoid re-using remote pages */
 	if (unlikely(ixgbevf_page_is_reserved(page)))
@@ -746,17 +815,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
 	/* if we are only owner of page we can reuse it */
-	if (unlikely(page_ref_count(page) != pagecnt_bias))
+	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
 		return false;
-
-	/* flip page offset to other buffer */
-	rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
 #else
-	/* move offset up to the next cache line */
-	rx_buffer->page_offset += truesize;
+#define IXGBEVF_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
 
-	if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+	if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
 		return false;
 
 #endif
@@ -765,7 +830,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(pagecnt_bias == 1)) {
+	if (unlikely(!pagecnt_bias)) {
 		page_ref_add(page, USHRT_MAX);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
@@ -777,136 +842,268 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
  * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
  * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
  **/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 				struct ixgbevf_rx_buffer *rx_buffer,
-				u16 size,
-				union ixgbe_adv_rx_desc *rx_desc,
-				struct sk_buff *skb)
+				struct sk_buff *skb,
+				unsigned int size)
 {
-	struct page *page = rx_buffer->page;
-	unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = IXGBEVF_RX_BUFSZ;
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
 #endif
-	unsigned int pull_len;
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
 
-	if (unlikely(skb_is_nonlinear(skb)))
-		goto add_tail_frag;
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+				      struct ixgbevf_rx_buffer *rx_buffer,
+				      struct xdp_buff *xdp,
+				      union ixgbe_adv_rx_desc *rx_desc)
+{
+	unsigned int size = xdp->data_end - xdp->data;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	unsigned int headlen;
+	struct sk_buff *skb;
 
-	if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
-		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+	/* prefetch first cache line of first page */
+	prefetch(xdp->data);
+#if L1_CACHE_BYTES < 128
+	prefetch(xdp->data + L1_CACHE_BYTES);
+#endif
+	/* Note, we get here by enabling legacy-rx via:
+	 *
+	 *    ethtool --set-priv-flags <dev> legacy-rx on
+	 *
+	 * In this mode, we currently get 0 extra XDP headroom as
+	 * opposed to having legacy-rx off, where we process XDP
+	 * packets going to stack via ixgbevf_build_skb().
+	 *
+	 * For ixgbevf_construct_skb() mode it means that the
+	 * xdp->data_meta will always point to xdp->data, since
+	 * the helper cannot expand the head. Should this ever
+	 * changed in future for legacy-rx mode on, then lets also
+	 * add xdp->data_meta handling here.
+	 */
 
-		/* page is not reserved, we can reuse buffer as is */
-		if (likely(!ixgbevf_page_is_reserved(page)))
-			return true;
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+	if (unlikely(!skb))
+		return NULL;
 
-		/* this page cannot be reused so discard it */
-		return false;
-	}
-
-	/* we need the header to contain the greater of either ETH_HLEN or
-	 * 60 bytes if the skb->len is less than 60 for skb_pad.
-	 */
-	pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IXGBEVF_RX_HDR_SIZE)
+		headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
+	memcpy(__skb_put(skb, headlen), xdp->data,
+	       ALIGN(headlen, sizeof(long)));
 
 	/* update all of the pointers */
-	va += pull_len;
-	size -= pull_len;
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(xdp->data + headlen) -
+					page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
+	} else {
+		rx_buffer->pagecnt_bias++;
+	}
 
-add_tail_frag:
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-			(unsigned long)va & ~PAGE_MASK, size, truesize);
+	return skb;
+}
+
+static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
+					     u32 qmask)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
 
-	return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
 }
 
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
-					       union ixgbe_adv_rx_desc *rx_desc,
-					       struct sk_buff *skb)
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+					 struct ixgbevf_rx_buffer *rx_buffer,
+					 struct xdp_buff *xdp,
+					 union ixgbe_adv_rx_desc *rx_desc)
 {
-	struct ixgbevf_rx_buffer *rx_buffer;
-	struct page *page;
-	u16 size = le16_to_cpu(rx_desc->wb.upper.length);
+	unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	struct sk_buff *skb;
 
-	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-	page = rx_buffer->page;
-	prefetchw(page);
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points to xdp->data, otherwise, we likely
+	 * have a consumer accessing first few bytes of meta data,
+	 * and then actual data.
+	 */
+	prefetch(xdp->data_meta);
+#if L1_CACHE_BYTES < 128
+	prefetch(xdp->data_meta + L1_CACHE_BYTES);
+#endif
 
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      rx_buffer->dma,
-				      rx_buffer->page_offset,
-				      size,
-				      DMA_FROM_DEVICE);
+	/* build an skb around the page buffer */
+	skb = build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
 
-	if (likely(!skb)) {
-		void *page_addr = page_address(page) +
-				  rx_buffer->page_offset;
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
 
-		/* prefetch first cache line of first page */
-		prefetch(page_addr);
-#if L1_CACHE_BYTES < 128
-		prefetch(page_addr + L1_CACHE_BYTES);
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
 #endif
 
-		/* allocate a skb to store the frags */
-		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-						IXGBEVF_RX_HDR_SIZE);
-		if (unlikely(!skb)) {
-			rx_ring->rx_stats.alloc_rx_buff_failed++;
-			return NULL;
-		}
+	return skb;
+}
 
-		/* we will be copying header into skb->data in
-		 * pskb_may_pull so it is in our interest to prefetch
-		 * it now to avoid a possible cache miss
-		 */
-		prefetchw(skb->data);
-	}
+#define IXGBEVF_XDP_PASS 0
+#define IXGBEVF_XDP_CONSUMED 1
+#define IXGBEVF_XDP_TX 2
 
-	/* pull page into skb */
-	if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
-		/* hand second half of page back to the ring */
-		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
-	} else {
-		/* We are not reusing the buffer so unmap it and free
-		 * any references we are holding to it
-		 */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     PAGE_SIZE, DMA_FROM_DEVICE,
-				     IXGBEVF_RX_DMA_ATTR);
-		__page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
-	}
+static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
+				 struct xdp_buff *xdp)
+{
+	struct ixgbevf_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	u32 len, cmd_type;
+	dma_addr_t dma;
+	u16 i;
 
-	/* clear contents of buffer_info */
-	rx_buffer->dma = 0;
-	rx_buffer->page = NULL;
+	len = xdp->data_end - xdp->data;
 
-	return skb;
+	if (unlikely(!ixgbevf_desc_unused(ring)))
+		return IXGBEVF_XDP_CONSUMED;
+
+	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring->dev, dma))
+		return IXGBEVF_XDP_CONSUMED;
+
+	/* record the location of the first descriptor for this packet */
+	tx_buffer = &ring->tx_buffer_info[ring->next_to_use];
+	tx_buffer->bytecount = len;
+	tx_buffer->gso_segs = 1;
+	tx_buffer->protocol = 0;
+
+	i = ring->next_to_use;
+	tx_desc = IXGBEVF_TX_DESC(ring, i);
+
+	dma_unmap_len_set(tx_buffer, len, len);
+	dma_unmap_addr_set(tx_buffer, dma, dma);
+	tx_buffer->data = xdp->data;
+	tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+	/* put descriptor type bits */
+	cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+		   IXGBE_ADVTXD_DCMD_DEXT |
+		   IXGBE_ADVTXD_DCMD_IFCS;
+	cmd_type |= len | IXGBE_TXD_CMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	tx_desc->read.olinfo_status =
+			cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) |
+				    IXGBE_ADVTXD_CC);
+
+	/* Avoid any potential race with cleanup */
+	smp_wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	i++;
+	if (i == ring->count)
+		i = 0;
+
+	tx_buffer->next_to_watch = tx_desc;
+	ring->next_to_use = i;
+
+	return IXGBEVF_XDP_TX;
 }
 
-static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
-					     u32 qmask)
+static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
+				       struct ixgbevf_ring  *rx_ring,
+				       struct xdp_buff *xdp)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
+	int result = IXGBEVF_XDP_PASS;
+	struct ixgbevf_ring *xdp_ring;
+	struct bpf_prog *xdp_prog;
+	u32 act;
 
-	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = adapter->xdp_ring[rx_ring->queue_index];
+		result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp);
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fallthrough */
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping packet */
+	case XDP_DROP:
+		result = IXGBEVF_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	rcu_read_unlock();
+	return ERR_PTR(-result);
+}
+
+static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
+				   struct ixgbevf_rx_buffer *rx_buffer,
+				   unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+
+	rx_buffer->page_offset ^= truesize;
+#else
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
+
+	rx_buffer->page_offset += truesize;
+#endif
 }
 
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
@@ -914,11 +1111,18 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 				int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	struct ixgbevf_adapter *adapter = q_vector->adapter;
 	u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
 	struct sk_buff *skb = rx_ring->skb;
+	bool xdp_xmit = false;
+	struct xdp_buff xdp;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
 
 	while (likely(total_rx_packets < budget)) {
+		struct ixgbevf_rx_buffer *rx_buffer;
 		union ixgbe_adv_rx_desc *rx_desc;
+		unsigned int size;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
@@ -927,8 +1131,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		}
 
 		rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-		if (!rx_desc->wb.upper.length)
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
 			break;
 
 		/* This memory barrier is needed to keep us from reading
@@ -937,15 +1141,48 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		 */
 		rmb();
 
+		rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
 		/* retrieve a buffer from the ring */
-		skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+		if (!skb) {
+			xdp.data = page_address(rx_buffer->page) +
+				   rx_buffer->page_offset;
+			xdp.data_meta = xdp.data;
+			xdp.data_hard_start = xdp.data -
+					      ixgbevf_rx_offset(rx_ring);
+			xdp.data_end = xdp.data + size;
+
+			skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
+				xdp_xmit = true;
+				ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
+						       size);
+			} else {
+				rx_buffer->pagecnt_bias++;
+			}
+			total_rx_packets++;
+			total_rx_bytes += size;
+		} else if (skb) {
+			ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		} else if (ring_uses_build_skb(rx_ring)) {
+			skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+						&xdp, rx_desc);
+		} else {
+			skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+						    &xdp, rx_desc);
+		}
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
 			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			rx_buffer->pagecnt_bias++;
 			break;
 		}
 
+		ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb);
 		cleaned_count++;
 
 		/* fetch next buffer in frame if non-eop */
@@ -987,6 +1224,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 	/* place incomplete frames back on ring for completion */
 	rx_ring->skb = skb;
 
+	if (xdp_xmit) {
+		struct ixgbevf_ring *xdp_ring =
+			adapter->xdp_ring[rx_ring->queue_index];
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.
+		 */
+		wmb();
+		ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use);
+	}
+
 	u64_stats_update_begin(&rx_ring->syncp);
 	rx_ring->stats.packets += total_rx_packets;
 	rx_ring->stats.bytes += total_rx_bytes;
@@ -1260,85 +1508,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
-				     int r_idx)
-{
-	struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-	a->rx_ring[r_idx]->next = q_vector->rx.ring;
-	q_vector->rx.ring = a->rx_ring[r_idx];
-	q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
-				     int t_idx)
-{
-	struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-	a->tx_ring[t_idx]->next = q_vector->tx.ring;
-	q_vector->tx.ring = a->tx_ring[t_idx];
-	q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code.  Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible.  You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
-	int q_vectors;
-	int v_start = 0;
-	int rxr_idx = 0, txr_idx = 0;
-	int rxr_remaining = adapter->num_rx_queues;
-	int txr_remaining = adapter->num_tx_queues;
-	int i, j;
-	int rqpv, tqpv;
-
-	q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	/* The ideal configuration...
-	 * We have enough vectors to map one per queue.
-	 */
-	if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
-		for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
-			map_vector_to_rxq(adapter, v_start, rxr_idx);
-
-		for (; txr_idx < txr_remaining; v_start++, txr_idx++)
-			map_vector_to_txq(adapter, v_start, txr_idx);
-		return 0;
-	}
-
-	/* If we don't have enough vectors for a 1-to-1
-	 * mapping, we'll have to group them so there are
-	 * multiple queues per vector.
-	 */
-	/* Re-adjusting *qpv takes care of the remainder. */
-	for (i = v_start; i < q_vectors; i++) {
-		rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
-		for (j = 0; j < rqpv; j++) {
-			map_vector_to_rxq(adapter, i, rxr_idx);
-			rxr_idx++;
-			rxr_remaining--;
-		}
-	}
-	for (i = v_start; i < q_vectors; i++) {
-		tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
-		for (j = 0; j < tqpv; j++) {
-			map_vector_to_txq(adapter, i, txr_idx);
-			txr_idx++;
-			txr_remaining--;
-		}
-	}
-
-	return 0;
-}
-
 /**
  * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
@@ -1411,20 +1580,6 @@ free_queue_irqs:
 	return err;
 }
 
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
-	int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	for (i = 0; i < q_vectors; i++) {
-		struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
-		q_vector->rx.ring = NULL;
-		q_vector->tx.ring = NULL;
-		q_vector->rx.count = 0;
-		q_vector->tx.count = 0;
-	}
-}
-
 /**
  * ixgbevf_request_irq - initialize interrupts
  * @adapter: board private structure
@@ -1464,8 +1619,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
 		free_irq(adapter->msix_entries[i].vector,
 			 adapter->q_vector[i]);
 	}
-
-	ixgbevf_reset_q_vectors(adapter);
 }
 
 /**
@@ -1583,11 +1736,14 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
 	/* Setup the HW Tx Head and Tail descriptor pointers */
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		ixgbevf_configure_tx_ring(adapter, adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbevf_configure_tx_ring(adapter, adapter->xdp_ring[i]);
 }
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT	2
 
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+				     struct ixgbevf_ring *ring, int index)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 srrctl;
@@ -1595,7 +1751,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
 	srrctl = IXGBE_SRRCTL_DROP_EN;
 
 	srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
-	srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
 	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 	IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
@@ -1767,10 +1926,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 	ring->next_to_use = 0;
 	ring->next_to_alloc = 0;
 
-	ixgbevf_configure_srrctl(adapter, reg_idx);
+	ixgbevf_configure_srrctl(adapter, ring, reg_idx);
+
+	/* RXDCTL.RLPML does not work on 82599 */
+	if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+			    IXGBE_RXDCTL_RLPML_EN);
 
-	/* allow any size packet since we can handle overflow */
-	rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+#if (PAGE_SIZE < 8192)
+		/* Limit the maximum frame size so we don't overrun the skb */
+		if (ring_uses_build_skb(ring) &&
+		    !ring_uses_large_buffer(ring))
+			rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+				  IXGBE_RXDCTL_RLPML_EN;
+#endif
+	}
 
 	rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
 	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
@@ -1779,6 +1949,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 	ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
 }
 
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+				      struct ixgbevf_ring *rx_ring)
+{
+	struct net_device *netdev = adapter->netdev;
+	unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+	/* set build_skb and buffer size flags */
+	clear_ring_build_skb_enabled(rx_ring);
+	clear_ring_uses_large_buffer(rx_ring);
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		return;
+
+	set_ring_build_skb_enabled(rx_ring);
+
+	if (PAGE_SIZE < 8192) {
+		if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+			return;
+
+		set_ring_uses_large_buffer(rx_ring);
+	}
+}
+
 /**
  * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
  * @adapter: board private structure
@@ -1806,8 +1999,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
 	 * the Base and Length of the Rx Descriptor Ring
 	 */
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+		ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+		ixgbevf_configure_rx_ring(adapter, rx_ring);
+	}
 }
 
 static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
@@ -2136,13 +2333,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
 		dma_sync_single_range_for_cpu(rx_ring->dev,
 					      rx_buffer->dma,
 					      rx_buffer->page_offset,
-					      IXGBEVF_RX_BUFSZ,
+					      ixgbevf_rx_bufsz(rx_ring),
 					      DMA_FROM_DEVICE);
 
 		/* free resources associated with mapping */
 		dma_unmap_page_attrs(rx_ring->dev,
 				     rx_buffer->dma,
-				     PAGE_SIZE,
+				     ixgbevf_rx_pg_size(rx_ring),
 				     DMA_FROM_DEVICE,
 				     IXGBEVF_RX_DMA_ATTR);
 
@@ -2172,7 +2369,10 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring)
 		union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
 
 		/* Free all the Tx ring sk_buffs */
-		dev_kfree_skb_any(tx_buffer->skb);
+		if (ring_is_xdp(tx_ring))
+			page_frag_free(tx_buffer->data);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -2240,6 +2440,8 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter)
 
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		ixgbevf_clean_tx_ring(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbevf_clean_tx_ring(adapter->xdp_ring[i]);
 }
 
 void ixgbevf_down(struct ixgbevf_adapter *adapter)
@@ -2278,6 +2480,13 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter)
 				IXGBE_TXDCTL_SWFLSH);
 	}
 
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		u8 reg_idx = adapter->xdp_ring[i]->reg_idx;
+
+		IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx),
+				IXGBE_TXDCTL_SWFLSH);
+	}
+
 	if (!pci_channel_offline(adapter->pdev))
 		ixgbevf_reset(adapter);
 
@@ -2375,6 +2584,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 	/* Start with base case */
 	adapter->num_rx_queues = 1;
 	adapter->num_tx_queues = 1;
+	adapter->num_xdp_queues = 0;
 
 	spin_lock_bh(&adapter->mbx_lock);
 
@@ -2396,8 +2606,13 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 		case ixgbe_mbox_api_11:
 		case ixgbe_mbox_api_12:
 		case ixgbe_mbox_api_13:
+			if (adapter->xdp_prog &&
+			    hw->mac.max_tx_queues == rss)
+				rss = rss > 3 ? 2 : 1;
+
 			adapter->num_rx_queues = rss;
 			adapter->num_tx_queues = rss;
+			adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0;
 		default:
 			break;
 		}
@@ -2405,105 +2620,209 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 }
 
 /**
- * ixgbevf_alloc_queues - Allocate memory for all rings
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
  * @adapter: board private structure to initialize
  *
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time.  The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
  **/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
 {
+	int vector, v_budget;
+
+	/* It's easy to be greedy for MSI-X vectors, but it really
+	 * doesn't do us much good if we have a lot more vectors
+	 * than CPU's.  So let's be conservative and only ask for
+	 * (roughly) the same number of vectors as there are CPU's.
+	 * The default is to use pairs of vectors.
+	 */
+	v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+	v_budget = min_t(int, v_budget, num_online_cpus());
+	v_budget += NON_Q_VECTORS;
+
+	adapter->msix_entries = kcalloc(v_budget,
+					sizeof(struct msix_entry), GFP_KERNEL);
+	if (!adapter->msix_entries)
+		return -ENOMEM;
+
+	for (vector = 0; vector < v_budget; vector++)
+		adapter->msix_entries[vector].entry = vector;
+
+	/* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+	 * does not support any other modes, so we will simply fail here. Note
+	 * that we clean up the msix_entries pointer else-where.
+	 */
+	return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+			     struct ixgbevf_ring_container *head)
+{
+	ring->next = head->ring;
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @xdp_count: total number of XDP rings to allocate
+ * @xdp_idx: index of first XDP ring to allocate
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+				  int txr_count, int txr_idx,
+				  int xdp_count, int xdp_idx,
+				  int rxr_count, int rxr_idx)
+{
+	struct ixgbevf_q_vector *q_vector;
+	int reg_idx = txr_idx + xdp_idx;
 	struct ixgbevf_ring *ring;
-	int rx = 0, tx = 0;
+	int ring_count, size;
+
+	ring_count = txr_count + xdp_count + rxr_count;
+	size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+	/* allocate q_vector and rings */
+	q_vector = kzalloc(size, GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
 
-	for (; tx < adapter->num_tx_queues; tx++) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring)
-			goto err_allocation;
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+	q_vector->v_idx = v_idx;
 
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	while (txr_count) {
+		/* assign generic ring traits */
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbevf_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
 		ring->count = adapter->tx_ring_count;
-		ring->queue_index = tx;
-		ring->reg_idx = tx;
+		ring->queue_index = txr_idx;
+		ring->reg_idx = reg_idx;
 
-		adapter->tx_ring[tx] = ring;
+		/* assign ring to adapter */
+		 adapter->tx_ring[txr_idx] = ring;
+
+		/* update count and index */
+		txr_count--;
+		txr_idx++;
+		reg_idx++;
+
+		/* push pointer to next ring */
+		ring++;
 	}
 
-	for (; rx < adapter->num_rx_queues; rx++) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring)
-			goto err_allocation;
+	while (xdp_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
 
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbevf_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = xdp_idx;
+		ring->reg_idx = reg_idx;
+		set_ring_xdp(ring);
+
+		/* assign ring to adapter */
+		adapter->xdp_ring[xdp_idx] = ring;
+
+		/* update count and index */
+		xdp_count--;
+		xdp_idx++;
+		reg_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (rxr_count) {
+		/* assign generic ring traits */
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
 
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		ixgbevf_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
 		ring->count = adapter->rx_ring_count;
-		ring->queue_index = rx;
-		ring->reg_idx = rx;
+		ring->queue_index = rxr_idx;
+		ring->reg_idx = rxr_idx;
 
-		adapter->rx_ring[rx] = ring;
-	}
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
 
-	return 0;
+		/* update count and index */
+		rxr_count--;
+		rxr_idx++;
 
-err_allocation:
-	while (tx) {
-		kfree(adapter->tx_ring[--tx]);
-		adapter->tx_ring[tx] = NULL;
+		/* push pointer to next ring */
+		ring++;
 	}
 
-	while (rx) {
-		kfree(adapter->rx_ring[--rx]);
-		adapter->rx_ring[rx] = NULL;
-	}
-	return -ENOMEM;
+	return 0;
 }
 
 /**
- * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
  *
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
  **/
-static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
 {
-	struct net_device *netdev = adapter->netdev;
-	int err;
-	int vector, v_budget;
-
-	/* It's easy to be greedy for MSI-X vectors, but it really
-	 * doesn't do us much good if we have a lot more vectors
-	 * than CPU's.  So let's be conservative and only ask for
-	 * (roughly) the same number of vectors as there are CPU's.
-	 * The default is to use pairs of vectors.
-	 */
-	v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
-	v_budget = min_t(int, v_budget, num_online_cpus());
-	v_budget += NON_Q_VECTORS;
-
-	/* A failure in MSI-X entry allocation isn't fatal, but it does
-	 * mean we disable MSI-X capabilities of the adapter.
-	 */
-	adapter->msix_entries = kcalloc(v_budget,
-					sizeof(struct msix_entry), GFP_KERNEL);
-	if (!adapter->msix_entries)
-		return -ENOMEM;
+	struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+	struct ixgbevf_ring *ring;
 
-	for (vector = 0; vector < v_budget; vector++)
-		adapter->msix_entries[vector].entry = vector;
+	ixgbevf_for_each_ring(ring, q_vector->tx) {
+		if (ring_is_xdp(ring))
+			adapter->xdp_ring[ring->queue_index] = NULL;
+		else
+			adapter->tx_ring[ring->queue_index] = NULL;
+	}
 
-	err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
-	if (err)
-		return err;
+	ixgbevf_for_each_ring(ring, q_vector->rx)
+		adapter->rx_ring[ring->queue_index] = NULL;
 
-	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
-	if (err)
-		return err;
+	adapter->q_vector[v_idx] = NULL;
+	netif_napi_del(&q_vector->napi);
 
-	return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	/* ixgbevf_get_stats() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	kfree_rcu(q_vector, rcu);
 }
 
 /**
@@ -2515,35 +2834,58 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
  **/
 static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 {
-	int q_idx, num_q_vectors;
-	struct ixgbevf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int xdp_remaining = adapter->num_xdp_queues;
+	int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
+		for (; rxr_remaining; v_idx++, q_vectors--) {
+			int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
 
-	num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+			err = ixgbevf_alloc_q_vector(adapter, v_idx,
+						     0, 0, 0, 0, rqpv, rxr_idx);
+			if (err)
+				goto err_out;
 
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
-		if (!q_vector)
+			/* update counts and index */
+			rxr_remaining -= rqpv;
+			rxr_idx += rqpv;
+		}
+	}
+
+	for (; q_vectors; v_idx++, q_vectors--) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
+		int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors);
+
+		err = ixgbevf_alloc_q_vector(adapter, v_idx,
+					     tqpv, txr_idx,
+					     xqpv, xdp_idx,
+					     rqpv, rxr_idx);
+
+		if (err)
 			goto err_out;
-		q_vector->adapter = adapter;
-		q_vector->v_idx = q_idx;
-		netif_napi_add(adapter->netdev, &q_vector->napi,
-			       ixgbevf_poll, 64);
-		adapter->q_vector[q_idx] = q_vector;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		rxr_idx += rqpv;
+		txr_remaining -= tqpv;
+		txr_idx += tqpv;
+		xdp_remaining -= xqpv;
+		xdp_idx += xqpv;
 	}
 
 	return 0;
 
 err_out:
-	while (q_idx) {
-		q_idx--;
-		q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
-		napi_hash_del(&q_vector->napi);
-#endif
-		netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
-		adapter->q_vector[q_idx] = NULL;
+	while (v_idx) {
+		v_idx--;
+		ixgbevf_free_q_vector(adapter, v_idx);
 	}
+
 	return -ENOMEM;
 }
 
@@ -2557,17 +2899,11 @@ err_out:
  **/
 static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
 {
-	int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
-		adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-		napi_hash_del(&q_vector->napi);
-#endif
-		netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
+	while (q_vectors) {
+		q_vectors--;
+		ixgbevf_free_q_vector(adapter, q_vectors);
 	}
 }
 
@@ -2611,21 +2947,14 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
 		goto err_alloc_q_vectors;
 	}
 
-	err = ixgbevf_alloc_queues(adapter);
-	if (err) {
-		pr_err("Unable to allocate memory for queues\n");
-		goto err_alloc_queues;
-	}
-
-	hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
-	       (adapter->num_rx_queues > 1) ? "Enabled" :
-	       "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
+	hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count %u\n",
+	       (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
+	       adapter->num_rx_queues, adapter->num_tx_queues,
+	       adapter->num_xdp_queues);
 
 	set_bit(__IXGBEVF_DOWN, &adapter->state);
 
 	return 0;
-err_alloc_queues:
-	ixgbevf_free_q_vectors(adapter);
 err_alloc_q_vectors:
 	ixgbevf_reset_interrupt_capability(adapter);
 err_set_interrupt:
@@ -2641,18 +2970,8 @@ err_set_interrupt:
  **/
 static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
 {
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		kfree(adapter->tx_ring[i]);
-		adapter->tx_ring[i] = NULL;
-	}
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		kfree(adapter->rx_ring[i]);
-		adapter->rx_ring[i] = NULL;
-	}
-
 	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_queues = 0;
 
 	ixgbevf_free_q_vectors(adapter);
@@ -2860,6 +3179,8 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter)
 	if (netif_carrier_ok(adapter->netdev)) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			set_check_for_tx_hang(adapter->tx_ring[i]);
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			set_check_for_tx_hang(adapter->xdp_ring[i]);
 	}
 
 	/* get one bit for every active Tx/Rx interrupt vector */
@@ -3031,6 +3352,9 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter)
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		if (adapter->tx_ring[i]->desc)
 			ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		if (adapter->xdp_ring[i]->desc)
+			ixgbevf_free_tx_resources(adapter->xdp_ring[i]);
 }
 
 /**
@@ -3081,26 +3405,44 @@ err:
  **/
 static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
 {
-	int i, err = 0;
+	int i, j = 0, err = 0;
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]);
 		if (!err)
 			continue;
 		hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
+		goto err_setup_tx;
+	}
+
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		err = ixgbevf_setup_tx_resources(adapter->xdp_ring[j]);
+		if (!err)
+			continue;
+		hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j);
 		break;
 	}
 
+	return 0;
+err_setup_tx:
+	/* rewind the index freeing the rings as we go */
+	while (j--)
+		ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+	while (i--)
+		ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+
 	return err;
 }
 
 /**
  * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
  *
  * Returns 0 on success, negative on failure
  **/
-int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+			       struct ixgbevf_ring *rx_ring)
 {
 	int size;
 
@@ -3121,6 +3463,13 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
 	if (!rx_ring->desc)
 		goto err;
 
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+			     rx_ring->queue_index) < 0)
+		goto err;
+
+	rx_ring->xdp_prog = adapter->xdp_prog;
+
 	return 0;
 err:
 	vfree(rx_ring->rx_buffer_info);
@@ -3144,12 +3493,18 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
-		err = ixgbevf_setup_rx_resources(adapter->rx_ring[i]);
+		err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]);
 		if (!err)
 			continue;
 		hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
-		break;
+		goto err_setup_rx;
 	}
+
+	return 0;
+err_setup_rx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ixgbevf_free_rx_resources(adapter->rx_ring[i]);
 	return err;
 }
 
@@ -3163,6 +3518,8 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring)
 {
 	ixgbevf_clean_rx_ring(rx_ring);
 
+	rx_ring->xdp_prog = NULL;
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 
@@ -3244,28 +3601,31 @@ int ixgbevf_open(struct net_device *netdev)
 
 	ixgbevf_configure(adapter);
 
-	/* Map the Tx/Rx rings to the vectors we were allotted.
-	 * if request_irq will be called in this function map_rings
-	 * must be called *before* up_complete
-	 */
-	ixgbevf_map_rings_to_vectors(adapter);
-
 	err = ixgbevf_request_irq(adapter);
 	if (err)
 		goto err_req_irq;
 
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
 	ixgbevf_up_complete(adapter);
 
 	return 0;
 
+err_set_queues:
+	ixgbevf_free_irq(adapter);
 err_req_irq:
-	ixgbevf_down(adapter);
-err_setup_rx:
 	ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
 	ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
 	ixgbevf_reset(adapter);
-
 err_setup_reset:
 
 	return err;
@@ -3707,11 +4067,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
 	return __ixgbevf_maybe_stop_tx(tx_ring, size);
 }
 
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+				   struct ixgbevf_ring *tx_ring)
 {
-	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbevf_tx_buffer *first;
-	struct ixgbevf_ring *tx_ring;
 	int tso;
 	u32 tx_flags = 0;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
@@ -3726,8 +4085,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_OK;
 	}
 
-	tx_ring = adapter->tx_ring[skb->queue_mapping];
-
 	/* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
 	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
 	 *       + 2 desc gap to keep tail from touching head,
@@ -3780,6 +4137,29 @@ out_drop:
 	return NETDEV_TX_OK;
 }
 
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbevf_ring *tx_ring;
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* The minimum packet size for olinfo paylen is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	tx_ring = adapter->tx_ring[skb->queue_mapping];
+	return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
 /**
  * ixgbevf_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -3826,6 +4206,12 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 	int ret;
 
+	/* prevent MTU being changed to a size unsupported by XDP */
+	if (adapter->xdp_prog) {
+		dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n");
+		return -EPERM;
+	}
+
 	spin_lock_bh(&adapter->mbx_lock);
 	/* notify the PF of our intent to use this size of frame */
 	ret = hw->mac.ops.set_rlpml(hw, max_frame);
@@ -3839,6 +4225,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	/* must set new MTU before calling down or up */
 	netdev->mtu = new_mtu;
 
+	if (netif_running(netdev))
+		ixgbevf_reinit_locked(adapter);
+
 	return 0;
 }
 
@@ -3917,17 +4306,11 @@ static int ixgbevf_resume(struct pci_dev *pdev)
 
 	rtnl_lock();
 	err = ixgbevf_init_interrupt_scheme(adapter);
+	if (!err && netif_running(netdev))
+		err = ixgbevf_open(netdev);
 	rtnl_unlock();
-	if (err) {
-		dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+	if (err)
 		return err;
-	}
-
-	if (netif_running(netdev)) {
-		err = ixgbevf_open(netdev);
-		if (err)
-			return err;
-	}
 
 	netif_device_attach(netdev);
 
@@ -3940,6 +4323,23 @@ static void ixgbevf_shutdown(struct pci_dev *pdev)
 	ixgbevf_suspend(pdev, PMSG_SUSPEND);
 }
 
+static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats,
+				      const struct ixgbevf_ring *ring)
+{
+	u64 bytes, packets;
+	unsigned int start;
+
+	if (ring) {
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->syncp);
+			bytes = ring->stats.bytes;
+			packets = ring->stats.packets;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+		stats->tx_bytes += bytes;
+		stats->tx_packets += packets;
+	}
+}
+
 static void ixgbevf_get_stats(struct net_device *netdev,
 			      struct rtnl_link_stats64 *stats)
 {
@@ -3953,6 +4353,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
 	stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
 
+	rcu_read_lock();
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		ring = adapter->rx_ring[i];
 		do {
@@ -3966,14 +4367,14 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		ring = adapter->tx_ring[i];
-		do {
-			start = u64_stats_fetch_begin_irq(&ring->syncp);
-			bytes = ring->stats.bytes;
-			packets = ring->stats.packets;
-		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
-		stats->tx_bytes += bytes;
-		stats->tx_packets += packets;
+		ixgbevf_get_tx_ring_stats(stats, ring);
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		ring = adapter->xdp_ring[i];
+		ixgbevf_get_tx_ring_stats(stats, ring);
 	}
+	rcu_read_unlock();
 }
 
 #define IXGBEVF_MAX_MAC_HDR_LEN		127
@@ -4010,6 +4411,64 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev,
 	return features;
 }
 
+static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+
+	/* verify ixgbevf ring attributes are sufficient for XDP */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbevf_ring *ring = adapter->rx_ring[i];
+
+		if (frame_size > ixgbevf_rx_bufsz(ring))
+			return -EINVAL;
+	}
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+
+	/* If transitioning XDP modes reconfigure rings */
+	if (!!prog != !!old_prog) {
+		/* Hardware has to reinitialize queues and interrupts to
+		 * match packet buffer alignment. Unfortunately, the
+		 * hardware is not flexible enough to do this dynamically.
+		 */
+		if (netif_running(dev))
+			ixgbevf_close(dev);
+
+		ixgbevf_clear_interrupt_scheme(adapter);
+		ixgbevf_init_interrupt_scheme(adapter);
+
+		if (netif_running(dev))
+			ixgbevf_open(dev);
+	} else {
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+	}
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ixgbevf_xdp_setup(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!(adapter->xdp_prog);
+		xdp->prog_id = adapter->xdp_prog ?
+			       adapter->xdp_prog->aux->id : 0;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops ixgbevf_netdev_ops = {
 	.ndo_open		= ixgbevf_open,
 	.ndo_stop		= ixgbevf_close,
@@ -4026,6 +4485,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = {
 	.ndo_poll_controller	= ixgbevf_netpoll,
 #endif
 	.ndo_features_check	= ixgbevf_features_check,
+	.ndo_bpf		= ixgbevf_xdp,
 };
 
 static void ixgbevf_assign_netdev_ops(struct net_device *dev)
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index bc0442acae78..5ec947fe3d09 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/regs.h b/drivers/net/ethernet/intel/ixgbevf/regs.h
index 2764fd16261f..278f73980501 100644
--- a/drivers/net/ethernet/intel/ixgbevf/regs.h
+++ b/drivers/net/ethernet/intel/ixgbevf/regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index c651fefcc3d2..194fbdaa4519 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 25e9a551cc8c..eaa4bb80f1c9 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4655,8 +4655,8 @@ MODULE_DESCRIPTION("Marvell NETA Ethernet Driver - www.marvell.com");
 MODULE_AUTHOR("Rami Rosen <[email protected]>, Thomas Petazzoni <[email protected]>");
 MODULE_LICENSE("GPL");
 
-module_param(rxq_number, int, S_IRUGO);
-module_param(txq_number, int, S_IRUGO);
+module_param(rxq_number, int, 0444);
+module_param(txq_number, int, 0444);
 
-module_param(rxq_def, int, S_IRUGO);
-module_param(rx_copybreak, int, S_IRUGO | S_IWUSR);
+module_param(rxq_def, int, 0444);
+module_param(rx_copybreak, int, 0644);
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index a1d7b88cf083..7fc1bbf51c44 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -44,6 +44,7 @@
 #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port)	(0x20 + 4 * (port))
 #define MVPP2_RX_MIN_PKT_SIZE_REG		0x60
 #define MVPP2_RX_FIFO_INIT_REG			0x64
+#define MVPP22_TX_FIFO_THRESH_REG(port)		(0x8840 + 4 * (port))
 #define MVPP22_TX_FIFO_SIZE_REG(port)		(0x8860 + 4 * (port))
 
 /* RX DMA Top Registers */
@@ -65,6 +66,10 @@
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK	0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK		BIT(31)
 
+/* Top Registers */
+#define MVPP2_MH_REG(port)			(0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED			BIT(5)
+
 /* Parser Registers */
 #define MVPP2_PRS_INIT_LOOKUP_REG		0x1000
 #define     MVPP2_PRS_PORT_LU_MAX		0xf
@@ -254,6 +259,7 @@
 #define MVPP2_BM_BPPI_READ_PTR_REG(pool)	(0x6100 + ((pool) * 4))
 #define MVPP2_BM_BPPI_PTRS_NUM_REG(pool)	(0x6140 + ((pool) * 4))
 #define     MVPP2_BM_BPPI_PTR_NUM_MASK		0x7ff
+#define MVPP22_BM_POOL_PTRS_NUM_MASK		0xfff8
 #define     MVPP2_BM_BPPI_PREFETCH_FULL_MASK	BIT(16)
 #define MVPP2_BM_POOL_CTRL_REG(pool)		(0x6200 + ((pool) * 4))
 #define     MVPP2_BM_START_MASK			BIT(0)
@@ -473,6 +479,7 @@
 #define MVPP2_ETH_TYPE_LEN		2
 #define MVPP2_PPPOE_HDR_SIZE		8
 #define MVPP2_VLAN_TAG_LEN		4
+#define MVPP2_VLAN_TAG_EDSA_LEN		8
 
 /* Lbtd 802.3 type */
 #define MVPP2_IP_LBDT_TYPE		0xfffa
@@ -536,6 +543,11 @@
 /* TX FIFO constants */
 #define MVPP22_TX_FIFO_DATA_SIZE_10KB		0xa
 #define MVPP22_TX_FIFO_DATA_SIZE_3KB		0x3
+#define MVPP2_TX_FIFO_THRESHOLD_MIN		256
+#define MVPP2_TX_FIFO_THRESHOLD_10KB	\
+	(MVPP22_TX_FIFO_DATA_SIZE_10KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
+#define MVPP2_TX_FIFO_THRESHOLD_3KB	\
+	(MVPP22_TX_FIFO_DATA_SIZE_3KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
 
 /* RX buffer constants */
 #define MVPP2_SKB_SHINFO_SIZE \
@@ -589,6 +601,9 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_PROTO_MASK	0xff
 #define MVPP2_PRS_TCAM_PROTO_MASK_L	0x3f
 #define MVPP2_PRS_DBL_VLANS_MAX		100
+#define MVPP2_PRS_CAST_MASK		BIT(0)
+#define MVPP2_PRS_MCAST_VAL		BIT(0)
+#define MVPP2_PRS_UCAST_VAL		0x0
 
 /* Tcam structure:
  * - lookup ID - 4 bits
@@ -609,35 +624,81 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_LU_BYTE			20
 #define MVPP2_PRS_TCAM_EN_OFFS(offs)		((offs) + 2)
 #define MVPP2_PRS_TCAM_INV_WORD			5
+
+#define MVPP2_PRS_VID_TCAM_BYTE         2
+
+/* TCAM range for unicast and multicast filtering. We have 25 entries per port,
+ * with 4 dedicated to UC filtering and the rest to multicast filtering.
+ * Additionnally we reserve one entry for the broadcast address, and one for
+ * each port's own address.
+ */
+#define MVPP2_PRS_MAC_UC_MC_FILT_MAX	25
+#define MVPP2_PRS_MAC_RANGE_SIZE	80
+
+/* Number of entries per port dedicated to UC and MC filtering */
+#define MVPP2_PRS_MAC_UC_FILT_MAX	4
+#define MVPP2_PRS_MAC_MC_FILT_MAX	(MVPP2_PRS_MAC_UC_MC_FILT_MAX - \
+					 MVPP2_PRS_MAC_UC_FILT_MAX)
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX		11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE	33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY   (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY  (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
 /* Tcam entries ID */
 #define MVPP2_PE_DROP_ALL		0
 #define MVPP2_PE_FIRST_FREE_TID		1
-#define MVPP2_PE_LAST_FREE_TID		(MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* MAC filtering range */
+#define MVPP2_PE_MAC_RANGE_END		(MVPP2_PE_VID_FILT_RANGE_START - 1)
+#define MVPP2_PE_MAC_RANGE_START	(MVPP2_PE_MAC_RANGE_END - \
+						MVPP2_PRS_MAC_RANGE_SIZE + 1)
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END     (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START   (MVPP2_PE_VID_FILT_RANGE_END - \
+					 MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID          (MVPP2_PE_VID_FILT_RANGE_START - 1)
 #define MVPP2_PE_IP6_EXT_PROTO_UN	(MVPP2_PRS_TCAM_SRAM_SIZE - 30)
-#define MVPP2_PE_MAC_MC_IP6		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
-#define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
-#define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
-#define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
-#define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
-#define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 4)
-#define MVPP2_PE_MAC_MC_ALL		(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
-#define MVPP2_PE_MAC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
+#define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
+#define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
+#define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 22)
+#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_VID_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
+/* reserved */
+#define MVPP2_PE_MAC_MC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
+#define MVPP2_PE_MAC_UC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 
+#define MVPP2_PRS_VID_PORT_FIRST(port)	(MVPP2_PE_VID_FILT_RANGE_START + \
+					 ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port)	(MVPP2_PRS_VID_PORT_FIRST(port) \
+					 + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port)	(MVPP2_PRS_VID_PORT_FIRST(port) \
+					 + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
 /* Sram structure
  * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
  */
@@ -725,6 +786,7 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT		BIT(4)
 #define MVPP2_PRS_SINGLE_VLAN_AI		0
 #define MVPP2_PRS_DBL_VLAN_AI_BIT		BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT		BIT(0)
 
 /* DSA/EDSA type */
 #define MVPP2_PRS_TAGGED		true
@@ -747,6 +809,7 @@ enum mvpp2_prs_lookup {
 	MVPP2_PRS_LU_MAC,
 	MVPP2_PRS_LU_DSA,
 	MVPP2_PRS_LU_VLAN,
+	MVPP2_PRS_LU_VID,
 	MVPP2_PRS_LU_L2,
 	MVPP2_PRS_LU_PPPOE,
 	MVPP2_PRS_LU_IP4,
@@ -755,6 +818,12 @@ enum mvpp2_prs_lookup {
 	MVPP2_PRS_LU_LAST,
 };
 
+/* L2 cast enum */
+enum mvpp2_prs_l2_cast {
+	MVPP2_PRS_L2_UNI_CAST,
+	MVPP2_PRS_L2_MULTI_CAST,
+};
+
 /* L3 cast enum */
 enum mvpp2_prs_l3_cast {
 	MVPP2_PRS_L3_UNI_CAST,
@@ -772,23 +841,26 @@ enum mvpp2_prs_l3_cast {
 #define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* BM constants */
-#define MVPP2_BM_POOLS_NUM		8
+#define MVPP2_BM_JUMBO_BUF_NUM		512
 #define MVPP2_BM_LONG_BUF_NUM		1024
 #define MVPP2_BM_SHORT_BUF_NUM		2048
 #define MVPP2_BM_POOL_SIZE_MAX		(16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
 #define MVPP2_BM_POOL_PTR_ALIGN		128
-#define MVPP2_BM_SWF_LONG_POOL(port)	((port > 2) ? 2 : port)
-#define MVPP2_BM_SWF_SHORT_POOL		3
 
 /* BM cookie (32 bits) definition */
 #define MVPP2_BM_COOKIE_POOL_OFFS	8
 #define MVPP2_BM_COOKIE_CPU_OFFS	24
 
+#define MVPP2_BM_SHORT_FRAME_SIZE		512
+#define MVPP2_BM_LONG_FRAME_SIZE		2048
+#define MVPP2_BM_JUMBO_FRAME_SIZE		10240
 /* BM short pool packet size
  * These value assure that for SWF the total number
  * of bytes allocated for each buffer will be 512
  */
-#define MVPP2_BM_SHORT_PKT_SIZE		MVPP2_RX_MAX_PKT_SIZE(512)
+#define MVPP2_BM_SHORT_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_SHORT_FRAME_SIZE)
+#define MVPP2_BM_LONG_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_LONG_FRAME_SIZE)
+#define MVPP2_BM_JUMBO_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_JUMBO_FRAME_SIZE)
 
 #define MVPP21_ADDR_SPACE_SZ		0
 #define MVPP22_ADDR_SPACE_SZ		SZ_64K
@@ -796,12 +868,18 @@ enum mvpp2_prs_l3_cast {
 #define MVPP2_MAX_THREADS		8
 #define MVPP2_MAX_QVECS			MVPP2_MAX_THREADS
 
-enum mvpp2_bm_type {
-	MVPP2_BM_FREE,
-	MVPP2_BM_SWF_LONG,
-	MVPP2_BM_SWF_SHORT
+enum mvpp2_bm_pool_log_num {
+	MVPP2_BM_SHORT,
+	MVPP2_BM_LONG,
+	MVPP2_BM_JUMBO,
+	MVPP2_BM_POOLS_NUM
 };
 
+static struct {
+	int pkt_size;
+	int buf_num;
+} mvpp2_pools[MVPP2_BM_POOLS_NUM];
+
 /* GMAC MIB Counters register definitions */
 #define MVPP21_MIB_COUNTERS_OFFSET		0x1000
 #define MVPP21_MIB_COUNTERS_PORT_SZ		0x400
@@ -1230,7 +1308,6 @@ struct mvpp2_cls_lookup_entry {
 struct mvpp2_bm_pool {
 	/* Pool number in the range 0-7 */
 	int id;
-	enum mvpp2_bm_type type;
 
 	/* Buffer Pointers Pool External (BPPE) size */
 	int size;
@@ -1282,6 +1359,10 @@ static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 	return readl(priv->swth_base[0] + offset);
 }
 
+static u32 mvpp2_read_relaxed(struct mvpp2 *priv, u32 offset)
+{
+	return readl_relaxed(priv->swth_base[0] + offset);
+}
 /* These accessors should be used to access:
  *
  * - per-CPU registers, where each CPU has its own copy of the
@@ -1330,6 +1411,18 @@ static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
 	return readl(priv->swth_base[cpu] + offset);
 }
 
+static void mvpp2_percpu_write_relaxed(struct mvpp2 *priv, int cpu,
+				       u32 offset, u32 data)
+{
+	writel_relaxed(data, priv->swth_base[cpu] + offset);
+}
+
+static u32 mvpp2_percpu_read_relaxed(struct mvpp2 *priv, int cpu,
+				     u32 offset)
+{
+	return readl_relaxed(priv->swth_base[cpu] + offset);
+}
+
 static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
 					    struct mvpp2_tx_desc *tx_desc)
 {
@@ -1505,14 +1598,18 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe)
 	return 0;
 }
 
-/* Read tcam entry from hw */
-static int mvpp2_prs_hw_read(struct mvpp2 *priv, struct mvpp2_prs_entry *pe)
+/* Initialize tcam entry from hw */
+static int mvpp2_prs_init_from_hw(struct mvpp2 *priv,
+				  struct mvpp2_prs_entry *pe, int tid)
 {
 	int i;
 
 	if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 		return -EINVAL;
 
+	memset(pe, 0, sizeof(*pe));
+	pe->index = tid;
+
 	/* Write tcam index - indirect access */
 	mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index);
 
@@ -1662,6 +1759,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
 	mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
 }
 
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+				unsigned short vid)
+{
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
 /* Set bits in sram sw entry */
 static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
 				    int val)
@@ -1828,16 +1933,11 @@ static void mvpp2_prs_sram_offset_set(struct mvpp2_prs_entry *pe,
 }
 
 /* Find parser flow entry */
-static struct mvpp2_prs_entry *mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
+static int mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_FLOWS);
-
 	/* Go through the all entires with MVPP2_PRS_LU_FLOWS */
 	for (tid = MVPP2_PRS_TCAM_SRAM_SIZE - 1; tid >= 0; tid--) {
 		u8 bits;
@@ -1846,17 +1946,15 @@ static struct mvpp2_prs_entry *mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_FLOWS)
 			continue;
 
-		pe->index = tid;
-		mvpp2_prs_hw_read(priv, pe);
-		bits = mvpp2_prs_sram_ai_get(pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+		bits = mvpp2_prs_sram_ai_get(&pe);
 
 		/* Sram store classification lookup ID in AI bits [5:0] */
 		if ((bits & MVPP2_PRS_FLOW_ID_MASK) == flow)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Return first free tcam index, seeking from start to end */
@@ -1886,8 +1984,7 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
 
 	if (priv->prs_shadow[MVPP2_PE_DROP_ALL].valid) {
 		/* Entry exist - update port only */
-		pe.index = MVPP2_PE_DROP_ALL;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, MVPP2_PE_DROP_ALL);
 	} else {
 		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
@@ -1914,78 +2011,42 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
 	mvpp2_prs_hw_write(priv, &pe);
 }
 
-/* Set port to promiscuous mode */
-static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, bool add)
+/* Set port to unicast or multicast promiscuous mode */
+static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port,
+				      enum mvpp2_prs_l2_cast l2_cast, bool add)
 {
 	struct mvpp2_prs_entry pe;
+	unsigned char cast_match;
+	unsigned int ri;
+	int tid;
 
-	/* Promiscuous mode - Accept unknown packets */
-
-	if (priv->prs_shadow[MVPP2_PE_MAC_PROMISCUOUS].valid) {
-		/* Entry exist - update port only */
-		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-		mvpp2_prs_hw_read(priv, &pe);
+	if (l2_cast == MVPP2_PRS_L2_UNI_CAST) {
+		cast_match = MVPP2_PRS_UCAST_VAL;
+		tid = MVPP2_PE_MAC_UC_PROMISCUOUS;
+		ri = MVPP2_PRS_RI_L2_UCAST;
 	} else {
-		/* Entry doesn't exist - create new */
-		memset(&pe, 0, sizeof(pe));
-		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-
-		/* Continue - set next lookup */
-		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
-
-		/* Set result info bits */
-		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_UCAST,
-					 MVPP2_PRS_RI_L2_CAST_MASK);
-
-		/* Shift to ethertype */
-		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
-		/* Mask all ports */
-		mvpp2_prs_tcam_port_map_set(&pe, 0);
-
-		/* Update shadow table */
-		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+		cast_match = MVPP2_PRS_MCAST_VAL;
+		tid = MVPP2_PE_MAC_MC_PROMISCUOUS;
+		ri = MVPP2_PRS_RI_L2_MCAST;
 	}
 
-	/* Update port mask */
-	mvpp2_prs_tcam_port_set(&pe, port, add);
-
-	mvpp2_prs_hw_write(priv, &pe);
-}
-
-/* Accept multicast */
-static void mvpp2_prs_mac_multi_set(struct mvpp2 *priv, int port, int index,
-				    bool add)
-{
-	struct mvpp2_prs_entry pe;
-	unsigned char da_mc;
-
-	/* Ethernet multicast address first byte is
-	 * 0x01 for IPv4 and 0x33 for IPv6
-	 */
-	da_mc = (index == MVPP2_PE_MAC_MC_ALL) ? 0x01 : 0x33;
-
-	if (priv->prs_shadow[index].valid) {
-		/* Entry exist - update port only */
-		pe.index = index;
-		mvpp2_prs_hw_read(priv, &pe);
+	/* promiscuous mode - Accept unknown unicast or multicast packets */
+	if (priv->prs_shadow[tid].valid) {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	} else {
-		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
 		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-		pe.index = index;
+		pe.index = tid;
 
 		/* Continue - set next lookup */
 		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
 
 		/* Set result info bits */
-		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_MCAST,
-					 MVPP2_PRS_RI_L2_CAST_MASK);
+		mvpp2_prs_sram_ri_update(&pe, ri, MVPP2_PRS_RI_L2_CAST_MASK);
 
-		/* Update tcam entry data first byte */
-		mvpp2_prs_tcam_data_byte_set(&pe, 0, da_mc, 0xff);
+		/* Match UC or MC addresses */
+		mvpp2_prs_tcam_data_byte_set(&pe, 0, cast_match,
+					     MVPP2_PRS_CAST_MASK);
 
 		/* Shift to ethertype */
 		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
@@ -2021,32 +2082,37 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
 
 	if (priv->prs_shadow[tid].valid) {
 		/* Entry exist - update port only */
-		pe.index = tid;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	} else {
 		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
 		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
 		pe.index = tid;
 
-		/* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
-		mvpp2_prs_sram_shift_set(&pe, shift,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
 		/* Update shadow table */
 		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
 
 		if (tagged) {
 			/* Set tagged bit in DSA tag */
 			mvpp2_prs_tcam_data_byte_set(&pe, 0,
-						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
-						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
-			/* Clear all ai bits for next iteration */
-			mvpp2_prs_sram_ai_update(&pe, 0,
-						 MVPP2_PRS_SRAM_AI_MASK);
-			/* If packet is tagged continue check vlans */
-			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+					     MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+					     MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+			/* Set ai bits for next iteration */
+			if (extend)
+				mvpp2_prs_sram_ai_update(&pe, 1,
+							MVPP2_PRS_SRAM_AI_MASK);
+			else
+				mvpp2_prs_sram_ai_update(&pe, 0,
+							MVPP2_PRS_SRAM_AI_MASK);
+
+			/* If packet is tagged continue check vid filtering */
+			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
 		} else {
+			/* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+			mvpp2_prs_sram_shift_set(&pe, shift,
+					MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
 			/* Set result info bits to 'no vlans' */
 			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
 						 MVPP2_PRS_RI_VLAN_MASK);
@@ -2084,8 +2150,7 @@ static void mvpp2_prs_dsa_tag_ethertype_set(struct mvpp2 *priv, int port,
 
 	if (priv->prs_shadow[tid].valid) {
 		/* Entry exist - update port only */
-		pe.index = tid;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	} else {
 		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
@@ -2133,17 +2198,11 @@ static void mvpp2_prs_dsa_tag_ethertype_set(struct mvpp2 *priv, int port,
 }
 
 /* Search for existing single/triple vlan entry */
-static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
-						   unsigned short tpid, int ai)
+static int mvpp2_prs_vlan_find(struct mvpp2 *priv, unsigned short tpid, int ai)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-
 	/* Go through the all entries with MVPP2_PRS_LU_VLAN */
 	for (tid = MVPP2_PE_FIRST_FREE_TID;
 	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
@@ -2154,19 +2213,17 @@ static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
 			continue;
 
-		pe->index = tid;
-
-		mvpp2_prs_hw_read(priv, pe);
-		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid));
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+		match = mvpp2_prs_tcam_data_cmp(&pe, 0, swab16(tpid));
 		if (!match)
 			continue;
 
 		/* Get vlan type */
-		ri_bits = mvpp2_prs_sram_ri_get(pe);
+		ri_bits = mvpp2_prs_sram_ri_get(&pe);
 		ri_bits &= MVPP2_PRS_RI_VLAN_MASK;
 
 		/* Get current ai value from tcam */
-		ai_bits = mvpp2_prs_tcam_ai_get(pe);
+		ai_bits = mvpp2_prs_tcam_ai_get(&pe);
 		/* Clear double vlan bit */
 		ai_bits &= ~MVPP2_PRS_DBL_VLAN_AI_BIT;
 
@@ -2175,34 +2232,31 @@ static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
 
 		if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE ||
 		    ri_bits == MVPP2_PRS_RI_VLAN_TRIPLE)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Add/update single/triple vlan entry */
 static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 			      unsigned int port_map)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid_aux, tid;
 	int ret = 0;
 
-	pe = mvpp2_prs_vlan_find(priv, tpid, ai);
+	memset(&pe, 0, sizeof(pe));
+
+	tid = mvpp2_prs_vlan_find(priv, tpid, ai);
 
-	if (!pe) {
+	if (tid < 0) {
 		/* Create new tcam entry */
 		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_LAST_FREE_TID,
 						MVPP2_PE_FIRST_FREE_TID);
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-		if (!pe)
-			return -ENOMEM;
-
 		/* Get last double vlan tid */
 		for (tid_aux = MVPP2_PE_LAST_FREE_TID;
 		     tid_aux >= MVPP2_PE_FIRST_FREE_TID; tid_aux--) {
@@ -2212,50 +2266,46 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
 				continue;
 
-			pe->index = tid_aux;
-			mvpp2_prs_hw_read(priv, pe);
-			ri_bits = mvpp2_prs_sram_ri_get(pe);
+			mvpp2_prs_init_from_hw(priv, &pe, tid_aux);
+			ri_bits = mvpp2_prs_sram_ri_get(&pe);
 			if ((ri_bits & MVPP2_PRS_RI_VLAN_MASK) ==
 			    MVPP2_PRS_RI_VLAN_DOUBLE)
 				break;
 		}
 
-		if (tid <= tid_aux) {
-			ret = -EINVAL;
-			goto free_pe;
-		}
+		if (tid <= tid_aux)
+			return -EINVAL;
+
+		memset(&pe, 0, sizeof(pe));
+		pe.index = tid;
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
 
-		memset(pe, 0, sizeof(*pe));
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-		pe->index = tid;
+		mvpp2_prs_match_etype(&pe, 0, tpid);
 
-		mvpp2_prs_match_etype(pe, 0, tpid);
+		/* VLAN tag detected, proceed with VID filtering */
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
 
-		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
-		/* Shift 4 bytes - skip 1 vlan tag */
-		mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
 		/* Clear all ai bits for next iteration */
-		mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+		mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 
 		if (ai == MVPP2_PRS_SINGLE_VLAN_AI) {
-			mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_SINGLE,
+			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_SINGLE,
 						 MVPP2_PRS_RI_VLAN_MASK);
 		} else {
 			ai |= MVPP2_PRS_DBL_VLAN_AI_BIT;
-			mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_TRIPLE,
+			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_TRIPLE,
 						 MVPP2_PRS_RI_VLAN_MASK);
 		}
-		mvpp2_prs_tcam_ai_update(pe, ai, MVPP2_PRS_SRAM_AI_MASK);
+		mvpp2_prs_tcam_ai_update(&pe, ai, MVPP2_PRS_SRAM_AI_MASK);
 
-		mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_VLAN);
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN);
+	} else {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	}
 	/* Update ports' mask */
-	mvpp2_prs_tcam_port_map_set(pe, port_map);
+	mvpp2_prs_tcam_port_map_set(&pe, port_map);
 
-	mvpp2_prs_hw_write(priv, pe);
-free_pe:
-	kfree(pe);
+	mvpp2_prs_hw_write(priv, &pe);
 
 	return ret;
 }
@@ -2274,18 +2324,12 @@ static int mvpp2_prs_double_vlan_ai_free_get(struct mvpp2 *priv)
 }
 
 /* Search for existing double vlan entry */
-static struct mvpp2_prs_entry *mvpp2_prs_double_vlan_find(struct mvpp2 *priv,
-							  unsigned short tpid1,
-							  unsigned short tpid2)
+static int mvpp2_prs_double_vlan_find(struct mvpp2 *priv, unsigned short tpid1,
+				      unsigned short tpid2)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-
 	/* Go through the all entries with MVPP2_PRS_LU_VLAN */
 	for (tid = MVPP2_PE_FIRST_FREE_TID;
 	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
@@ -2296,22 +2340,20 @@ static struct mvpp2_prs_entry *mvpp2_prs_double_vlan_find(struct mvpp2 *priv,
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
 			continue;
 
-		pe->index = tid;
-		mvpp2_prs_hw_read(priv, pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 
-		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid1))
-			&& mvpp2_prs_tcam_data_cmp(pe, 4, swab16(tpid2));
+		match = mvpp2_prs_tcam_data_cmp(&pe, 0, swab16(tpid1)) &&
+			mvpp2_prs_tcam_data_cmp(&pe, 4, swab16(tpid2));
 
 		if (!match)
 			continue;
 
-		ri_mask = mvpp2_prs_sram_ri_get(pe) & MVPP2_PRS_RI_VLAN_MASK;
+		ri_mask = mvpp2_prs_sram_ri_get(&pe) & MVPP2_PRS_RI_VLAN_MASK;
 		if (ri_mask == MVPP2_PRS_RI_VLAN_DOUBLE)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Add or update double vlan entry */
@@ -2319,28 +2361,24 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
 				     unsigned short tpid2,
 				     unsigned int port_map)
 {
-	struct mvpp2_prs_entry *pe;
 	int tid_aux, tid, ai, ret = 0;
+	struct mvpp2_prs_entry pe;
 
-	pe = mvpp2_prs_double_vlan_find(priv, tpid1, tpid2);
+	memset(&pe, 0, sizeof(pe));
 
-	if (!pe) {
+	tid = mvpp2_prs_double_vlan_find(priv, tpid1, tpid2);
+
+	if (tid < 0) {
 		/* Create new tcam entry */
 		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
 				MVPP2_PE_LAST_FREE_TID);
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-		if (!pe)
-			return -ENOMEM;
-
 		/* Set ai value for new double vlan entry */
 		ai = mvpp2_prs_double_vlan_ai_free_get(priv);
-		if (ai < 0) {
-			ret = ai;
-			goto free_pe;
-		}
+		if (ai < 0)
+			return ai;
 
 		/* Get first single/triple vlan tid */
 		for (tid_aux = MVPP2_PE_FIRST_FREE_TID;
@@ -2351,46 +2389,44 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
 			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
 				continue;
 
-			pe->index = tid_aux;
-			mvpp2_prs_hw_read(priv, pe);
-			ri_bits = mvpp2_prs_sram_ri_get(pe);
+			mvpp2_prs_init_from_hw(priv, &pe, tid_aux);
+			ri_bits = mvpp2_prs_sram_ri_get(&pe);
 			ri_bits &= MVPP2_PRS_RI_VLAN_MASK;
 			if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE ||
 			    ri_bits == MVPP2_PRS_RI_VLAN_TRIPLE)
 				break;
 		}
 
-		if (tid >= tid_aux) {
-			ret = -ERANGE;
-			goto free_pe;
-		}
+		if (tid >= tid_aux)
+			return -ERANGE;
 
-		memset(pe, 0, sizeof(*pe));
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-		pe->index = tid;
+		memset(&pe, 0, sizeof(pe));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+		pe.index = tid;
 
 		priv->prs_double_vlans[ai] = true;
 
-		mvpp2_prs_match_etype(pe, 0, tpid1);
-		mvpp2_prs_match_etype(pe, 4, tpid2);
+		mvpp2_prs_match_etype(&pe, 0, tpid1);
+		mvpp2_prs_match_etype(&pe, 4, tpid2);
 
-		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
-		/* Shift 8 bytes - skip 2 vlan tags */
-		mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+		/* Shift 4 bytes - skip outer vlan tag */
+		mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
 					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-		mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
 					 MVPP2_PRS_RI_VLAN_MASK);
-		mvpp2_prs_sram_ai_update(pe, ai | MVPP2_PRS_DBL_VLAN_AI_BIT,
+		mvpp2_prs_sram_ai_update(&pe, ai | MVPP2_PRS_DBL_VLAN_AI_BIT,
 					 MVPP2_PRS_SRAM_AI_MASK);
 
-		mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_VLAN);
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN);
+	} else {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	}
 
 	/* Update ports' mask */
-	mvpp2_prs_tcam_port_map_set(pe, port_map);
-	mvpp2_prs_hw_write(priv, pe);
-free_pe:
-	kfree(pe);
+	mvpp2_prs_tcam_port_map_set(&pe, port_map);
+	mvpp2_prs_hw_write(priv, &pe);
+
 	return ret;
 }
 
@@ -2694,11 +2730,10 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv)
 	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
 	mvpp2_prs_hw_write(priv, &pe);
 
-	/* place holders only - no ports */
+	/* Create dummy entries for drop all and promiscuous modes */
 	mvpp2_prs_mac_drop_all_set(priv, 0, false);
-	mvpp2_prs_mac_promisc_set(priv, 0, false);
-	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_ALL, false);
-	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_IP6, false);
+	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false);
+	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false);
 }
 
 /* Set default entries for various types of dsa packets */
@@ -2755,6 +2790,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
 	mvpp2_prs_hw_write(priv, &pe);
 }
 
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+
+	memset(&pe, 0, sizeof(pe));
+
+	/* Set default vid entry */
+	pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+	/* Skip VLAN header - Set offset to 4 bytes */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Set default vid entry for extended DSA*/
+	memset(&pe, 0, sizeof(pe));
+
+	/* Set default vid entry */
+	pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+				 MVPP2_PRS_EDSA_VID_AI_BIT);
+
+	/* Skip VLAN header - Set offset to 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Match basic ethertypes */
 static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 {
@@ -3023,7 +3114,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
 	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
 	pe.index = MVPP2_PE_VLAN_DBL;
 
-	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
 	/* Clear ai for next iterations */
 	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3478,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
 	return 0;
 }
 
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+				    u16 mask)
+{
+	unsigned char byte[2], enable[2];
+	struct mvpp2_prs_entry pe;
+	u16 rvid, rmask;
+	int tid;
+
+	/* Go through the all entries with MVPP2_PRS_LU_VID */
+	for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+	     tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+		if (!priv->prs_shadow[tid].valid ||
+		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+			continue;
+
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+
+		mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+		mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+		rvid = ((byte[0] & 0xf) << 8) + byte[1];
+		rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+		if (rvid != vid || rmask != mask)
+			continue;
+
+		return tid;
+	}
+
+	return -ENOENT;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+	unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+				 port->id * MVPP2_PRS_VLAN_FILT_MAX;
+	unsigned int mask = 0xfff, reg_val, shift;
+	struct mvpp2 *priv = port->priv;
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	memset(&pe, 0, sizeof(pe));
+
+	/* Scan TCAM and see if entry with this <vid,port> already exist */
+	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+	reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+	if (reg_val & MVPP2_DSA_EXTENDED)
+		shift = MVPP2_VLAN_TAG_EDSA_LEN;
+	else
+		shift = MVPP2_VLAN_TAG_LEN;
+
+	/* No such entry */
+	if (tid < 0) {
+
+		/* Go through all entries from first to last in vlan range */
+		tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+						vid_start +
+						MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+		/* There isn't room for a new VID filter */
+		if (tid < 0)
+			return tid;
+
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+		pe.index = tid;
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+	} else {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+	}
+
+	/* Enable the current port */
+	mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+	/* Continue - set next lookup */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Skip VLAN header - Set offset to 4 or 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Set match on VID */
+	mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	/* Update shadow table */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+	struct mvpp2 *priv = port->priv;
+	int tid;
+
+	/* Scan TCAM and see if entry with this <vid,port> already exist */
+	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+	/* No such entry */
+	if (tid < 0)
+		return;
+
+	mvpp2_prs_hw_inv(priv, tid);
+	priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	int tid;
+
+	for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+	     tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+		if (priv->prs_shadow[tid].valid)
+			mvpp2_prs_vid_entry_remove(port, tid);
+	}
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+	unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+	struct mvpp2 *priv = port->priv;
+
+	/* Invalidate the guard entry */
+	mvpp2_prs_hw_inv(priv, tid);
+
+	priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+	unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+	struct mvpp2 *priv = port->priv;
+	unsigned int reg_val, shift;
+	struct mvpp2_prs_entry pe;
+
+	if (priv->prs_shadow[tid].valid)
+		return;
+
+	memset(&pe, 0, sizeof(pe));
+
+	pe.index = tid;
+
+	reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+	if (reg_val & MVPP2_DSA_EXTENDED)
+		shift = MVPP2_VLAN_TAG_EDSA_LEN;
+	else
+		shift = MVPP2_VLAN_TAG_LEN;
+
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	/* Mask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+	/* Continue - set next lookup */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Skip VLAN header - Set offset to 4 or 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Drop VLAN packets that don't belong to any VIDs on this port */
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+				 MVPP2_PRS_RI_DROP_MASK);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	/* Update shadow table */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Parser default initialization */
 static int mvpp2_prs_default_init(struct platform_device *pdev,
 				  struct mvpp2 *priv)
@@ -3429,6 +3707,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
 
 	mvpp2_prs_dsa_init(priv);
 
+	mvpp2_prs_vid_init(priv);
+
 	err = mvpp2_prs_etype_init(priv);
 	if (err)
 		return err;
@@ -3472,21 +3752,16 @@ static bool mvpp2_prs_mac_range_equals(struct mvpp2_prs_entry *pe,
 }
 
 /* Find tcam entry with matched pair <MAC DA, port> */
-static struct mvpp2_prs_entry *
+static int
 mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 			    unsigned char *mask, int udf_type)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_ATOMIC);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
-
 	/* Go through the all entires with MVPP2_PRS_LU_MAC */
-	for (tid = MVPP2_PE_FIRST_FREE_TID;
-	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+	for (tid = MVPP2_PE_MAC_RANGE_START;
+	     tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
 		unsigned int entry_pmap;
 
 		if (!priv->prs_shadow[tid].valid ||
@@ -3494,110 +3769,103 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 		    (priv->prs_shadow[tid].udf != udf_type))
 			continue;
 
-		pe->index = tid;
-		mvpp2_prs_hw_read(priv, pe);
-		entry_pmap = mvpp2_prs_tcam_port_map_get(pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+		entry_pmap = mvpp2_prs_tcam_port_map_get(&pe);
 
-		if (mvpp2_prs_mac_range_equals(pe, da, mask) &&
+		if (mvpp2_prs_mac_range_equals(&pe, da, mask) &&
 		    entry_pmap == pmap)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Update parser's mac da entry */
-static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
-				   const u8 *da, bool add)
+static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
+				   bool add)
 {
-	struct mvpp2_prs_entry *pe;
-	unsigned int pmap, len, ri;
 	unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct mvpp2 *priv = port->priv;
+	unsigned int pmap, len, ri;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
+	memset(&pe, 0, sizeof(pe));
+
 	/* Scan TCAM and see if entry with this <MAC DA, port> already exist */
-	pe = mvpp2_prs_mac_da_range_find(priv, (1 << port), da, mask,
-					 MVPP2_PRS_UDF_MAC_DEF);
+	tid = mvpp2_prs_mac_da_range_find(priv, BIT(port->id), da, mask,
+					  MVPP2_PRS_UDF_MAC_DEF);
 
 	/* No such entry */
-	if (!pe) {
+	if (tid < 0) {
 		if (!add)
 			return 0;
 
 		/* Create new TCAM entry */
-		/* Find first range mac entry*/
-		for (tid = MVPP2_PE_FIRST_FREE_TID;
-		     tid <= MVPP2_PE_LAST_FREE_TID; tid++)
-			if (priv->prs_shadow[tid].valid &&
-			    (priv->prs_shadow[tid].lu == MVPP2_PRS_LU_MAC) &&
-			    (priv->prs_shadow[tid].udf ==
-						       MVPP2_PRS_UDF_MAC_RANGE))
-				break;
-
 		/* Go through the all entries from first to last */
-		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
-						tid - 1);
+		tid = mvpp2_prs_tcam_first_free(priv,
+						MVPP2_PE_MAC_RANGE_START,
+						MVPP2_PE_MAC_RANGE_END);
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_ATOMIC);
-		if (!pe)
-			return -ENOMEM;
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
-		pe->index = tid;
+		pe.index = tid;
 
 		/* Mask all ports */
-		mvpp2_prs_tcam_port_map_set(pe, 0);
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+	} else {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	}
 
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
+
 	/* Update port mask */
-	mvpp2_prs_tcam_port_set(pe, port, add);
+	mvpp2_prs_tcam_port_set(&pe, port->id, add);
 
 	/* Invalidate the entry if no ports are left enabled */
-	pmap = mvpp2_prs_tcam_port_map_get(pe);
+	pmap = mvpp2_prs_tcam_port_map_get(&pe);
 	if (pmap == 0) {
-		if (add) {
-			kfree(pe);
+		if (add)
 			return -EINVAL;
-		}
-		mvpp2_prs_hw_inv(priv, pe->index);
-		priv->prs_shadow[pe->index].valid = false;
-		kfree(pe);
+
+		mvpp2_prs_hw_inv(priv, pe.index);
+		priv->prs_shadow[pe.index].valid = false;
 		return 0;
 	}
 
 	/* Continue - set next lookup */
-	mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_DSA);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
 
 	/* Set match on DA */
 	len = ETH_ALEN;
 	while (len--)
-		mvpp2_prs_tcam_data_byte_set(pe, len, da[len], 0xff);
+		mvpp2_prs_tcam_data_byte_set(&pe, len, da[len], 0xff);
 
 	/* Set result info bits */
-	if (is_broadcast_ether_addr(da))
+	if (is_broadcast_ether_addr(da)) {
 		ri = MVPP2_PRS_RI_L2_BCAST;
-	else if (is_multicast_ether_addr(da))
+	} else if (is_multicast_ether_addr(da)) {
 		ri = MVPP2_PRS_RI_L2_MCAST;
-	else
-		ri = MVPP2_PRS_RI_L2_UCAST | MVPP2_PRS_RI_MAC_ME_MASK;
+	} else {
+		ri = MVPP2_PRS_RI_L2_UCAST;
+
+		if (ether_addr_equal(da, port->dev->dev_addr))
+			ri |= MVPP2_PRS_RI_MAC_ME_MASK;
+	}
 
-	mvpp2_prs_sram_ri_update(pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
+	mvpp2_prs_sram_ri_update(&pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
 				 MVPP2_PRS_RI_MAC_ME_MASK);
-	mvpp2_prs_shadow_ri_set(priv, pe->index, ri, MVPP2_PRS_RI_L2_CAST_MASK |
+	mvpp2_prs_shadow_ri_set(priv, pe.index, ri, MVPP2_PRS_RI_L2_CAST_MASK |
 				MVPP2_PRS_RI_MAC_ME_MASK);
 
 	/* Shift to ethertype */
-	mvpp2_prs_sram_shift_set(pe, 2 * ETH_ALEN,
+	mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
 				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
 
 	/* Update shadow table and hw entry */
-	priv->prs_shadow[pe->index].udf = MVPP2_PRS_UDF_MAC_DEF;
-	mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_MAC);
-	mvpp2_prs_hw_write(priv, pe);
-
-	kfree(pe);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_MAC_DEF;
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+	mvpp2_prs_hw_write(priv, &pe);
 
 	return 0;
 }
@@ -3608,13 +3876,12 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
 	int err;
 
 	/* Remove old parser entry */
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, dev->dev_addr,
-				      false);
+	err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, false);
 	if (err)
 		return err;
 
 	/* Add new parser entry */
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, da, true);
+	err = mvpp2_prs_mac_da_accept(port, da, true);
 	if (err)
 		return err;
 
@@ -3624,14 +3891,15 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
 	return 0;
 }
 
-/* Delete all port's multicast simple (not range) entries */
-static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
+static void mvpp2_prs_mac_del_all(struct mvpp2_port *port)
 {
+	struct mvpp2 *priv = port->priv;
 	struct mvpp2_prs_entry pe;
+	unsigned long pmap;
 	int index, tid;
 
-	for (tid = MVPP2_PE_FIRST_FREE_TID;
-	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+	for (tid = MVPP2_PE_MAC_RANGE_START;
+	     tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
 		unsigned char da[ETH_ALEN], da_mask[ETH_ALEN];
 
 		if (!priv->prs_shadow[tid].valid ||
@@ -3639,18 +3907,28 @@ static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
 		    (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF))
 			continue;
 
-		/* Only simple mac entries */
-		pe.index = tid;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+
+		pmap = mvpp2_prs_tcam_port_map_get(&pe);
+
+		/* We only want entries active on this port */
+		if (!test_bit(port->id, &pmap))
+			continue;
 
 		/* Read mac addr from entry */
 		for (index = 0; index < ETH_ALEN; index++)
 			mvpp2_prs_tcam_data_byte_get(&pe, index, &da[index],
 						     &da_mask[index]);
 
-		if (is_multicast_ether_addr(da) && !is_broadcast_ether_addr(da))
-			/* Delete this entry */
-			mvpp2_prs_mac_da_accept(priv, port, da, false);
+		/* Special cases : Don't remove broadcast and port's own
+		 * address
+		 */
+		if (is_broadcast_ether_addr(da) ||
+		    ether_addr_equal(da, port->dev->dev_addr))
+			continue;
+
+		/* Remove entry from TCAM */
+		mvpp2_prs_mac_da_accept(port, da, false);
 	}
 }
 
@@ -3707,13 +3985,15 @@ static int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type)
 /* Set prs flow for the port */
 static int mvpp2_prs_def_flow(struct mvpp2_port *port)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = mvpp2_prs_flow_find(port->priv, port->id);
+	memset(&pe, 0, sizeof(pe));
+
+	tid = mvpp2_prs_flow_find(port->priv, port->id);
 
 	/* Such entry not exist */
-	if (!pe) {
+	if (tid < 0) {
 		/* Go through the all entires from last to first */
 		tid = mvpp2_prs_tcam_first_free(port->priv,
 						MVPP2_PE_LAST_FREE_TID,
@@ -3721,24 +4001,21 @@ static int mvpp2_prs_def_flow(struct mvpp2_port *port)
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-		if (!pe)
-			return -ENOMEM;
-
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_FLOWS);
-		pe->index = tid;
+		pe.index = tid;
 
 		/* Set flow ID*/
-		mvpp2_prs_sram_ai_update(pe, port->id, MVPP2_PRS_FLOW_ID_MASK);
-		mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
+		mvpp2_prs_sram_ai_update(&pe, port->id, MVPP2_PRS_FLOW_ID_MASK);
+		mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
 
 		/* Update shadow table */
-		mvpp2_prs_shadow_set(port->priv, pe->index, MVPP2_PRS_LU_FLOWS);
+		mvpp2_prs_shadow_set(port->priv, pe.index, MVPP2_PRS_LU_FLOWS);
+	} else {
+		mvpp2_prs_init_from_hw(port->priv, &pe, tid);
 	}
 
-	mvpp2_prs_tcam_port_map_set(pe, (1 << port->id));
-	mvpp2_prs_hw_write(port->priv, pe);
-	kfree(pe);
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_tcam_port_map_set(&pe, (1 << port->id));
+	mvpp2_prs_hw_write(port->priv, &pe);
 
 	return 0;
 }
@@ -3901,7 +4178,6 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
 	val |= MVPP2_BM_START_MASK;
 	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-	bm_pool->type = MVPP2_BM_FREE;
 	bm_pool->size = size;
 	bm_pool->pkt_size = 0;
 	bm_pool->buf_num = 0;
@@ -3954,11 +4230,17 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 
 /* Free all buffers from the pool */
 static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
-			       struct mvpp2_bm_pool *bm_pool)
+			       struct mvpp2_bm_pool *bm_pool, int buf_num)
 {
 	int i;
 
-	for (i = 0; i < bm_pool->buf_num; i++) {
+	if (buf_num > bm_pool->buf_num) {
+		WARN(1, "Pool does not have so many bufs pool(%d) bufs(%d)\n",
+		     bm_pool->id, buf_num);
+		buf_num = bm_pool->buf_num;
+	}
+
+	for (i = 0; i < buf_num; i++) {
 		dma_addr_t buf_dma_addr;
 		phys_addr_t buf_phys_addr;
 		void *data;
@@ -3980,16 +4262,39 @@ static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
 	bm_pool->buf_num -= i;
 }
 
+/* Check number of buffers in BM pool */
+static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
+{
+	int buf_num = 0;
+
+	buf_num += mvpp2_read(priv, MVPP2_BM_POOL_PTRS_NUM_REG(bm_pool->id)) &
+				    MVPP22_BM_POOL_PTRS_NUM_MASK;
+	buf_num += mvpp2_read(priv, MVPP2_BM_BPPI_PTRS_NUM_REG(bm_pool->id)) &
+				    MVPP2_BM_BPPI_PTR_NUM_MASK;
+
+	/* HW has one buffer ready which is not reflected in the counters */
+	if (buf_num)
+		buf_num += 1;
+
+	return buf_num;
+}
+
 /* Cleanup pool */
 static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
 				 struct mvpp2 *priv,
 				 struct mvpp2_bm_pool *bm_pool)
 {
+	int buf_num;
 	u32 val;
 
-	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool);
-	if (bm_pool->buf_num) {
-		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
+	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
+
+	/* Check buffer counters after free */
+	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+	if (buf_num) {
+		WARN(1, "cannot free all buffers in pool %d, buf_num left %d\n",
+		     bm_pool->id, bm_pool->buf_num);
 		return 0;
 	}
 
@@ -4051,6 +4356,21 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
 	return 0;
 }
 
+static void mvpp2_setup_bm_pool(void)
+{
+	/* Short pool */
+	mvpp2_pools[MVPP2_BM_SHORT].buf_num  = MVPP2_BM_SHORT_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_SHORT].pkt_size = MVPP2_BM_SHORT_PKT_SIZE;
+
+	/* Long pool */
+	mvpp2_pools[MVPP2_BM_LONG].buf_num  = MVPP2_BM_LONG_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_LONG].pkt_size = MVPP2_BM_LONG_PKT_SIZE;
+
+	/* Jumbo pool */
+	mvpp2_pools[MVPP2_BM_JUMBO].buf_num  = MVPP2_BM_JUMBO_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_JUMBO].pkt_size = MVPP2_BM_JUMBO_PKT_SIZE;
+}
+
 /* Attach long pool to rxq */
 static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
 				    int lrxq, int long_pool)
@@ -4138,8 +4458,8 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 				<< MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT) &
 				MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK;
 
-		mvpp2_percpu_write(port->priv, cpu,
-				   MVPP22_BM_ADDR_HIGH_RLS_REG, val);
+		mvpp2_percpu_write_relaxed(port->priv, cpu,
+					   MVPP22_BM_ADDR_HIGH_RLS_REG, val);
 	}
 
 	/* MVPP2_BM_VIRT_RLS_REG is not interpreted by HW, and simply
@@ -4147,10 +4467,10 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 	 * descriptor. Instead of storing the virtual address, we
 	 * store the physical address
 	 */
-	mvpp2_percpu_write(port->priv, cpu,
-			   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
-	mvpp2_percpu_write(port->priv, cpu,
-			   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
+	mvpp2_percpu_write_relaxed(port->priv, cpu,
+				   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
+	mvpp2_percpu_write_relaxed(port->priv, cpu,
+				   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
 
 	put_cpu();
 }
@@ -4189,13 +4509,11 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
 	bm_pool->buf_num += i;
 
 	netdev_dbg(port->dev,
-		   "%s pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
-		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   "pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
 		   bm_pool->id, bm_pool->pkt_size, buf_size, total_size);
 
 	netdev_dbg(port->dev,
-		   "%s pool %d: %d of %d buffers added\n",
-		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   "pool %d: %d of %d buffers added\n",
 		   bm_pool->id, i, buf_num);
 	return i;
 }
@@ -4204,25 +4522,20 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
  * pool pointer on success
  */
 static struct mvpp2_bm_pool *
-mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
-		  int pkt_size)
+mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 {
 	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
 	int num;
 
-	if (new_pool->type != MVPP2_BM_FREE && new_pool->type != type) {
-		netdev_err(port->dev, "mixing pool types is forbidden\n");
+	if (pool >= MVPP2_BM_POOLS_NUM) {
+		netdev_err(port->dev, "Invalid pool %d\n", pool);
 		return NULL;
 	}
 
-	if (new_pool->type == MVPP2_BM_FREE)
-		new_pool->type = type;
-
 	/* Allocate buffers in case BM pool is used as long pool, but packet
 	 * size doesn't match MTU or BM pool hasn't being used yet
 	 */
-	if (((type == MVPP2_BM_SWF_LONG) && (pkt_size > new_pool->pkt_size)) ||
-	    (new_pool->pkt_size == 0)) {
+	if (new_pool->pkt_size == 0) {
 		int pkts_num;
 
 		/* Set default buffer number or free all the buffers in case
@@ -4230,12 +4543,10 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 		 */
 		pkts_num = new_pool->buf_num;
 		if (pkts_num == 0)
-			pkts_num = type == MVPP2_BM_SWF_LONG ?
-				   MVPP2_BM_LONG_BUF_NUM :
-				   MVPP2_BM_SHORT_BUF_NUM;
+			pkts_num = mvpp2_pools[pool].buf_num;
 		else
 			mvpp2_bm_bufs_free(port->dev->dev.parent,
-					   port->priv, new_pool);
+					   port->priv, new_pool, pkts_num);
 
 		new_pool->pkt_size = pkt_size;
 		new_pool->frag_size =
@@ -4261,16 +4572,28 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 {
 	int rxq;
+	enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
+
+	/* If port pkt_size is higher than 1518B:
+	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+	 */
+	if (port->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
+		long_log_pool = MVPP2_BM_JUMBO;
+		short_log_pool = MVPP2_BM_LONG;
+	} else {
+		long_log_pool = MVPP2_BM_LONG;
+		short_log_pool = MVPP2_BM_SHORT;
+	}
 
 	if (!port->pool_long) {
 		port->pool_long =
-		       mvpp2_bm_pool_use(port, MVPP2_BM_SWF_LONG_POOL(port->id),
-					 MVPP2_BM_SWF_LONG,
-					 port->pkt_size);
+			mvpp2_bm_pool_use(port, long_log_pool,
+					  mvpp2_pools[long_log_pool].pkt_size);
 		if (!port->pool_long)
 			return -ENOMEM;
 
-		port->pool_long->port_map |= (1 << port->id);
+		port->pool_long->port_map |= BIT(port->id);
 
 		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
@@ -4278,13 +4601,12 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 	if (!port->pool_short) {
 		port->pool_short =
-			mvpp2_bm_pool_use(port, MVPP2_BM_SWF_SHORT_POOL,
-					  MVPP2_BM_SWF_SHORT,
-					  MVPP2_BM_SHORT_PKT_SIZE);
+			mvpp2_bm_pool_use(port, short_log_pool,
+					  mvpp2_pools[short_log_pool].pkt_size);
 		if (!port->pool_short)
 			return -ENOMEM;
 
-		port->pool_short->port_map |= (1 << port->id);
+		port->pool_short->port_map |= BIT(port->id);
 
 		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_short_pool_set(port, rxq,
@@ -4297,30 +4619,49 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2_bm_pool *port_pool = port->pool_long;
-	int num, pkts_num = port_pool->buf_num;
+	enum mvpp2_bm_pool_log_num new_long_pool;
 	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
-	/* Update BM pool with new buffer size */
-	mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool);
-	if (port_pool->buf_num) {
-		WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
-		return -EIO;
-	}
-
-	port_pool->pkt_size = pkt_size;
-	port_pool->frag_size = SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
-		MVPP2_SKB_SHINFO_SIZE;
-	num = mvpp2_bm_bufs_add(port, port_pool, pkts_num);
-	if (num != pkts_num) {
-		WARN(1, "pool %d: %d of %d allocated\n",
-		     port_pool->id, num, pkts_num);
-		return -EIO;
+	/* If port MTU is higher than 1518B:
+	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+	 */
+	if (pkt_size > MVPP2_BM_LONG_PKT_SIZE)
+		new_long_pool = MVPP2_BM_JUMBO;
+	else
+		new_long_pool = MVPP2_BM_LONG;
+
+	if (new_long_pool != port->pool_long->id) {
+		/* Remove port from old short & long pool */
+		port->pool_long = mvpp2_bm_pool_use(port, port->pool_long->id,
+						    port->pool_long->pkt_size);
+		port->pool_long->port_map &= ~BIT(port->id);
+		port->pool_long = NULL;
+
+		port->pool_short = mvpp2_bm_pool_use(port, port->pool_short->id,
+						     port->pool_short->pkt_size);
+		port->pool_short->port_map &= ~BIT(port->id);
+		port->pool_short = NULL;
+
+		port->pkt_size =  pkt_size;
+
+		/* Add port to new short & long pool */
+		mvpp2_swf_bm_pool_init(port);
+
+		/* Update L4 checksum when jumbo enable/disable on port */
+		if (new_long_pool == MVPP2_BM_JUMBO && port->id != 0) {
+			dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+			dev->hw_features &= ~(NETIF_F_IP_CSUM |
+					      NETIF_F_IPV6_CSUM);
+		} else {
+			dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+			dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+		}
 	}
 
-	mvpp2_bm_pool_bufsize_set(port->priv, port_pool,
-				  MVPP2_RX_BUF_SIZE(port_pool->pkt_size));
 	dev->mtu = mtu;
+	dev->wanted_features = dev->features;
+
 	netdev_update_features(dev);
 	return 0;
 }
@@ -5221,7 +5562,8 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
 	if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) {
 		/* Update number of occupied aggregated Tx descriptors */
 		int cpu = smp_processor_id();
-		u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu));
+		u32 val = mvpp2_read_relaxed(priv,
+					     MVPP2_AGGR_TXQ_STATUS_REG(cpu));
 
 		aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
 	}
@@ -5245,9 +5587,9 @@ static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
 	int cpu = smp_processor_id();
 
 	val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
-	mvpp2_percpu_write(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
+	mvpp2_percpu_write_relaxed(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
 
-	val = mvpp2_percpu_read(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
+	val = mvpp2_percpu_read_relaxed(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
 
 	return val & MVPP2_TXQ_RSVD_RSLT_MASK;
 }
@@ -5352,8 +5694,8 @@ static inline int mvpp2_txq_sent_desc_proc(struct mvpp2_port *port,
 	u32 val;
 
 	/* Reading status reg resets transmitted descriptor counter */
-	val = mvpp2_percpu_read(port->priv, smp_processor_id(),
-				MVPP2_TXQ_SENT_REG(txq->id));
+	val = mvpp2_percpu_read_relaxed(port->priv, smp_processor_id(),
+					MVPP2_TXQ_SENT_REG(txq->id));
 
 	return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
 		MVPP2_TRANSMITTED_COUNT_OFFSET;
@@ -6719,8 +7061,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 	 *
 	 * Each CPU has its own Rx/Tx cause register
 	 */
-	cause_rx_tx = mvpp2_percpu_read(port->priv, qv->sw_thread_id,
-					MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
+	cause_rx_tx = mvpp2_percpu_read_relaxed(port->priv, qv->sw_thread_id,
+						MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
 
 	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 	if (cause_misc) {
@@ -7007,15 +7349,14 @@ static int mvpp2_open(struct net_device *dev)
 			0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	int err;
 
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, mac_bcast, true);
+	err = mvpp2_prs_mac_da_accept(port, mac_bcast, true);
 	if (err) {
 		netdev_err(dev, "mvpp2_prs_mac_da_accept BC failed\n");
 		return err;
 	}
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id,
-				      dev->dev_addr, true);
+	err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, true);
 	if (err) {
-		netdev_err(dev, "mvpp2_prs_mac_da_accept MC failed\n");
+		netdev_err(dev, "mvpp2_prs_mac_da_accept own addr failed\n");
 		return err;
 	}
 	err = mvpp2_prs_tag_mode_set(port->priv, port->id, MVPP2_TAG_TYPE_MH);
@@ -7129,25 +7470,64 @@ static int mvpp2_stop(struct net_device *dev)
 	return 0;
 }
 
+static int mvpp2_prs_mac_da_accept_list(struct mvpp2_port *port,
+					struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha;
+	int ret;
+
+	netdev_hw_addr_list_for_each(ha, list) {
+		ret = mvpp2_prs_mac_da_accept(port, ha->addr, true);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void mvpp2_set_rx_promisc(struct mvpp2_port *port, bool enable)
+{
+	if (!enable && (port->dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		mvpp2_prs_vid_enable_filtering(port);
+	else
+		mvpp2_prs_vid_disable_filtering(port);
+
+	mvpp2_prs_mac_promisc_set(port->priv, port->id,
+				  MVPP2_PRS_L2_UNI_CAST, enable);
+
+	mvpp2_prs_mac_promisc_set(port->priv, port->id,
+				  MVPP2_PRS_L2_MULTI_CAST, enable);
+}
+
 static void mvpp2_set_rx_mode(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2 *priv = port->priv;
-	struct netdev_hw_addr *ha;
-	int id = port->id;
-	bool allmulti = dev->flags & IFF_ALLMULTI;
 
-	mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
-	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
-	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
+	/* Clear the whole UC and MC list */
+	mvpp2_prs_mac_del_all(port);
 
-	/* Remove all port->id's mcast enries */
-	mvpp2_prs_mcast_del_all(priv, id);
+	if (dev->flags & IFF_PROMISC) {
+		mvpp2_set_rx_promisc(port, true);
+		return;
+	}
+
+	mvpp2_set_rx_promisc(port, false);
 
-	if (allmulti && !netdev_mc_empty(dev)) {
-		netdev_for_each_mc_addr(ha, dev)
-			mvpp2_prs_mac_da_accept(priv, id, ha->addr, true);
+	if (netdev_uc_count(dev) > MVPP2_PRS_MAC_UC_FILT_MAX ||
+	    mvpp2_prs_mac_da_accept_list(port, &dev->uc))
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_UNI_CAST, true);
+
+	if (dev->flags & IFF_ALLMULTI) {
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_MULTI_CAST, true);
+		return;
 	}
+
+	if (netdev_mc_count(dev) > MVPP2_PRS_MAC_MC_FILT_MAX ||
+	    mvpp2_prs_mac_da_accept_list(port, &dev->mc))
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_MULTI_CAST, true);
 }
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7287,6 +7667,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return ret;
 }
 
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret;
+
+	ret = mvpp2_prs_vid_entry_add(port, vid);
+	if (ret)
+		netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+			   MVPP2_PRS_VLAN_FILT_MAX - 1);
+	return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	mvpp2_prs_vid_entry_remove(port, vid);
+	return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+			      netdev_features_t features)
+{
+	netdev_features_t changed = dev->features ^ features;
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+			mvpp2_prs_vid_enable_filtering(port);
+		} else {
+			/* Invalidate all registered VID filters for this
+			 * port
+			 */
+			mvpp2_prs_vid_remove_all(port);
+
+			mvpp2_prs_vid_disable_filtering(port);
+		}
+	}
+
+	return 0;
+}
+
 /* Ethtool methods */
 
 /* Set interrupt coalescing for ethtools */
@@ -7428,6 +7850,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 	.ndo_change_mtu		= mvpp2_change_mtu,
 	.ndo_get_stats64	= mvpp2_get_stats64,
 	.ndo_do_ioctl		= mvpp2_ioctl,
+	.ndo_vlan_rx_add_vid	= mvpp2_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= mvpp2_vlan_rx_kill_vid,
+	.ndo_set_features	= mvpp2_set_features,
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7938,16 +8363,25 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		}
 	}
 
-	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		   NETIF_F_TSO;
 	dev->features = features | NETIF_F_RXCSUM;
-	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+			    NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) {
+		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+		dev->hw_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+	}
+
 	dev->vlan_features |= features;
 	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
+	dev->priv_flags |= IFF_UNICAST_FLT;
 
-	/* MTU range: 68 - 9676 */
+	/* MTU range: 68 - 9704 */
 	dev->min_mtu = ETH_MIN_MTU;
-	/* 9676 == 9700 - 20 and rounding to 8 */
-	dev->max_mtu = 9676;
+	/* 9704 == 9728 - 20 and rounding to 8 */
+	dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
 
 	err = register_netdev(dev);
 	if (err < 0) {
@@ -8078,14 +8512,25 @@ static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
-/* Initialize Tx FIFO's */
+/* Initialize Tx FIFO's: the total FIFO size is 19kB on PPv2.2 and 10G
+ * interfaces must have a Tx FIFO size of 10kB. As only port 0 can do 10G,
+ * configure its Tx FIFO size to 10kB and the others ports Tx FIFO size to 3kB.
+ */
 static void mvpp22_tx_fifo_init(struct mvpp2 *priv)
 {
-	int port;
+	int port, size, thrs;
 
-	for (port = 0; port < MVPP2_MAX_PORTS; port++)
-		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port),
-			    MVPP22_TX_FIFO_DATA_SIZE_3KB);
+	for (port = 0; port < MVPP2_MAX_PORTS; port++) {
+		if (port == 0) {
+			size = MVPP22_TX_FIFO_DATA_SIZE_10KB;
+			thrs = MVPP2_TX_FIFO_THRESHOLD_10KB;
+		} else {
+			size = MVPP22_TX_FIFO_DATA_SIZE_3KB;
+			thrs = MVPP2_TX_FIFO_THRESHOLD_3KB;
+		}
+		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), size);
+		mvpp2_write(priv, MVPP22_TX_FIFO_THRESH_REG(port), thrs);
+	}
 }
 
 static void mvpp2_axi_init(struct mvpp2 *priv)
@@ -8279,6 +8724,8 @@ static int mvpp2_probe(struct platform_device *pdev)
 			priv->sysctrl_base = NULL;
 	}
 
+	mvpp2_setup_bm_pool();
+
 	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		u32 addr_space_sz;
 
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 31efc47c847e..9c08c3650c02 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -3783,7 +3783,7 @@ static int skge_device_event(struct notifier_block *unused,
 		break;
 
 	case NETDEV_UP:
-		d = debugfs_create_file(dev->name, S_IRUGO,
+		d = debugfs_create_file(dev->name, 0444,
 					skge_debug, dev,
 					&skge_debug_fops);
 		if (!d || IS_ERR(d))
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 9fe85300e7b6..9b77db7c13d0 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4667,7 +4667,7 @@ static int sky2_device_event(struct notifier_block *unused,
 		break;
 
 	case NETDEV_UP:
-		sky2->debugfs = debugfs_create_file(dev->name, S_IRUGO,
+		sky2->debugfs = debugfs_create_file(dev->name, 0444,
 						    sky2_debug, dev,
 						    &sky2_debug_fops);
 		if (IS_ERR(sky2->debugfs))
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ebc1f566a4d9..9a7a2f05ab35 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
 	"rx_xdp_drop",
 	"rx_xdp_tx",
 	"rx_xdp_tx_full",
+
+	/* phy statistics */
+	"rx_packets_phy", "rx_bytes_phy",
+	"tx_packets_phy", "tx_bytes_phy",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		if (bitmap_iterator_test(&it))
 			data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
 
+	for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
 	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
 		data[index++] = priv->tx_ring[TX][i]->packets;
 		data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
 				strcpy(data + (index++) * ETH_GSTRING_LEN,
 				       main_strings[strings]);
 
+		for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+		     bitmap_iterator_inc(&it))
+			if (bitmap_iterator_test(&it))
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+				       main_strings[strings]);
+
 		for (i = 0; i < priv->tx_ring_num[TX]; i++) {
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"tx%d_packets", i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 8fc51bc29003..e0adac4a9a19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
 	bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
 	last_i += NUM_XDP_STATS;
+
+	if (!mlx4_is_slave(dev))
+		bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+	last_i += NUM_PHY_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-		ts_config.rx_filter,
-		!!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
 	mlx4_en_safe_replace_resources(priv, tmp);
 
 	if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 1fa4849a6f56..0158b88bea5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 		priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
 	}
 
-	if (mlx4_is_master(mdev->dev)) {
-		stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
-						   &mlx4_en_stats->RTOT_prio_1,
-						   NUM_PRIORITIES);
-		stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
-						   &mlx4_en_stats->TTOT_prio_1,
-						   NUM_PRIORITIES);
-		stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
-						 &mlx4_en_stats->ROCT_prio_1,
-						 NUM_PRIORITIES);
-		stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
-						 &mlx4_en_stats->TOCT_prio_1,
-						 NUM_PRIORITIES);
+	if (!mlx4_is_slave(mdev->dev)) {
+		struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+		p_stats->rx_packets_phy =
+			en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+				       &mlx4_en_stats->RTOT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->tx_packets_phy =
+			en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+				       &mlx4_en_stats->TTOT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->rx_bytes_phy =
+			en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+				       &mlx4_en_stats->ROCT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->tx_bytes_phy =
+			en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+				       &mlx4_en_stats->TOCT_prio_1,
+				       NUM_PRIORITIES);
+		if (mlx4_is_master(mdev->dev)) {
+			stats->rx_packets = p_stats->rx_packets_phy;
+			stats->tx_packets = p_stats->tx_packets_phy;
+			stats->rx_bytes = p_stats->rx_bytes_phy;
+			stats->tx_bytes = p_stats->tx_bytes_phy;
+		}
 	}
 
 	/* net device stats */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b4d144e67514..05787efef492 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -291,13 +291,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
 	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
 					sizeof(struct mlx4_en_rx_alloc));
-	ring->rx_info = vzalloc_node(tmp, node);
+	ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node);
 	if (!ring->rx_info) {
-		ring->rx_info = vzalloc(tmp);
-		if (!ring->rx_info) {
-			err = -ENOMEM;
-			goto err_xdp_info;
-		}
+		err = -ENOMEM;
+		goto err_xdp_info;
 	}
 
 	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
@@ -318,7 +315,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 	return 0;
 
 err_info:
-	vfree(ring->rx_info);
+	kvfree(ring->rx_info);
 	ring->rx_info = NULL;
 err_xdp_info:
 	xdp_rxq_info_unreg(&ring->xdp_rxq);
@@ -447,7 +444,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 		bpf_prog_put(old_prog);
 	xdp_rxq_info_unreg(&ring->xdp_rxq);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
-	vfree(ring->rx_info);
+	kvfree(ring->rx_info);
 	ring->rx_info = NULL;
 	kfree(ring);
 	*pring = NULL;
@@ -649,6 +646,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
 	return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -662,12 +665,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	int polled = 0;
 	int index;
 
-	if (unlikely(!priv->port_up))
+	if (unlikely(!priv->port_up || budget <= 0))
 		return 0;
 
-	if (unlikely(budget <= 0))
-		return polled;
-
 	ring = priv->rx_ring[cq_ring];
 
 	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -838,12 +838,7 @@ xdp_drop_no_cnt:
 				ring->csum_ok++;
 			} else {
 				if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
-				      (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
-								 MLX4_CQE_STATUS_IPV6))))
-#else
-								 0))))
-#endif
+				      (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
 					goto csum_none;
 				if (check_csum(cqe, skb, va, dev->features))
 					goto csum_none;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 4d84cab77105..5a26851b4ffd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2993,10 +2993,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 
 	sprintf(info->dev_name, "mlx4_port%d", port);
 	info->port_attr.attr.name = info->dev_name;
-	if (mlx4_is_mfunc(dev))
-		info->port_attr.attr.mode = S_IRUGO;
-	else {
-		info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (mlx4_is_mfunc(dev)) {
+		info->port_attr.attr.mode = 0444;
+	} else {
+		info->port_attr.attr.mode = 0644;
 		info->port_attr.store     = set_port_type;
 	}
 	info->port_attr.show      = show_port_type;
@@ -3011,10 +3011,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 
 	sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
 	info->port_mtu_attr.attr.name = info->dev_mtu_name;
-	if (mlx4_is_mfunc(dev))
-		info->port_mtu_attr.attr.mode = S_IRUGO;
-	else {
-		info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (mlx4_is_mfunc(dev)) {
+		info->port_mtu_attr.attr.mode = 0444;
+	} else {
+		info->port_mtu_attr.attr.mode = 0644;
 		info->port_mtu_attr.store     = set_port_ib_mtu;
 	}
 	info->port_mtu_attr.show      = show_port_ib_mtu;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f470ae37d937..f7c81133594f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_flow_stats_tx tx_flowstats;
 	struct mlx4_en_port_stats port_stats;
 	struct mlx4_en_xdp_stats xdp_stats;
+	struct mlx4_en_phy_stats phy_stats;
 	struct mlx4_en_stats_bitmap stats_bitmap;
 	struct list_head mc_list;
 	struct list_head curr_list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index aab28eb27a30..86b6051da8ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
 #define NUM_XDP_STATS		3
 };
 
+struct mlx4_en_phy_stats {
+	unsigned long rx_packets_phy;
+	unsigned long rx_bytes_phy;
+	unsigned long tx_packets_phy;
+	unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS		4
+};
+
 #define NUM_MAIN_STATS	21
 
 #define MLX4_NUM_PRIORITIES	8
@@ -116,7 +124,7 @@ enum {
 
 #define NUM_ALL_STATS	(NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
 			 NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
-			 NUM_XDP_STATS)
+			 NUM_XDP_STATS + NUM_PHY_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
 				  sizeof(((struct net_device_stats *)0)->n))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 53e69edaedde..9f1b1939716a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -37,24 +37,11 @@
 #include "mlx5_core.h"
 #include "fpga/ipsec.h"
 
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				   struct mlx5_accel_ipsec_sa *cmd)
-{
-	if (!MLX5_IPSEC_DEV(mdev))
-		return ERR_PTR(-EOPNOTSUPP);
-
-	return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
-}
-
-int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
-{
-	return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
-}
-
 u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
 	return mlx5_fpga_ipsec_device_caps(mdev);
 }
+EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
 
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
 {
@@ -67,6 +54,21 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 	return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
 }
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       const __be32 saddr[4],
+				       const __be32 daddr[4],
+				       const __be32 spi, bool is_ipv6)
+{
+	return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
+					     spi, is_ipv6);
+}
+
+void mlx5_accel_esp_free_hw_context(void *context)
+{
+	mlx5_fpga_ipsec_delete_sa_ctx(context);
+}
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return mlx5_fpga_ipsec_init(mdev);
@@ -76,3 +78,32 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	mlx5_fpga_ipsec_cleanup(mdev);
 }
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags)
+{
+	struct mlx5_accel_esp_xfrm *xfrm;
+
+	xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags);
+	if (IS_ERR(xfrm))
+		return xfrm;
+
+	xfrm->mdev = mdev;
+	return xfrm;
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
+
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+	mlx5_fpga_esp_destroy_xfrm(xfrm);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
+
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			       const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index d6e20fea9554..024dbd22a89b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -34,89 +34,25 @@
 #ifndef __MLX5_ACCEL_IPSEC_H__
 #define __MLX5_ACCEL_IPSEC_H__
 
-#ifdef CONFIG_MLX5_ACCEL
-
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/accel.h>
 
-enum {
-	MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
-	MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
-	MLX5_ACCEL_IPSEC_ESP = BIT(3),
-	MLX5_ACCEL_IPSEC_LSO = BIT(4),
-};
-
-#define MLX5_IPSEC_SADB_IP_AH       BIT(7)
-#define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
-#define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
-#define MLX5_IPSEC_SADB_SPI_EN      BIT(4)
-#define MLX5_IPSEC_SADB_DIR_SX      BIT(3)
-#define MLX5_IPSEC_SADB_IPV6        BIT(2)
-
-enum {
-	MLX5_IPSEC_CMD_ADD_SA = 0,
-	MLX5_IPSEC_CMD_DEL_SA = 1,
-};
-
-enum mlx5_accel_ipsec_enc_mode {
-	MLX5_IPSEC_SADB_MODE_NONE = 0,
-	MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
-	MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
-};
+#ifdef CONFIG_MLX5_ACCEL
 
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
-			      MLX5_ACCEL_IPSEC_DEVICE)
-
-struct mlx5_accel_ipsec_sa {
-	__be32 cmd;
-	u8 key_enc[32];
-	u8 key_auth[32];
-	__be32 sip[4];
-	__be32 dip[4];
-	union {
-		struct {
-			__be32 reserved;
-			u8 salt_iv[8];
-			__be32 salt;
-		} __packed gcm;
-		struct {
-			u8 salt[16];
-		} __packed cbc;
-	};
-	__be32 spi;
-	__be32 sw_sa_handle;
-	__be16 tfclen;
-	u8 enc_mode;
-	u8 sip_masklen;
-	u8 dip_masklen;
-	u8 flags;
-	u8 reserved[2];
-} __packed;
-
-/**
- * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
- * @mdev: mlx5 device
- * @cmd: command to execute
- * May be called from atomic context. Returns context pointer, or error
- * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
- * context, to cleanup the context pointer
- */
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				   struct mlx5_accel_ipsec_sa *cmd);
-
-/**
- * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
- * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
- * Sleeps (killable) until command execution is complete.
- * Returns the command result, or -EINTR if killed
- */
-int mlx5_accel_ipsec_sa_cmd_wait(void *context);
-
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+			      MLX5_ACCEL_IPSEC_CAP_DEVICE)
 
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				   unsigned int count);
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       const __be32 saddr[4],
+				       const __be32 daddr[4],
+				       const __be32 spi, bool is_ipv6);
+void mlx5_accel_esp_free_hw_context(void *context);
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
@@ -124,6 +60,20 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 #define MLX5_IPSEC_DEV(mdev) false
 
+static inline void *
+mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				 struct mlx5_accel_esp_xfrm *xfrm,
+				 const __be32 saddr[4],
+				 const __be32 daddr[4],
+				 const __be32 spi, bool is_ipv6)
+{
+	return NULL;
+}
+
+static inline void mlx5_accel_esp_free_hw_context(void *context)
+{
+}
+
 static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 47239bf7bf43..323ffe8bf7e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -71,19 +71,24 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
 }
 
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-			struct mlx5_buf *buf, int node)
+			struct mlx5_frag_buf *buf, int node)
 {
 	dma_addr_t t;
 
 	buf->size = size;
 	buf->npages       = 1;
 	buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
-	buf->direct.buf   = mlx5_dma_zalloc_coherent_node(dev, size,
-							  &t, node);
-	if (!buf->direct.buf)
+
+	buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
+	if (!buf->frags)
 		return -ENOMEM;
 
-	buf->direct.map = t;
+	buf->frags->buf   = mlx5_dma_zalloc_coherent_node(dev, size,
+							  &t, node);
+	if (!buf->frags->buf)
+		goto err_out;
+
+	buf->frags->map = t;
 
 	while (t & ((1 << buf->page_shift) - 1)) {
 		--buf->page_shift;
@@ -91,18 +96,24 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 	}
 
 	return 0;
+err_out:
+	kfree(buf->frags);
+	return -ENOMEM;
 }
 
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+		   int size, struct mlx5_frag_buf *buf)
 {
 	return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
 }
-EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+EXPORT_SYMBOL(mlx5_buf_alloc);
 
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
-	dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
-			  buf->direct.map);
+	dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+			  buf->frags->map);
+
+	kfree(buf->frags);
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
@@ -147,6 +158,7 @@ err_free_buf:
 err_out:
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
 
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
@@ -162,6 +174,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 	}
 	kfree(buf->frags);
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
 
 static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
 						 int node)
@@ -275,13 +288,13 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 }
 EXPORT_SYMBOL_GPL(mlx5_db_free);
 
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
 {
 	u64 addr;
 	int i;
 
 	for (i = 0; i < buf->npages; i++) {
-		addr = buf->direct.map + (i << buf->page_shift);
+		addr = buf->frags->map + (i << buf->page_shift);
 
 		pas[i] = cpu_to_be64(addr);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index e9a1fbcc4adf..21cd1703a862 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -359,6 +359,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
 	case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+	case MLX5_CMD_OP_QUERY_VNIC_ENV:
 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
@@ -501,6 +502,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT);
 	MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID);
 	MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY);
+	MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV);
 	MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER);
 	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
@@ -1802,7 +1804,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 
 	cmd->checksum_disabled = 1;
 	cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
-	cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
+	cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1;
 
 	cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
 	if (cmd->cmdif_rev > CMD_IF_REV) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 1016e05c7ec7..a4179122a279 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,8 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
 				 tasklet_ctx.list) {
 		list_del_init(&mcq->tasklet_ctx.list);
 		mcq->tasklet_ctx.comp(mcq);
-		if (refcount_dec_and_test(&mcq->refcount))
-			complete(&mcq->free);
+		mlx5_cq_put(mcq);
 		if (time_after(jiffies, end))
 			break;
 	}
@@ -80,69 +79,19 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
 	 * still arrive.
 	 */
 	if (list_empty_careful(&cq->tasklet_ctx.list)) {
-		refcount_inc(&cq->refcount);
+		mlx5_cq_hold(cq);
 		list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
 	}
 	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
 }
 
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
-{
-	struct mlx5_core_cq *cq;
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
-
-	spin_lock(&table->lock);
-	cq = radix_tree_lookup(&table->tree, cqn);
-	if (likely(cq))
-		refcount_inc(&cq->refcount);
-	spin_unlock(&table->lock);
-
-	if (!cq) {
-		mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	++cq->arm_sn;
-
-	cq->comp(cq);
-
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
-}
-
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
-{
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
-	struct mlx5_core_cq *cq;
-
-	spin_lock(&table->lock);
-
-	cq = radix_tree_lookup(&table->tree, cqn);
-	if (cq)
-		refcount_inc(&cq->refcount);
-
-	spin_unlock(&table->lock);
-
-	if (!cq) {
-		mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	cq->event(cq, event_type);
-
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
-}
-
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen)
 {
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
-	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
-	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
-			   c_eqn);
 	struct mlx5_eq *eq;
 	int err;
 
@@ -159,6 +108,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cqn = MLX5_GET(create_cq_out, out, cqn);
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
+	cq->eq         = eq;
 	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
@@ -167,12 +117,16 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->tasklet_ctx.priv = &eq->tasklet_ctx;
 	INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
-	spin_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, cq->cqn, cq);
-	spin_unlock_irq(&table->lock);
+	/* Add to comp EQ CQ tree to recv comp events */
+	err = mlx5_eq_add_cq(eq, cq);
 	if (err)
 		goto err_cmd;
 
+	/* Add to async EQ CQ tree to recv async events */
+	err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+	if (err)
+		goto err_cq_add;
+
 	cq->pid = current->pid;
 	err = mlx5_debug_cq_add(dev, cq);
 	if (err)
@@ -183,6 +137,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 
 	return 0;
 
+err_cq_add:
+	mlx5_eq_del_cq(eq, cq);
 err_cmd:
 	memset(din, 0, sizeof(din));
 	memset(dout, 0, sizeof(dout));
@@ -195,23 +151,17 @@ EXPORT_SYMBOL(mlx5_core_create_cq);
 
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
 	u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
-	struct mlx5_core_cq *tmp;
 	int err;
 
-	spin_lock_irq(&table->lock);
-	tmp = radix_tree_delete(&table->tree, cq->cqn);
-	spin_unlock_irq(&table->lock);
-	if (!tmp) {
-		mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
-		return -EINVAL;
-	}
-	if (tmp != cq) {
-		mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
-		return -EINVAL;
-	}
+	err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+	if (err)
+		return err;
+
+	err = mlx5_eq_del_cq(cq->eq, cq);
+	if (err)
+		return err;
 
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
@@ -222,8 +172,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 	synchronize_irq(cq->irqn);
 
 	mlx5_debug_cq_remove(dev, cq);
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
+	mlx5_cq_put(cq);
 	wait_for_completion(&cq->free);
 
 	return 0;
@@ -270,21 +219,3 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
 	return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
-
-int mlx5_init_cq_table(struct mlx5_core_dev *dev)
-{
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
-	int err;
-
-	memset(table, 0, sizeof(*table));
-	spin_lock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-	err = mlx5_cq_debugfs_init(dev);
-
-	return err;
-}
-
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
-{
-	mlx5_cq_debugfs_cleanup(dev);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 17b723218b0c..b994b80d5714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+	mutex_lock(&mlx5_intf_mutex);
+	mlx5_remove_dev_by_protocol(mdev, protocol);
+	mlx5_add_dev_by_protocol(mdev, protocol);
+	mutex_unlock(&mlx5_intf_mutex);
+}
+
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
 {
 	struct mlx5_priv *priv = &mdev->priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 0be4575b58a2..d93ff567b40d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -96,10 +96,10 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p,
 					  "%pI4");
 		} else if (ethertype.v == ETH_P_IPV6) {
 			static const struct in6_addr full_ones = {
-				.in6_u.u6_addr32 = {htonl(0xffffffff),
-						    htonl(0xffffffff),
-						    htonl(0xffffffff),
-						    htonl(0xffffffff)},
+				.in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+						    __constant_htonl(0xffffffff),
+						    __constant_htonl(0xffffffff),
+						    __constant_htonl(0xffffffff)},
 			};
 			DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
 			DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
@@ -246,6 +246,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
 		trace_seq_printf(p, "counter_id=%u\n", counter_id);
 		break;
+	case MLX5_FLOW_DESTINATION_TYPE_PORT:
+		trace_seq_printf(p, "port\n");
+		break;
 	}
 
 	trace_seq_putc(p, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 80eef4163f52..09f178a3fcab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -136,6 +136,8 @@ TRACE_EVENT(mlx5_fs_del_fg,
 	{MLX5_FLOW_CONTEXT_ACTION_ENCAP,	 "ENCAP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_DECAP,	 "DECAP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,	 "VLAN_PUSH"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_POP,	 "VLAN_POP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
 
 TRACE_EVENT(mlx5_fs_set_fte,
@@ -163,9 +165,9 @@ TRACE_EVENT(mlx5_fs_set_fte,
 			   fs_get_obj(__entry->fg, fte->node.parent);
 			   __entry->group_index = __entry->fg->id;
 			   __entry->index = fte->index;
-			   __entry->action = fte->action;
+			   __entry->action = fte->action.action;
 			   __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-			   __entry->flow_tag = fte->flow_tag;
+			   __entry->flow_tag = fte->action.flow_tag;
 			   memcpy(__entry->mask_outer,
 				  MLX5_ADDR_OF(fte_match_param,
 					       &__entry->fg->mask.match_criteria,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4c9360b25532..294bc9f175a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -781,7 +781,8 @@ struct mlx5e_priv {
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
 	struct hwtstamp_config     tstamp;
-	u16 q_counter;
+	u16                        q_counter;
+	u16                        drop_rq_q_counter;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	struct mlx5e_dcbx          dcbx;
 #endif
@@ -1061,7 +1062,6 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
 int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
 void mlx5e_update_stats_work(struct work_struct *work);
-u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
 
 int mlx5e_bits_invert(unsigned long a, int size);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index bac5103efad3..cf58c9637904 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -38,17 +38,24 @@
 #include <linux/module.h>
 
 #include "en.h"
-#include "accel/ipsec.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/ipsec_rxtx.h"
 
-struct mlx5e_ipsec_sa_entry {
-	struct hlist_node hlist; /* Item in SADB_RX hashtable */
-	unsigned int handle; /* Handle in SADB_RX */
-	struct xfrm_state *x;
-	struct mlx5e_ipsec *ipsec;
-	void *context;
-};
+
+static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa;
+
+	if (!x)
+		return NULL;
+
+	sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	if (!sa)
+		return NULL;
+
+	WARN_ON(sa->x != x);
+	return sa;
+}
 
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
 					      unsigned int handle)
@@ -74,18 +81,16 @@ static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
 	ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
 	if (ret < 0)
-		goto out;
+		return ret;
 
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
 	sa_entry->handle = ret;
 	hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
-	ret = 0;
-
-out:
 	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
-	return ret;
+
+	return 0;
 }
 
 static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
@@ -101,87 +106,99 @@ static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
 static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-	unsigned long flags;
 
-	/* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
-	synchronize_rcu();
-	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	/* xfrm already doing sync rcu between del and free callbacks */
+
 	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
-	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
 }
 
-static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
+static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
-	unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
-
-	switch (key_len) {
-	case 16:
-		return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
-	case 32:
-		return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
-	default:
-		netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
-			    key_len, x->aead->alg_name);
-		return -1;
+	struct xfrm_replay_state_esn *replay_esn;
+	u32 seq_bottom;
+	u8 overlap;
+	u32 *esn;
+
+	if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+		sa_entry->esn_state.trigger = 0;
+		return false;
+	}
+
+	replay_esn = sa_entry->x->replay_esn;
+	seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+	overlap = sa_entry->esn_state.overlap;
+
+	sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+						    htonl(seq_bottom));
+	esn = &sa_entry->esn_state.esn;
+
+	sa_entry->esn_state.trigger = 1;
+	if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+		++(*esn);
+		sa_entry->esn_state.overlap = 0;
+		return true;
+	} else if (unlikely(!overlap &&
+			    (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
+		sa_entry->esn_state.overlap = 1;
+		return true;
 	}
+
+	return false;
 }
 
-static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
-				    struct mlx5_accel_ipsec_sa *hw_sa)
+static void
+mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+				   struct mlx5_accel_esp_xfrm_attrs *attrs)
 {
 	struct xfrm_state *x = sa_entry->x;
+	struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
 	struct aead_geniv_ctx *geniv_ctx;
-	unsigned int crypto_data_len;
 	struct crypto_aead *aead;
-	unsigned int key_len;
+	unsigned int crypto_data_len, key_len;
 	int ivsize;
 
-	memset(hw_sa, 0, sizeof(*hw_sa));
-
-	if (op == MLX5_IPSEC_CMD_ADD_SA) {
-		crypto_data_len = (x->aead->alg_key_len + 7) / 8;
-		key_len = crypto_data_len - 4; /* 4 bytes salt at end */
-		aead = x->data;
-		geniv_ctx = crypto_aead_ctx(aead);
-		ivsize = crypto_aead_ivsize(aead);
-
-		memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
-		/* Duplicate 128 bit key twice according to HW layout */
-		if (key_len == 16)
-			memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
-		memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
-		hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
-	}
+	memset(attrs, 0, sizeof(*attrs));
 
-	hw_sa->cmd = htonl(op);
-	hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
-	if (x->props.family == AF_INET) {
-		hw_sa->sip[3] = x->props.saddr.a4;
-		hw_sa->dip[3] = x->id.daddr.a4;
-		hw_sa->sip_masklen = 32;
-		hw_sa->dip_masklen = 32;
-	} else {
-		memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
-		memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
-		hw_sa->sip_masklen = 128;
-		hw_sa->dip_masklen = 128;
-		hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
-	}
-	hw_sa->spi = x->id.spi;
-	hw_sa->sw_sa_handle = htonl(sa_entry->handle);
-	switch (x->id.proto) {
-	case IPPROTO_ESP:
-		hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
-		break;
-	case IPPROTO_AH:
-		hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
-		break;
-	default:
-		break;
+	/* key */
+	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+
+	memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
+	aes_gcm->key_len = key_len * 8;
+
+	/* salt and seq_iv */
+	aead = x->data;
+	geniv_ctx = crypto_aead_ctx(aead);
+	ivsize = crypto_aead_ivsize(aead);
+	memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
+	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
+	       sizeof(aes_gcm->salt));
+
+	/* iv len */
+	aes_gcm->icv_len = x->aead->alg_icv_len;
+
+	/* esn */
+	if (sa_entry->esn_state.trigger) {
+		attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+		attrs->esn = sa_entry->esn_state.esn;
+		if (sa_entry->esn_state.overlap)
+			attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
 	}
-	hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
-	if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
-		hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+
+	/* rx handle */
+	attrs->sa_handle = sa_entry->handle;
+
+	/* algo type */
+	attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+	/* action */
+	attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
+			MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+			MLX5_ACCEL_ESP_ACTION_DECRYPT;
+	/* flags */
+	attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
+			MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
+			MLX5_ACCEL_ESP_FLAGS_TUNNEL;
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
@@ -203,7 +220,9 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		netdev_info(netdev, "Cannot offload compressed xfrm states\n");
 		return -EINVAL;
 	}
-	if (x->props.flags & XFRM_STATE_ESN) {
+	if (x->props.flags & XFRM_STATE_ESN &&
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+	    MLX5_ACCEL_IPSEC_CAP_ESN)) {
 		netdev_info(netdev, "Cannot offload ESN xfrm states\n");
 		return -EINVAL;
 	}
@@ -251,7 +270,8 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		return -EINVAL;
 	}
 	if (x->props.family == AF_INET6 &&
-	    !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+	     MLX5_ACCEL_IPSEC_CAP_IPV6)) {
 		netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
 		return -EINVAL;
 	}
@@ -262,9 +282,10 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 {
 	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
 	struct net_device *netdev = x->xso.dev;
-	struct mlx5_accel_ipsec_sa hw_sa;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
 	struct mlx5e_priv *priv;
-	void *context;
+	__be32 saddr[4] = {0}, daddr[4] = {0}, spi;
+	bool is_ipv6 = false;
 	int err;
 
 	priv = netdev_priv(netdev);
@@ -291,22 +312,49 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 			netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
 			goto err_entry;
 		}
+	} else {
+		sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+				mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
 	}
 
-	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
-	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-	if (IS_ERR(context)) {
-		err = PTR_ERR(context);
+	/* check esn */
+	mlx5e_ipsec_update_esn_state(sa_entry);
+
+	/* create xfrm */
+	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
+	sa_entry->xfrm =
+		mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
+					   MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
+	if (IS_ERR(sa_entry->xfrm)) {
+		err = PTR_ERR(sa_entry->xfrm);
 		goto err_sadb_rx;
 	}
 
-	err = mlx5_accel_ipsec_sa_cmd_wait(context);
-	if (err)
-		goto err_sadb_rx;
+	/* create hw context */
+	if (x->props.family == AF_INET) {
+		saddr[3] = x->props.saddr.a4;
+		daddr[3] = x->id.daddr.a4;
+	} else {
+		memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
+		memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
+		is_ipv6 = true;
+	}
+	spi = x->id.spi;
+	sa_entry->hw_context =
+			mlx5_accel_esp_create_hw_context(priv->mdev,
+							 sa_entry->xfrm,
+							 saddr, daddr, spi,
+							 is_ipv6);
+	if (IS_ERR(sa_entry->hw_context)) {
+		err = PTR_ERR(sa_entry->hw_context);
+		goto err_xfrm;
+	}
 
 	x->xso.offload_handle = (unsigned long)sa_entry;
 	goto out;
 
+err_xfrm:
+	mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 err_sadb_rx:
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
 		mlx5e_ipsec_sadb_rx_del(sa_entry);
@@ -320,43 +368,26 @@ out:
 
 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
-	struct mlx5_accel_ipsec_sa hw_sa;
-	void *context;
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-	WARN_ON(sa_entry->x != x);
-
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
 		mlx5e_ipsec_sadb_rx_del(sa_entry);
-
-	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
-	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-	if (IS_ERR(context))
-		return;
-
-	sa_entry->context = context;
 }
 
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
-	int res;
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-	WARN_ON(sa_entry->x != x);
-
-	res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
-	sa_entry->context = NULL;
-	if (res) {
-		/* Leftover object will leak */
-		return;
+	if (sa_entry->hw_context) {
+		flush_workqueue(sa_entry->ipsec->wq);
+		mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
+		mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 	}
 
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
@@ -383,6 +414,14 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	ida_init(&ipsec->halloc);
 	ipsec->en_priv = priv;
 	ipsec->en_priv->ipsec = ipsec;
+	ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
+			       MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+	ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
+					    priv->netdev->name);
+	if (!ipsec->wq) {
+		kfree(ipsec);
+		return -ENOMEM;
+	}
 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
 	return 0;
 }
@@ -394,6 +433,9 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
 	if (!ipsec)
 		return;
 
+	drain_workqueue(ipsec->wq);
+	destroy_workqueue(ipsec->wq);
+
 	ida_destroy(&ipsec->halloc);
 	kfree(ipsec);
 	priv->ipsec = NULL;
@@ -414,11 +456,58 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	return true;
 }
 
+struct mlx5e_ipsec_modify_state_work {
+	struct work_struct		work;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
+	struct mlx5e_ipsec_sa_entry	*sa_entry;
+};
+
+static void _update_xfrm_state(struct work_struct *work)
+{
+	int ret;
+	struct mlx5e_ipsec_modify_state_work *modify_work =
+		container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+	struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
+
+	ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
+					 &modify_work->attrs);
+	if (ret)
+		netdev_warn(sa_entry->ipsec->en_priv->netdev,
+			    "Not an IPSec offload device\n");
+
+	kfree(modify_work);
+}
+
+static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+	struct mlx5e_ipsec_modify_state_work *modify_work;
+	bool need_update;
+
+	if (!sa_entry)
+		return;
+
+	need_update = mlx5e_ipsec_update_esn_state(sa_entry);
+	if (!need_update)
+		return;
+
+	modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
+	if (!modify_work)
+		return;
+
+	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
+	modify_work->sa_entry = sa_entry;
+
+	INIT_WORK(&modify_work->work, _update_xfrm_state);
+	WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
 	.xdo_dev_offload_ok	= mlx5e_ipsec_offload_ok,
+	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
@@ -429,7 +518,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 	if (!priv->ipsec)
 		return;
 
-	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
 	    !MLX5_CAP_ETH(mdev, swp)) {
 		mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
 		return;
@@ -448,7 +537,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
 
-	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
 	    !MLX5_CAP_ETH(mdev, swp_lso)) {
 		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 56e00baf16cc..1198fc1eba4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -40,7 +40,11 @@
 #include <net/xfrm.h>
 #include <linux/idr.h>
 
+#include "accel/ipsec.h"
+
 #define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
 #define MLX5E_METADATA_ETHER_LEN 8
 
@@ -77,10 +81,30 @@ struct mlx5e_ipsec_stats {
 struct mlx5e_ipsec {
 	struct mlx5e_priv *en_priv;
 	DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+	bool no_trailer;
 	spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
 	struct ida halloc;
 	struct mlx5e_ipsec_sw_stats sw_stats;
 	struct mlx5e_ipsec_stats stats;
+	struct workqueue_struct *wq;
+};
+
+struct mlx5e_ipsec_esn_state {
+	u32 esn;
+	u8 trigger: 1;
+	u8 overlap: 1;
+};
+
+struct mlx5e_ipsec_sa_entry {
+	struct hlist_node hlist; /* Item in SADB_RX hashtable */
+	struct mlx5e_ipsec_esn_state esn_state;
+	unsigned int handle; /* Handle in SADB_RX */
+	struct xfrm_state *x;
+	struct mlx5e_ipsec *ipsec;
+	struct mlx5_accel_esp_xfrm *xfrm;
+	void *hw_context;
+	void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
+			  struct xfrm_offload *xo);
 };
 
 void mlx5e_ipsec_build_inverse_table(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 6a7c8b04447e..c245d8e78509 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -42,10 +42,11 @@
 enum {
 	MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
 	MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+	MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
 };
 
 struct mlx5e_ipsec_rx_metadata {
-	unsigned char   reserved;
+	unsigned char   nexthdr;
 	__be32		sa_handle;
 } __packed;
 
@@ -175,7 +176,30 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
 	}
 }
 
-static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+			    struct xfrm_offload *xo)
+{
+	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+	__u32 oseq = replay_esn->oseq;
+	int iv_offset;
+	__be64 seqno;
+	u32 seq_hi;
+
+	if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID &&
+		     MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) {
+		seq_hi = xo->seq.hi - 1;
+	} else {
+		seq_hi = xo->seq.hi;
+	}
+
+	/* Place the SN in the IV field */
+	seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32));
+	iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+	skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+			struct xfrm_offload *xo)
 {
 	int iv_offset;
 	__be64 seqno;
@@ -227,6 +251,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct mlx5e_ipsec_metadata *mdata;
+	struct mlx5e_ipsec_sa_entry *sa_entry;
 	struct xfrm_state *x;
 
 	if (!xo)
@@ -261,7 +286,8 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 		goto drop;
 	}
 	mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
-	mlx5e_ipsec_set_iv(skb, xo);
+	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	sa_entry->set_iv_op(skb, x, xo);
 	mlx5e_ipsec_set_metadata(skb, mdata, xo);
 
 	return skb;
@@ -301,10 +327,17 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
 	switch (mdata->syndrome) {
 	case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
 		xo->status = CRYPTO_SUCCESS;
+		if (likely(priv->ipsec->no_trailer)) {
+			xo->flags |= XFRM_ESP_NO_TRAILER;
+			xo->proto = mdata->content.rx.nexthdr;
+		}
 		break;
 	case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
 		xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
 		break;
+	case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
+		xo->status = CRYPTO_INVALID_PROTOCOL;
+		break;
 	default:
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
 		return NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index e37ae2598dbb..2bfbbef1b054 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -37,6 +37,7 @@
 #ifdef CONFIG_MLX5_EN_IPSEC
 
 #include <linux/skbuff.h>
+#include <net/xfrm.h>
 #include "en.h"
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
@@ -46,6 +47,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_ipsec_inverse_table_init(void);
 bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
 			       netdev_features_t features);
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+			    struct xfrm_offload *xo);
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+			struct xfrm_offload *xo);
 struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 					  struct mlx5e_tx_wqe *wqe,
 					  struct sk_buff *skb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cc8048f68f11..d415e67b557b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -203,9 +203,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 {
 	int i, idx = 0;
 
-	if (!data)
-		return;
-
 	mutex_lock(&priv->state_lock);
 	mlx5e_update_stats(priv);
 	mutex_unlock(&priv->state_lock);
@@ -1066,6 +1063,57 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
 	return err;
 }
 
+#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC		100
+#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC		8000
+#define MLX5E_PFC_PREVEN_MINOR_PRECENT		85
+#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC		80
+#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
+	max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
+	      (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
+
+static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
+					 u16 *pfc_prevention_tout)
+{
+	struct mlx5e_priv *priv    = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+		return -EOPNOTSUPP;
+
+	return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);
+}
+
+static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev,
+					 u16 pfc_preven)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u16 critical_tout;
+	u16 minor;
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+		return -EOPNOTSUPP;
+
+	critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ?
+			MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC :
+			pfc_preven;
+
+	if (critical_tout != PFC_STORM_PREVENTION_DISABLE &&
+	    (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC ||
+	     critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) {
+		netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n",
+			    __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC,
+			    MLX5E_PFC_PREVEN_TOUT_MAX_MSEC);
+		return -EINVAL;
+	}
+
+	minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout);
+	return mlx5_set_port_stall_watermark(mdev, critical_tout,
+					     minor);
+}
+
 static int mlx5e_get_tunable(struct net_device *dev,
 			     const struct ethtool_tunable *tuna,
 			     void *data)
@@ -1077,6 +1125,9 @@ static int mlx5e_get_tunable(struct net_device *dev,
 	case ETHTOOL_TX_COPYBREAK:
 		*(u32 *)data = priv->channels.params.tx_max_inline;
 		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		err = mlx5e_get_pfc_prevention_tout(dev, data);
+		break;
 	default:
 		err = -EINVAL;
 		break;
@@ -1119,6 +1170,9 @@ static int mlx5e_set_tunable(struct net_device *dev,
 		mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
 		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 47bab842c5ee..1d36d7569f44 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -615,8 +615,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
 static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
 				 int next_state)
 {
-	struct mlx5e_channel *c = rq->channel;
-	struct mlx5_core_dev *mdev = c->mdev;
+	struct mlx5_core_dev *mdev = rq->mdev;
 
 	void *in;
 	void *rqc;
@@ -1768,13 +1767,18 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	param->wq.linear = 1;
 }
 
-static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param)
+static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+				      struct mlx5e_rq_param *param)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
 	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
+	MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
+
+	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
 }
 
 static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
@@ -2634,18 +2638,22 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
 			       struct mlx5e_cq *cq,
 			       struct mlx5e_cq_param *param)
 {
+	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+	param->wq.db_numa_node  = dev_to_node(&mdev->pdev->dev);
+
 	return mlx5e_alloc_cq_common(mdev, param, cq);
 }
 
-static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev,
+static int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
 			      struct mlx5e_rq *drop_rq)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_cq_param cq_param = {};
 	struct mlx5e_rq_param rq_param = {};
 	struct mlx5e_cq *cq = &drop_rq->cq;
 	int err;
 
-	mlx5e_build_drop_rq_param(&rq_param);
+	mlx5e_build_drop_rq_param(priv, &rq_param);
 
 	err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
 	if (err)
@@ -2663,6 +2671,10 @@ static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev,
 	if (err)
 		goto err_free_rq;
 
+	err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+	if (err)
+		mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
+
 	return 0;
 
 err_free_rq:
@@ -2994,8 +3006,8 @@ static int mlx5e_setup_tc_block(struct net_device *dev,
 }
 #endif
 
-int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
-		   void *type_data)
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			  void *type_data)
 {
 	switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
@@ -3230,24 +3242,20 @@ static int mlx5e_set_features(struct net_device *netdev,
 			      netdev_features_t features)
 {
 	netdev_features_t oper_features = netdev->features;
-	int err;
+	int err = 0;
+
+#define MLX5E_HANDLE_FEATURE(feature, handler) \
+	mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
 
-	err  = mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_LRO, set_feature_lro);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_HW_VLAN_CTAG_FILTER,
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
 				    set_feature_cvlan_filter);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_HW_TC, set_feature_tc_num_filters);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_RXALL, set_feature_rx_all);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_RXFCS, set_feature_rx_fcs);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
 #ifdef CONFIG_RFS_ACCEL
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_NTUPLE, set_feature_arfs);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
 #endif
 
 	if (err) {
@@ -3955,7 +3963,7 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 }
 
-u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
+static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
 {
 	int i;
 
@@ -4098,6 +4106,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->vlan_features    |= NETIF_F_RXCSUM;
 	netdev->vlan_features    |= NETIF_F_RXHASH;
 
+	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
+	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
+
 	if (!!MLX5_CAP_ETH(mdev, lro_cap))
 		netdev->vlan_features    |= NETIF_F_LRO;
 
@@ -4177,7 +4188,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	mlx5e_ipsec_build_netdev(priv);
 }
 
-static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
+static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
@@ -4187,14 +4198,21 @@ static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
 		mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err);
 		priv->q_counter = 0;
 	}
+
+	err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter);
+	if (err) {
+		mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err);
+		priv->drop_rq_q_counter = 0;
+	}
 }
 
-static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv)
+static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
 {
-	if (!priv->q_counter)
-		return;
+	if (priv->q_counter)
+		mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
 
-	mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
+	if (priv->drop_rq_q_counter)
+		mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
 }
 
 static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
@@ -4433,18 +4451,18 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 	if (err)
 		goto out;
 
-	err = mlx5e_open_drop_rq(mdev, &priv->drop_rq);
+	mlx5e_create_q_counters(priv);
+
+	err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
 	if (err) {
 		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
-		goto err_cleanup_tx;
+		goto err_destroy_q_counters;
 	}
 
 	err = profile->init_rx(priv);
 	if (err)
 		goto err_close_drop_rq;
 
-	mlx5e_create_q_counter(priv);
-
 	if (profile->enable)
 		profile->enable(priv);
 
@@ -4453,7 +4471,8 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 err_close_drop_rq:
 	mlx5e_close_drop_rq(&priv->drop_rq);
 
-err_cleanup_tx:
+err_destroy_q_counters:
+	mlx5e_destroy_q_counters(priv);
 	profile->cleanup_tx(priv);
 
 out:
@@ -4470,9 +4489,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
 		profile->disable(priv);
 	flush_workqueue(priv->wq);
 
-	mlx5e_destroy_q_counter(priv);
 	profile->cleanup_rx(priv);
 	mlx5e_close_drop_rq(&priv->drop_rq);
+	mlx5e_destroy_q_counters(priv);
 	profile->cleanup_tx(priv);
 	cancel_delayed_work_sync(&priv->update_stats_work);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 363d8dcb7f17..ea4b255380a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	kfree(ppriv); /* mlx5e_rep_priv */
 }
 
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_rep_priv *rpriv;
+
+	rpriv = mlx5e_rep_to_rep_priv(rep);
+
+	return rpriv->netdev;
+}
+
 static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 
 		rep_if.load = mlx5e_vport_rep_load;
 		rep_if.unload = mlx5e_vport_rep_unload;
+		rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
 	}
 }
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 
 	rep_if.load = mlx5e_nic_rep_load;
 	rep_if.unload = mlx5e_nic_rep_unload;
+	rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 	rep_if.priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 0d4bb0688faa..8cce90dc461d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -36,6 +36,7 @@
 #include <linux/tcp.h>
 #include <linux/bpf_trace.h>
 #include <net/busy_poll.h>
+#include <net/ip6_checksum.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -52,7 +53,7 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
 				       void *data)
 {
-	u32 ci = cqcc & cq->wq.sz_m1;
+	u32 ci = cqcc & cq->wq.fbc.sz_m1;
 
 	memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
 }
@@ -74,9 +75,10 @@ static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
 
 static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
 {
-	u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
-	u32 wq_sz = 1 << cq->wq.log_sz;
-	u32 ci = cqcc & cq->wq.sz_m1;
+	struct mlx5_frag_buf_ctrl *fbc = &cq->wq.fbc;
+	u8 op_own = (cqcc >> fbc->log_sz) & 1;
+	u32 wq_sz = 1 << fbc->log_sz;
+	u32 ci = cqcc & fbc->sz_m1;
 	u32 ci_top = min_t(u32, wq_sz, ci + n);
 
 	for (; ci < ci_top; ci++, n--) {
@@ -101,7 +103,7 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
 	cq->title.byte_cnt     = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
 	cq->title.check_sum    = cq->mini_arr[cq->mini_arr_idx].checksum;
 	cq->title.op_own      &= 0xf0;
-	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.log_sz);
+	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
 	cq->title.wqe_counter  = cpu_to_be16(cq->decmprs_wqe_counter);
 
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
@@ -546,20 +548,33 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 	return true;
 }
 
+static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
+{
+	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+	u8 tcp_ack     = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+			 (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
+
+	tcp->check                      = 0;
+	tcp->psh                        = get_cqe_lro_tcppsh(cqe);
+
+	if (tcp_ack) {
+		tcp->ack                = 1;
+		tcp->ack_seq            = cqe->lro_ack_seq_num;
+		tcp->window             = cqe->lro_tcp_win;
+	}
+}
+
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 				 u32 cqe_bcnt)
 {
 	struct ethhdr	*eth = (struct ethhdr *)(skb->data);
 	struct tcphdr	*tcp;
 	int network_depth = 0;
+	__wsum check;
 	__be16 proto;
 	u16 tot_len;
 	void *ip_p;
 
-	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
-	u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
-		(l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
-
 	proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
 
 	tot_len = cqe_bcnt - network_depth;
@@ -576,23 +591,30 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 		ipv4->check             = 0;
 		ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
 						       ipv4->ihl);
+
+		mlx5e_lro_update_tcp_hdr(cqe, tcp);
+		check = csum_partial(tcp, tcp->doff * 4,
+				     csum_unfold((__force __sum16)cqe->check_sum));
+		/* Almost done, don't forget the pseudo header */
+		tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
+					       tot_len - sizeof(struct iphdr),
+					       IPPROTO_TCP, check);
 	} else {
+		u16 payload_len = tot_len - sizeof(struct ipv6hdr);
 		struct ipv6hdr *ipv6 = ip_p;
 
 		tcp = ip_p + sizeof(struct ipv6hdr);
 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 
 		ipv6->hop_limit         = cqe->lro_min_ttl;
-		ipv6->payload_len       = cpu_to_be16(tot_len -
-						      sizeof(struct ipv6hdr));
-	}
-
-	tcp->psh = get_cqe_lro_tcppsh(cqe);
-
-	if (tcp_ack) {
-		tcp->ack                = 1;
-		tcp->ack_seq            = cqe->lro_ack_seq_num;
-		tcp->window             = cqe->lro_tcp_win;
+		ipv6->payload_len       = cpu_to_be16(payload_len);
+
+		mlx5e_lro_update_tcp_hdr(cqe, tcp);
+		check = csum_partial(tcp, tcp->doff * 4,
+				     csum_unfold((__force __sum16)cqe->check_sum));
+		/* Almost done, don't forget the pseudo header */
+		tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
+					     IPPROTO_TCP, check);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 5a4608281f38..707976482c09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -216,7 +216,8 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
 	if (iph->protocol != IPPROTO_UDP)
 		goto out;
 
-	udph = udp_hdr(skb);
+	/* Don't assume skb_transport_header() was set */
+	udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl);
 	if (udph->dest != htons(9))
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 5f0f3493d747..c0dab9a8969e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -170,11 +170,24 @@ static const struct counter_desc q_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
 };
 
+static const struct counter_desc drop_rq_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) },
+};
+
 #define NUM_Q_COUNTERS			ARRAY_SIZE(q_stats_desc)
+#define NUM_DROP_RQ_COUNTERS		ARRAY_SIZE(drop_rq_stats_desc)
 
 static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv)
 {
-	return priv->q_counter ? NUM_Q_COUNTERS : 0;
+	int num_stats = 0;
+
+	if (priv->q_counter)
+		num_stats += NUM_Q_COUNTERS;
+
+	if (priv->drop_rq_q_counter)
+		num_stats += NUM_DROP_RQ_COUNTERS;
+
+	return num_stats;
 }
 
 static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
@@ -182,7 +195,13 @@ static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
 	int i;
 
 	for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       q_stats_desc[i].format);
+
+	for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       drop_rq_stats_desc[i].format);
+
 	return idx;
 }
 
@@ -191,7 +210,11 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 	int i;
 
 	for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
-		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, q_stats_desc, i);
+		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+						   q_stats_desc, i);
+	for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+						   drop_rq_stats_desc, i);
 	return idx;
 }
 
@@ -199,16 +222,76 @@ static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv)
 {
 	struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
-	int err;
 
-	if (!priv->q_counter)
-		return;
+	if (priv->q_counter &&
+	    !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out,
+				       sizeof(out)))
+		qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
+						  out, out_of_buffer);
+	if (priv->drop_rq_q_counter &&
+	    !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0,
+				       out, sizeof(out)))
+		qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out,
+						    out_of_buffer);
+}
+
+#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
+static const struct counter_desc vnic_env_stats_desc[] = {
+	{ "rx_steer_missed_packets",
+		VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) },
+};
+
+#define NUM_VNIC_ENV_COUNTERS		ARRAY_SIZE(vnic_env_stats_desc)
+
+static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv)
+{
+	return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ?
+		NUM_VNIC_ENV_COUNTERS : 0;
+}
+
+static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data,
+					   int idx)
+{
+	int i;
+
+	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+		return idx;
+
+	for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       vnic_env_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data,
+					 int idx)
+{
+	int i;
+
+	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+		return idx;
 
-	err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out));
-	if (err)
+	for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out,
+						  vnic_env_stats_desc, i);
+	return idx;
+}
+
+static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
+{
+	u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
+	int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out);
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
 		return;
 
-	qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer);
+	MLX5_SET(query_vnic_env_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_VNIC_ENV);
+	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+	MLX5_SET(query_vnic_env_in, in, other_vport, 0);
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
 #define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
@@ -754,7 +837,15 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
 	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
+static const struct counter_desc pport_pfc_stall_stats_desc[] = {
+	{ "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+	{ "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
+};
+
 #define NUM_PPORT_PER_PRIO_PFC_COUNTERS		ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
+#define NUM_PPORT_PFC_STALL_COUNTERS(priv)	(ARRAY_SIZE(pport_pfc_stall_stats_desc) * \
+						 MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \
+						 MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
 
 static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 {
@@ -790,7 +881,8 @@ static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv)
 {
 	return (mlx5e_query_global_pause_combined(priv) +
 		hweight8(mlx5e_query_pfc_combined(priv))) *
-		NUM_PPORT_PER_PRIO_PFC_COUNTERS;
+		NUM_PPORT_PER_PRIO_PFC_COUNTERS +
+		NUM_PPORT_PFC_STALL_COUNTERS(priv);
 }
 
 static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
@@ -818,6 +910,10 @@ static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
 		}
 	}
 
+	for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pport_pfc_stall_stats_desc[i].format);
+
 	return idx;
 }
 
@@ -845,6 +941,10 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv,
 		}
 	}
 
+	for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+						  pport_pfc_stall_stats_desc, i);
+
 	return idx;
 }
 
@@ -1095,6 +1195,12 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
 		.update_stats = mlx5e_grp_q_update_stats,
 	},
 	{
+		.get_num_stats = mlx5e_grp_vnic_env_get_num_stats,
+		.fill_strings = mlx5e_grp_vnic_env_fill_strings,
+		.fill_stats = mlx5e_grp_vnic_env_fill_stats,
+		.update_stats = mlx5e_grp_vnic_env_update_stats,
+	},
+	{
 		.get_num_stats = mlx5e_grp_vport_get_num_stats,
 		.fill_strings = mlx5e_grp_vport_fill_strings,
 		.fill_stats = mlx5e_grp_vport_fill_stats,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 0b3320a2b072..43a72efa28c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -97,6 +97,11 @@ struct mlx5e_sw_stats {
 
 struct mlx5e_qcounter_stats {
 	u32 rx_out_of_buffer;
+	u32 rx_if_down_packets;
+};
+
+struct mlx5e_vnic_env_stats {
+	__be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
 };
 
 #define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \
@@ -201,6 +206,7 @@ struct mlx5e_ch_stats {
 struct mlx5e_stats {
 	struct mlx5e_sw_stats sw;
 	struct mlx5e_qcounter_stats qcnt;
+	struct mlx5e_vnic_env_stats vnic;
 	struct mlx5e_vport_stats vport;
 	struct mlx5e_pport_stats pport;
 	struct rtnl_link_stats64 vf_vport;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fd98b0dc610f..3e4a7e81b67f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -675,6 +675,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
+		.has_flow_tag = true,
 		.flow_tag = attr->flow_tag,
 		.encap_id = 0,
 	};
@@ -2529,11 +2530,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
 			} else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
-				if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
-					return -EOPNOTSUPP;
-
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
-				attr->vlan = tcf_vlan_push_vid(a);
+				attr->vlan_vid = tcf_vlan_push_vid(a);
+				if (mlx5_eswitch_vlan_actions_supported(priv->mdev)) {
+					attr->vlan_prio = tcf_vlan_push_prio(a);
+					attr->vlan_proto = tcf_vlan_push_proto(a);
+					if (!attr->vlan_proto)
+						attr->vlan_proto = htons(ETH_P_8021Q);
+				} else if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
+					   tcf_vlan_push_prio(a)) {
+					return -EOPNOTSUPP;
+				}
 			} else { /* action is TCA_VLAN_ACT_MODIFY */
 				return -EOPNOTSUPP;
 			}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 569b42a01026..11b4f1089d1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -176,7 +176,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
 	default:
 		hlen = mlx5e_skb_l2_header_offset(skb);
 	}
-	return min_t(u16, hlen, skb->len);
+	return min_t(u16, hlen, skb_headlen(skb));
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 25106e996a96..c1c94974e16b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -393,6 +393,51 @@ static void general_event_handler(struct mlx5_core_dev *dev,
 	}
 }
 
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	struct mlx5_core_cq *cq = NULL;
+
+	spin_lock(&table->lock);
+	cq = radix_tree_lookup(&table->tree, cqn);
+	if (likely(cq))
+		mlx5_cq_hold(cq);
+	spin_unlock(&table->lock);
+
+	return cq;
+}
+
+static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+	if (unlikely(!cq)) {
+		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	++cq->arm_sn;
+
+	cq->comp(cq);
+
+	mlx5_cq_put(cq);
+}
+
+static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+{
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+	if (unlikely(!cq)) {
+		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	cq->event(cq, event_type);
+
+	mlx5_cq_put(cq);
+}
+
 static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 {
 	struct mlx5_eq *eq = eq_ptr;
@@ -415,7 +460,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 		switch (eqe->type) {
 		case MLX5_EVENT_TYPE_COMP:
 			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-			mlx5_cq_completion(dev, cqn);
+			mlx5_eq_cq_completion(eq, cqn);
 			break;
 		case MLX5_EVENT_TYPE_DCT_DRAINED:
 			rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
@@ -472,7 +517,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
 			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
 				       cqn, eqe->data.cq_err.syndrome);
-			mlx5_cq_event(dev, cqn, eqe->type);
+			mlx5_eq_cq_event(eq, cqn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
@@ -567,6 +612,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
 		       enum mlx5_eq_type type)
 {
+	struct mlx5_cq_table *cq_table = &eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	struct mlx5_priv *priv = &dev->priv;
 	irq_handler_t handler;
@@ -576,6 +622,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 	u32 *in;
 	int err;
 
+	/* Init CQ table */
+	memset(cq_table, 0, sizeof(*cq_table));
+	spin_lock_init(&cq_table->lock);
+	INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
 	eq->type = type;
 	eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
 	eq->cons_index = 0;
@@ -669,7 +720,6 @@ err_buf:
 	mlx5_buf_free(dev, &eq->buf);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
 
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
@@ -696,7 +746,40 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	int err;
+
+	spin_lock_irq(&table->lock);
+	err = radix_tree_insert(&table->tree, cq->cqn, cq);
+	spin_unlock_irq(&table->lock);
+
+	return err;
+}
+
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	struct mlx5_core_cq *tmp;
+
+	spin_lock_irq(&table->lock);
+	tmp = radix_tree_delete(&table->tree, cq->cqn);
+	spin_unlock_irq(&table->lock);
+
+	if (!tmp) {
+		mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
+		return -ENOENT;
+	}
+
+	if (tmp != cq) {
+		mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
+		return -EINVAL;
+	}
+
+	return 0;
+}
 
 int mlx5_eq_init(struct mlx5_core_dev *dev)
 {
@@ -840,4 +923,3 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
-EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5ecf2cddc16d..332bc56306bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1529,6 +1529,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 
 	esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
 
+	/* Create steering drop counters for ingress and egress ACLs */
+	if (vport_num && esw->mode == SRIOV_LEGACY)
+		esw_vport_create_drop_counters(vport);
+
 	/* Restore old vport configuration */
 	esw_apply_vport_conf(esw, vport);
 
@@ -1545,10 +1549,6 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 	if (!vport_num)
 		vport->info.trusted = true;
 
-	/* create steering drop counters for ingress and egress ACLs */
-	if (vport_num && esw->mode == SRIOV_LEGACY)
-		esw_vport_create_drop_counters(vport);
-
 	esw_vport_change_handle_locked(vport);
 
 	esw->enabled_vports++;
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
 	esw->mode = mode;
 
-	if (mode == SRIOV_LEGACY)
+	if (mode == SRIOV_LEGACY) {
 		err = esw_create_legacy_fdb_table(esw, nvfs + 1);
-	else
+	} else {
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
 		err = esw_offloads_init(esw, nvfs + 1);
+	}
+
 	if (err)
 		goto abort;
 
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 abort:
 	esw->mode = SRIOV_NONE;
+
+	if (mode == SRIOV_OFFLOADS)
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
 	return err;
 }
 
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
 	struct esw_mc_addr *mc_promisc;
+	int old_mode;
 	int nvports;
 	int i;
 
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	else if (esw->mode == SRIOV_OFFLOADS)
 		esw_offloads_cleanup(esw, nvports);
 
+	old_mode = esw->mode;
 	esw->mode = SRIOV_NONE;
+
+	if (old_mode == SRIOV_OFFLOADS)
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -2083,17 +2096,19 @@ unlock:
 	return err;
 }
 
-static void mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
-						int vport_idx,
-						struct mlx5_vport_drop_stats *stats)
+static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
+					       int vport_idx,
+					       struct mlx5_vport_drop_stats *stats)
 {
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	struct mlx5_vport *vport = &esw->vports[vport_idx];
+	u64 rx_discard_vport_down, tx_discard_vport_down;
 	u64 bytes = 0;
 	u16 idx = 0;
+	int err = 0;
 
 	if (!vport->enabled || esw->mode != SRIOV_LEGACY)
-		return;
+		return 0;
 
 	if (vport->egress.drop_counter) {
 		idx = vport->egress.drop_counter->id;
@@ -2104,6 +2119,23 @@ static void mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
 		idx = vport->ingress.drop_counter->id;
 		mlx5_fc_query(dev, idx, &stats->tx_dropped, &bytes);
 	}
+
+	if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
+	    !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+		return 0;
+
+	err = mlx5_query_vport_down_stats(dev, vport_idx,
+					  &rx_discard_vport_down,
+					  &tx_discard_vport_down);
+	if (err)
+		return err;
+
+	if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
+		stats->rx_dropped += rx_discard_vport_down;
+	if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+		stats->tx_dropped += tx_discard_vport_down;
+
+	return 0;
 }
 
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
@@ -2167,7 +2199,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 	vf_stats->broadcast =
 		MLX5_GET_CTR(out, received_eth_broadcast.packets);
 
-	mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+	err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+	if (err)
+		goto free_out;
 	vf_stats->rx_dropped = stats.rx_dropped;
 	vf_stats->tx_dropped = stats.tx_dropped;
 
@@ -2175,3 +2209,9 @@ free_out:
 	kvfree(out);
 	return err;
 }
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2fa037066b2f..4cd773fa55e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -37,19 +37,9 @@
 #include <linux/if_link.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
 #include "lib/mpfs.h"
 
-enum {
-	SRIOV_NONE,
-	SRIOV_LEGACY,
-	SRIOV_OFFLOADS
-};
-
-enum {
-	REP_ETH,
-	NUM_REP_TYPES,
-};
-
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
 			struct mlx5_flow_table *fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *miss_grp;
-			struct mlx5_flow_handle *miss_rule;
+			struct mlx5_flow_handle *miss_rule_uni;
+			struct mlx5_flow_handle *miss_rule_multi;
 			int vlan_push_pop_refcount;
 		} offloads;
 	};
 };
 
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
-	int		       (*load)(struct mlx5_core_dev *dev,
-				       struct mlx5_eswitch_rep *rep);
-	void		       (*unload)(struct mlx5_eswitch_rep *rep);
-	void			*priv;
-	bool		       valid;
-};
-
-struct mlx5_eswitch_rep {
-	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
-	u16		       vport;
-	u8		       hw_id[ETH_ALEN];
-	u16		       vlan;
-	u32		       vlan_refcount;
-};
-
 struct mlx5_esw_offload {
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
@@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 int vport,
 				 struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
-				    u32 sqn);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 struct mlx5_flow_spec;
@@ -256,15 +227,14 @@ enum {
 	SET_VLAN_INSERT	= BIT(1)
 };
 
-#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x4000
-#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000
-
 struct mlx5_esw_flow_attr {
 	struct mlx5_eswitch_rep *in_rep;
 	struct mlx5_eswitch_rep *out_rep;
 
 	int	action;
-	u16	vlan;
+	__be16	vlan_proto;
+	u16	vlan_vid;
+	u8	vlan_prio;
 	bool	vlan_handled;
 	u32	encap_id;
 	u32	mod_hdr_id;
@@ -278,13 +248,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-				     int vport_index,
-				     struct mlx5_eswitch_rep_if *rep_if,
-				     u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index,
-				       u8 rep_type);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@@ -294,6 +257,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  int vport, u16 vlan, u8 qos, u8 set_flags);
 
+static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
+	       MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
+}
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
 #define esw_info(dev, format, ...)				\
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 99f583a15cc3..35e256eb2f6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -58,8 +58,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (esw->mode != SRIOV_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	/* per flow vlan pop/push is emulated, don't set that into the firmware */
-	flow_act.action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+	flow_act.action = attr->action;
+	/* if per flow vlan pop/push is emulated, don't set that into the firmware */
+	if (!mlx5_eswitch_vlan_actions_supported(esw->dev))
+		flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+				     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+	else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+		flow_act.vlan.ethtype = ntohs(attr->vlan_proto);
+		flow_act.vlan.vid = attr->vlan_vid;
+		flow_act.vlan.prio = attr->vlan_prio;
+	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
@@ -88,10 +96,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
 		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		flow_act.modify_id = attr->mod_hdr_id;
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
 		flow_act.encap_id = attr->encap_id;
 
 	rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb,
@@ -185,7 +193,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
 	/* protects against (1) setting rules with different vlans to push and
 	 * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
 	 */
-	if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan))
+	if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid))
 		goto out_notsupp;
 
 	return 0;
@@ -202,6 +210,10 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 	bool push, pop, fwd;
 	int err = 0;
 
+	/* nop if we're on the vlan push/pop non emulation mode */
+	if (mlx5_eswitch_vlan_actions_supported(esw->dev))
+		return 0;
+
 	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
@@ -239,11 +251,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 		if (vport->vlan_refcount)
 			goto skip_set_push;
 
-		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0,
+		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid, 0,
 						    SET_VLAN_INSERT | SET_VLAN_STRIP);
 		if (err)
 			goto out;
-		vport->vlan = attr->vlan;
+		vport->vlan = attr->vlan_vid;
 skip_set_push:
 		vport->vlan_refcount++;
 	}
@@ -261,6 +273,10 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 	bool push, pop, fwd;
 	int err = 0;
 
+	/* nop if we're on the vlan push/pop non emulation mode */
+	if (mlx5_eswitch_vlan_actions_supported(esw->dev))
+		return 0;
+
 	if (!attr->vlan_handled)
 		return 0;
 
@@ -338,6 +354,7 @@ out:
 	kvfree(spec);
 	return flow_rule;
 }
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 {
@@ -350,7 +367,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_spec *spec;
+	void *headers_c;
+	void *headers_v;
 	int err = 0;
+	u8 *dmac_c;
+	u8 *dmac_v;
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
@@ -358,6 +379,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 		goto out;
 	}
 
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				 outer_headers);
+	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+			      outer_headers.dmac_47_16);
+	dmac_c[0] = 0x01;
+
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = 0;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -366,11 +394,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 					&flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
-		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+		esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
 		goto out;
 	}
 
-	esw->fdb_table.offloads.miss_rule = flow_rule;
+	esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				 outer_headers);
+	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+			      outer_headers.dmac_47_16);
+	dmac_v[0] = 0x01;
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+					&flow_act, &dest, 1);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+		mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+		goto out;
+	}
+
+	esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
 out:
 	kvfree(spec);
 	return err;
@@ -426,6 +471,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 }
 
 #define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
 
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
@@ -438,6 +484,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	struct mlx5_flow_group *g;
 	void *match_criteria;
 	u32 *flow_group_in;
+	u8 *dmac;
 
 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -455,7 +502,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	if (err)
 		goto fast_fdb_err;
 
-	table_size = nvports + MAX_PF_SQ + 1;
+	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
 
 	ft_attr.max_fte = table_size;
 	ft_attr.prio = FDB_SLOW_PATH;
@@ -478,7 +525,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
 
-	ix = nvports + MAX_PF_SQ;
+	ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 
@@ -492,10 +539,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
 	/* create miss group */
 	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+				      match_criteria);
+	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+			    outer_headers.dmac_47_16);
+	dmac[0] = 0x01;
 
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
 
 	g = mlx5_create_flow_group(fdb, flow_group_in);
 	if (IS_ERR(g)) {
@@ -531,7 +584,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 		return;
 
 	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
-	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -789,14 +843,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 {
 	int err;
 
-	/* disable PF RoCE so missed packets don't go through RoCE steering */
-	mlx5_dev_list_lock();
-	mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	err = esw_create_offloads_fdb_tables(esw, nvports);
 	if (err)
-		goto create_fdb_err;
+		return err;
 
 	err = esw_create_offloads_table(esw);
 	if (err)
@@ -821,12 +870,6 @@ create_fg_err:
 create_ft_err:
 	esw_destroy_offloads_fdb_tables(esw);
 
-create_fdb_err:
-	/* enable back PF RoCE */
-	mlx5_dev_list_lock();
-	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	return err;
 }
 
@@ -844,9 +887,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
 	}
 
 	/* enable back PF RoCE */
-	mlx5_dev_list_lock();
-	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
+	mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 
 	return err;
 }
@@ -1160,10 +1201,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
 	rep_if->load   = __rep_if->load;
 	rep_if->unload = __rep_if->unload;
+	rep_if->get_proto_dev = __rep_if->get_proto_dev;
 	rep_if->priv = __rep_if->priv;
 
 	rep_if->valid = true;
 }
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 				       int vport_index, u8 rep_type)
@@ -1178,6 +1221,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
 	rep->rep_if[rep_type].valid = false;
 }
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
@@ -1188,3 +1232,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 	rep = &offloads->vport_reps[UPLINK_REP_INDEX];
 	return rep->rep_if[rep_type].priv;
 }
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+				 int vport,
+				 u8 rep_type)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep *rep;
+
+	if (vport == FDB_UPLINK_VPORT)
+		vport = UPLINK_REP_INDEX;
+
+	rep = &offloads->vport_reps[vport];
+
+	if (rep->rep_if[rep_type].valid &&
+	    rep->rep_if[rep_type].get_proto_dev)
+		return rep->rep_if[rep_type].get_proto_dev(rep);
+	return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+						int vport)
+{
+	return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 35d0e33381ca..0f5da499a223 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -31,49 +31,91 @@
  *
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
 
 #include "mlx5_core.h"
+#include "fs_cmd.h"
 #include "fpga/ipsec.h"
 #include "fpga/sdk.h"
 #include "fpga/core.h"
 
 #define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC	(60 * 1000)
 
-enum mlx5_ipsec_response_syndrome {
-	MLX5_IPSEC_RESPONSE_SUCCESS = 0,
-	MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
-	MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
-	MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+enum mlx5_fpga_ipsec_cmd_status {
+	MLX5_FPGA_IPSEC_CMD_PENDING,
+	MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
+	MLX5_FPGA_IPSEC_CMD_COMPLETE,
 };
 
-enum mlx5_fpga_ipsec_sacmd_status {
-	MLX5_FPGA_IPSEC_SACMD_PENDING,
-	MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
-	MLX5_FPGA_IPSEC_SACMD_COMPLETE,
-};
-
-struct mlx5_ipsec_command_context {
+struct mlx5_fpga_ipsec_cmd_context {
 	struct mlx5_fpga_dma_buf buf;
-	struct mlx5_accel_ipsec_sa sa;
-	enum mlx5_fpga_ipsec_sacmd_status status;
+	enum mlx5_fpga_ipsec_cmd_status status;
+	struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
 	int status_code;
 	struct completion complete;
 	struct mlx5_fpga_device *dev;
 	struct list_head list; /* Item in pending_cmds */
+	u8 command[0];
+};
+
+struct mlx5_fpga_esp_xfrm;
+
+struct mlx5_fpga_ipsec_sa_ctx {
+	struct rhash_head		hash;
+	struct mlx5_ifc_fpga_ipsec_sa	hw_sa;
+	struct mlx5_core_dev		*dev;
+	struct mlx5_fpga_esp_xfrm	*fpga_xfrm;
+};
+
+struct mlx5_fpga_esp_xfrm {
+	unsigned int			num_rules;
+	struct mlx5_fpga_ipsec_sa_ctx	*sa_ctx;
+	struct mutex			lock; /* xfrm lock */
+	struct mlx5_accel_esp_xfrm	accel_xfrm;
+};
+
+struct mlx5_fpga_ipsec_rule {
+	struct rb_node			node;
+	struct fs_fte			*fte;
+	struct mlx5_fpga_ipsec_sa_ctx	*ctx;
 };
 
-struct mlx5_ipsec_sadb_resp {
-	__be32 syndrome;
-	__be32 sw_sa_handle;
-	u8 reserved[24];
-} __packed;
+static const struct rhashtable_params rhash_sa = {
+	.key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+	.key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+	.head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
+	.automatic_shrinking = true,
+	.min_size = 1,
+};
 
 struct mlx5_fpga_ipsec {
+	struct mlx5_fpga_device *fdev;
 	struct list_head pending_cmds;
 	spinlock_t pending_cmds_lock; /* Protects pending_cmds */
 	u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
 	struct mlx5_fpga_conn *conn;
+
+	struct notifier_block	fs_notifier_ingress_bypass;
+	struct notifier_block	fs_notifier_egress;
+
+	/* Map hardware SA           -->  SA context
+	 *     (mlx5_fpga_ipsec_sa)       (mlx5_fpga_ipsec_sa_ctx)
+	 * We will use this hash to avoid SAs duplication in fpga which
+	 * aren't allowed
+	 */
+	struct rhashtable sa_hash;	/* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
+	struct mutex sa_hash_lock;
+
+	/* Tree holding all rules for this fpga device
+	 * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
+	 */
+	struct rb_root rules_rb;
+	struct mutex rules_rb_lock; /* rules lock */
 };
 
 static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
@@ -97,28 +139,29 @@ static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
 					  struct mlx5_fpga_dma_buf *buf,
 					  u8 status)
 {
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 
 	if (status) {
-		context = container_of(buf, struct mlx5_ipsec_command_context,
+		context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
 				       buf);
 		mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
 			       status);
-		context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+		context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
 		complete(&context->complete);
 	}
 }
 
-static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+static inline
+int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
 {
 	switch (syndrome) {
-	case MLX5_IPSEC_RESPONSE_SUCCESS:
+	case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
 		return 0;
-	case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+	case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
 		return -EEXIST;
-	case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+	case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
 		return -EINVAL;
-	case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+	case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
 		return -EIO;
 	}
 	return -EIO;
@@ -126,9 +169,9 @@ static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
 
 static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 {
-	struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
-	struct mlx5_ipsec_command_context *context;
-	enum mlx5_ipsec_response_syndrome syndrome;
+	struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
+	struct mlx5_fpga_ipsec_cmd_context *context;
+	enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
 	struct mlx5_fpga_device *fdev = cb_arg;
 	unsigned long flags;
 
@@ -138,12 +181,12 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 		return;
 	}
 
-	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
-		      ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
+		      ntohl(resp->syndrome));
 
 	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
 	context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
-					   struct mlx5_ipsec_command_context,
+					   struct mlx5_fpga_ipsec_cmd_context,
 					   list);
 	if (context)
 		list_del(&context->list);
@@ -155,51 +198,48 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 	}
 	mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
 
-	if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
-		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
-			      ntohl(context->sa.sw_sa_handle),
-			      ntohl(resp->sw_sa_handle));
-		return;
-	}
-
 	syndrome = ntohl(resp->syndrome);
 	context->status_code = syndrome_to_errno(syndrome);
-	context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+	context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
+	memcpy(&context->resp, resp, sizeof(*resp));
 
 	if (context->status_code)
-		mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+		mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
 			       syndrome);
+
 	complete(&context->complete);
 }
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd)
+static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
+				      const void *cmd, int cmd_size)
 {
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned long flags;
-	int res = 0;
+	int res;
 
-	BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
 	if (!fdev || !fdev->ipsec)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (cmd_size & 3)
+		return ERR_PTR(-EINVAL);
+
+	context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
 	if (!context)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(&context->sa, cmd, sizeof(*cmd));
+	context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
+	context->dev = fdev;
 	context->buf.complete = mlx5_fpga_ipsec_send_complete;
-	context->buf.sg[0].size = sizeof(context->sa);
-	context->buf.sg[0].data = &context->sa;
 	init_completion(&context->complete);
-	context->dev = fdev;
+	memcpy(&context->command, cmd, cmd_size);
+	context->buf.sg[0].size = cmd_size;
+	context->buf.sg[0].data = &context->command;
+
 	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
 	list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
 	spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
 
-	context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
-
 	res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
 	if (res) {
 		mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
@@ -214,47 +254,103 @@ void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
 	return context;
 }
 
-int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 {
-	struct mlx5_ipsec_command_context *context = ctx;
+	struct mlx5_fpga_ipsec_cmd_context *context = ctx;
+	unsigned long timeout =
+		msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC);
 	int res;
 
-	res = wait_for_completion_killable(&context->complete);
-	if (res) {
+	res = wait_for_completion_timeout(&context->complete, timeout);
+	if (!res) {
 		mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
-		return -EINTR;
+		return -ETIMEDOUT;
 	}
 
-	if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+	if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
 		res = context->status_code;
 	else
 		res = -EIO;
 
-	kfree(context);
 	return res;
 }
 
+static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
+{
+	if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
+		return true;
+	return false;
+}
+
+static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
+					struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
+					int opcode)
+{
+	struct mlx5_core_dev *dev = fdev->mdev;
+	struct mlx5_ifc_fpga_ipsec_sa *sa;
+	struct mlx5_fpga_ipsec_cmd_context *cmd_context;
+	size_t sa_cmd_size;
+	int err;
+
+	hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
+	if (is_v2_sadb_supported(fdev->ipsec))
+		sa_cmd_size = sizeof(*hw_sa);
+	else
+		sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
+
+	cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
+			mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
+	if (IS_ERR(cmd_context))
+		return PTR_ERR(cmd_context);
+
+	err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
+	if (err)
+		goto out;
+
+	sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
+	if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
+		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+			      ntohl(sa->ipsec_sa_v1.sw_sa_handle),
+			      ntohl(cmd_context->resp.sw_sa_handle));
+		err = -EIO;
+	}
+
+out:
+	kfree(cmd_context);
+	return err;
+}
+
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	u32 ret = 0;
 
-	if (mlx5_fpga_is_ipsec_device(mdev))
-		ret |= MLX5_ACCEL_IPSEC_DEVICE;
-	else
+	if (mlx5_fpga_is_ipsec_device(mdev)) {
+		ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
+		ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
+	} else {
 		return ret;
+	}
 
 	if (!fdev->ipsec)
 		return ret;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
-		ret |= MLX5_ACCEL_IPSEC_ESP;
+		ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
-		ret |= MLX5_ACCEL_IPSEC_IPV6;
+		ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
-		ret |= MLX5_ACCEL_IPSEC_LSO;
+		ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
+		ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
+		ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
+		ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
+	}
 
 	return ret;
 }
@@ -318,6 +414,829 @@ out:
 	return ret;
 }
 
+static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
+{
+	struct mlx5_fpga_ipsec_cmd_context *context;
+	struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
+	int err;
+
+	cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
+	cmd.flags = htonl(flags);
+	context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
+	if (IS_ERR(context)) {
+		err = PTR_ERR(context);
+		goto out;
+	}
+
+	err = mlx5_fpga_ipsec_cmd_wait(context);
+	if (err)
+		goto out;
+
+	if ((context->resp.flags & cmd.flags) != cmd.flags) {
+		mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
+			      cmd.flags,
+			      context->resp.flags);
+		err = -EIO;
+	}
+
+out:
+	return err;
+}
+
+static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
+{
+	u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
+	u32 flags = 0;
+
+	if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
+		flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
+
+	return mlx5_fpga_ipsec_set_caps(mdev, flags);
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
+			      const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+			      struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+	const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
+
+	/* key */
+	memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
+	       aes_gcm->key_len / 8);
+	/* Duplicate 128 bit key twice according to HW layout */
+	if (aes_gcm->key_len == 128)
+		memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
+		       aes_gcm->aes_key, aes_gcm->key_len / 8);
+
+	/* salt and seq_iv */
+	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
+	       sizeof(aes_gcm->seq_iv));
+	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
+	       sizeof(aes_gcm->salt));
+
+	/* esn */
+	if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
+		hw_sa->ipsec_sa_v1.flags |=
+				(xfrm_attrs->flags &
+				 MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+					MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+		hw_sa->esn = htonl(xfrm_attrs->esn);
+	} else {
+		hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
+		hw_sa->ipsec_sa_v1.flags &=
+				~(xfrm_attrs->flags &
+				  MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+					MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+		hw_sa->esn = 0;
+	}
+
+	/* rx handle */
+	hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
+
+	/* enc mode */
+	switch (aes_gcm->key_len) {
+	case 128:
+		hw_sa->ipsec_sa_v1.enc_mode =
+			MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
+		break;
+	case 256:
+		hw_sa->ipsec_sa_v1.enc_mode =
+			MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
+		break;
+	}
+
+	/* flags */
+	hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
+			MLX5_FPGA_IPSEC_SA_SPI_EN |
+			MLX5_FPGA_IPSEC_SA_IP_ESP;
+
+	if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
+	else
+		hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
+			    struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+			    const __be32 saddr[4],
+			    const __be32 daddr[4],
+			    const __be32 spi, bool is_ipv6,
+			    struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+	mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
+
+	/* IPs */
+	memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
+	memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
+
+	/* SPI */
+	hw_sa->ipsec_sa_v1.spi = spi;
+
+	/* flags */
+	if (is_ipv6)
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
+}
+
+static bool is_full_mask(const void *p, size_t len)
+{
+	WARN_ON(len % 4);
+
+	return !memchr_inv(p, 0xff, len);
+}
+
+static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
+				    const u32 *match_c,
+				    const u32 *match_v)
+{
+	const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+						 match_c,
+						 misc_parameters);
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param,
+					     match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param,
+					     match_v,
+					     outer_headers);
+
+	if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
+		const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    src_ipv4_src_ipv6.ipv4_layout.ipv4);
+		const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+		if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+							      ipv4)) ||
+		    !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+							      ipv4)))
+			return false;
+	} else {
+		const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    src_ipv4_src_ipv6.ipv6_layout.ipv6);
+		const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+
+		if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+							      ipv6)) ||
+		    !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+							      ipv6)))
+			return false;
+	}
+
+	if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+				       outer_esp_spi),
+			  MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
+		return false;
+
+	return true;
+}
+
+static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
+				    u8 match_criteria_enable,
+				    const u32 *match_c,
+				    const u32 *match_v)
+{
+	u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
+	bool ipv6_flow;
+
+	ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
+
+	if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
+	    mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
+	    mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
+	    mlx5_fs_is_vxlan_flow(match_c) ||
+	    !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
+	      ipv6_flow))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
+	    mlx5_fs_is_outer_ipsec_flow(match_c))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
+	    ipv6_flow)
+		return false;
+
+	if (!validate_fpga_full_mask(dev, match_c, match_v))
+		return false;
+
+	return true;
+}
+
+static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
+					   u8 match_criteria_enable,
+					   const u32 *match_c,
+					   const u32 *match_v,
+					   struct mlx5_flow_act *flow_act)
+{
+	const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					   outer_headers);
+	bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
+			MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
+	bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
+			MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
+	int ret;
+
+	ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
+				      match_v);
+	if (!ret)
+		return ret;
+
+	if (is_dmac || is_smac ||
+	    (match_criteria_enable &
+	     ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
+	    (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
+	     flow_act->has_flow_tag)
+		return false;
+
+	return true;
+}
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+				    struct mlx5_accel_esp_xfrm *accel_xfrm,
+				    const __be32 saddr[4],
+				    const __be32 daddr[4],
+				    const __be32 spi, bool is_ipv6)
+{
+	struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			container_of(accel_xfrm, typeof(*fpga_xfrm),
+				     accel_xfrm);
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	int opcode, err;
+	void *context;
+
+	/* alloc SA */
+	sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
+	if (!sa_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	sa_ctx->dev = mdev;
+
+	/* build candidate SA */
+	mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
+				    saddr, daddr, spi, is_ipv6,
+				    &sa_ctx->hw_sa);
+
+	mutex_lock(&fpga_xfrm->lock);
+
+	if (fpga_xfrm->sa_ctx) {        /* multiple rules for same accel_xfrm */
+		/* all rules must be with same IPs and SPI */
+		if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
+			   sizeof(sa_ctx->hw_sa))) {
+			context = ERR_PTR(-EINVAL);
+			goto exists;
+		}
+
+		++fpga_xfrm->num_rules;
+		context = fpga_xfrm->sa_ctx;
+		goto exists;
+	}
+
+	/* This is unbounded fpga_xfrm, try to add to hash */
+	mutex_lock(&fipsec->sa_hash_lock);
+
+	err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
+					    rhash_sa);
+	if (err) {
+		/* Can't bound different accel_xfrm to already existing sa_ctx.
+		 * This is because we can't support multiple ketmats for
+		 * same IPs and SPI
+		 */
+		context = ERR_PTR(-EEXIST);
+		goto unlock_hash;
+	}
+
+	/* Bound accel_xfrm to sa_ctx */
+	opcode = is_v2_sadb_supported(fdev->ipsec) ?
+			MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
+			MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+	sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err) {
+		context = ERR_PTR(err);
+		goto delete_hash;
+	}
+
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+	++fpga_xfrm->num_rules;
+	fpga_xfrm->sa_ctx = sa_ctx;
+	sa_ctx->fpga_xfrm = fpga_xfrm;
+
+	mutex_unlock(&fpga_xfrm->lock);
+
+	return sa_ctx;
+
+delete_hash:
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+				       rhash_sa));
+unlock_hash:
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+exists:
+	mutex_unlock(&fpga_xfrm->lock);
+	kfree(sa_ctx);
+	return context;
+}
+
+static void *
+mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
+				 struct fs_fte *fte,
+				 bool is_egress)
+{
+	struct mlx5_accel_esp_xfrm *accel_xfrm;
+	__be32 saddr[4], daddr[4], spi;
+	struct mlx5_flow_group *fg;
+	bool is_ipv6 = false;
+
+	fs_get_obj(fg, fte->node.parent);
+	/* validate */
+	if (is_egress &&
+	    !mlx5_is_fpga_egress_ipsec_rule(mdev,
+					    fg->mask.match_criteria_enable,
+					    fg->mask.match_criteria,
+					    fte->val,
+					    &fte->action))
+		return ERR_PTR(-EINVAL);
+	else if (!mlx5_is_fpga_ipsec_rule(mdev,
+					  fg->mask.match_criteria_enable,
+					  fg->mask.match_criteria,
+					  fte->val))
+		return ERR_PTR(-EINVAL);
+
+	/* get xfrm context */
+	accel_xfrm =
+		(struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
+
+	/* IPs */
+	if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
+				       fte->val)) {
+		memcpy(&saddr[3],
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+				    fte->val,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+				    sizeof(saddr[3]));
+		memcpy(&daddr[3],
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+				    fte->val,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+				    sizeof(daddr[3]));
+	} else {
+		memcpy(saddr,
+		       MLX5_ADDR_OF(fte_match_param,
+				    fte->val,
+				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+				    sizeof(saddr));
+		memcpy(daddr,
+		       MLX5_ADDR_OF(fte_match_param,
+				    fte->val,
+				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+				    sizeof(daddr));
+		is_ipv6 = true;
+	}
+
+	/* SPI */
+	spi = MLX5_GET_BE(typeof(spi),
+			  fte_match_param, fte->val,
+			  misc_parameters.outer_esp_spi);
+
+	/* create */
+	return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
+					     saddr, daddr,
+					     spi, is_ipv6);
+}
+
+static void
+mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
+{
+	struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	int opcode = is_v2_sadb_supported(fdev->ipsec) ?
+			MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
+			MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
+	int err;
+
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+	sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err) {
+		WARN_ON(err);
+		return;
+	}
+
+	mutex_lock(&fipsec->sa_hash_lock);
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+				       rhash_sa));
+	mutex_unlock(&fipsec->sa_hash_lock);
+}
+
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
+
+	mutex_lock(&fpga_xfrm->lock);
+	if (!--fpga_xfrm->num_rules) {
+		mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
+		fpga_xfrm->sa_ctx = NULL;
+	}
+	mutex_unlock(&fpga_xfrm->lock);
+}
+
+static inline struct mlx5_fpga_ipsec_rule *
+_rule_search(struct rb_root *root, struct fs_fte *fte)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct mlx5_fpga_ipsec_rule *rule =
+				container_of(node, struct mlx5_fpga_ipsec_rule,
+					     node);
+
+		if (rule->fte < fte)
+			node = node->rb_left;
+		else if (rule->fte > fte)
+			node = node->rb_right;
+		else
+			return rule;
+	}
+	return NULL;
+}
+
+static struct mlx5_fpga_ipsec_rule *
+rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
+{
+	struct mlx5_fpga_ipsec_rule *rule;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	rule = _rule_search(&ipsec_dev->rules_rb, fte);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+	return rule;
+}
+
+static inline int _rule_insert(struct rb_root *root,
+			       struct mlx5_fpga_ipsec_rule *rule)
+{
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct mlx5_fpga_ipsec_rule *this =
+				container_of(*new, struct mlx5_fpga_ipsec_rule,
+					     node);
+
+		parent = *new;
+		if (rule->fte < this->fte)
+			new = &((*new)->rb_left);
+		else if (rule->fte > this->fte)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&rule->node, parent, new);
+	rb_insert_color(&rule->node, root);
+
+	return 0;
+}
+
+static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
+		       struct mlx5_fpga_ipsec_rule *rule)
+{
+	int ret;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	ret = _rule_insert(&ipsec_dev->rules_rb, rule);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+	return ret;
+}
+
+static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+				struct mlx5_fpga_ipsec_rule *rule)
+{
+	struct rb_root *root = &ipsec_dev->rules_rb;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	rb_erase(&rule->node, root);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+}
+
+static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+			struct mlx5_fpga_ipsec_rule *rule)
+{
+	_rule_delete(ipsec_dev, rule);
+	kfree(rule);
+}
+
+struct mailbox_mod {
+	uintptr_t			saved_esp_id;
+	u32				saved_action;
+	u32				saved_outer_esp_spi_value;
+};
+
+static void restore_spec_mailbox(struct fs_fte *fte,
+				 struct mailbox_mod *mbox_mod)
+{
+	char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+					   fte->val,
+					   misc_parameters);
+
+	MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+		 mbox_mod->saved_outer_esp_spi_value);
+	fte->action.action |= mbox_mod->saved_action;
+	fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
+}
+
+static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
+				struct fs_fte *fte,
+				struct mailbox_mod *mbox_mod)
+{
+	char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+					   fte->val,
+					   misc_parameters);
+
+	mbox_mod->saved_esp_id = fte->action.esp_id;
+	mbox_mod->saved_action = fte->action.action &
+			(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+			 MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+	mbox_mod->saved_outer_esp_spi_value =
+			MLX5_GET(fte_match_set_misc, misc_params_v,
+				 outer_esp_spi);
+
+	fte->action.esp_id = 0;
+	fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+				MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+	if (!MLX5_CAP_FLOWTABLE(mdev,
+				flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+		MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
+}
+
+static enum fs_flow_table_type egress_to_fs_ft(bool egress)
+{
+	return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
+}
+
+static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   u32 *in,
+					   unsigned int *group_id,
+					   bool is_egress)
+{
+	int (*create_flow_group)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft, u32 *in,
+				 unsigned int *group_id) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
+	char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
+					   match_criteria.misc_parameters);
+	u32 saved_outer_esp_spi_mask;
+	u8 match_criteria_enable;
+	int ret;
+
+	if (MLX5_CAP_FLOWTABLE(dev,
+			       flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+		return create_flow_group(dev, ft, in, group_id);
+
+	match_criteria_enable =
+		MLX5_GET(create_flow_group_in, in, match_criteria_enable);
+	saved_outer_esp_spi_mask =
+		MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+	if (!match_criteria_enable || !saved_outer_esp_spi_mask)
+		return create_flow_group(dev, ft, in, group_id);
+
+	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
+
+	if (!(*misc_params_c) &&
+	    !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
+		MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+			 match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
+
+	ret = create_flow_group(dev, ft, in, group_id);
+
+	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct mlx5_flow_group *fg,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*create_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_group *fg,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
+	struct mlx5_fpga_device *fdev = dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_ipsec_rule *rule;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return create_fte(dev, ft, fg, fte);
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return -ENOMEM;
+
+	rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
+	if (IS_ERR(rule->ctx)) {
+		int err = PTR_ERR(rule->ctx);
+		kfree(rule);
+		return err;
+	}
+
+	rule->fte = fte;
+	WARN_ON(rule_insert(fipsec, rule));
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = create_fte(dev, ft, fg, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+	if (ret) {
+		_rule_delete(fipsec, rule);
+		mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+		kfree(rule);
+	}
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    unsigned int group_id,
+				    int modify_mask,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*update_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  unsigned int group_id,
+			  int modify_mask,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return update_fte(dev, ft, group_id, modify_mask, fte);
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = update_fte(dev, ft, group_id, modify_mask, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*delete_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
+	struct mlx5_fpga_device *fdev = dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_ipsec_rule *rule;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return delete_fte(dev, ft, fte);
+
+	rule = rule_search(fipsec, fte);
+	if (!rule)
+		return -ENOENT;
+
+	mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+	rule_delete(fipsec, rule);
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = delete_fte(dev, ft, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+
+	return ret;
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft,
+					    u32 *in,
+					    unsigned int *group_id)
+{
+	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     struct mlx5_flow_group *fg,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     unsigned int group_id,
+				     int modify_mask,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+					true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_delete_fte(dev, ft, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev,
+					     struct mlx5_flow_table *ft,
+					     u32 *in,
+					     unsigned int *group_id)
+{
+	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct mlx5_flow_group *fg,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      unsigned int group_id,
+				      int modify_mask,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+					false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_delete_fte(dev, ft, fte, false);
+}
+
+static struct mlx5_flow_cmds fpga_ipsec_ingress;
+static struct mlx5_flow_cmds fpga_ipsec_egress;
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+	switch (type) {
+	case FS_FT_NIC_RX:
+		return &fpga_ipsec_ingress;
+	case FS_FT_NIC_TX:
+		return &fpga_ipsec_egress;
+	default:
+		WARN_ON(true);
+		return NULL;
+	}
+}
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_conn_attr init_attr = {0};
@@ -332,6 +1251,8 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 	if (!fdev->ipsec)
 		return -ENOMEM;
 
+	fdev->ipsec->fdev = fdev;
+
 	err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
 				     fdev->ipsec->caps);
 	if (err) {
@@ -355,14 +1276,47 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 		goto error;
 	}
 	fdev->ipsec->conn = conn;
+
+	err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
+	if (err)
+		goto err_destroy_conn;
+	mutex_init(&fdev->ipsec->sa_hash_lock);
+
+	fdev->ipsec->rules_rb = RB_ROOT;
+	mutex_init(&fdev->ipsec->rules_rb_lock);
+
+	err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
+			      err);
+		goto err_destroy_hash;
+	}
+
 	return 0;
 
+err_destroy_hash:
+	rhashtable_destroy(&fdev->ipsec->sa_hash);
+
+err_destroy_conn:
+	mlx5_fpga_sbu_conn_destroy(conn);
+
 error:
 	kfree(fdev->ipsec);
 	fdev->ipsec = NULL;
 	return err;
 }
 
+static void destroy_rules_rb(struct rb_root *root)
+{
+	struct mlx5_fpga_ipsec_rule *r, *tmp;
+
+	rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
+		rb_erase(&r->node, root);
+		mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
+		kfree(r);
+	}
+}
+
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -370,7 +1324,209 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 	if (!mlx5_fpga_is_ipsec_device(mdev))
 		return;
 
+	destroy_rules_rb(&fdev->ipsec->rules_rb);
+	rhashtable_destroy(&fdev->ipsec->sa_hash);
+
 	mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
 	kfree(fdev->ipsec);
 	fdev->ipsec = NULL;
 }
+
+void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+	/* ingress */
+	fpga_ipsec_ingress.create_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
+	fpga_ipsec_ingress.destroy_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
+	fpga_ipsec_ingress.modify_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
+	fpga_ipsec_ingress.create_flow_group =
+		mlx5_fpga_ipsec_fs_create_flow_group_ingress;
+	fpga_ipsec_ingress.destroy_flow_group =
+		 mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
+	fpga_ipsec_ingress.create_fte =
+		mlx5_fpga_ipsec_fs_create_fte_ingress;
+	fpga_ipsec_ingress.update_fte =
+		mlx5_fpga_ipsec_fs_update_fte_ingress;
+	fpga_ipsec_ingress.delete_fte =
+		mlx5_fpga_ipsec_fs_delete_fte_ingress;
+	fpga_ipsec_ingress.update_root_ft =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
+
+	/* egress */
+	fpga_ipsec_egress.create_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
+	fpga_ipsec_egress.destroy_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
+	fpga_ipsec_egress.modify_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
+	fpga_ipsec_egress.create_flow_group =
+		mlx5_fpga_ipsec_fs_create_flow_group_egress;
+	fpga_ipsec_egress.destroy_flow_group =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
+	fpga_ipsec_egress.create_fte =
+		mlx5_fpga_ipsec_fs_create_fte_egress;
+	fpga_ipsec_egress.update_fte =
+		mlx5_fpga_ipsec_fs_update_fte_egress;
+	fpga_ipsec_egress.delete_fte =
+		mlx5_fpga_ipsec_fs_delete_fte_egress;
+	fpga_ipsec_egress.update_root_ft =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
+}
+
+static int
+mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
+				  const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	if (attrs->tfc_pad) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
+		mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.iv_algo !=
+	    MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
+		mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.icv_len != 128) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.key_len != 128 &&
+	    attrs->keymat.aes_gcm.key_len != 256) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
+	    (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
+		       v2_command))) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+
+	if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
+		mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+		mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
+	if (!fpga_xfrm)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&fpga_xfrm->lock);
+	memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
+	       sizeof(fpga_xfrm->accel_xfrm.attrs));
+
+	return &fpga_xfrm->accel_xfrm;
+}
+
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			container_of(xfrm, struct mlx5_fpga_esp_xfrm,
+				     accel_xfrm);
+	/* assuming no sa_ctx are connected to this xfrm_ctx */
+	kfree(fpga_xfrm);
+}
+
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			      const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	struct mlx5_core_dev *mdev = xfrm->mdev;
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+	struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
+
+	int err = 0;
+
+	if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
+		return 0;
+
+	if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+		mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_v2_sadb_supported(fipsec)) {
+		mlx5_core_warn(mdev, "Modify esp is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
+
+	mutex_lock(&fpga_xfrm->lock);
+
+	if (!fpga_xfrm->sa_ctx)
+		/* Unbounded xfrm, chane only sw attrs */
+		goto change_sw_xfrm_attrs;
+
+	/* copy original hw sa */
+	memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
+	mutex_lock(&fipsec->sa_hash_lock);
+	/* remove original hw sa from hash */
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+				       &fpga_xfrm->sa_ctx->hash, rhash_sa));
+	/* update hw_sa with new xfrm attrs*/
+	mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
+				      &fpga_xfrm->sa_ctx->hw_sa);
+	/* try to insert new hw_sa to hash */
+	err = rhashtable_insert_fast(&fipsec->sa_hash,
+				     &fpga_xfrm->sa_ctx->hash, rhash_sa);
+	if (err)
+		goto rollback_sa;
+
+	/* modify device with new hw_sa */
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
+					   MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
+	fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err)
+		WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+					       &fpga_xfrm->sa_ctx->hash,
+					       rhash_sa));
+rollback_sa:
+	if (err) {
+		/* return original hw_sa to hash */
+		memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
+		       sizeof(org_hw_sa));
+		WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
+					       &fpga_xfrm->sa_ctx->hash,
+					       rhash_sa));
+	}
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+change_sw_xfrm_attrs:
+	if (!err)
+		memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
+	mutex_unlock(&fpga_xfrm->lock);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 26a3e4b56972..2b5e63b0d4d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -35,33 +35,38 @@
 #define __MLX5_FPGA_IPSEC_H__
 
 #include "accel/ipsec.h"
+#include "fs_cmd.h"
 
 #ifdef CONFIG_MLX5_FPGA
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd);
-int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
-
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
 unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				  unsigned int counters_count);
 
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+				    struct mlx5_accel_esp_xfrm *accel_xfrm,
+				    const __be32 saddr[4],
+				    const __be32 daddr[4],
+				    const __be32 spi, bool is_ipv6);
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_build_fs_cmds(void);
 
-#else
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags);
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			      const struct mlx5_accel_esp_xfrm_attrs *attrs);
 
-static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-						struct mlx5_accel_ipsec_sa *cmd)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
+const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
 
-static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
-{
-	return -EOPNOTSUPP;
-}
+#else
 
 static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
@@ -80,6 +85,20 @@ static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
 	return 0;
 }
 
+static inline void *
+mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+			      struct mlx5_accel_esp_xfrm *accel_xfrm,
+			      const __be32 saddr[4],
+			      const __be32 daddr[4],
+			      const __be32 spi, bool is_ipv6)
+{
+	return NULL;
+}
+
+static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+}
+
 static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return 0;
@@ -89,6 +108,35 @@ static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 }
 
+static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+}
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+}
+
+static inline int
+mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+	return mlx5_fs_cmd_get_default(type);
+}
+
 #endif /* CONFIG_MLX5_FPGA */
 
 #endif	/* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 881e2e55840c..ef5afd7c9325 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -39,9 +39,81 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft, u32 underlay_qpn,
-			    bool disconnect)
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev,
+					struct mlx5_flow_table *ft,
+					u32 underlay_qpn,
+					bool disconnect)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev,
+					   u16 vport,
+					   enum fs_flow_table_op_mod op_mod,
+					   enum fs_flow_table_type type,
+					   unsigned int level,
+					   unsigned int log_size,
+					   struct mlx5_flow_table *next_ft,
+					   unsigned int *table_id, u32 flags)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   struct mlx5_flow_table *next_ft)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   u32 *in,
+					   unsigned int *group_id)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft,
+					    unsigned int group_id)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct mlx5_flow_group *group,
+				    struct fs_fte *fte)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    unsigned int group_id,
+				    int modify_mask,
+				    struct fs_fte *fte)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct fs_fte *fte)
+{
+	return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+				   struct mlx5_flow_table *ft, u32 underlay_qpn,
+				   bool disconnect)
 {
 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
@@ -71,12 +143,14 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-			       u16 vport,
-			       enum fs_flow_table_op_mod op_mod,
-			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id, u32 flags)
+static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+				      u16 vport,
+				      enum fs_flow_table_op_mod op_mod,
+				      enum fs_flow_table_type type,
+				      unsigned int level,
+				      unsigned int log_size,
+				      struct mlx5_flow_table *next_ft,
+				      unsigned int *table_id, u32 flags)
 {
 	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
@@ -125,8 +199,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 	return err;
 }
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft)
+static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+				       struct mlx5_flow_table *ft)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
@@ -143,9 +217,9 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       struct mlx5_flow_table *next_ft)
+static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct mlx5_flow_table *next_ft)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
@@ -188,10 +262,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       u32 *in,
-			       unsigned int *group_id)
+static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      u32 *in,
+				      unsigned int *group_id)
 {
 	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -213,9 +287,9 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
 	return err;
 }
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft,
-				unsigned int group_id)
+static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+				       struct mlx5_flow_table *ft,
+				       unsigned int group_id)
 {
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
@@ -243,7 +317,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 		fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
 	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
 	struct mlx5_flow_rule *dst;
-	void *in_flow_context;
+	void *in_flow_context, *vlan;
 	void *in_match_value;
 	void *in_dests;
 	u32 *in;
@@ -266,16 +340,25 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
-	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
-	MLX5_SET(flow_context, in_flow_context, action, fte->action);
-	MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
-	MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id);
+
+	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+	MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
+	MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+	MLX5_SET(flow_context, in_flow_context, modify_header_id,
+		 fte->action.modify_id);
+
+	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan.ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan.vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan.prio);
+
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, sizeof(fte->val));
 
 	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		int list_size = 0;
 
 		list_for_each_entry(dst, &fte->node.children, node.list) {
@@ -301,7 +384,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 			 list_size);
 	}
 
-	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
 					log_max_flow_counter,
 					ft->type));
@@ -332,19 +415,21 @@ err_out:
 	return err;
 }
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			struct fs_fte *fte)
+static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       struct mlx5_flow_group *group,
+			       struct fs_fte *fte)
 {
+	unsigned int group_id = group->id;
+
 	return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
 }
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			int modify_mask,
-			struct fs_fte *fte)
+static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       unsigned int group_id,
+			       int modify_mask,
+			       struct fs_fte *fte)
 {
 	int opmod;
 	int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
@@ -357,9 +442,9 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
 	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
 }
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned int index)
+static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       struct fs_fte *fte)
 {
 	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
@@ -367,7 +452,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
 	MLX5_SET(delete_fte_in, in, table_type, ft->type);
 	MLX5_SET(delete_fte_in, in, table_id, ft->id);
-	MLX5_SET(delete_fte_in, in, flow_index, index);
+	MLX5_SET(delete_fte_in, in, flow_index, fte->index);
 	if (ft->vport) {
 		MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
 		MLX5_SET(delete_fte_in, in, other_vport, 1);
@@ -610,3 +695,53 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds = {
+	.create_flow_table = mlx5_cmd_create_flow_table,
+	.destroy_flow_table = mlx5_cmd_destroy_flow_table,
+	.modify_flow_table = mlx5_cmd_modify_flow_table,
+	.create_flow_group = mlx5_cmd_create_flow_group,
+	.destroy_flow_group = mlx5_cmd_destroy_flow_group,
+	.create_fte = mlx5_cmd_create_fte,
+	.update_fte = mlx5_cmd_update_fte,
+	.delete_fte = mlx5_cmd_delete_fte,
+	.update_root_ft = mlx5_cmd_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
+	.create_flow_table = mlx5_cmd_stub_create_flow_table,
+	.destroy_flow_table = mlx5_cmd_stub_destroy_flow_table,
+	.modify_flow_table = mlx5_cmd_stub_modify_flow_table,
+	.create_flow_group = mlx5_cmd_stub_create_flow_group,
+	.destroy_flow_group = mlx5_cmd_stub_destroy_flow_group,
+	.create_fte = mlx5_cmd_stub_create_fte,
+	.update_fte = mlx5_cmd_stub_update_fte,
+	.delete_fte = mlx5_cmd_stub_delete_fte,
+	.update_root_ft = mlx5_cmd_stub_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+{
+	return &mlx5_flow_cmds;
+}
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void)
+{
+	return &mlx5_flow_cmd_stubs;
+}
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type)
+{
+	switch (type) {
+	case FS_FT_NIC_RX:
+	case FS_FT_ESW_EGRESS_ACL:
+	case FS_FT_ESW_INGRESS_ACL:
+	case FS_FT_FDB:
+	case FS_FT_SNIFFER_RX:
+	case FS_FT_SNIFFER_TX:
+		return mlx5_fs_cmd_get_fw_cmds();
+	case FS_FT_NIC_TX:
+	default:
+		return mlx5_fs_cmd_get_stub_cmds();
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 71e2d0f37ad9..6228ba7bfa1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -33,46 +33,52 @@
 #ifndef _MLX5_FS_CMD_
 #define _MLX5_FS_CMD_
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-			       u16 vport,
-			       enum fs_flow_table_op_mod op_mod,
-			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id, u32 flags);
+#include "fs_core.h"
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft);
+struct mlx5_flow_cmds {
+	int (*create_flow_table)(struct mlx5_core_dev *dev,
+				 u16 vport,
+				 enum fs_flow_table_op_mod op_mod,
+				 enum fs_flow_table_type type,
+				 unsigned int level, unsigned int log_size,
+				 struct mlx5_flow_table *next_ft,
+				 unsigned int *table_id, u32 flags);
+	int (*destroy_flow_table)(struct mlx5_core_dev *dev,
+				  struct mlx5_flow_table *ft);
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       struct mlx5_flow_table *next_ft);
+	int (*modify_flow_table)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft,
+				 struct mlx5_flow_table *next_ft);
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       u32 *in, unsigned int *group_id);
+	int (*create_flow_group)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft,
+				 u32 *in,
+				 unsigned int *group_id);
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft,
-				unsigned int group_id);
+	int (*destroy_flow_group)(struct mlx5_core_dev *dev,
+				  struct mlx5_flow_table *ft,
+				  unsigned int group_id);
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			struct fs_fte *fte);
+	int (*create_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_group *fg,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			int modify_mask,
-			struct fs_fte *fte);
+	int (*update_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  unsigned int group_id,
+			  int modify_mask,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned int index);
+	int (*delete_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft, u32 underlay_qpn,
-			    bool disconnect);
+	int (*update_root_ft)(struct mlx5_core_dev *dev,
+			      struct mlx5_flow_table *ft,
+			      u32 underlay_qpn,
+			      bool disconnect);
+};
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
@@ -90,4 +96,6 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
 			  struct mlx5_cmd_fc_bulk *b, u32 id,
 			  u64 *packets, u64 *bytes);
 
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c025c98700e4..de51e7c39bc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -37,6 +37,8 @@
 #include "fs_core.h"
 #include "fs_cmd.h"
 #include "diag/fs_tracepoint.h"
+#include "accel/ipsec.h"
+#include "fpga/ipsec.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -425,15 +427,17 @@ static void del_sw_prio(struct fs_node *node)
 
 static void del_hw_flow_table(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
 	int err;
 
 	fs_get_obj(ft, node);
 	dev = get_dev(&ft->node);
+	root = find_root(&ft->node);
 
 	if (node->active) {
-		err = mlx5_cmd_destroy_flow_table(dev, ft);
+		err = root->cmds->destroy_flow_table(dev, ft);
 		if (err)
 			mlx5_core_warn(dev, "flow steering can't destroy ft\n");
 	}
@@ -454,6 +458,7 @@ static void del_sw_flow_table(struct fs_node *node)
 
 static void del_sw_hw_rule(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_rule *rule;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
@@ -477,19 +482,20 @@ static void del_sw_hw_rule(struct fs_node *node)
 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
 	    --fte->dests_size) {
 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
-		fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		update_fte = true;
 		goto out;
 	}
 
-	if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+	if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 	    --fte->dests_size) {
 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
 		update_fte = true;
 	}
 out:
+	root = find_root(&ft->node);
 	if (update_fte && fte->dests_size) {
-		err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
+		err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "%s can't del rule fg id=%d fte_index=%d\n",
@@ -500,6 +506,7 @@ out:
 
 static void del_hw_fte(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct mlx5_core_dev *dev;
@@ -512,9 +519,9 @@ static void del_hw_fte(struct fs_node *node)
 
 	trace_mlx5_fs_del_fte(fte);
 	dev = get_dev(&ft->node);
+	root = find_root(&ft->node);
 	if (node->active) {
-		err = mlx5_cmd_delete_fte(dev, ft,
-					  fte->index);
+		err = root->cmds->delete_fte(dev, ft, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "flow steering can't delete fte in index %d of flow group id %d\n",
@@ -542,6 +549,7 @@ static void del_sw_fte(struct fs_node *node)
 
 static void del_hw_flow_group(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_group *fg;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
@@ -551,7 +559,8 @@ static void del_hw_flow_group(struct fs_node *node)
 	dev = get_dev(&ft->node);
 	trace_mlx5_fs_del_fg(fg);
 
-	if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
+	root = find_root(&ft->node);
+	if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id))
 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
 			       fg->id, ft->id);
 }
@@ -615,10 +624,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
 
 	memcpy(fte->val, match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
-	fte->flow_tag = flow_act->flow_tag;
-	fte->action = flow_act->action;
-	fte->encap_id = flow_act->encap_id;
-	fte->modify_id = flow_act->modify_id;
+	fte->action = *flow_act;
 
 	tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
 
@@ -797,15 +803,14 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
 			       struct fs_prio *prio,
 			       struct mlx5_flow_table *ft)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
 	struct mlx5_flow_table *iter;
 	int i = 0;
 	int err;
 
 	fs_for_each_ft(iter, prio) {
 		i++;
-		err = mlx5_cmd_modify_flow_table(dev,
-						 iter,
-						 ft);
+		err = root->cmds->modify_flow_table(dev, iter, ft);
 		if (err) {
 			mlx5_core_warn(dev, "Failed to modify flow table %d\n",
 				       iter->id);
@@ -853,12 +858,12 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false);
+		err = root->cmds->update_root_ft(root->dev, ft, qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = mlx5_cmd_update_root_ft(root->dev, ft, qpn,
-						      false);
+			err = root->cmds->update_root_ft(root->dev, ft,
+							 qpn, false);
 			if (err)
 				break;
 		}
@@ -877,6 +882,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 					 struct mlx5_flow_destination *dest)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct fs_fte *fte;
@@ -884,17 +890,16 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 	int err = 0;
 
 	fs_get_obj(fte, rule->node.parent);
-	if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return -EINVAL;
 	down_write_ref_node(&fte->node);
 	fs_get_obj(fg, fte->node.parent);
 	fs_get_obj(ft, fg->node.parent);
 
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
-	err = mlx5_cmd_update_fte(get_dev(&ft->node),
-				  ft, fg->id,
-				  modify_mask,
-				  fte);
+	root = find_root(&ft->node);
+	err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+				     modify_mask, fte);
 	up_write_ref_node(&fte->node);
 
 	return err;
@@ -1035,9 +1040,9 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
-	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
-					 ft->level, log_table_sz, next_ft, &ft->id,
-					 ft->flags);
+	err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod,
+					    ft->type, ft->level, log_table_sz,
+					    next_ft, &ft->id, ft->flags);
 	if (err)
 		goto free_ft;
 
@@ -1053,7 +1058,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	mutex_unlock(&root->chain_lock);
 	return ft;
 destroy_ft:
-	mlx5_cmd_destroy_flow_table(root->dev, ft);
+	root->cmds->destroy_flow_table(root->dev, ft);
 free_ft:
 	kfree(ft);
 unlock_root:
@@ -1125,6 +1130,7 @@ EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 					       u32 *fg_in)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
 					    fg_in, match_criteria);
 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
@@ -1152,7 +1158,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	if (IS_ERR(fg))
 		return fg;
 
-	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
+	err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
 	if (err) {
 		tree_put_node(&fg->node);
 		return ERR_PTR(err);
@@ -1275,6 +1281,7 @@ add_rule_fte(struct fs_fte *fte,
 	     int dest_num,
 	     bool update_action)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
 	int modify_mask = 0;
@@ -1290,12 +1297,13 @@ add_rule_fte(struct fs_fte *fte,
 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 
 	fs_get_obj(ft, fg->node.parent);
+	root = find_root(&fg->node);
 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
-		err = mlx5_cmd_create_fte(get_dev(&ft->node),
-					  ft, fg->id, fte);
+		err = root->cmds->create_fte(get_dev(&ft->node),
+					     ft, fg, fte);
 	else
-		err = mlx5_cmd_update_fte(get_dev(&ft->node),
-					  ft, fg->id, modify_mask, fte);
+		err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+						     modify_mask, fte);
 	if (err)
 		goto free_handle;
 
@@ -1360,6 +1368,7 @@ out:
 static int create_auto_flow_group(struct mlx5_flow_table *ft,
 				  struct mlx5_flow_group *fg)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_core_dev *dev = get_dev(&ft->node);
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	void *match_criteria_addr;
@@ -1380,7 +1389,7 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
 	memcpy(match_criteria_addr, fg->mask.match_criteria,
 	       sizeof(fg->mask.match_criteria));
 
-	err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id);
+	err = root->cmds->create_flow_group(dev, ft, in, &fg->id);
 	if (!err) {
 		fg->node.active = true;
 		trace_mlx5_fs_add_fg(fg);
@@ -1429,7 +1438,10 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 
 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
 			     MLX5_FLOW_CONTEXT_ACTION_ENCAP |
-			     MLX5_FLOW_CONTEXT_ACTION_DECAP))
+			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
+			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
 		return true;
 
 	return false;
@@ -1437,16 +1449,17 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 
 static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
 {
-	if (check_conflicting_actions(flow_act->action, fte->action)) {
+	if (check_conflicting_actions(flow_act->action, fte->action.action)) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "Found two FTEs with conflicting actions\n");
 		return -EEXIST;
 	}
 
-	if (fte->flow_tag != flow_act->flow_tag) {
+	if (flow_act->has_flow_tag &&
+	    fte->action.flow_tag != flow_act->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
-			       fte->flow_tag,
+			       fte->action.flow_tag,
 			       flow_act->flow_tag);
 		return -EEXIST;
 	}
@@ -1470,12 +1483,12 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	if (ret)
 		return ERR_PTR(ret);
 
-	old_action = fte->action;
-	fte->action |= flow_act->action;
+	old_action = fte->action.action;
+	fte->action.action |= flow_act->action;
 	handle = add_rule_fte(fte, fg, dest, dest_num,
 			      old_action != flow_act->action);
 	if (IS_ERR(handle)) {
-		fte->action = old_action;
+		fte->action.action = old_action;
 		return handle;
 	}
 	trace_mlx5_fs_set_fte(fte, false);
@@ -1636,7 +1649,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 
 	list_for_each_entry(iter, match_head, list) {
 		nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT);
-		ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL);
 	}
 
 search_again_locked:
@@ -1758,8 +1770,11 @@ search_again_locked:
 
 	/* Collect all fgs which has a matching match_criteria */
 	err = build_match_list(&match_head, ft, spec);
-	if (err)
+	if (err) {
+		if (take_write)
+			up_write_ref_node(&ft->node);
 		return ERR_PTR(err);
+	}
 
 	if (!take_write)
 		up_read_ref_node(&ft->node);
@@ -1768,8 +1783,11 @@ search_again_locked:
 				      dest_num, version);
 	free_match_list(&match_head);
 	if (!IS_ERR(rule) ||
-	    (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN))
+	    (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
+		if (take_write)
+			up_write_ref_node(&ft->node);
 		return rule;
+	}
 
 	if (!take_write) {
 		nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
@@ -1912,7 +1930,6 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 		return 0;
 
 	new_root_ft = find_next_ft(ft);
-
 	if (!new_root_ft) {
 		root->root_ft = NULL;
 		return 0;
@@ -1921,13 +1938,14 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn,
-					      false);
+		err = root->cmds->update_root_ft(root->dev, new_root_ft,
+						 qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
-						      qpn, false);
+			err = root->cmds->update_root_ft(root->dev,
+							 new_root_ft, qpn,
+							 false);
 			if (err)
 				break;
 		}
@@ -2039,6 +2057,11 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			return &steering->sniffer_tx_root_ns->ns;
 		else
 			return NULL;
+	case MLX5_FLOW_NAMESPACE_EGRESS:
+		if (steering->egress_root_ns)
+			return &steering->egress_root_ns->ns;
+		else
+			return NULL;
 	default:
 		return NULL;
 	}
@@ -2229,13 +2252,18 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
 	return 0;
 }
 
-static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
-						       enum fs_flow_table_type
-						       table_type)
+static struct mlx5_flow_root_namespace
+*create_root_ns(struct mlx5_flow_steering *steering,
+		enum fs_flow_table_type table_type)
 {
+	const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_namespace *ns;
 
+	if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+	    (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
+		cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
+
 	/* Create the root namespace */
 	root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
 	if (!root_ns)
@@ -2243,6 +2271,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering
 
 	root_ns->dev = steering->dev;
 	root_ns->table_type = table_type;
+	root_ns->cmds = cmds;
 
 	INIT_LIST_HEAD(&root_ns->underlay_qpns);
 
@@ -2401,6 +2430,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	cleanup_root_ns(steering->fdb_root_ns);
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
+	cleanup_root_ns(steering->egress_root_ns);
 	mlx5_cleanup_fc_stats(dev);
 	kmem_cache_destroy(steering->ftes_cache);
 	kmem_cache_destroy(steering->fgs_cache);
@@ -2546,6 +2576,20 @@ cleanup_root_ns:
 	return err;
 }
 
+static int init_egress_root_ns(struct mlx5_flow_steering *steering)
+{
+	struct fs_prio *prio;
+
+	steering->egress_root_ns = create_root_ns(steering,
+						  FS_FT_NIC_TX);
+	if (!steering->egress_root_ns)
+		return -ENOMEM;
+
+	/* create 1 prio*/
+	prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
+	return PTR_ERR_OR_ZERO(prio);
+}
+
 int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering;
@@ -2611,6 +2655,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
+	if (MLX5_IPSEC_DEV(dev)) {
+		err = init_egress_root_ns(steering);
+		if (err)
+			goto err;
+	}
+
 	return 0;
 err:
 	mlx5_cleanup_fs(dev);
@@ -2634,7 +2684,8 @@ int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 		goto update_ft_fail;
 	}
 
-	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false);
+	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+					 false);
 	if (err) {
 		mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
 			       underlay_qpn, err);
@@ -2677,7 +2728,8 @@ int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 		goto out;
 	}
 
-	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true);
+	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+					 true);
 	if (err)
 		mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
 			       underlay_qpn, err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 05262708f14b..e26d3e9d5f9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -48,6 +48,7 @@ enum fs_node_type {
 
 enum fs_flow_table_type {
 	FS_FT_NIC_RX          = 0x0,
+	FS_FT_NIC_TX          = 0x1,
 	FS_FT_ESW_EGRESS_ACL  = 0x2,
 	FS_FT_ESW_INGRESS_ACL = 0x3,
 	FS_FT_FDB             = 0X4,
@@ -75,6 +76,7 @@ struct mlx5_flow_steering {
 	struct mlx5_flow_root_namespace **esw_ingress_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_rx_root_ns;
+	struct mlx5_flow_root_namespace	*egress_root_ns;
 };
 
 struct fs_node {
@@ -174,11 +176,8 @@ struct fs_fte {
 	struct fs_node			node;
 	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
 	u32				dests_size;
-	u32				flow_tag;
 	u32				index;
-	u32				action;
-	u32				encap_id;
-	u32				modify_id;
+	struct mlx5_flow_act		action;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
 	struct rhash_head		hash;
@@ -224,6 +223,7 @@ struct mlx5_flow_root_namespace {
 	/* Should be held when chaining flow tables */
 	struct mutex			chain_lock;
 	struct list_head		underlay_qpns;
+	const struct mlx5_flow_cmds	*cmds;
 };
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 9d11e92fb541..d7bb10ab2173 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -183,6 +183,9 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, debug))
+		mlx5_core_get_caps(dev, MLX5_CAP_DEBUG);
+
 	if (MLX5_CAP_GEN(dev, pcam_reg))
 		mlx5_get_pcam_reg(dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 21d29f7936f6..d39b0b7011b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -124,7 +124,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 		trigger_cmd_completions(dev);
 	}
 
-	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
+	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
 	mlx5_core_err(dev, "end\n");
 
 unlock:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index e159243e0fcf..857035583ccd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -34,6 +34,7 @@
 #include <linux/highmem.h>
 #include <rdma/mlx5-abi.h>
 #include "en.h"
+#include "clock.h"
 
 enum {
 	MLX5_CYCLES_SHIFT	= 23
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 2ef641c91c26..13b6f66310c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -58,6 +58,7 @@
 #include "eswitch.h"
 #include "lib/mlx5.h"
 #include "fpga/core.h"
+#include "fpga/ipsec.h"
 #include "accel/ipsec.h"
 #include "lib/clock.h"
 
@@ -551,7 +552,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 		MLX5_SET(cmd_hca_cap,
 			 set_hca_cap,
 			 cache_line_128byte,
-			 cache_line_size() == 128 ? 1 : 0);
+			 cache_line_size() >= 128 ? 1 : 0);
 
 	if (MLX5_CAP_GEN_MAX(dev, dct))
 		MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
@@ -942,9 +943,9 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto out;
 	}
 
-	err = mlx5_init_cq_table(dev);
+	err = mlx5_cq_debugfs_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "failed to initialize cq table\n");
+		dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
 		goto err_eq_cleanup;
 	}
 
@@ -1002,7 +1003,7 @@ err_tables_cleanup:
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
+	mlx5_cq_debugfs_cleanup(dev);
 
 err_eq_cleanup:
 	mlx5_eq_cleanup(dev);
@@ -1023,7 +1024,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
+	mlx5_cq_debugfs_cleanup(dev);
 	mlx5_eq_cleanup(dev);
 }
 
@@ -1173,6 +1174,18 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_affinity_hints;
 	}
 
+	err = mlx5_fpga_device_start(dev);
+	if (err) {
+		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+		goto err_fpga_start;
+	}
+
+	err = mlx5_accel_ipsec_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+		goto err_ipsec_start;
+	}
+
 	err = mlx5_init_fs(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1191,17 +1204,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_sriov;
 	}
 
-	err = mlx5_fpga_device_start(dev);
-	if (err) {
-		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
-		goto err_fpga_start;
-	}
-	err = mlx5_accel_ipsec_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
-		goto err_ipsec_start;
-	}
-
 	if (mlx5_device_registered(dev)) {
 		mlx5_attach_device(dev);
 	} else {
@@ -1219,17 +1221,18 @@ out:
 	return 0;
 
 err_reg_dev:
-	mlx5_accel_ipsec_cleanup(dev);
-err_ipsec_start:
-	mlx5_fpga_device_stop(dev);
-
-err_fpga_start:
 	mlx5_sriov_detach(dev);
 
 err_sriov:
 	mlx5_cleanup_fs(dev);
 
 err_fs:
+	mlx5_accel_ipsec_cleanup(dev);
+
+err_ipsec_start:
+	mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
 	mlx5_irq_clear_affinity_hints(dev);
 
 err_affinity_hints:
@@ -1296,11 +1299,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
-	mlx5_accel_ipsec_cleanup(dev);
-	mlx5_fpga_device_stop(dev);
-
 	mlx5_sriov_detach(dev);
 	mlx5_cleanup_fs(dev);
+	mlx5_accel_ipsec_cleanup(dev);
+	mlx5_fpga_device_stop(dev);
 	mlx5_irq_clear_affinity_hints(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
@@ -1657,6 +1659,7 @@ static int __init init(void)
 	get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
 
 	mlx5_core_verify_params();
+	mlx5_fpga_ipsec_build_fs_cmds();
 	mlx5_register_debugfs();
 
 	err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 394552f36fcf..4e25f2b2e0bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,16 +38,11 @@
 #include <linux/sched.h>
 #include <linux/if_link.h>
 #include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
 
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
-	(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-	 mlx5_core_is_pf(mdev))
-
 extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)				\
@@ -115,9 +110,29 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+		       int nent, u64 mask, const char *name,
+		       enum mlx5_eq_type type);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		       u32 *out, int outlen);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
 void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
 			u8 access_reg_group);
@@ -186,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
 int mlx5_lag_allow(struct mlx5_core_dev *dev);
 int mlx5_lag_forbid(struct mlx5_core_dev *dev);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
 #endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index c37d00cd472a..fa9d0760dd36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -483,6 +483,17 @@ int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
 
+static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
+			       u32 out_size)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+				    out_size, MLX5_REG_PFCC, 0, 0);
+}
+
 int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
 {
 	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
@@ -500,13 +511,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pause);
 int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 			  u32 *rx_pause, u32 *tx_pause)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	MLX5_SET(pfcc_reg, in, local_port, 1);
-	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_PFCC, 0, 0);
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
 	if (err)
 		return err;
 
@@ -520,6 +528,49 @@ int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
 
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+				  u16 stall_critical_watermark,
+				  u16 stall_minor_watermark)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+	MLX5_SET(pfcc_reg, in, pptx_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, pprx_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, ppan_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, critical_stall_mask, 1);
+	MLX5_SET(pfcc_reg, in, minor_stall_mask, 1);
+	MLX5_SET(pfcc_reg, in, device_stall_critical_watermark,
+		 stall_critical_watermark);
+	MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+				    sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+				    u16 *stall_critical_watermark,
+				    u16 *stall_minor_watermark)
+{
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+	int err;
+
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+	if (err)
+		return err;
+
+	if (stall_critical_watermark)
+		*stall_critical_watermark = MLX5_GET(pfcc_reg, out,
+						     device_stall_critical_watermark);
+
+	if (stall_minor_watermark)
+		*stall_minor_watermark = MLX5_GET(pfcc_reg, out,
+						  device_stall_minor_watermark);
+
+	return 0;
+}
+
 int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
 {
 	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
@@ -538,13 +589,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
 
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	MLX5_SET(pfcc_reg, in, local_port, 1);
-	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_PFCC, 0, 0);
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index dfe36cf6fbea..177e076b8d17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1070,6 +1070,32 @@ free:
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
 
+int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
+				u64 *rx_discard_vport_down,
+				u64 *tx_discard_vport_down)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+	int err;
+
+	MLX5_SET(query_vnic_env_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_VNIC_ENV);
+	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+	MLX5_SET(query_vnic_env_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+					    vport_env.receive_discard_vport_down);
+	*tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+					    vport_env.transmit_discard_vport_down);
+	return 0;
+}
+
 int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 				       u8 other_vport, u8 port_num,
 				       int vf,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 6bcfc25350f5..ea66448ba365 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -41,7 +41,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
-	return wq->sz_m1 + 1;
+	return wq->fbc.sz_m1 + 1;
 }
 
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
@@ -62,7 +62,7 @@ static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
 
 static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
 {
-	return mlx5_cqwq_get_size(wq) << wq->log_stride;
+	return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
 }
 
 static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
@@ -92,7 +92,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->buf = wq_ctrl->buf.frags->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -130,7 +130,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->rq.buf = wq_ctrl->buf.direct.buf;
+	wq->rq.buf = wq_ctrl->buf.frags->buf;
 	wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -151,11 +151,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 {
 	int err;
 
-	wq->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
-	wq->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
-	wq->sz_m1	= (1 << wq->log_sz) - 1;
-	wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
-	wq->frag_sz_m1	= (1 << wq->log_frag_strides) - 1;
+	mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -172,7 +168,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->frag_buf = wq_ctrl->frag_buf;
+	wq->fbc.frag_buf = wq_ctrl->frag_buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -209,7 +205,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->buf = wq_ctrl->buf.frags->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	for (i = 0; i < wq->sz_m1; i++) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 718589d0cec2..fca90b94596d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -45,7 +45,7 @@ struct mlx5_wq_param {
 
 struct mlx5_wq_ctrl {
 	struct mlx5_core_dev	*mdev;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	struct mlx5_db		db;
 };
 
@@ -68,14 +68,9 @@ struct mlx5_wq_qp {
 };
 
 struct mlx5_cqwq {
-	struct mlx5_frag_buf	frag_buf;
-	__be32			*db;
-	u32			sz_m1;
-	u32			frag_sz_m1;
-	u32			cc; /* consumer counter */
-	u8			log_sz;
-	u8			log_stride;
-	u8			log_frag_strides;
+	struct mlx5_frag_buf_ctrl fbc;
+	__be32			  *db;
+	u32			  cc; /* consumer counter */
 };
 
 struct mlx5_wq_ll {
@@ -131,20 +126,17 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 
 static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
 {
-	return wq->cc & wq->sz_m1;
+	return wq->cc & wq->fbc.sz_m1;
 }
 
 static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-	unsigned int frag = (ix >> wq->log_frag_strides);
-
-	return wq->frag_buf.frags[frag].buf +
-		((wq->frag_sz_m1 & ix) << wq->log_stride);
+	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
 static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
 {
-	return wq->cc >> wq->log_sz;
+	return wq->cc >> wq->fbc.log_sz;
 }
 
 static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index d56eea310509..f4d9c9975ac3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -76,6 +76,8 @@ config MLXSW_SPECTRUM
 	depends on PSAMPLE || PSAMPLE=n
 	depends on BRIDGE || BRIDGE=n
 	depends on IPV6 || IPV6=n
+	depends on NET_IPGRE || NET_IPGRE=n
+	depends on IPV6_GRE || IPV6_GRE=n
 	select PARMAN
 	select MLXFW
 	default m
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9463c3fa254f..0cadcabfe86f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -20,7 +20,7 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_cnt.o spectrum_fid.o \
 				   spectrum_ipip.o spectrum_acl_flex_actions.o \
 				   spectrum_mr.o spectrum_mr_tcam.o \
-				   spectrum_qdisc.o
+				   spectrum_qdisc.o spectrum_span.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index b698fb481b2e..3c0d882ba183 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -443,6 +443,17 @@ int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
 }
 EXPORT_SYMBOL(mlxsw_afa_block_jump);
 
+int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block)
+{
+	if (block->finished)
+		return -EINVAL;
+	mlxsw_afa_set_goto_set(block->cur_set,
+			       MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0);
+	block->finished = true;
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_terminate);
+
 static struct mlxsw_afa_fwd_entry *
 mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u8 local_port)
 {
@@ -838,7 +849,6 @@ struct mlxsw_afa_mirror {
 	struct mlxsw_afa_resource resource;
 	int span_id;
 	u8 local_in_port;
-	u8 local_out_port;
 	bool ingress;
 };
 
@@ -848,7 +858,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
 {
 	block->afa->ops->mirror_del(block->afa->ops_priv,
 				    mirror->local_in_port,
-				    mirror->local_out_port,
+				    mirror->span_id,
 				    mirror->ingress);
 	kfree(mirror);
 }
@@ -864,9 +874,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
 }
 
 static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
-			u8 local_in_port, u8 local_out_port,
-			bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+			const struct net_device *out_dev, bool ingress)
 {
 	struct mlxsw_afa_mirror *mirror;
 	int err;
@@ -876,13 +885,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
 		return ERR_PTR(-ENOMEM);
 
 	err = block->afa->ops->mirror_add(block->afa->ops_priv,
-					  local_in_port, local_out_port,
+					  local_in_port, out_dev,
 					  ingress, &mirror->span_id);
 	if (err)
 		goto err_mirror_add;
 
 	mirror->ingress = ingress;
-	mirror->local_out_port = local_out_port;
 	mirror->local_in_port = local_in_port;
 	mirror->resource.destructor = mlxsw_afa_mirror_destructor;
 	mlxsw_afa_resource_add(block, &mirror->resource);
@@ -909,13 +917,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
 }
 
 int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-			      u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+			      const struct net_device *out_dev, bool ingress)
 {
 	struct mlxsw_afa_mirror *mirror;
 	int err;
 
-	mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+	mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
 					 ingress);
 	if (IS_ERR(mirror))
 		return PTR_ERR(mirror);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 43132293475c..3a155d104384 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -36,6 +36,7 @@
 #define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
 
 #include <linux/types.h>
+#include <linux/netdevice.h>
 
 struct mlxsw_afa;
 struct mlxsw_afa_block;
@@ -48,9 +49,10 @@ struct mlxsw_afa_ops {
 	void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
 	int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
 	void (*counter_index_put)(void *priv, unsigned int counter_index);
-	int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+	int (*mirror_add)(void *priv, u8 local_in_port,
+			  const struct net_device *out_dev,
 			  bool ingress, int *p_span_id);
-	void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+	void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
 			   bool ingress);
 };
 
@@ -65,12 +67,14 @@ char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
 u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
+int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
 int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
 					    u16 trap_id);
 int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-				  u8 local_in_port, u8 local_out_port,
+				  u8 local_in_port,
+				  const struct net_device *out_dev,
 				  bool ingress);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
 			       u8 local_port, bool in_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index f6963b0b4a55..122506daa586 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -107,20 +107,20 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
 	MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
 	MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3),
 	MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9),
-	MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8),
-	MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2),
-	MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6),
-	MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32),
-	MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32),
-	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8),
-	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x20, 8),
-	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x28, 8),
-	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x30, 8),
 	MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16),
 	MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
+	MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x20, 0, 32),
+	MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x24, 0, 32),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x20, 8),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x28, 8),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x30, 8),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x38, 8),
 };
 
-#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x38
+#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40
 
 struct mlxsw_afk_element_inst { /* element instance in actual block */
 	const struct mlxsw_afk_element_info *info;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index ab710e37af99..84185f8dfbae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -218,32 +218,32 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 	switch (attr_type) {
 	case MLXSW_HWMON_ATTR_TYPE_TEMP:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_input", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_highest", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_TEMP_RST:
 		mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_temp_rst_store;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0200;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_reset_history", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_FAN_RPM:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "fan%u_input", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_PWM:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show;
 		mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR | S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0644;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "pwm%u", num + 1);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 85faa87bf42d..e30c6ce3dcb4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1519,8 +1519,7 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
 			      u8 *p_status)
 {
 	struct mlxsw_pci *mlxsw_pci = bus_priv;
-	dma_addr_t in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
-	dma_addr_t out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
+	dma_addr_t in_mapaddr = 0, out_mapaddr = 0;
 	bool evreq = mlxsw_pci->cmd.nopoll;
 	unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS);
 	bool *p_wait_done = &mlxsw_pci->cmd.wait_done;
@@ -1532,11 +1531,15 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
 	if (err)
 		return err;
 
-	if (in_mbox)
+	if (in_mbox) {
 		memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size);
+		in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
+	}
 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr));
 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr));
 
+	if (out_mbox)
+		out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr));
 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr));
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0e08be41c8e0..6218231e379e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1,11 +1,11 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2016 Ido Schimmel <[email protected]>
  * Copyright (c) 2015 Elad Raz <[email protected]>
  * Copyright (c) 2015-2017 Jiri Pirko <[email protected]>
  * Copyright (c) 2016 Yotam Gigi <[email protected]>
- * Copyright (c) 2017 Petr Machata <[email protected]>
+ * Copyright (c) 2017-2018 Petr Machata <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -2872,6 +2872,14 @@ static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port,
 
 MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN);
 
+/* an_disable_admin
+ * Auto negotiation disable administrative configuration
+ * 0 - Device doesn't support AN disable.
+ * 1 - Device supports AN disable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, an_disable_admin, 0x00, 30, 1);
+
 /* reg_ptys_local_port
  * Local port number.
  * Access: Index
@@ -3000,12 +3008,13 @@ MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
 MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
 
 static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
-					   u32 proto_admin)
+					   u32 proto_admin, bool autoneg)
 {
 	MLXSW_REG_ZERO(ptys, payload);
 	mlxsw_reg_ptys_local_port_set(payload, local_port);
 	mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH);
 	mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin);
+	mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg);
 }
 
 static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
@@ -4216,6 +4225,12 @@ MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1);
 
+/* reg_ritr_ipv6_mc
+ * IPv6 multicast routing enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_mc, 0x00, 26, 1);
+
 enum mlxsw_reg_ritr_if_type {
 	/* VLAN interface. */
 	MLXSW_REG_RITR_VLAN_IF,
@@ -4281,6 +4296,14 @@ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1);
 
+/* reg_ritr_ipv6_mc_fe
+ * IPv6 Multicast Forwarding Enable.
+ * When disabled, forwarding is blocked but local traffic (traps and IP to me)
+ * will be enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_mc_fe, 0x04, 26, 1);
+
 /* reg_ritr_lb_en
  * Loop-back filter enable for unicast packets.
  * If the flag is set then loop-back filter for unicast packets is
@@ -4504,12 +4527,14 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
 	mlxsw_reg_ritr_ipv4_set(payload, 1);
 	mlxsw_reg_ritr_ipv6_set(payload, 1);
 	mlxsw_reg_ritr_ipv4_mc_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_mc_set(payload, 1);
 	mlxsw_reg_ritr_type_set(payload, type);
 	mlxsw_reg_ritr_op_set(payload, op);
 	mlxsw_reg_ritr_rif_set(payload, rif);
 	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
 	mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
 	mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_mc_fe_set(payload, 1);
 	mlxsw_reg_ritr_lb_en_set(payload, 1);
 	mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
 	mlxsw_reg_ritr_mtu_set(payload, mtu);
@@ -6293,30 +6318,34 @@ MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1);
  */
 MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16);
 
-/* reg_rmft2_dip4
- * Destination IPv4 address
+/* reg_rmft2_dip{4,6}
+ * Destination IPv4/6 address
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, dip6, 0x10, 16);
 MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32);
 
-/* reg_rmft2_dip4_mask
+/* reg_rmft2_dip{4,6}_mask
  * A bit that is set directs the TCAM to compare the corresponding bit in key. A
  * bit that is clear directs the TCAM to ignore the corresponding bit in key.
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, dip6_mask, 0x20, 16);
 MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32);
 
-/* reg_rmft2_sip4
- * Source IPv4 address
+/* reg_rmft2_sip{4,6}
+ * Source IPv4/6 address
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, sip6, 0x30, 16);
 MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32);
 
-/* reg_rmft2_sip4_mask
+/* reg_rmft2_sip{4,6}_mask
  * A bit that is set directs the TCAM to compare the corresponding bit in key. A
  * bit that is clear directs the TCAM to ignore the corresponding bit in key.
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, sip6_mask, 0x40, 16);
 MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32);
 
 /* reg_rmft2_flexible_action_set
@@ -6334,26 +6363,52 @@ MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80,
 	       MLXSW_REG_FLEX_ACTION_SET_LEN);
 
 static inline void
-mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router,
-			  enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
-			  u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask,
-			  const char *flexible_action_set)
+mlxsw_reg_rmft2_common_pack(char *payload, bool v, u16 offset,
+			    u16 virtual_router,
+			    enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+			    const char *flex_action_set)
 {
 	MLXSW_REG_ZERO(rmft2, payload);
 	mlxsw_reg_rmft2_v_set(payload, v);
-	mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4);
 	mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE);
 	mlxsw_reg_rmft2_offset_set(payload, offset);
 	mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router);
 	mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask);
 	mlxsw_reg_rmft2_irif_set(payload, irif);
+	if (flex_action_set)
+		mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload,
+							      flex_action_set);
+}
+
+static inline void
+mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+			  enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+			  u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask,
+			  const char *flexible_action_set)
+{
+	mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router,
+				    irif_mask, irif, flexible_action_set);
+	mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4);
 	mlxsw_reg_rmft2_dip4_set(payload, dip4);
 	mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask);
 	mlxsw_reg_rmft2_sip4_set(payload, sip4);
 	mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask);
-	if (flexible_action_set)
-		mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload,
-							      flexible_action_set);
+}
+
+static inline void
+mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+			  enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+			  struct in6_addr dip6, struct in6_addr dip6_mask,
+			  struct in6_addr sip6, struct in6_addr sip6_mask,
+			  const char *flexible_action_set)
+{
+	mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router,
+				    irif_mask, irif, flexible_action_set);
+	mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV6);
+	mlxsw_reg_rmft2_dip6_memcpy_to(payload, (void *)&dip6);
+	mlxsw_reg_rmft2_dip6_mask_memcpy_to(payload, (void *)&dip6_mask);
+	mlxsw_reg_rmft2_sip6_memcpy_to(payload, (void *)&sip6);
+	mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask);
 }
 
 /* MFCR - Management Fan Control Register
@@ -6772,8 +6827,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
  */
 MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
 
+enum mlxsw_reg_mpat_span_type {
+	/* Local SPAN Ethernet.
+	 * The original packet is not encapsulated.
+	 */
+	MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+	/* Encapsulated Remote SPAN Ethernet L3 GRE.
+	 * The packet is encapsulated with GRE header.
+	 */
+	MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+	MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+	MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+	MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
 static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
-				       u16 system_port, bool e)
+				       u16 system_port, bool e,
+				       enum mlxsw_reg_mpat_span_type span_type)
 {
 	MLXSW_REG_ZERO(mpat, payload);
 	mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6781,6 +6932,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
 	mlxsw_reg_mpat_e_set(payload, e);
 	mlxsw_reg_mpat_qos_set(payload, 1);
 	mlxsw_reg_mpat_be_set(payload, 1);
+	mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+	mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+				 enum mlxsw_reg_mpat_eth_rspan_version version,
+				 const char *mac,
+				 bool tp)
+{
+	mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+	mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+	mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+				      const char *smac,
+				      u32 sip, u32 dip)
+{
+	mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+	mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+	mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+				    MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+	mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+	mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+				      const char *smac,
+				      struct in6_addr sip, struct in6_addr dip)
+{
+	mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+	mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+	mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+				    MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+	mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+	mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
 }
 
 /* MPAR - Monitoring Port Analyzer Register
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3dcc58d61506..4aa84442e357 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2017 Jiri Pirko <[email protected]>
  * Copyright (c) 2015 Ido Schimmel <[email protected]>
  * Copyright (c) 2015 Elad Raz <[email protected]>
@@ -71,11 +71,12 @@
 #include "spectrum_cnt.h"
 #include "spectrum_dpipe.h"
 #include "spectrum_acl_flex_actions.h"
+#include "spectrum_span.h"
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1530
-#define MLXSW_FWREV_SUBMINOR 152
+#define MLXSW_FWREV_MINOR 1620
+#define MLXSW_FWREV_SUBMINOR 192
 #define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
 #define MLXSW_SP_FW_FILENAME \
@@ -487,327 +488,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
-static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
-{
-	int i;
-
-	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
-		return -EIO;
-
-	mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
-							  MAX_SPAN);
-	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
-					 sizeof(struct mlxsw_sp_span_entry),
-					 GFP_KERNEL);
-	if (!mlxsw_sp->span.entries)
-		return -ENOMEM;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++)
-		INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
-
-	return 0;
-}
-
-static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
-	}
-	kfree(mlxsw_sp->span.entries);
-}
-
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_entry *span_entry;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
-	u8 local_port = port->local_port;
-	int index;
-	int i;
-	int err;
-
-	/* find a free entry to use */
-	index = -1;
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		if (!mlxsw_sp->span.entries[i].used) {
-			index = i;
-			span_entry = &mlxsw_sp->span.entries[i];
-			break;
-		}
-	}
-	if (index < 0)
-		return NULL;
-
-	/* create a new port analayzer entry for local_port */
-	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-	if (err)
-		return NULL;
-
-	span_entry->used = true;
-	span_entry->id = index;
-	span_entry->ref_count = 1;
-	span_entry->local_port = local_port;
-	return span_entry;
-}
-
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_span_entry *span_entry)
-{
-	u8 local_port = span_entry->local_port;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
-	int pa_id = span_entry->id;
-
-	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-	span_entry->used = false;
-}
-
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
-{
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		if (curr->used && curr->local_port == local_port)
-			return curr;
-	}
-	return NULL;
-}
-
-static struct mlxsw_sp_span_entry
-*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp_span_entry *span_entry;
-
-	span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
-					      port->local_port);
-	if (span_entry) {
-		/* Already exists, just take a reference */
-		span_entry->ref_count++;
-		return span_entry;
-	}
-
-	return mlxsw_sp_span_entry_create(port);
-}
-
-static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
-				   struct mlxsw_sp_span_entry *span_entry)
-{
-	WARN_ON(!span_entry->ref_count);
-	if (--span_entry->ref_count == 0)
-		mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
-	return 0;
-}
-
-static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_inspected_port *p;
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		list_for_each_entry(p, &curr->bound_ports_list, list)
-			if (p->local_port == port->local_port &&
-			    p->type == MLXSW_SP_SPAN_EGRESS)
-				return true;
-	}
-
-	return false;
-}
-
-static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
-					 int mtu)
-{
-	return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
-}
-
-static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int err;
-
-	/* If port is egress mirrored, the shared buffer size should be
-	 * updated according to the mtu value
-	 */
-	if (mlxsw_sp_span_is_egress_mirror(port)) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
-
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
-				    struct mlxsw_sp_span_entry *span_entry)
-{
-	struct mlxsw_sp_span_inspected_port *p;
-
-	list_for_each_entry(p, &span_entry->bound_ports_list, list)
-		if (port->local_port == p->local_port)
-			return p;
-	return NULL;
-}
-
-static int
-mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
-				  struct mlxsw_sp_span_entry *span_entry,
-				  enum mlxsw_sp_span_type type,
-				  bool bind)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char mpar_pl[MLXSW_REG_MPAR_LEN];
-	int pa_id = span_entry->id;
-
-	/* bind the port to the SPAN entry */
-	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
-			    (enum mlxsw_reg_mpar_i_e) type, bind, pa_id);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
-}
-
-static int
-mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int err;
-
-	/* if it is an egress SPAN, bind a shared buffer to it */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
-							     port->dev->mtu);
-
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
-			return err;
-		}
-	}
-
-	if (bind) {
-		err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-							true);
-		if (err)
-			goto err_port_bind;
-	}
-
-	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
-	if (!inspected_port) {
-		err = -ENOMEM;
-		goto err_inspected_port_alloc;
-	}
-	inspected_port->local_port = port->local_port;
-	inspected_port->type = type;
-	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
-
-	return 0;
-
-err_inspected_port_alloc:
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-err_port_bind:
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-	return err;
-}
-
-static void
-mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-
-	inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
-	if (!inspected_port)
-		return;
-
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-	/* remove the SBIB buffer if it was egress SPAN */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-
-	list_del(&inspected_port->list);
-	kfree(inspected_port);
-}
-
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type, bool bind)
-{
-	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
-	struct mlxsw_sp_span_entry *span_entry;
-	int err;
-
-	span_entry = mlxsw_sp_span_entry_get(to);
-	if (!span_entry)
-		return -ENOENT;
-
-	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
-		   span_entry->id);
-
-	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
-	if (err)
-		goto err_port_bind;
-
-	return 0;
-
-err_port_bind:
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-	return err;
-}
-
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
-			      enum mlxsw_sp_span_type type, bool bind)
-{
-	struct mlxsw_sp_span_entry *span_entry;
-
-	span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
-					      destination_port);
-	if (!span_entry) {
-		netdev_err(from->dev, "no span entry found\n");
-		return;
-	}
-
-	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
-		   span_entry->id);
-	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
-}
-
 static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				    bool enable, u32 rate)
 {
@@ -1360,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
 		xstats->tail_drop[i] =
 			mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
 	}
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+						  i, ppcnt_pl);
+		if (err)
+			continue;
+
+		xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+		xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+	}
 }
 
 static void update_stats_cache(struct work_struct *work)
@@ -1459,6 +1149,7 @@ mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
 	}
 
 	mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port;
+	mlxsw_sp_port_vlan->ref_count = 1;
 	mlxsw_sp_port_vlan->vid = vid;
 	list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list);
 
@@ -1486,8 +1177,10 @@ mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
-	if (mlxsw_sp_port_vlan)
+	if (mlxsw_sp_port_vlan) {
+		mlxsw_sp_port_vlan->ref_count++;
 		return mlxsw_sp_port_vlan;
+	}
 
 	return mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid);
 }
@@ -1496,6 +1189,9 @@ void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
 {
 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
 
+	if (--mlxsw_sp_port_vlan->ref_count != 0)
+		return;
+
 	if (mlxsw_sp_port_vlan->bridge_port)
 		mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
 	else if (fid)
@@ -1578,7 +1274,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 				      bool ingress)
 {
 	enum mlxsw_sp_span_type span_type;
-	struct mlxsw_sp_port *to_port;
 	struct net_device *to_dev;
 
 	to_dev = tcf_mirred_dev(a);
@@ -1587,17 +1282,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 		return -EINVAL;
 	}
 
-	if (!mlxsw_sp_port_dev_check(to_dev)) {
-		netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
-		return -EOPNOTSUPP;
-	}
-	to_port = netdev_priv(to_dev);
-
-	mirror->to_local_port = to_port->local_port;
 	mirror->ingress = ingress;
 	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
-					true);
+	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+					true, &mirror->span_id);
 }
 
 static void
@@ -1608,7 +1296,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	span_type = mirror->ingress ?
 			MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
 				 span_type, true);
 }
 
@@ -2702,7 +2390,7 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 	int err;
 
 	autoneg = mlxsw_sp_port->link.autoneg;
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2736,7 +2424,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	bool autoneg;
 	int err;
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2754,7 +2442,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	}
 
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_new);
+				eth_proto_new, autoneg);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2965,7 +2653,7 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
 
 	eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_admin);
+				eth_proto_admin, mlxsw_sp_port->link.autoneg);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -3692,6 +3380,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false),
 	/* Multicast Router Traps */
 	MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
+	MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
 	MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
 	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
 	MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
@@ -3995,14 +3684,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_afa_init;
 	}
 
+	err = mlxsw_sp_span_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+		goto err_span_init;
+	}
+
+	/* Initialize router after SPAN is initialized, so that the FIB and
+	 * neighbor event handlers can issue SPAN respin.
+	 */
 	err = mlxsw_sp_router_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
 		goto err_router_init;
 	}
 
-	/* Initialize netdevice notifier after router is initialized, so that
-	 * the event handler can use router structures.
+	/* Initialize netdevice notifier after router and SPAN is initialized,
+	 * so that the event handler can use router structures and call SPAN
+	 * respin.
 	 */
 	mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
 	err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -4011,12 +3710,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_netdev_notifier;
 	}
 
-	err = mlxsw_sp_span_init(mlxsw_sp);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
-		goto err_span_init;
-	}
-
 	err = mlxsw_sp_acl_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -4042,12 +3735,12 @@ err_ports_create:
 err_dpipe_init:
 	mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
-	mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
 	mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
 	mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -4073,9 +3766,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_ports_remove(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 	mlxsw_sp_acl_fini(mlxsw_sp);
-	mlxsw_sp_span_fini(mlxsw_sp);
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 	mlxsw_sp_router_fini(mlxsw_sp);
+	mlxsw_sp_span_fini(mlxsw_sp);
 	mlxsw_sp_afa_fini(mlxsw_sp);
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4087,12 +3780,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 }
 
 static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
-	.used_max_vepa_channels		= 1,
-	.max_vepa_channels		= 0,
 	.used_max_mid			= 1,
 	.max_mid			= MLXSW_SP_MID_MAX,
-	.used_max_pgt			= 1,
-	.max_pgt			= 0,
 	.used_flood_tables		= 1,
 	.used_flood_mode		= 1,
 	.flood_mode			= 3,
@@ -4118,70 +3807,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.resource_query_enable		= 1,
 };
 
-static bool
-mlxsw_sp_resource_kvd_granularity_validate(struct netlink_ext_ack *extack,
-					   u64 size)
-{
-	const struct mlxsw_config_profile *profile;
-
-	profile = &mlxsw_sp_config_profile;
-	if (size % profile->kvd_hash_granularity) {
-		NL_SET_ERR_MSG_MOD(extack, "resource set with wrong granularity");
-		return false;
-	}
-	return true;
-}
-
-static int
-mlxsw_sp_resource_kvd_size_validate(struct devlink *devlink, u64 size,
-				    struct netlink_ext_ack *extack)
-{
-	NL_SET_ERR_MSG_MOD(extack, "kvd size cannot be changed");
-	return -EINVAL;
-}
-
-static int
-mlxsw_sp_resource_kvd_linear_size_validate(struct devlink *devlink, u64 size,
-					   struct netlink_ext_ack *extack)
-{
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_single_size_validate(struct devlink *devlink, u64 size,
-						struct netlink_ext_ack *extack)
-{
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE)) {
-		NL_SET_ERR_MSG_MOD(extack, "hash single size is smaller than minimum");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_double_size_validate(struct devlink *devlink, u64 size,
-						struct netlink_ext_ack *extack)
-{
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE)) {
-		NL_SET_ERR_MSG_MOD(extack, "hash double size is smaller than minimum");
-		return -EINVAL;
-	}
-	return 0;
-}
-
 static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -4190,30 +3815,16 @@ static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 	return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
 }
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_size_validate,
-};
-
 static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_linear_size_validate,
 	.occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
 };
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_single_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_hash_single_size_validate,
-};
-
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_double_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_hash_double_size_validate,
-};
-
-static struct devlink_resource_size_params mlxsw_sp_kvd_size_params;
-static struct devlink_resource_size_params mlxsw_sp_linear_size_params;
-static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params;
-static struct devlink_resource_size_params mlxsw_sp_hash_double_size_params;
-
 static void
-mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core)
+mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
+				      struct devlink_resource_size_params *kvd_size_params,
+				      struct devlink_resource_size_params *linear_size_params,
+				      struct devlink_resource_size_params *hash_double_size_params,
+				      struct devlink_resource_size_params *hash_single_size_params)
 {
 	u32 single_size_min = MLXSW_CORE_RES_GET(mlxsw_core,
 						 KVD_SINGLE_MIN_SIZE);
@@ -4222,37 +3833,35 @@ mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core)
 	u32 kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
 	u32 linear_size_min = 0;
 
-	/* KVD top resource */
-	mlxsw_sp_kvd_size_params.size_min = kvd_size;
-	mlxsw_sp_kvd_size_params.size_max = kvd_size;
-	mlxsw_sp_kvd_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-	mlxsw_sp_kvd_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
-
-	/* Linear part init */
-	mlxsw_sp_linear_size_params.size_min = linear_size_min;
-	mlxsw_sp_linear_size_params.size_max = kvd_size - single_size_min -
-					       double_size_min;
-	mlxsw_sp_linear_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-	mlxsw_sp_linear_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
-
-	/* Hash double part init */
-	mlxsw_sp_hash_double_size_params.size_min = double_size_min;
-	mlxsw_sp_hash_double_size_params.size_max = kvd_size - single_size_min -
-						    linear_size_min;
-	mlxsw_sp_hash_double_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-	mlxsw_sp_hash_double_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
-
-	/* Hash single part init */
-	mlxsw_sp_hash_single_size_params.size_min = single_size_min;
-	mlxsw_sp_hash_single_size_params.size_max = kvd_size - double_size_min -
-						    linear_size_min;
-	mlxsw_sp_hash_single_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-	mlxsw_sp_hash_single_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
+	devlink_resource_size_params_init(kvd_size_params, kvd_size, kvd_size,
+					  MLXSW_SP_KVD_GRANULARITY,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	devlink_resource_size_params_init(linear_size_params, linear_size_min,
+					  kvd_size - single_size_min -
+					  double_size_min,
+					  MLXSW_SP_KVD_GRANULARITY,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	devlink_resource_size_params_init(hash_double_size_params,
+					  double_size_min,
+					  kvd_size - single_size_min -
+					  linear_size_min,
+					  MLXSW_SP_KVD_GRANULARITY,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	devlink_resource_size_params_init(hash_single_size_params,
+					  single_size_min,
+					  kvd_size - double_size_min -
+					  linear_size_min,
+					  MLXSW_SP_KVD_GRANULARITY,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
 }
 
 static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	struct devlink_resource_size_params hash_single_size_params;
+	struct devlink_resource_size_params hash_double_size_params;
+	struct devlink_resource_size_params linear_size_params;
+	struct devlink_resource_size_params kvd_size_params;
 	u32 kvd_size, single_size, double_size, linear_size;
 	const struct mlxsw_config_profile *profile;
 	int err;
@@ -4261,48 +3870,55 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE))
 		return -EIO;
 
-	mlxsw_sp_resource_size_params_prepare(mlxsw_core);
+	mlxsw_sp_resource_size_params_prepare(mlxsw_core, &kvd_size_params,
+					      &linear_size_params,
+					      &hash_double_size_params,
+					      &hash_single_size_params);
+
 	kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
-					true, kvd_size,
-					MLXSW_SP_RESOURCE_KVD,
+					kvd_size, MLXSW_SP_RESOURCE_KVD,
 					DEVLINK_RESOURCE_ID_PARENT_TOP,
-					&mlxsw_sp_kvd_size_params,
-					&mlxsw_sp_resource_kvd_ops);
+					&kvd_size_params,
+					NULL);
 	if (err)
 		return err;
 
 	linear_size = profile->kvd_linear_size;
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR,
-					false, linear_size,
+					linear_size,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					MLXSW_SP_RESOURCE_KVD,
-					&mlxsw_sp_linear_size_params,
+					&linear_size_params,
 					&mlxsw_sp_resource_kvd_linear_ops);
 	if (err)
 		return err;
 
+	err = mlxsw_sp_kvdl_resources_register(devlink);
+	if  (err)
+		return err;
+
 	double_size = kvd_size - linear_size;
 	double_size *= profile->kvd_hash_double_parts;
 	double_size /= profile->kvd_hash_double_parts +
 		       profile->kvd_hash_single_parts;
 	double_size = rounddown(double_size, profile->kvd_hash_granularity);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE,
-					false, double_size,
+					double_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
 					MLXSW_SP_RESOURCE_KVD,
-					&mlxsw_sp_hash_double_size_params,
-					&mlxsw_sp_resource_kvd_hash_double_ops);
+					&hash_double_size_params,
+					NULL);
 	if (err)
 		return err;
 
 	single_size = kvd_size - double_size - linear_size;
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE,
-					false, single_size,
+					single_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 					MLXSW_SP_RESOURCE_KVD,
-					&mlxsw_sp_hash_single_size_params,
-					&mlxsw_sp_resource_kvd_hash_single_ops);
+					&hash_single_size_params,
+					NULL);
 	if (err)
 		return err;
 
@@ -4556,13 +4172,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
 	u16 lag_id;
 
 	if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
-		NL_SET_ERR_MSG(extack,
-			       "spectrum: Exceeded number of supported LAG devices");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
 		return false;
 	}
 	if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
-		NL_SET_ERR_MSG(extack,
-			       "spectrum: LAG device using unsupported Tx type");
+		NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
 		return false;
 	}
 	return true;
@@ -4804,8 +4418,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		    !netif_is_lag_master(upper_dev) &&
 		    !netif_is_bridge_master(upper_dev) &&
 		    !netif_is_ovs_master(upper_dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Unknown upper device type");
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
 			return -EINVAL;
 		}
 		if (!info->linking)
@@ -4814,8 +4427,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
 							  upper_dev))) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+			NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
 		if (netif_is_lag_master(upper_dev) &&
@@ -4823,24 +4435,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 					       info->upper_info, extack))
 			return -EINVAL;
 		if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Master device is a LAG master and this device has a VLAN");
+			NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN");
 			return -EINVAL;
 		}
 		if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
 		    !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Can not put a VLAN on a LAG port");
+			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
 			return -EINVAL;
 		}
 		if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Master device is an OVS master and this device has a VLAN");
+			NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
 			return -EINVAL;
 		}
 		if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Can not put a VLAN on an OVS port");
+			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
 			return -EINVAL;
 		}
 		break;
@@ -4953,7 +4561,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
 		if (!netif_is_bridge_master(upper_dev)) {
-			NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers");
+			NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
 			return -EINVAL;
 		}
 		if (!info->linking)
@@ -4962,7 +4570,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
 							  upper_dev))) {
-			NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+			NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
 		break;
@@ -5040,10 +4648,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct mlxsw_sp_span_entry *span_entry;
 	struct mlxsw_sp *mlxsw_sp;
 	int err = 0;
 
 	mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+	if (event == NETDEV_UNREGISTER) {
+		span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+		if (span_entry)
+			mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+	}
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
 						       event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index bdd8f94a452c..21bee8f19894 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -70,16 +70,23 @@
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
 
 enum mlxsw_sp_resource_id {
 	MLXSW_SP_RESOURCE_KVD,
 	MLXSW_SP_RESOURCE_KVD_LINEAR,
 	MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 	MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 };
 
 struct mlxsw_sp_port;
 struct mlxsw_sp_rif;
+struct mlxsw_sp_span_entry;
 
 struct mlxsw_sp_upper {
 	struct net_device *dev;
@@ -111,32 +118,13 @@ struct mlxsw_sp_mid {
 	unsigned long *ports_in_mid; /* bits array */
 };
 
-enum mlxsw_sp_span_type {
-	MLXSW_SP_SPAN_EGRESS,
-	MLXSW_SP_SPAN_INGRESS
-};
-
-struct mlxsw_sp_span_inspected_port {
-	struct list_head list;
-	enum mlxsw_sp_span_type type;
-	u8 local_port;
-};
-
-struct mlxsw_sp_span_entry {
-	u8 local_port;
-	bool used;
-	struct list_head bound_ports_list;
-	int ref_count;
-	int id;
-};
-
 enum mlxsw_sp_port_mall_action_type {
 	MLXSW_SP_PORT_MALL_MIRROR,
 	MLXSW_SP_PORT_MALL_SAMPLE,
 };
 
 struct mlxsw_sp_port_mall_mirror_tc_entry {
-	u8 to_local_port;
+	int span_id;
 	bool ingress;
 };
 
@@ -211,6 +199,7 @@ struct mlxsw_sp_port_vlan {
 	struct list_head list;
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct mlxsw_sp_fid *fid;
+	unsigned int ref_count;
 	u16 vid;
 	struct mlxsw_sp_bridge_port *bridge_port;
 	struct list_head bridge_vlan_node;
@@ -222,6 +211,8 @@ struct mlxsw_sp_port_xstats {
 	u64 wred_drop[TC_MAX_QUEUE];
 	u64 tail_drop[TC_MAX_QUEUE];
 	u64 backlog[TC_MAX_QUEUE];
+	u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+	u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
 };
 
 struct mlxsw_sp_port {
@@ -259,6 +250,7 @@ struct mlxsw_sp_port {
 	struct mlxsw_sp_port_sample *sample;
 	struct list_head vlans_list;
 	struct mlxsw_sp_qdisc *root_qdisc;
+	struct mlxsw_sp_qdisc *tclass_qdiscs;
 	unsigned acl_rule_count;
 	struct mlxsw_sp_acl_block *ing_acl_block;
 	struct mlxsw_sp_acl_block *eg_acl_block;
@@ -396,16 +388,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
 void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type,
-			     bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from,
-			      u8 destination_port,
-			      enum mlxsw_sp_span_type type,
-			      bool bind);
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
 
 /* spectrum_dcb.c */
 #ifdef CONFIG_MLXSW_SPECTRUM_DCB
@@ -461,6 +443,7 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int entry_count,
 				   unsigned int *p_alloc_size);
 u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
 
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
@@ -552,6 +535,7 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
 int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
 				u16 group_id);
+int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 0897a5435cc2..79b1fa27a9a4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -160,6 +160,13 @@ bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block)
 	return block->disable_count;
 }
 
+static bool
+mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
+{
+	/* We hold a reference on ruleset ourselves */
+	return ruleset->ref_count == 2;
+}
+
 static int
 mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_acl_block *block,
@@ -341,21 +348,8 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_ht_insert;
 
-	if (!chain_index) {
-		/* We only need ruleset with chain index 0, the implicit one,
-		 * to be directly bound to device. The rest of the rulesets
-		 * are bound by "Goto action set".
-		 */
-		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
-		if (err)
-			goto err_ruleset_bind;
-	}
-
 	return ruleset;
 
-err_ruleset_bind:
-	rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
-			       mlxsw_sp_acl_ruleset_ht_params);
 err_ht_insert:
 	ops->ruleset_del(mlxsw_sp, ruleset->priv);
 err_ops_ruleset_add:
@@ -369,12 +363,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_acl_ruleset *ruleset)
 {
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
-	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
-	u32 chain_index = ruleset->ht_key.chain_index;
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 
-	if (!chain_index)
-		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, block);
 	rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
 			       mlxsw_sp_acl_ruleset_ht_params);
 	ops->ruleset_del(mlxsw_sp, ruleset->priv);
@@ -528,6 +518,11 @@ int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
 	return mlxsw_afa_block_jump(rulei->act_block, group_id);
 }
 
+int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei)
+{
+	return mlxsw_afa_block_terminate(rulei->act_block);
+}
+
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
 {
 	return mlxsw_afa_block_append_drop(rulei->act_block);
@@ -572,7 +567,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 				  struct net_device *out_dev)
 {
 	struct mlxsw_sp_acl_block_binding *binding;
-	struct mlxsw_sp_port *out_port;
 	struct mlxsw_sp_port *in_port;
 
 	if (!list_is_singular(&block->binding_list))
@@ -581,16 +575,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 	binding = list_first_entry(&block->binding_list,
 				   struct mlxsw_sp_acl_block_binding, list);
 	in_port = binding->mlxsw_sp_port;
-	if (!mlxsw_sp_port_dev_check(out_dev))
-		return -EINVAL;
-
-	out_port = netdev_priv(out_dev);
-	if (out_port->mlxsw_sp != mlxsw_sp)
-		return -EINVAL;
 
 	return mlxsw_afa_block_append_mirror(rulei->act_block,
 					     in_port->local_port,
-					     out_port->local_port,
+					     out_dev,
 					     binding->ingress);
 }
 
@@ -695,10 +683,25 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_rhashtable_insert;
 
+	if (!ruleset->ht_key.chain_index &&
+	    mlxsw_sp_acl_ruleset_is_singular(ruleset)) {
+		/* We only need ruleset with chain index 0, the implicit
+		 * one, to be directly bound to device. The rest of the
+		 * rulesets are bound by "Goto action set".
+		 */
+		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
+						      ruleset->ht_key.block);
+		if (err)
+			goto err_ruleset_block_bind;
+	}
+
 	list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
 	ruleset->ht_key.block->rule_count++;
 	return 0;
 
+err_ruleset_block_bind:
+	rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
+			       mlxsw_sp_acl_rule_ht_params);
 err_rhashtable_insert:
 	ops->rule_del(mlxsw_sp, rule->priv);
 	return err;
@@ -712,6 +715,10 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 
 	ruleset->ht_key.block->rule_count--;
 	list_del(&rule->list);
+	if (!ruleset->ht_key.chain_index &&
+	    mlxsw_sp_acl_ruleset_is_singular(ruleset))
+		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
+						  ruleset->ht_key.block);
 	rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
 			       mlxsw_sp_acl_rule_ht_params);
 	ops->rule_del(mlxsw_sp, rule->priv);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 6ca6894125f0..510ce48d87f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <[email protected]>
  * Copyright (c) 2017 Yotam Gigi <[email protected]>
  *
@@ -35,6 +35,7 @@
 
 #include "spectrum_acl_flex_actions.h"
 #include "core_acl_flex_actions.h"
+#include "spectrum_span.h"
 
 #define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
 
@@ -125,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
 }
 
 static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+			const struct net_device *out_dev,
 			bool ingress, int *p_span_id)
 {
-	struct mlxsw_sp_port *in_port, *out_port;
-	struct mlxsw_sp_span_entry *span_entry;
+	struct mlxsw_sp_port *in_port;
 	struct mlxsw_sp *mlxsw_sp = priv;
 	enum mlxsw_sp_span_type type;
-	int err;
 
 	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	out_port = mlxsw_sp->ports[local_out_port];
 	in_port = mlxsw_sp->ports[local_in_port];
 
-	err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
-	if (err)
-		return err;
-
-	span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
-	if (!span_entry) {
-		err = -ENOENT;
-		goto err_span_entry_find;
-	}
-
-	*p_span_id = span_entry->id;
-	return 0;
-
-err_span_entry_find:
-	mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
-	return err;
+	return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+					false, p_span_id);
 }
 
 static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
-			bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *in_port;
@@ -167,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
 	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
 	in_port = mlxsw_sp->ports[local_in_port];
 
-	mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+	mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
 static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index c6e180c2be1e..ad1b548e3cac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -228,10 +228,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		return err;
 
-	err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
-	if (err)
-		goto err_group_update;
-
 	err = rhashtable_init(&group->chunk_ht,
 			      &mlxsw_sp_acl_tcam_chunk_ht_params);
 	if (err)
@@ -240,7 +236,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 
 err_rhashtable_init:
-err_group_update:
 	mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 93728c694e6d..0a9adc5962fb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -385,13 +385,13 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
 
 static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
 	MLXSW_SP_CPU_PORT_SB_CM,
+	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
 	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_SB_CM(10000, 0, 0),
+	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index bbd238e50f05..54262af4e98f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -112,11 +112,11 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP]	= 1,
 	[MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL]			= 1,
 	[MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST]			= 1,
+	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]	= 1,
 };
 
 static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4]	= 1,
-	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]	= 1,
 };
 
 static const int *mlxsw_sp_packet_type_sfgc_types[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 6ce00e28d4ea..89dbf569dff5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -65,7 +65,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 	tcf_exts_to_list(exts, &actions);
 	list_for_each_entry(a, &actions, list) {
 		if (is_tcf_gact_ok(a)) {
-			err = mlxsw_sp_acl_rulei_act_continue(rulei);
+			err = mlxsw_sp_acl_rulei_act_terminate(rulei);
 			if (err)
 				return err;
 		} else if (is_tcf_gact_shot(a)) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 7502e53447bd..98d896c14b87 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <[email protected]>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,126 +33,125 @@
  */
 
 #include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum_ipip.h"
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev)
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
 {
 	struct ip_tunnel *tun = netdev_priv(ol_dev);
 
 	return tun->parms;
 }
 
-static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms)
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+	struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+	return tun->parms;
+}
+
+static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
 {
 	return !!(parms.i_flags & TUNNEL_KEY);
 }
 
-static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms)
+static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
 {
 	return !!(parms.o_flags & TUNNEL_KEY);
 }
 
-static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
 {
-	return mlxsw_sp_ipip_parms_has_ikey(parms) ?
+	return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
 		be32_to_cpu(parms.i_key) : 0;
 }
 
-static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
 {
-	return mlxsw_sp_ipip_parms_has_okey(parms) ?
+	return mlxsw_sp_ipip_parms4_has_okey(parms) ?
 		be32_to_cpu(parms.o_key) : 0;
 }
 
-static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
 {
-	return parms.iph.saddr;
+	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
 }
 
 static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto,
-			  struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
 {
-	switch (proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
-		};
-	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
-	}
-
-	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
+	return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
 }
 
-static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
 {
-	return parms.iph.daddr;
+	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
 }
 
 static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
-			  struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+	return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev)
 {
+	struct ip_tunnel_parm parms4;
+	struct __ip6_tnl_parm parms6;
+
 	switch (proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_parms_daddr4(parms),
-		};
+		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		return mlxsw_sp_ipip_parms4_saddr(parms4);
 	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
+		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+		return mlxsw_sp_ipip_parms6_saddr(parms6);
 	}
 
 	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
-}
-
-static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+	return (union mlxsw_sp_l3addr) {0};
 }
 
-static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
 {
-	return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
 
-static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+	struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 
-static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+	return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
 }
 
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev)
 {
-	return mlxsw_sp_ipip_parms_saddr(proto,
-					 mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+	struct ip_tunnel_parm parms4;
+	struct __ip6_tnl_parm parms6;
 
-static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		return mlxsw_sp_ipip_parms4_daddr(parms4);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+		return mlxsw_sp_ipip_parms6_daddr(parms6);
+	}
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {0};
 }
 
-static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
-			   const struct net_device *ol_dev)
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
 {
-	return mlxsw_sp_ipip_parms_daddr(proto,
-					 mlxsw_sp_ipip_netdev_parms(ol_dev));
+	union mlxsw_sp_l3addr naddr = {0};
+
+	return !memcmp(&addr, &naddr, sizeof(naddr));
 }
 
 static int
@@ -176,12 +175,17 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
 				     u32 tunnel_index,
 				     struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-	bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
-	u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
+	struct ip_tunnel_parm parms;
 	unsigned int type_check;
+	bool has_ikey;
 	u32 daddr4;
+	u32 ikey;
+
+	parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
+	has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
+	ikey = mlxsw_sp_ipip_parms4_ikey(parms);
 
 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
 
@@ -243,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
 {
 	union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
 	union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
-	union mlxsw_sp_l3addr naddr = {0};
 
 	/* Tunnels with unset local or remote address are valid in Linux and
 	 * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
 	 * (NBMA) tunnels. In principle these can be offloaded, but the driver
 	 * currently doesn't support this. So punt.
 	 */
-	return memcmp(&saddr, &naddr, sizeof(naddr)) &&
-	       memcmp(&daddr, &naddr, sizeof(naddr));
+	return !mlxsw_sp_l3addr_is_zero(saddr) &&
+	       !mlxsw_sp_l3addr_is_zero(daddr);
 }
 
 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
@@ -273,14 +276,15 @@ static struct mlxsw_sp_rif_ipip_lb_config
 mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
 				      const struct net_device *ol_dev)
 {
+	struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
 
-	lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+	lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
 	return (struct mlxsw_sp_rif_ipip_lb_config){
 		.lb_ipipt = lb_ipipt,
-		.okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
+		.okey = mlxsw_sp_ipip_parms4_okey(parms),
 		.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
 		.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
 						    ol_dev),
@@ -300,16 +304,12 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 	bool update_nhs = false;
 	int err = 0;
 
-	new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev);
+	new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
 
-	new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-					      new_parms);
-	old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-					      ipip_entry->parms);
-	new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-					      new_parms);
-	old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-					      ipip_entry->parms);
+	new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
+	old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
+	new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
+	old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
 
 	if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
 		u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
@@ -326,14 +326,14 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 		}
 
 		update_tunnel = true;
-	} else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) !=
-		    mlxsw_sp_ipip_parms_okey(new_parms)) ||
-		   ipip_entry->parms.link != new_parms.link) {
+	} else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
+		    mlxsw_sp_ipip_parms4_okey(new_parms)) ||
+		   ipip_entry->parms4.link != new_parms.link) {
 		update_tunnel = true;
 	} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
 		update_nhs = true;
-	} else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) !=
-		   mlxsw_sp_ipip_parms_ikey(new_parms)) {
+	} else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
+		   mlxsw_sp_ipip_parms4_ikey(new_parms)) {
 		update_decap = true;
 	}
 
@@ -350,7 +350,7 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 							  false, false, false,
 							  extack);
 
-	ipip_entry->parms = new_parms;
+	ipip_entry->parms4 = new_parms;
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 04b08d9d76e9..6909d867bb59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <[email protected]>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,14 +37,19 @@
 
 #include "spectrum_router.h"
 #include <net/ip_fib.h>
+#include <linux/if_tunnel.h>
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev);
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
 enum mlxsw_sp_ipip_type {
 	MLXSW_SP_IPIP_TYPE_GRE4,
 	MLXSW_SP_IPIP_TYPE_MAX,
@@ -56,7 +61,9 @@ struct mlxsw_sp_ipip_entry {
 	struct mlxsw_sp_rif_ipip_lb *ol_lb;
 	struct mlxsw_sp_fib_entry *decap_fib_entry;
 	struct list_head ipip_list_node;
-	struct ip_tunnel_parm parms;
+	union {
+		struct ip_tunnel_parm parms4;
+	};
 };
 
 struct mlxsw_sp_ipip_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 55f9d2d70f9e..85503e93b93f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -67,7 +67,7 @@ struct mlxsw_sp_kvdl_part_info {
 
 struct mlxsw_sp_kvdl_part {
 	struct list_head list;
-	const struct mlxsw_sp_kvdl_part_info *info;
+	struct mlxsw_sp_kvdl_part_info *info;
 	unsigned long usage[0];	/* Entries */
 };
 
@@ -188,21 +188,27 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+enum mlxsw_sp_kvdl_part_id {
+	MLXSW_SP_KVDL_PART_SINGLE,
+	MLXSW_SP_KVDL_PART_CHUNKS,
+	MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
+};
+
 static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
 	{
-		.part_index	= 0,
+		.part_index	= MLXSW_SP_KVDL_PART_SINGLE,
 		.start_index	= MLXSW_SP_KVDL_SINGLE_BASE,
 		.end_index	= MLXSW_SP_KVDL_SINGLE_END,
 		.alloc_size	= 1,
 	},
 	{
-		.part_index	= 1,
+		.part_index	= MLXSW_SP_KVDL_PART_CHUNKS,
 		.start_index	= MLXSW_SP_KVDL_CHUNKS_BASE,
 		.end_index	= MLXSW_SP_KVDL_CHUNKS_END,
 		.alloc_size	= MLXSW_SP_CHUNK_MAX,
 	},
 	{
-		.part_index	= 2,
+		.part_index	= MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
 		.start_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
 		.end_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_END,
 		.alloc_size	= MLXSW_SP_LARGE_CHUNK_MAX,
@@ -222,27 +228,76 @@ mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
 	return NULL;
 }
 
+static void
+mlxsw_sp_kvdl_part_update(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_kvdl_part *part, unsigned int size)
+{
+	struct mlxsw_sp_kvdl_part_info *info = part->info;
+
+	if (list_is_last(&part->list, &mlxsw_sp->kvdl->parts_list)) {
+		info->end_index = size - 1;
+	} else  {
+		struct mlxsw_sp_kvdl_part *last_part;
+
+		last_part = list_next_entry(part, list);
+		info->start_index = last_part->info->end_index + 1;
+		info->end_index = info->start_index + size - 1;
+	}
+}
+
 static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int part_index)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 	const struct mlxsw_sp_kvdl_part_info *info;
+	enum mlxsw_sp_resource_id resource_id;
 	struct mlxsw_sp_kvdl_part *part;
+	bool need_update = true;
 	unsigned int nr_entries;
 	size_t usage_size;
+	u64 resource_size;
+	int err;
 
 	info = &kvdl_parts_info[part_index];
 
-	nr_entries = (info->end_index - info->start_index + 1) /
-		     info->alloc_size;
+	switch (part_index) {
+	case MLXSW_SP_KVDL_PART_SINGLE:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE;
+		break;
+	case MLXSW_SP_KVDL_PART_CHUNKS:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS;
+		break;
+	case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = devlink_resource_size_get(devlink, resource_id, &resource_size);
+	if (err) {
+		need_update = false;
+		resource_size = info->end_index - info->start_index + 1;
+	}
+
+	nr_entries = div_u64(resource_size, info->alloc_size);
 	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
 	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
 	if (!part)
 		return -ENOMEM;
 
-	part->info = info;
-	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+	part->info = kmemdup(info, sizeof(*part->info), GFP_KERNEL);
+	if (!part->info)
+		goto err_part_info_alloc;
 
+	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+	if (need_update)
+		mlxsw_sp_kvdl_part_update(mlxsw_sp, part, resource_size);
 	return 0;
+
+err_part_info_alloc:
+	kfree(part);
+	return -ENOMEM;
 }
 
 static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
@@ -255,6 +310,7 @@ static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
 		return;
 
 	list_del(&part->list);
+	kfree(part->info);
 	kfree(part);
 }
 
@@ -312,6 +368,123 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
 	return occ;
 }
 
+static u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_SINGLE);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_CHUNKS);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp,
+				       MLXSW_SP_KVDL_PART_LARGE_CHUNKS);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
+	.occ_get = mlxsw_sp_kvdl_single_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
+	.occ_get = mlxsw_sp_kvdl_chunks_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
+	.occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
+	.size_min = 0,
+	.size_granularity = 1,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_chunks_size_params = {
+	.size_min = 0,
+	.size_granularity = MLXSW_SP_CHUNK_MAX,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_large_chunks_size_params = {
+	.size_min = 0,
+	.size_granularity = MLXSW_SP_LARGE_CHUNK_MAX,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static void
+mlxsw_sp_kvdl_resource_size_params_prepare(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	u32 kvdl_max_size;
+
+	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+	mlxsw_sp_kvdl_single_size_params.size_max = kvdl_max_size;
+	mlxsw_sp_kvdl_chunks_size_params.size_max = kvdl_max_size;
+	mlxsw_sp_kvdl_large_chunks_size_params.size_max = kvdl_max_size;
+}
+
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
+{
+	int err;
+
+	mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+					MLXSW_SP_KVDL_SINGLE_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_single_size_params,
+					&mlxsw_sp_kvdl_single_ops);
+	if (err)
+		return err;
+
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+					MLXSW_SP_KVDL_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_chunks_size_params,
+					&mlxsw_sp_kvdl_chunks_ops);
+	if (err)
+		return err;
+
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_large_chunks_size_params,
+					&mlxsw_sp_kvdl_chunks_large_ops);
+	return err;
+}
+
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_kvdl *kvdl;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index d20b143de3b4..a82539609d49 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -33,6 +33,7 @@
  */
 
 #include <linux/rhashtable.h>
+#include <net/ipv6.h>
 
 #include "spectrum_mr.h"
 #include "spectrum_router.h"
@@ -47,6 +48,11 @@ struct mlxsw_sp_mr {
 	/* priv has to be always the last item */
 };
 
+struct mlxsw_sp_mr_vif;
+struct mlxsw_sp_mr_vif_ops {
+	bool (*is_regular)(const struct mlxsw_sp_mr_vif *vif);
+};
+
 struct mlxsw_sp_mr_vif {
 	struct net_device *dev;
 	const struct mlxsw_sp_rif *rif;
@@ -61,6 +67,9 @@ struct mlxsw_sp_mr_vif {
 	 * instance is used as an ingress VIF
 	 */
 	struct list_head route_ivif_list;
+
+	/* Protocol specific operations for a VIF */
+	const struct mlxsw_sp_mr_vif_ops *ops;
 };
 
 struct mlxsw_sp_mr_route_vif_entry {
@@ -70,6 +79,17 @@ struct mlxsw_sp_mr_route_vif_entry {
 	struct mlxsw_sp_mr_route *mr_route;
 };
 
+struct mlxsw_sp_mr_table;
+struct mlxsw_sp_mr_table_ops {
+	bool (*is_route_valid)(const struct mlxsw_sp_mr_table *mr_table,
+			       const struct mr_mfc *mfc);
+	void (*key_create)(struct mlxsw_sp_mr_table *mr_table,
+			   struct mlxsw_sp_mr_route_key *key,
+			   struct mr_mfc *mfc);
+	bool (*is_route_starg)(const struct mlxsw_sp_mr_table *mr_table,
+			       const struct mlxsw_sp_mr_route *mr_route);
+};
+
 struct mlxsw_sp_mr_table {
 	struct list_head node;
 	enum mlxsw_sp_l3proto proto;
@@ -78,6 +98,7 @@ struct mlxsw_sp_mr_table {
 	struct mlxsw_sp_mr_vif vifs[MAXVIFS];
 	struct list_head route_list;
 	struct rhashtable route_ht;
+	const struct mlxsw_sp_mr_table_ops *ops;
 	char catchall_route_priv[0];
 	/* catchall_route_priv has to be always the last item */
 };
@@ -88,7 +109,7 @@ struct mlxsw_sp_mr_route {
 	struct mlxsw_sp_mr_route_key key;
 	enum mlxsw_sp_mr_route_action route_action;
 	u16 min_mtu;
-	struct mfc_cache *mfc4;
+	struct mr_mfc *mfc;
 	void *route_priv;
 	const struct mlxsw_sp_mr_table *mr_table;
 	/* A list of route_vif_entry structs that point to the egress VIFs */
@@ -104,14 +125,9 @@ static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = {
 	.automatic_shrinking = true,
 };
 
-static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif)
-{
-	return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
-}
-
 static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif)
 {
-	return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif;
+	return vif->ops->is_regular(vif) && vif->dev && vif->rif;
 }
 
 static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif)
@@ -122,18 +138,9 @@ static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif)
 static bool
 mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
 {
-	vifi_t ivif;
+	vifi_t ivif = mr_route->mfc->mfc_parent;
 
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		ivif = mr_route->mfc4->mfc_parent;
-		return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
-	return false;
+	return mr_route->mfc->mfc_un.res.ttls[ivif] != 255;
 }
 
 static int
@@ -149,19 +156,6 @@ mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route)
 	return valid_evifs;
 }
 
-static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route)
-{
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY);
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
-	return false;
-}
-
 static enum mlxsw_sp_mr_route_action
 mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
 {
@@ -174,7 +168,8 @@ mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
 	/* The kernel does not match a (*,G) route that the ingress interface is
 	 * not one of the egress interfaces, so trap these kind of routes.
 	 */
-	if (mlxsw_sp_mr_route_starg(mr_route) &&
+	if (mr_route->mr_table->ops->is_route_starg(mr_route->mr_table,
+						    mr_route) &&
 	    !mlxsw_sp_mr_route_ivif_in_evifs(mr_route))
 		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
 
@@ -195,25 +190,11 @@ mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
 static enum mlxsw_sp_mr_route_prio
 mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route)
 {
-	return mlxsw_sp_mr_route_starg(mr_route) ?
+	return mr_route->mr_table->ops->is_route_starg(mr_route->mr_table,
+						       mr_route) ?
 		MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG;
 }
 
-static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
-				   struct mlxsw_sp_mr_route_key *key,
-				   const struct mfc_cache *mfc)
-{
-	bool starg = (mfc->mfc_origin == htonl(INADDR_ANY));
-
-	memset(key, 0, sizeof(*key));
-	key->vrid = mr_table->vr_id;
-	key->proto = mr_table->proto;
-	key->group.addr4 = mfc->mfc_mcastgrp;
-	key->group_mask.addr4 = htonl(0xffffffff);
-	key->source.addr4 = mfc->mfc_origin;
-	key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff);
-}
-
 static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route,
 				       struct mlxsw_sp_mr_vif *mr_vif)
 {
@@ -343,8 +324,8 @@ static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table,
 }
 
 static struct mlxsw_sp_mr_route *
-mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
-			  struct mfc_cache *mfc)
+mlxsw_sp_mr_route_create(struct mlxsw_sp_mr_table *mr_table,
+			 struct mr_mfc *mfc)
 {
 	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
 	struct mlxsw_sp_mr_route *mr_route;
@@ -356,12 +337,13 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 	if (!mr_route)
 		return ERR_PTR(-ENOMEM);
 	INIT_LIST_HEAD(&mr_route->evif_list);
-	mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc);
 
 	/* Find min_mtu and link iVIF and eVIFs */
 	mr_route->min_mtu = ETH_MAX_MTU;
-	ipmr_cache_hold(mfc);
-	mr_route->mfc4 = mfc;
+	mr_cache_hold(mfc);
+	mr_route->mfc = mfc;
+	mr_table->ops->key_create(mr_table, &mr_route->key, mr_route->mfc);
+
 	mr_route->mr_table = mr_table;
 	for (i = 0; i < MAXVIFS; i++) {
 		if (mfc->mfc_un.res.ttls[i] != 255) {
@@ -374,59 +356,38 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 				mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
 		}
 	}
-	mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+	mlxsw_sp_mr_route_ivif_link(mr_route,
+				    &mr_table->vifs[mfc->mfc_parent]);
 
 	mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
 	return mr_route;
 err:
-	ipmr_cache_put(mfc);
+	mr_cache_put(mfc);
 	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
 		mlxsw_sp_mr_route_evif_unlink(rve);
 	kfree(mr_route);
 	return ERR_PTR(err);
 }
 
-static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
-				       struct mlxsw_sp_mr_route *mr_route)
+static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
+				      struct mlxsw_sp_mr_route *mr_route)
 {
 	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
 
 	mlxsw_sp_mr_route_ivif_unlink(mr_route);
-	ipmr_cache_put(mr_route->mfc4);
+	mr_cache_put(mr_route->mfc);
 	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
 		mlxsw_sp_mr_route_evif_unlink(rve);
 	kfree(mr_route);
 }
 
-static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
-				      struct mlxsw_sp_mr_route *mr_route)
-{
-	switch (mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
-}
-
 static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
 					bool offload)
 {
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		if (offload)
-			mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
-		else
-			mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
+	if (offload)
+		mr_route->mfc->mfc_flags |= MFC_OFFLOAD;
+	else
+		mr_route->mfc->mfc_flags &= ~MFC_OFFLOAD;
 }
 
 static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
@@ -448,25 +409,18 @@ static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
 	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
 }
 
-int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
-			   struct mfc_cache *mfc, bool replace)
+int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
+			  struct mr_mfc *mfc, bool replace)
 {
 	struct mlxsw_sp_mr_route *mr_orig_route = NULL;
 	struct mlxsw_sp_mr_route *mr_route;
 	int err;
 
-	/* If the route is a (*,*) route, abort, as these kind of routes are
-	 * used for proxy routes.
-	 */
-	if (mfc->mfc_origin == htonl(INADDR_ANY) &&
-	    mfc->mfc_mcastgrp == htonl(INADDR_ANY)) {
-		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
-			 "Offloading proxy routes is not supported.\n");
+	if (!mr_table->ops->is_route_valid(mr_table, mfc))
 		return -EINVAL;
-	}
 
 	/* Create a new route */
-	mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc);
+	mr_route = mlxsw_sp_mr_route_create(mr_table, mfc);
 	if (IS_ERR(mr_route))
 		return PTR_ERR(mr_route);
 
@@ -511,7 +465,7 @@ int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
 				       &mr_orig_route->ht_node,
 				       mlxsw_sp_mr_route_ht_params);
 		list_del(&mr_orig_route->node);
-		mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route);
+		mlxsw_sp_mr_route_destroy(mr_table, mr_orig_route);
 	}
 
 	mlxsw_sp_mr_mfc_offload_update(mr_route);
@@ -524,17 +478,17 @@ err_rhashtable_insert:
 	list_del(&mr_route->node);
 err_no_orig_route:
 err_duplicate_route:
-	mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
+	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
 	return err;
 }
 
-void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
-			    struct mfc_cache *mfc)
+void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+			   struct mr_mfc *mfc)
 {
 	struct mlxsw_sp_mr_route *mr_route;
 	struct mlxsw_sp_mr_route_key key;
 
-	mlxsw_sp_mr_route4_key(mr_table, &key, mfc);
+	mr_table->ops->key_create(mr_table, &key, mfc);
 	mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
 					  mlxsw_sp_mr_route_ht_params);
 	if (mr_route)
@@ -839,6 +793,125 @@ void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
 	}
 }
 
+/* Protocol specific functions */
+static bool
+mlxsw_sp_mr_route4_validate(const struct mlxsw_sp_mr_table *mr_table,
+			    const struct mr_mfc *c)
+{
+	struct mfc_cache *mfc = (struct mfc_cache *) c;
+
+	/* If the route is a (*,*) route, abort, as these kind of routes are
+	 * used for proxy routes.
+	 */
+	if (mfc->mfc_origin == htonl(INADDR_ANY) &&
+	    mfc->mfc_mcastgrp == htonl(INADDR_ANY)) {
+		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+			 "Offloading proxy routes is not supported.\n");
+		return false;
+	}
+	return true;
+}
+
+static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
+				   struct mlxsw_sp_mr_route_key *key,
+				   struct mr_mfc *c)
+{
+	const struct mfc_cache *mfc = (struct mfc_cache *) c;
+	bool starg;
+
+	starg = (mfc->mfc_origin == htonl(INADDR_ANY));
+
+	memset(key, 0, sizeof(*key));
+	key->vrid = mr_table->vr_id;
+	key->proto = MLXSW_SP_L3_PROTO_IPV4;
+	key->group.addr4 = mfc->mfc_mcastgrp;
+	key->group_mask.addr4 = htonl(0xffffffff);
+	key->source.addr4 = mfc->mfc_origin;
+	key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff);
+}
+
+static bool mlxsw_sp_mr_route4_starg(const struct mlxsw_sp_mr_table *mr_table,
+				     const struct mlxsw_sp_mr_route *mr_route)
+{
+	return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY);
+}
+
+static bool mlxsw_sp_mr_vif4_is_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+	return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
+}
+
+static bool
+mlxsw_sp_mr_route6_validate(const struct mlxsw_sp_mr_table *mr_table,
+			    const struct mr_mfc *c)
+{
+	struct mfc6_cache *mfc = (struct mfc6_cache *) c;
+
+	/* If the route is a (*,*) route, abort, as these kind of routes are
+	 * used for proxy routes.
+	 */
+	if (ipv6_addr_any(&mfc->mf6c_origin) &&
+	    ipv6_addr_any(&mfc->mf6c_mcastgrp)) {
+		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+			 "Offloading proxy routes is not supported.\n");
+		return false;
+	}
+	return true;
+}
+
+static void mlxsw_sp_mr_route6_key(struct mlxsw_sp_mr_table *mr_table,
+				   struct mlxsw_sp_mr_route_key *key,
+				   struct mr_mfc *c)
+{
+	const struct mfc6_cache *mfc = (struct mfc6_cache *) c;
+
+	memset(key, 0, sizeof(*key));
+	key->vrid = mr_table->vr_id;
+	key->proto = MLXSW_SP_L3_PROTO_IPV6;
+	key->group.addr6 = mfc->mf6c_mcastgrp;
+	memset(&key->group_mask.addr6, 0xff, sizeof(key->group_mask.addr6));
+	key->source.addr6 = mfc->mf6c_origin;
+	if (!ipv6_addr_any(&mfc->mf6c_origin))
+		memset(&key->source_mask.addr6, 0xff,
+		       sizeof(key->source_mask.addr6));
+}
+
+static bool mlxsw_sp_mr_route6_starg(const struct mlxsw_sp_mr_table *mr_table,
+				     const struct mlxsw_sp_mr_route *mr_route)
+{
+	return ipv6_addr_any(&mr_route->key.source_mask.addr6);
+}
+
+static bool mlxsw_sp_mr_vif6_is_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+	return !(vif->vif_flags & MIFF_REGISTER);
+}
+
+static struct
+mlxsw_sp_mr_vif_ops mlxsw_sp_mr_vif_ops_arr[] = {
+	{
+		.is_regular = mlxsw_sp_mr_vif4_is_regular,
+	},
+	{
+		.is_regular = mlxsw_sp_mr_vif6_is_regular,
+	},
+};
+
+static struct
+mlxsw_sp_mr_table_ops mlxsw_sp_mr_table_ops_arr[] = {
+	{
+		.is_route_valid = mlxsw_sp_mr_route4_validate,
+		.key_create = mlxsw_sp_mr_route4_key,
+		.is_route_starg = mlxsw_sp_mr_route4_starg,
+	},
+	{
+		.is_route_valid = mlxsw_sp_mr_route6_validate,
+		.key_create = mlxsw_sp_mr_route6_key,
+		.is_route_starg = mlxsw_sp_mr_route6_starg,
+	},
+
+};
+
 struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 						   u32 vr_id,
 						   enum mlxsw_sp_l3proto proto)
@@ -847,6 +920,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 		.prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
 		.key = {
 			.vrid = vr_id,
+			.proto = proto,
 		},
 		.value = {
 			.route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP,
@@ -865,6 +939,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 	mr_table->vr_id = vr_id;
 	mr_table->mlxsw_sp = mlxsw_sp;
 	mr_table->proto = proto;
+	mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto];
 	INIT_LIST_HEAD(&mr_table->route_list);
 
 	err = rhashtable_init(&mr_table->route_ht,
@@ -875,6 +950,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 	for (i = 0; i < MAXVIFS; i++) {
 		INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list);
 		INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list);
+		mr_table->vifs[i].ops = &mlxsw_sp_mr_vif_ops_arr[proto];
 	}
 
 	err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
@@ -941,18 +1017,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
 	mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
 				&bytes);
 
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		if (mr_route->mfc4->mfc_un.res.pkt != packets)
-			mr_route->mfc4->mfc_un.res.lastuse = jiffies;
-		mr_route->mfc4->mfc_un.res.pkt = packets;
-		mr_route->mfc4->mfc_un.res.bytes = bytes;
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
+	if (mr_route->mfc->mfc_un.res.pkt != packets)
+		mr_route->mfc->mfc_un.res.lastuse = jiffies;
+	mr_route->mfc->mfc_un.res.pkt = packets;
+	mr_route->mfc->mfc_un.res.bytes = bytes;
 }
 
 static void mlxsw_sp_mr_stats_update(struct work_struct *work)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
index 5d26a122af49..7c864a86811d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
@@ -36,6 +36,7 @@
 #define _MLXSW_SPECTRUM_MCROUTER_H
 
 #include <linux/mroute.h>
+#include <linux/mroute6.h>
 #include "spectrum_router.h"
 #include "spectrum.h"
 
@@ -109,10 +110,10 @@ struct mlxsw_sp_mr_table;
 int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
 		     const struct mlxsw_sp_mr_ops *mr_ops);
 void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
-			   struct mfc_cache *mfc, bool replace);
-void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
-			    struct mfc_cache *mfc);
+int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
+			  struct mr_mfc *mfc, bool replace);
+void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+			   struct mr_mfc *mfc);
 int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
 			struct net_device *dev, vifi_t vif_index,
 			unsigned long vif_flags,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
index 4c7f32d4288d..4f4c0d311883 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -51,7 +51,7 @@ struct mlxsw_sp_mr_tcam_region {
 };
 
 struct mlxsw_sp_mr_tcam {
-	struct mlxsw_sp_mr_tcam_region ipv4_tcam_region;
+	struct mlxsw_sp_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
 };
 
 /* This struct maps to one RIGR2 register entry */
@@ -316,20 +316,37 @@ static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
 					  mlxsw_afa_block_first_set(afa_block));
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-	default:
-		WARN_ON_ONCE(1);
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
+					  key->vrid,
+					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+					  key->group.addr6,
+					  key->group_mask.addr6,
+					  key->source.addr6,
+					  key->source_mask.addr6,
+					  mlxsw_afa_block_first_set(afa_block));
 	}
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
 }
 
 static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid,
+					 struct mlxsw_sp_mr_route_key *key,
 					 struct parman_item *parman_item)
 {
+	struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
 	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
 
-	mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid,
-				  0, 0, 0, 0, 0, 0, NULL);
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
+					  vrid, 0, 0, 0, 0, 0, 0, NULL);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
+					  vrid, 0, 0, zero_addr, zero_addr,
+					  zero_addr, zero_addr, NULL);
+		break;
+	}
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
 }
@@ -353,27 +370,30 @@ mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static struct mlxsw_sp_mr_tcam_region *
+mlxsw_sp_mr_tcam_protocol_region(struct mlxsw_sp_mr_tcam *mr_tcam,
+				 enum mlxsw_sp_l3proto proto)
+{
+	return &mr_tcam->tcam_regions[proto];
+}
+
 static int
 mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam,
 				       struct mlxsw_sp_mr_tcam_route *route,
 				       enum mlxsw_sp_mr_route_prio prio)
 {
-	struct parman_prio *parman_prio = NULL;
+	struct mlxsw_sp_mr_tcam_region *tcam_region;
 	int err;
 
-	switch (route->key.proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio];
-		err = parman_item_add(mr_tcam->ipv4_tcam_region.parman,
-				      parman_prio, &route->parman_item);
-		if (err)
-			return err;
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-	default:
-		WARN_ON_ONCE(1);
-	}
-	route->parman_prio = parman_prio;
+	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
+						       route->key.proto);
+	err = parman_item_add(tcam_region->parman,
+			      &tcam_region->parman_prios[prio],
+			      &route->parman_item);
+	if (err)
+		return err;
+
+	route->parman_prio = &tcam_region->parman_prios[prio];
 	return 0;
 }
 
@@ -381,15 +401,13 @@ static void
 mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam,
 					  struct mlxsw_sp_mr_tcam_route *route)
 {
-	switch (route->key.proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		parman_item_remove(mr_tcam->ipv4_tcam_region.parman,
-				   route->parman_prio, &route->parman_item);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-	default:
-		WARN_ON_ONCE(1);
-	}
+	struct mlxsw_sp_mr_tcam_region *tcam_region;
+
+	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
+						       route->key.proto);
+
+	parman_item_remove(tcam_region->parman,
+			   route->parman_prio, &route->parman_item);
 }
 
 static int
@@ -462,7 +480,7 @@ static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
 
 	mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid,
-				      &route->parman_item);
+				      &route->key, &route->parman_item);
 	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
 	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
 	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
@@ -806,21 +824,42 @@ mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
 static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+	u32 rtar_key;
+	int err;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) ||
 	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
 		return -EIO;
 
-	return mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-					    &mr_tcam->ipv4_tcam_region,
-					    MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST);
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
+	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
+					   &region[MLXSW_SP_L3_PROTO_IPV4],
+					   rtar_key);
+	if (err)
+		return err;
+
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
+	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
+					   &region[MLXSW_SP_L3_PROTO_IPV6],
+					   rtar_key);
+	if (err)
+		goto err_ipv6_region_init;
+
+	return 0;
+
+err_ipv6_region_init:
+	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+	return err;
 }
 
 static void mlxsw_sp_mr_tcam_fini(void *priv)
 {
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
 
-	mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region);
+	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
+	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
 }
 
 const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0b7670459051..91262b0573e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -42,6 +42,8 @@
 #include "reg.h"
 
 #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+	MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
 
 enum mlxsw_sp_qdisc_type {
 	MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
 struct mlxsw_sp_qdisc {
 	u32 handle;
 	u8 tclass_num;
+	u8 prio_bitmap;
 	union {
 		struct red_stats red;
 	} xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
 	       mlxsw_sp_qdisc->handle == handle;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+		    bool root_only)
+{
+	int tclass, child_index;
+
+	if (parent == TC_H_ROOT)
+		return mlxsw_sp_port->root_qdisc;
+
+	if (root_only || !mlxsw_sp_port->root_qdisc ||
+	    !mlxsw_sp_port->root_qdisc->ops ||
+	    TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+	    TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+		return NULL;
+
+	child_index = TC_H_MIN(parent);
+	tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+	return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+	int i;
+
+	if (mlxsw_sp_port->root_qdisc->handle == handle)
+		return mlxsw_sp_port->root_qdisc;
+
+	if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+		return NULL;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+			return &mlxsw_sp_port->tclass_qdiscs[i];
+
+	return NULL;
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EOPNOTSUPP;
 }
 
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+				       u8 prio_bitmap, u64 *tx_packets,
+				       u64 *tx_bytes)
+{
+	int i;
+
+	*tx_packets = 0;
+	*tx_bytes = 0;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (prio_bitmap & BIT(i)) {
+			*tx_packets += xstats->tx_packets[i];
+			*tx_bytes += xstats->tx_bytes[i];
+		}
+	}
+}
+
 static int
 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
 				  int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
-	struct rtnl_link_stats64 *stats;
 	struct red_stats *red_base;
 
 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
 	stats_base = &mlxsw_sp_qdisc->stats_base;
 	red_base = &mlxsw_sp_qdisc->xstats_base.red;
 
-	stats_base->tx_packets = stats->tx_packets;
-	stats_base->tx_bytes = stats->tx_bytes;
-
+	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+					       mlxsw_sp_qdisc->prio_bitmap,
+					       &stats_base->tx_packets,
+					       &stats_base->tx_bytes);
 	red_base->prob_mark = xstats->ecn;
 	red_base->prob_drop = xstats->wred_drop[tclass_num];
 	red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+	struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+	if (root_qdisc != mlxsw_sp_qdisc)
+		root_qdisc->stats_base.backlog -=
+					mlxsw_sp_qdisc->stats_base.backlog;
+
 	return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
 						  mlxsw_sp_qdisc->tclass_num);
 }
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
 	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
 				       mlxsw_sp_qdisc->stats_base.backlog);
 	p->qstats->backlog -= backlog;
+	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
 static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
-	struct rtnl_link_stats64 *stats;
 
 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
 	stats_base = &mlxsw_sp_qdisc->stats_base;
 
-	tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
-	tx_packets = stats->tx_packets - stats_base->tx_packets;
+	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+					       mlxsw_sp_qdisc->prio_bitmap,
+					       &tx_packets, &tx_bytes);
+	tx_bytes = tx_bytes - stats_base->tx_bytes;
+	tx_packets = tx_packets - stats_base->tx_packets;
+
 	overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
 		     stats_base->overlimits;
 	drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-	if (p->parent != TC_H_ROOT)
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+	if (!mlxsw_sp_qdisc)
 		return -EOPNOTSUPP;
 
-	mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
 	if (p->command == TC_RED_REPLACE)
 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
 					      mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	int i;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
 					  MLXSW_SP_PORT_DEFAULT_TCLASS);
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+				       &mlxsw_sp_port->tclass_qdiscs[i]);
+		mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+	}
 
 	return 0;
 }
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 			    void *params)
 {
 	struct tc_prio_qopt_offload_params *p = params;
-	int tclass, i;
+	struct mlxsw_sp_qdisc *child_qdisc;
+	int tclass, i, band, backlog;
+	u8 old_priomap;
 	int err;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
-		err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
-		if (err)
-			return err;
+	for (band = 0; band < p->bands; band++) {
+		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+		old_priomap = child_qdisc->prio_bitmap;
+		child_qdisc->prio_bitmap = 0;
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+			if (p->priomap[i] == band) {
+				child_qdisc->prio_bitmap |= BIT(i);
+				if (BIT(i) & old_priomap)
+					continue;
+				err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+								i, tclass);
+				if (err)
+					return err;
+			}
+		}
+		if (old_priomap != child_qdisc->prio_bitmap &&
+		    child_qdisc->ops && child_qdisc->ops->clean_stats) {
+			backlog = child_qdisc->stats_base.backlog;
+			child_qdisc->ops->clean_stats(mlxsw_sp_port,
+						      child_qdisc);
+			child_qdisc->stats_base.backlog = backlog;
+		}
+	}
+	for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+		child_qdisc->prio_bitmap = 0;
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
 	}
-
 	return 0;
 }
 
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		drops += xstats->tail_drop[i];
+		drops += xstats->wred_drop[i];
 		backlog += xstats->backlog[i];
 	}
 	drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	stats_base->tx_bytes = stats->tx_bytes;
 
 	stats_base->drops = 0;
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		stats_base->drops += xstats->tail_drop[i];
+		stats_base->drops += xstats->wred_drop[i];
+	}
 
 	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 };
 
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			  struct tc_prio_qopt_offload_graft_params *p)
+{
+	int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+	struct mlxsw_sp_qdisc *old_qdisc;
+
+	/* Check if the grafted qdisc is already in its "new" location. If so -
+	 * nothing needs to be done.
+	 */
+	if (p->band < IEEE_8021QAZ_MAX_TCS &&
+	    mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+		return 0;
+
+	/* See if the grafted qdisc is already offloaded on any tclass. If so,
+	 * unoffload it.
+	 */
+	old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+						  p->child_handle);
+	if (old_qdisc)
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+	mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+			       &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+	return -EOPNOTSUPP;
+}
+
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct tc_prio_qopt_offload *p)
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-	if (p->parent != TC_H_ROOT)
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+	if (!mlxsw_sp_qdisc)
 		return -EOPNOTSUPP;
 
-	mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
 	if (p->command == TC_PRIO_REPLACE)
 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
 					      mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 	case TC_PRIO_STATS:
 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
 						&p->stats);
+	case TC_PRIO_GRAFT:
+		return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+						 &p->graft_params);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
-					    GFP_KERNEL);
-	if (!mlxsw_sp_port->root_qdisc)
-		return -ENOMEM;
+	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+	int i;
 
+	mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+	if (!mlxsw_sp_qdisc)
+		goto err_root_qdisc_init;
+
+	mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+	mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
 	mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 
+	mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+				 GFP_KERNEL);
+	if (!mlxsw_sp_qdisc)
+		goto err_tclass_qdiscs_init;
+
+	mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
 	return 0;
+
+err_tclass_qdiscs_init:
+	kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+	return -ENOMEM;
 }
 
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	kfree(mlxsw_sp_port->tclass_qdiscs);
 	kfree(mlxsw_sp_port->root_qdisc);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index f0b25baba09a..a9ccd974c620 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1,10 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko <[email protected]>
  * Copyright (c) 2016 Ido Schimmel <[email protected]>
  * Copyright (c) 2016 Yotam Gigi <[email protected]>
- * Copyright (c) 2017 Petr Machata <[email protected]>
+ * Copyright (c) 2017-2018 Petr Machata <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -70,6 +70,7 @@
 #include "spectrum_mr.h"
 #include "spectrum_mr_tcam.h"
 #include "spectrum_router.h"
+#include "spectrum_span.h"
 
 struct mlxsw_sp_fib;
 struct mlxsw_sp_vr;
@@ -466,7 +467,7 @@ struct mlxsw_sp_vr {
 	unsigned int rif_count;
 	struct mlxsw_sp_fib *fib4;
 	struct mlxsw_sp_fib *fib6;
-	struct mlxsw_sp_mr_table *mr4_table;
+	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -710,7 +711,9 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 
 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 {
-	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
+	return !!vr->fib4 || !!vr->fib6 ||
+	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
+	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -788,45 +791,61 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 					      u32 tb_id,
 					      struct netlink_ext_ack *extack)
 {
+	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
+	struct mlxsw_sp_fib *fib4;
+	struct mlxsw_sp_fib *fib6;
 	struct mlxsw_sp_vr *vr;
 	int err;
 
 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
 	if (!vr) {
-		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
 		return ERR_PTR(-EBUSY);
 	}
-	vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
-	if (IS_ERR(vr->fib4))
-		return ERR_CAST(vr->fib4);
-	vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
-	if (IS_ERR(vr->fib6)) {
-		err = PTR_ERR(vr->fib6);
+	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+	if (IS_ERR(fib4))
+		return ERR_CAST(fib4);
+	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(fib6)) {
+		err = PTR_ERR(fib6);
 		goto err_fib6_create;
 	}
-	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
-						 MLXSW_SP_L3_PROTO_IPV4);
-	if (IS_ERR(vr->mr4_table)) {
-		err = PTR_ERR(vr->mr4_table);
-		goto err_mr_table_create;
+	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+					     MLXSW_SP_L3_PROTO_IPV4);
+	if (IS_ERR(mr4_table)) {
+		err = PTR_ERR(mr4_table);
+		goto err_mr4_table_create;
+	}
+	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+					     MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(mr6_table)) {
+		err = PTR_ERR(mr6_table);
+		goto err_mr6_table_create;
 	}
+
+	vr->fib4 = fib4;
+	vr->fib6 = fib6;
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
 	vr->tb_id = tb_id;
 	return vr;
 
-err_mr_table_create:
-	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
-	vr->fib6 = NULL;
+err_mr6_table_create:
+	mlxsw_sp_mr_table_destroy(mr4_table);
+err_mr4_table_create:
+	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
 err_fib6_create:
-	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
-	vr->fib4 = NULL;
+	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
 	return ERR_PTR(err);
 }
 
 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_vr *vr)
 {
-	mlxsw_sp_mr_table_destroy(vr->mr4_table);
-	vr->mr4_table = NULL;
+	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
+	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
 	vr->fib6 = NULL;
 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
@@ -849,7 +868,8 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
 {
 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
 	    list_empty(&vr->fib6->node_list) &&
-	    mlxsw_sp_mr_table_empty(vr->mr4_table))
+	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
+	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
 }
 
@@ -1020,9 +1040,11 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 			  enum mlxsw_sp_ipip_type ipipt,
 			  struct net_device *ol_dev)
 {
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct mlxsw_sp_ipip_entry *ret = NULL;
 
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
 	if (!ipip_entry)
 		return ERR_PTR(-ENOMEM);
@@ -1036,7 +1058,15 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 
 	ipip_entry->ipipt = ipipt;
 	ipip_entry->ol_dev = ol_dev;
-	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
+
+	switch (ipip_ops->ul_proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		WARN_ON(1);
+		break;
+	}
 
 	return ipip_entry;
 
@@ -2316,6 +2346,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
 	read_unlock_bh(&n->lock);
 
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	entry_connected = nud_state & NUD_VALID && !dead;
 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
 	if (!entry_connected && !neigh_entry)
@@ -2413,7 +2445,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
 		mlxsw_core_schedule_work(&net_work->work);
 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
 		break;
-	case NETEVENT_MULTIPATH_HASH_UPDATE:
+	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
 		net = ptr;
 
 		if (!net_eq(net, &init_net))
@@ -3790,6 +3823,9 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
 	int i;
 
+	if (!list_is_singular(&nh_grp->fib_list))
+		return;
+
 	for (i = 0; i < nh_grp->count; i++) {
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 
@@ -5357,10 +5393,20 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static struct mlxsw_sp_mr_table *
+mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
+{
+	if (family == RTNL_FAMILY_IPMR)
+		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
+	else
+		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
+}
+
 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mfc_entry_notifier_info *men_info,
 				     bool replace)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
 	if (mlxsw_sp->router->aborted)
@@ -5370,12 +5416,14 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
 
-	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
+	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
 }
 
 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
 				      struct mfc_entry_notifier_info *men_info)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
 	if (mlxsw_sp->router->aborted)
@@ -5385,7 +5433,8 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!vr))
 		return;
 
-	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
+	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 
@@ -5393,6 +5442,7 @@ static int
 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
 			      struct vif_entry_notifier_info *ven_info)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_rif *rif;
 	struct mlxsw_sp_vr *vr;
 
@@ -5403,8 +5453,9 @@ mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
 
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
-	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
+	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
 				   ven_info->vif_index,
 				   ven_info->vif_flags, rif);
 }
@@ -5413,6 +5464,7 @@ static void
 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
 			      struct vif_entry_notifier_info *ven_info)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
 	if (mlxsw_sp->router->aborted)
@@ -5422,7 +5474,8 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!vr))
 		return;
 
-	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
+	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 
@@ -5514,7 +5567,7 @@ static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
 
 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 {
-	int i;
+	int i, j;
 
 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
@@ -5522,7 +5575,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 		if (!mlxsw_sp_vr_is_used(vr))
 			continue;
 
-		mlxsw_sp_mr_table_flush(vr->mr4_table);
+		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
+			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
 
 		/* If virtual router was only used for IPv4, then it's no
@@ -5572,6 +5626,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 
 	/* Protect internal structures from changes */
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5614,6 +5670,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 	int err;
 
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_ADD:
@@ -5657,11 +5715,11 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
 						replace);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
-		ipmr_cache_put(fib_work->men_info.mfc);
+		mr_cache_put(fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
-		ipmr_cache_put(fib_work->men_info.mfc);
+		mr_cache_put(fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_VIF_ADD:
 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
@@ -5741,7 +5799,7 @@ mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
-		ipmr_cache_hold(fib_work->men_info.mfc);
+		mr_cache_hold(fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_VIF_ADD: /* fall through */
 	case FIB_EVENT_VIF_DEL:
@@ -5783,10 +5841,14 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
 			err = -1;
 		break;
+	case RTNL_FAMILY_IP6MR:
+		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
+			err = -1;
+		break;
 	}
 
 	if (err < 0)
-		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
+		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
 
 	return err;
 }
@@ -5802,7 +5864,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 
 	if (!net_eq(info->net, &init_net) ||
 	    (info->family != AF_INET && info->family != AF_INET6 &&
-	     info->family != RTNL_FAMILY_IPMR))
+	     info->family != RTNL_FAMILY_IPMR &&
+	     info->family != RTNL_FAMILY_IP6MR))
 		return NOTIFY_DONE;
 
 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
@@ -5832,6 +5895,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
 		mlxsw_sp_router_fib6_event(fib_work, info);
 		break;
+	case RTNL_FAMILY_IP6MR:
 	case RTNL_FAMILY_IPMR:
 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
 		mlxsw_sp_router_fibmr_event(fib_work, info);
@@ -6013,7 +6077,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_rif *rif;
 	struct mlxsw_sp_vr *vr;
 	u16 rif_index;
-	int err;
+	int i, err;
 
 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
 	ops = mlxsw_sp->router->rif_ops_arr[type];
@@ -6025,7 +6089,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 
 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
 	if (err) {
-		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
 		goto err_rif_index_alloc;
 	}
 
@@ -6053,9 +6117,11 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_configure;
 
-	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
-	if (err)
-		goto err_mr_rif_add;
+	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
+		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
+		if (err)
+			goto err_mr_rif_add;
+	}
 
 	mlxsw_sp_rif_counters_alloc(rif);
 	mlxsw_sp->router->rifs[rif_index] = rif;
@@ -6063,6 +6129,8 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	return rif;
 
 err_mr_rif_add:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
 	ops->deconfigure(rif);
 err_configure:
 	if (fid)
@@ -6082,13 +6150,15 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 	struct mlxsw_sp_fid *fid = rif->fid;
 	struct mlxsw_sp_vr *vr;
+	int i;
 
 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
 
 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
 	mlxsw_sp_rif_counters_free(rif);
-	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
+	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
+		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
 	ops->deconfigure(rif);
 	if (fid)
 		/* Loopback RIFs are not associated with a FID. */
@@ -6495,13 +6565,16 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
 
 	if (rif->mtu != dev->mtu) {
 		struct mlxsw_sp_vr *vr;
+		int i;
 
 		/* The RIF is relevant only to its mr_table instance, as unlike
 		 * unicast routing, in multicast routing a RIF cannot be shared
 		 * between several multicast routing tables.
 		 */
 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
-		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
+		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
+			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
+						   rif, dev->mtu);
 	}
 
 	ether_addr_copy(rif->addr, dev->dev_addr);
@@ -7006,13 +7079,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
 
 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
 {
+	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
+
 	mlxsw_sp_mp_hash_header_set(recr2_pl,
 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+	if (only_l3) {
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+	} else {
+		mlxsw_sp_mp_hash_header_set(recr2_pl,
+					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
+	}
 }
 
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 1fb82246ce96..a01edcf56797 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -41,6 +41,7 @@
 enum mlxsw_sp_l3proto {
 	MLXSW_SP_L3_PROTO_IPV4,
 	MLXSW_SP_L3_PROTO_IPV6,
+#define MLXSW_SP_L3_PROTO_MAX	(MLXSW_SP_L3_PROTO_IPV6 + 1)
 };
 
 union mlxsw_sp_l3addr {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
new file mode 100644
index 000000000000..65a77708ff61
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -0,0 +1,824 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
+
+#include "spectrum.h"
+#include "spectrum_span.h"
+#include "spectrum_ipip.h"
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
+		return -EIO;
+
+	mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+							  MAX_SPAN);
+	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
+					 sizeof(struct mlxsw_sp_span_entry),
+					 GFP_KERNEL);
+	if (!mlxsw_sp->span.entries)
+		return -ENOMEM;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		INIT_LIST_HEAD(&curr->bound_ports_list);
+		curr->id = i;
+	}
+
+	return 0;
+}
+
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
+	}
+	kfree(mlxsw_sp->span.entries);
+}
+
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+			       struct mlxsw_sp_span_parms *sparmsp)
+{
+	sparmsp->dest_port = netdev_priv(to_dev);
+	return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+				   struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+				       enum mlxsw_reg_mpat_span_type span_type)
+{
+	struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+	.can_handle = mlxsw_sp_port_dev_check,
+	.parms = mlxsw_sp_span_entry_phys_parms,
+	.configure = mlxsw_sp_span_entry_phys_configure,
+	.deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+			      const void *pkey,
+			      struct net_device *l3edev,
+			      unsigned char dmac[ETH_ALEN])
+{
+	struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+	int err = 0;
+
+	if (!neigh) {
+		neigh = neigh_create(tbl, pkey, l3edev);
+		if (IS_ERR(neigh))
+			return PTR_ERR(neigh);
+	}
+
+	neigh_event_send(neigh, NULL);
+
+	read_lock_bh(&neigh->lock);
+	if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+		memcpy(dmac, neigh->ha, ETH_ALEN);
+	else
+		err = -ENOENT;
+	read_unlock_bh(&neigh->lock);
+
+	neigh_release(neigh);
+	return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+	sparmsp->dest_port = NULL;
+	return 0;
+}
+
+static __maybe_unused int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+					union mlxsw_sp_l3addr saddr,
+					union mlxsw_sp_l3addr daddr,
+					union mlxsw_sp_l3addr gw,
+					__u8 ttl,
+					struct neigh_table *tbl,
+					struct mlxsw_sp_span_parms *sparmsp)
+{
+	unsigned char dmac[ETH_ALEN];
+
+	if (mlxsw_sp_l3addr_is_zero(gw))
+		gw = daddr;
+
+	if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+	    mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	sparmsp->dest_port = netdev_priv(l3edev);
+	sparmsp->ttl = ttl;
+	memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+	memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+	sparmsp->saddr = saddr;
+	sparmsp->daddr = daddr;
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+			    __be32 *saddrp, __be32 *daddrp)
+{
+	struct ip_tunnel *tun = netdev_priv(to_dev);
+	struct net_device *dev = NULL;
+	struct ip_tunnel_parm parms;
+	struct rtable *rt = NULL;
+	struct flowi4 fl4;
+
+	/* We assume "dev" stays valid after rt is put. */
+	ASSERT_RTNL();
+
+	parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+	ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+			    0, 0, parms.link, tun->fwmark);
+
+	rt = ip_route_output_key(tun->net, &fl4);
+	if (IS_ERR(rt))
+		return NULL;
+
+	if (rt->rt_type != RTN_UNICAST)
+		goto out;
+
+	dev = rt->dst.dev;
+	*saddrp = fl4.saddr;
+	*daddrp = rt->rt_gateway;
+
+out:
+	ip_rt_put(rt);
+	return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+				  struct mlxsw_sp_span_parms *sparmsp)
+{
+	struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+	union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+	union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+	bool inherit_tos = tparm.iph.tos & 0x1;
+	bool inherit_ttl = !tparm.iph.ttl;
+	union mlxsw_sp_l3addr gw = daddr;
+	struct net_device *l3edev;
+
+	if (!(to_dev->flags & IFF_UP) ||
+	    /* Reject tunnels with GRE keys, checksums, etc. */
+	    tparm.i_flags || tparm.o_flags ||
+	    /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+	    inherit_ttl || !inherit_tos ||
+	    /* A destination address may not be "any". */
+	    mlxsw_sp_l3addr_is_zero(daddr))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+	return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+						       tparm.iph.ttl,
+						       &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+				      struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+	mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+				    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+				    sparms.dmac, false);
+	mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+					      sparms.ttl, sparms.smac,
+					      be32_to_cpu(sparms.saddr.addr4),
+					      be32_to_cpu(sparms.daddr.addr4));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+	.can_handle = is_gretap_dev,
+	.parms = mlxsw_sp_span_entry_gretap4_parms,
+	.configure = mlxsw_sp_span_entry_gretap4_configure,
+	.deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+			    struct in6_addr *saddrp,
+			    struct in6_addr *daddrp)
+{
+	struct ip6_tnl *t = netdev_priv(to_dev);
+	struct flowi6 fl6 = t->fl.u.ip6;
+	struct net_device *dev = NULL;
+	struct dst_entry *dst;
+	struct rt6_info *rt6;
+
+	/* We assume "dev" stays valid after dst is released. */
+	ASSERT_RTNL();
+
+	fl6.flowi6_mark = t->parms.fwmark;
+	if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+		return NULL;
+
+	dst = ip6_route_output(t->net, NULL, &fl6);
+	if (!dst || dst->error)
+		goto out;
+
+	rt6 = container_of(dst, struct rt6_info, dst);
+
+	dev = dst->dev;
+	*saddrp = fl6.saddr;
+	*daddrp = rt6->rt6i_gateway;
+
+out:
+	dst_release(dst);
+	return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+				  struct mlxsw_sp_span_parms *sparmsp)
+{
+	struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+	bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+	union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+	union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+	bool inherit_ttl = !tparm.hop_limit;
+	union mlxsw_sp_l3addr gw = daddr;
+	struct net_device *l3edev;
+
+	if (!(to_dev->flags & IFF_UP) ||
+	    /* Reject tunnels with GRE keys, checksums, etc. */
+	    tparm.i_flags || tparm.o_flags ||
+	    /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+	    inherit_ttl || !inherit_tos ||
+	    /* A destination address may not be "any". */
+	    mlxsw_sp_l3addr_is_zero(daddr))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+	return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+						       tparm.hop_limit,
+						       &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+				      struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+	mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+				    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+				    sparms.dmac, false);
+	mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+					      sparms.saddr.addr6,
+					      sparms.daddr.addr6);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+	.can_handle = is_ip6gretap_dev,
+	.parms = mlxsw_sp_span_entry_gretap6_parms,
+	.configure = mlxsw_sp_span_entry_gretap6_configure,
+	.deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+#endif
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+	&mlxsw_sp_span_entry_ops_phys,
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+	&mlxsw_sp_span_entry_ops_gretap4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+	&mlxsw_sp_span_entry_ops_gretap6,
+#endif
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+			      struct mlxsw_sp_span_parms *sparmsp)
+{
+	return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+				  struct mlxsw_sp_span_parms sparms)
+{
+	return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+	.parms = mlxsw_sp_span_entry_nop_parms,
+	.configure = mlxsw_sp_span_entry_nop_configure,
+	.deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_span_entry *span_entry,
+			      struct mlxsw_sp_span_parms sparms)
+{
+	if (sparms.dest_port) {
+		if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+			netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+				   sparms.dest_port->dev->name);
+			sparms.dest_port = NULL;
+		} else if (span_entry->ops->configure(span_entry, sparms)) {
+			netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+				   sparms.dest_port->dev->name);
+			sparms.dest_port = NULL;
+		}
+	}
+
+	span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	if (span_entry->parms.dest_port)
+		span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+			   const struct net_device *to_dev,
+			   const struct mlxsw_sp_span_entry_ops *ops,
+			   struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_span_entry *span_entry = NULL;
+	int i;
+
+	/* find a free entry to use */
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		if (!mlxsw_sp->span.entries[i].ref_count) {
+			span_entry = &mlxsw_sp->span.entries[i];
+			break;
+		}
+	}
+	if (!span_entry)
+		return NULL;
+
+	span_entry->ops = ops;
+	span_entry->ref_count = 1;
+	span_entry->to_dev = to_dev;
+	mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
+	return span_entry;
+}
+
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure(span_entry);
+}
+
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *to_dev)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		if (curr->ref_count && curr->to_dev == to_dev)
+			return curr;
+	}
+	return NULL;
+}
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure(span_entry);
+	span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		if (curr->ref_count && curr->id == span_id)
+			return curr;
+	}
+	return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+			const struct net_device *to_dev,
+			const struct mlxsw_sp_span_entry_ops *ops,
+			struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
+	if (span_entry) {
+		/* Already exists, just take a reference */
+		span_entry->ref_count++;
+		return span_entry;
+	}
+
+	return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
+}
+
+static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_span_entry *span_entry)
+{
+	WARN_ON(!span_entry->ref_count);
+	if (--span_entry->ref_count == 0)
+		mlxsw_sp_span_entry_destroy(span_entry);
+	return 0;
+}
+
+static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	struct mlxsw_sp_span_inspected_port *p;
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		list_for_each_entry(p, &curr->bound_ports_list, list)
+			if (p->local_port == port->local_port &&
+			    p->type == MLXSW_SP_SPAN_EGRESS)
+				return true;
+	}
+
+	return false;
+}
+
+static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
+					 int mtu)
+{
+	return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
+}
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int err;
+
+	/* If port is egress mirrored, the shared buffer size should be
+	 * updated according to the mtu value
+	 */
+	if (mlxsw_sp_span_is_egress_mirror(port)) {
+		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static struct mlxsw_sp_span_inspected_port *
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry,
+				    enum mlxsw_sp_span_type type,
+				    struct mlxsw_sp_port *port,
+				    bool bind)
+{
+	struct mlxsw_sp_span_inspected_port *p;
+
+	list_for_each_entry(p, &span_entry->bound_ports_list, list)
+		if (type == p->type &&
+		    port->local_port == p->local_port &&
+		    bind == p->bound)
+			return p;
+	return NULL;
+}
+
+static int
+mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
+				  struct mlxsw_sp_span_entry *span_entry,
+				  enum mlxsw_sp_span_type type,
+				  bool bind)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char mpar_pl[MLXSW_REG_MPAR_LEN];
+	int pa_id = span_entry->id;
+
+	/* bind the port to the SPAN entry */
+	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+			    (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+}
+
+static int
+mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
+				 struct mlxsw_sp_span_entry *span_entry,
+				 enum mlxsw_sp_span_type type,
+				 bool bind)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int i;
+	int err;
+
+	/* A given (source port, direction) can only be bound to one analyzer,
+	 * so if a binding is requested, check for conflicts.
+	 */
+	if (bind)
+		for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+			struct mlxsw_sp_span_entry *curr =
+				&mlxsw_sp->span.entries[i];
+
+			if (mlxsw_sp_span_entry_bound_port_find(curr, type,
+								port, bind))
+				return -EEXIST;
+		}
+
+	/* if it is an egress SPAN, bind a shared buffer to it */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
+							     port->dev->mtu);
+
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	if (bind) {
+		err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+							true);
+		if (err)
+			goto err_port_bind;
+	}
+
+	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
+	if (!inspected_port) {
+		err = -ENOMEM;
+		goto err_inspected_port_alloc;
+	}
+	inspected_port->local_port = port->local_port;
+	inspected_port->type = type;
+	inspected_port->bound = bind;
+	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
+
+	return 0;
+
+err_inspected_port_alloc:
+	if (bind)
+		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+						  false);
+err_port_bind:
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+	return err;
+}
+
+static void
+mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
+				 struct mlxsw_sp_span_entry *span_entry,
+				 enum mlxsw_sp_span_type type,
+				 bool bind)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+	inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type,
+							     port, bind);
+	if (!inspected_port)
+		return;
+
+	if (bind)
+		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+						  false);
+	/* remove the SBIB buffer if it was egress SPAN */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+
+	list_del(&inspected_port->list);
+	kfree(inspected_port);
+}
+
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+			const struct net_device *to_dev)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+		if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+			return mlxsw_sp_span_entry_types[i];
+
+	return NULL;
+}
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+			     const struct net_device *to_dev,
+			     enum mlxsw_sp_span_type type, bool bind,
+			     int *p_span_id)
+{
+	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+	const struct mlxsw_sp_span_entry_ops *ops;
+	struct mlxsw_sp_span_parms sparms = {NULL};
+	struct mlxsw_sp_span_entry *span_entry;
+	int err;
+
+	ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+	if (!ops) {
+		netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+		return -EOPNOTSUPP;
+	}
+
+	err = ops->parms(to_dev, &sparms);
+	if (err)
+		return err;
+
+	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
+	if (!span_entry)
+		return -ENOENT;
+
+	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
+		   span_entry->id);
+
+	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
+	if (err)
+		goto err_port_bind;
+
+	*p_span_id = span_entry->id;
+	return 0;
+
+err_port_bind:
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+	return err;
+}
+
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+			      enum mlxsw_sp_span_type type, bool bind)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
+	if (!span_entry) {
+		netdev_err(from->dev, "no span entry found\n");
+		return;
+	}
+
+	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
+		   span_entry->id);
+	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
+}
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+	int err;
+
+	ASSERT_RTNL();
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+		struct mlxsw_sp_span_parms sparms = {NULL};
+
+		if (!curr->ref_count)
+			continue;
+
+		err = curr->ops->parms(curr->to_dev, &sparms);
+		if (err)
+			continue;
+
+		if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+			mlxsw_sp_span_entry_deconfigure(curr);
+			mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
new file mode 100644
index 000000000000..4b87ec20e658
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -0,0 +1,107 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_SPAN_H
+#define _MLXSW_SPECTRUM_SPAN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+
+enum mlxsw_sp_span_type {
+	MLXSW_SP_SPAN_EGRESS,
+	MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+	struct list_head list;
+	enum mlxsw_sp_span_type type;
+	u8 local_port;
+
+	/* Whether this is a directly bound mirror (port-to-port) or an ACL. */
+	bool bound;
+};
+
+struct mlxsw_sp_span_parms {
+	struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+	unsigned int ttl;
+	unsigned char dmac[ETH_ALEN];
+	unsigned char smac[ETH_ALEN];
+	union mlxsw_sp_l3addr daddr;
+	union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
+struct mlxsw_sp_span_entry {
+	const struct net_device *to_dev;
+	const struct mlxsw_sp_span_entry_ops *ops;
+	struct mlxsw_sp_span_parms parms;
+	struct list_head bound_ports_list;
+	int ref_count;
+	int id;
+};
+
+struct mlxsw_sp_span_entry_ops {
+	bool (*can_handle)(const struct net_device *to_dev);
+	int (*parms)(const struct net_device *to_dev,
+		     struct mlxsw_sp_span_parms *sparmsp);
+	int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+			 struct mlxsw_sp_span_parms sparms);
+	void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+			     const struct net_device *to_dev,
+			     enum mlxsw_sp_span_type type,
+			     bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+			      enum mlxsw_sp_span_type type, bool bind);
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_span_entry *span_entry);
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 593ad31be749..c11c9a635866 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1203,6 +1203,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				     bool dynamic)
 {
 	char *sfd_pl;
+	u8 num_rec;
 	int err;
 
 	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
@@ -1212,9 +1213,16 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
 	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
 			      mac, fid, action, local_port);
+	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
-	kfree(sfd_pl);
+	if (err)
+		goto out;
+
+	if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+		err = -EBUSY;
 
+out:
+	kfree(sfd_pl);
 	return err;
 }
 
@@ -1239,6 +1247,7 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
 				       bool adding, bool dynamic)
 {
 	char *sfd_pl;
+	u8 num_rec;
 	int err;
 
 	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
@@ -1249,9 +1258,16 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
 	mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
 				  mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
 				  lag_vid, lag_id);
+	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
-	kfree(sfd_pl);
+	if (err)
+		goto out;
+
+	if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+		err = -EBUSY;
 
+out:
+	kfree(sfd_pl);
 	return err;
 }
 
@@ -1296,6 +1312,7 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
 				u16 fid, u16 mid_idx, bool adding)
 {
 	char *sfd_pl;
+	u8 num_rec;
 	int err;
 
 	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
@@ -1305,7 +1322,15 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
 	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
 	mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid,
 			      MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx);
+	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+	if (err)
+		goto out;
+
+	if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+		err = -EBUSY;
+
+out:
 	kfree(sfd_pl);
 	return err;
 }
@@ -1819,7 +1844,7 @@ mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
 	if (is_vlan_dev(bridge_port->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge");
+		NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
 		return -EINVAL;
 	}
 
@@ -1882,20 +1907,16 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 				struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+	struct net_device *dev = bridge_port->dev;
 	u16 vid;
 
-	if (!is_vlan_dev(bridge_port->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge");
-		return -EINVAL;
-	}
-	vid = vlan_dev_vlan_id(bridge_port->dev);
-
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
 		return -EINVAL;
 
 	if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port");
+		NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port");
 		return -EINVAL;
 	}
 
@@ -1912,8 +1933,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
 				 struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-	u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+	struct net_device *dev = bridge_port->dev;
+	u16 vid;
 
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index f3c29bbf07e2..c87b0934a405 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -789,7 +789,7 @@ mlxsw_sx_port_get_link_ksettings(struct net_device *dev,
 	u32 supported, advertising, lp_advertising;
 	int err;
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
@@ -879,7 +879,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
 		mlxsw_sx_to_ptys_advert_link(advertising) :
 		mlxsw_sx_to_ptys_speed(speed);
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
@@ -897,7 +897,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
 		return 0;
 
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_new);
+				eth_proto_new, true);
 	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to set proto admin");
@@ -1029,7 +1029,7 @@ mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
 
 	eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_admin);
+				eth_proto_admin, true);
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index ec6cef8267ae..399e9d6993f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -77,6 +77,7 @@ enum {
 	MLXSW_TRAP_ID_IPV6_DHCP = 0x69,
 	MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+	MLXSW_TRAP_ID_IPV6_PIM = 0x79,
 	MLXSW_TRAP_ID_IPV4_BGP = 0x88,
 	MLXSW_TRAP_ID_IPV6_BGP = 0x89,
 	MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig
index 36a09d94b368..71dca8bd51ac 100644
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig
@@ -42,4 +42,14 @@ config ENCX24J600
       To compile this driver as a module, choose M here. The module will be
       called encx24j600.
 
+config LAN743X
+	tristate "LAN743x support"
+	depends on PCI
+	select PHYLIB
+	---help---
+	  Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip
+
+	  To compile this driver as a module, choose M here. The module will be
+	  called lan743x.
+
 endif # NET_VENDOR_MICROCHIP
diff --git a/drivers/net/ethernet/microchip/Makefile b/drivers/net/ethernet/microchip/Makefile
index ff78f621b59a..2e982cc249fb 100644
--- a/drivers/net/ethernet/microchip/Makefile
+++ b/drivers/net/ethernet/microchip/Makefile
@@ -4,3 +4,6 @@
 
 obj-$(CONFIG_ENC28J60) += enc28j60.o
 obj-$(CONFIG_ENCX24J600) += encx24j600.o encx24j600-regmap.o
+obj-$(CONFIG_LAN743X) += lan743x.o
+
+lan743x-objs := lan743x_main.o
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
new file mode 100644
index 000000000000..dd947e4dd3ce
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -0,0 +1,2771 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <linux/microchipphy.h>
+#include <linux/net_tstamp.h>
+#include <linux/phy.h>
+#include <linux/rtnetlink.h>
+#include <linux/iopoll.h>
+#include "lan743x_main.h"
+
+static void lan743x_pci_cleanup(struct lan743x_adapter *adapter)
+{
+	pci_release_selected_regions(adapter->pdev,
+				     pci_select_bars(adapter->pdev,
+						     IORESOURCE_MEM));
+	pci_disable_device(adapter->pdev);
+}
+
+static int lan743x_pci_init(struct lan743x_adapter *adapter,
+			    struct pci_dev *pdev)
+{
+	unsigned long bars = 0;
+	int ret;
+
+	adapter->pdev = pdev;
+	ret = pci_enable_device_mem(pdev);
+	if (ret)
+		goto return_error;
+
+	netif_info(adapter, probe, adapter->netdev,
+		   "PCI: Vendor ID = 0x%04X, Device ID = 0x%04X\n",
+		   pdev->vendor, pdev->device);
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	if (!test_bit(0, &bars))
+		goto disable_device;
+
+	ret = pci_request_selected_regions(pdev, bars, DRIVER_NAME);
+	if (ret)
+		goto disable_device;
+
+	pci_set_master(pdev);
+	return 0;
+
+disable_device:
+	pci_disable_device(adapter->pdev);
+
+return_error:
+	return ret;
+}
+
+static u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
+{
+	return ioread32(&adapter->csr.csr_address[offset]);
+}
+
+static void lan743x_csr_write(struct lan743x_adapter *adapter, int offset,
+			      u32 data)
+{
+	iowrite32(data, &adapter->csr.csr_address[offset]);
+}
+
+#define LAN743X_CSR_READ_OP(offset)	lan743x_csr_read(adapter, offset)
+
+static int lan743x_csr_light_reset(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	data = lan743x_csr_read(adapter, HW_CFG);
+	data |= HW_CFG_LRST_;
+	lan743x_csr_write(adapter, HW_CFG, data);
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, HW_CFG, data,
+				  !(data & HW_CFG_LRST_), 100000, 10000000);
+}
+
+static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter,
+				    int offset, u32 bit_mask,
+				    int target_value, int usleep_min,
+				    int usleep_max, int count)
+{
+	u32 data;
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, offset, data,
+				  target_value == ((data & bit_mask) ? 1 : 0),
+				  usleep_max, usleep_min * count);
+}
+
+static int lan743x_csr_init(struct lan743x_adapter *adapter)
+{
+	struct lan743x_csr *csr = &adapter->csr;
+	resource_size_t bar_start, bar_length;
+	int result;
+
+	bar_start = pci_resource_start(adapter->pdev, 0);
+	bar_length = pci_resource_len(adapter->pdev, 0);
+	csr->csr_address = devm_ioremap(&adapter->pdev->dev,
+					bar_start, bar_length);
+	if (!csr->csr_address) {
+		result = -ENOMEM;
+		goto clean_up;
+	}
+
+	csr->id_rev = lan743x_csr_read(adapter, ID_REV);
+	csr->fpga_rev = lan743x_csr_read(adapter, FPGA_REV);
+	netif_info(adapter, probe, adapter->netdev,
+		   "ID_REV = 0x%08X, FPGA_REV = %d.%d\n",
+		   csr->id_rev,	FPGA_REV_GET_MAJOR_(csr->fpga_rev),
+		   FPGA_REV_GET_MINOR_(csr->fpga_rev));
+	if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) {
+		result = -ENODEV;
+		goto clean_up;
+	}
+
+	csr->flags = LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+	switch (csr->id_rev & ID_REV_CHIP_REV_MASK_) {
+	case ID_REV_CHIP_REV_A0_:
+		csr->flags |= LAN743X_CSR_FLAG_IS_A0;
+		csr->flags &= ~LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+		break;
+	case ID_REV_CHIP_REV_B0_:
+		csr->flags |= LAN743X_CSR_FLAG_IS_B0;
+		break;
+	}
+
+	result = lan743x_csr_light_reset(adapter);
+	if (result)
+		goto clean_up;
+	return 0;
+clean_up:
+	return result;
+}
+
+static void lan743x_intr_software_isr(void *context)
+{
+	struct lan743x_adapter *adapter = context;
+	struct lan743x_intr *intr = &adapter->intr;
+	u32 int_sts;
+
+	int_sts = lan743x_csr_read(adapter, INT_STS);
+	if (int_sts & INT_BIT_SW_GP_) {
+		lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_);
+		intr->software_isr_flag = 1;
+	}
+}
+
+static void lan743x_tx_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_tx *tx = context;
+	struct lan743x_adapter *adapter = tx->adapter;
+	bool enable_flag = true;
+	u32 int_en = 0;
+
+	int_en = lan743x_csr_read(adapter, INT_EN_SET);
+	if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+		lan743x_csr_write(adapter, INT_EN_CLR,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+	}
+
+	if (int_sts & INT_BIT_DMA_TX_(tx->channel_number)) {
+		u32 ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+		u32 dmac_int_sts;
+		u32 dmac_int_en;
+
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+			dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+		else
+			dmac_int_sts = ioc_bit;
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+			dmac_int_en = lan743x_csr_read(adapter,
+						       DMAC_INT_EN_SET);
+		else
+			dmac_int_en = ioc_bit;
+
+		dmac_int_en &= ioc_bit;
+		dmac_int_sts &= dmac_int_en;
+		if (dmac_int_sts & ioc_bit) {
+			napi_schedule(&tx->napi);
+			enable_flag = false;/* poll func will enable later */
+		}
+	}
+
+	if (enable_flag)
+		/* enable isr */
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+}
+
+static void lan743x_rx_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_rx *rx = context;
+	struct lan743x_adapter *adapter = rx->adapter;
+	bool enable_flag = true;
+
+	if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+		lan743x_csr_write(adapter, INT_EN_CLR,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+
+	if (int_sts & INT_BIT_DMA_RX_(rx->channel_number)) {
+		u32 rx_frame_bit = DMAC_INT_BIT_RXFRM_(rx->channel_number);
+		u32 dmac_int_sts;
+		u32 dmac_int_en;
+
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+			dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+		else
+			dmac_int_sts = rx_frame_bit;
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+			dmac_int_en = lan743x_csr_read(adapter,
+						       DMAC_INT_EN_SET);
+		else
+			dmac_int_en = rx_frame_bit;
+
+		dmac_int_en &= rx_frame_bit;
+		dmac_int_sts &= dmac_int_en;
+		if (dmac_int_sts & rx_frame_bit) {
+			napi_schedule(&rx->napi);
+			enable_flag = false;/* poll funct will enable later */
+		}
+	}
+
+	if (enable_flag) {
+		/* enable isr */
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+}
+
+static void lan743x_intr_shared_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_adapter *adapter = context;
+	unsigned int channel;
+
+	if (int_sts & INT_BIT_ALL_RX_) {
+		for (channel = 0; channel < LAN743X_USED_RX_CHANNELS;
+			channel++) {
+			u32 int_bit = INT_BIT_DMA_RX_(channel);
+
+			if (int_sts & int_bit) {
+				lan743x_rx_isr(&adapter->rx[channel],
+					       int_bit, flags);
+				int_sts &= ~int_bit;
+			}
+		}
+	}
+	if (int_sts & INT_BIT_ALL_TX_) {
+		for (channel = 0; channel < LAN743X_USED_TX_CHANNELS;
+			channel++) {
+			u32 int_bit = INT_BIT_DMA_TX_(channel);
+
+			if (int_sts & int_bit) {
+				lan743x_tx_isr(&adapter->tx[channel],
+					       int_bit, flags);
+				int_sts &= ~int_bit;
+			}
+		}
+	}
+	if (int_sts & INT_BIT_ALL_OTHER_) {
+		if (int_sts & INT_BIT_SW_GP_) {
+			lan743x_intr_software_isr(adapter);
+			int_sts &= ~INT_BIT_SW_GP_;
+		}
+	}
+	if (int_sts)
+		lan743x_csr_write(adapter, INT_EN_CLR, int_sts);
+}
+
+static irqreturn_t lan743x_intr_entry_isr(int irq, void *ptr)
+{
+	struct lan743x_vector *vector = ptr;
+	struct lan743x_adapter *adapter = vector->adapter;
+	irqreturn_t result = IRQ_NONE;
+	u32 int_enables;
+	u32 int_sts;
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ) {
+		int_sts = lan743x_csr_read(adapter, INT_STS);
+	} else if (vector->flags &
+		   (LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C |
+		   LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)) {
+		int_sts = lan743x_csr_read(adapter, INT_STS_R2C);
+	} else {
+		/* use mask as implied status */
+		int_sts = vector->int_mask | INT_BIT_MAS_;
+	}
+
+	if (!(int_sts & INT_BIT_MAS_))
+		goto irq_done;
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR)
+		/* disable vector interrupt */
+		lan743x_csr_write(adapter,
+				  INT_VEC_EN_CLR,
+				  INT_VEC_EN_(vector->vector_index));
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR)
+		/* disable master interrupt */
+		lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK) {
+		int_enables = lan743x_csr_read(adapter, INT_EN_SET);
+	} else {
+		/*  use vector mask as implied enable mask */
+		int_enables = vector->int_mask;
+	}
+
+	int_sts &= int_enables;
+	int_sts &= vector->int_mask;
+	if (int_sts) {
+		if (vector->handler) {
+			vector->handler(vector->context,
+					int_sts, vector->flags);
+		} else {
+			/* disable interrupts on this vector */
+			lan743x_csr_write(adapter, INT_EN_CLR,
+					  vector->int_mask);
+		}
+		result = IRQ_HANDLED;
+	}
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET)
+		/* enable master interrupt */
+		lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET)
+		/* enable vector interrupt */
+		lan743x_csr_write(adapter,
+				  INT_VEC_EN_SET,
+				  INT_VEC_EN_(vector->vector_index));
+irq_done:
+	return result;
+}
+
+static int lan743x_intr_test_isr(struct lan743x_adapter *adapter)
+{
+	struct lan743x_intr *intr = &adapter->intr;
+	int result = -ENODEV;
+	int timeout = 10;
+
+	intr->software_isr_flag = 0;
+
+	/* enable interrupt */
+	lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_SW_GP_);
+
+	/* activate interrupt here */
+	lan743x_csr_write(adapter, INT_SET, INT_BIT_SW_GP_);
+	while ((timeout > 0) && (!(intr->software_isr_flag))) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+
+	if (intr->software_isr_flag)
+		result = 0;
+
+	/* disable interrupts */
+	lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_);
+	return result;
+}
+
+static int lan743x_intr_register_isr(struct lan743x_adapter *adapter,
+				     int vector_index, u32 flags,
+				     u32 int_mask,
+				     lan743x_vector_handler handler,
+				     void *context)
+{
+	struct lan743x_vector *vector = &adapter->intr.vector_list
+					[vector_index];
+	int ret;
+
+	vector->adapter = adapter;
+	vector->flags = flags;
+	vector->vector_index = vector_index;
+	vector->int_mask = int_mask;
+	vector->handler = handler;
+	vector->context = context;
+
+	ret = request_irq(vector->irq,
+			  lan743x_intr_entry_isr,
+			  (flags & LAN743X_VECTOR_FLAG_IRQ_SHARED) ?
+			  IRQF_SHARED : 0, DRIVER_NAME, vector);
+	if (ret) {
+		vector->handler = NULL;
+		vector->context = NULL;
+		vector->int_mask = 0;
+		vector->flags = 0;
+	}
+	return ret;
+}
+
+static void lan743x_intr_unregister_isr(struct lan743x_adapter *adapter,
+					int vector_index)
+{
+	struct lan743x_vector *vector = &adapter->intr.vector_list
+					[vector_index];
+
+	free_irq(vector->irq, vector);
+	vector->handler = NULL;
+	vector->context = NULL;
+	vector->int_mask = 0;
+	vector->flags = 0;
+}
+
+static u32 lan743x_intr_get_vector_flags(struct lan743x_adapter *adapter,
+					 u32 int_mask)
+{
+	int index;
+
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+		if (adapter->intr.vector_list[index].int_mask & int_mask)
+			return adapter->intr.vector_list[index].flags;
+	}
+	return 0;
+}
+
+static void lan743x_intr_close(struct lan743x_adapter *adapter)
+{
+	struct lan743x_intr *intr = &adapter->intr;
+	int index = 0;
+
+	lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+	lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0x000000FF);
+
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+		if (intr->flags & INTR_FLAG_IRQ_REQUESTED(index)) {
+			lan743x_intr_unregister_isr(adapter, index);
+			intr->flags &= ~INTR_FLAG_IRQ_REQUESTED(index);
+		}
+	}
+
+	if (intr->flags & INTR_FLAG_MSI_ENABLED) {
+		pci_disable_msi(adapter->pdev);
+		intr->flags &= ~INTR_FLAG_MSI_ENABLED;
+	}
+
+	if (intr->flags & INTR_FLAG_MSIX_ENABLED) {
+		pci_disable_msix(adapter->pdev);
+		intr->flags &= ~INTR_FLAG_MSIX_ENABLED;
+	}
+}
+
+static int lan743x_intr_open(struct lan743x_adapter *adapter)
+{
+	struct msix_entry msix_entries[LAN743X_MAX_VECTOR_COUNT];
+	struct lan743x_intr *intr = &adapter->intr;
+	u32 int_vec_en_auto_clr = 0;
+	u32 int_vec_map0 = 0;
+	u32 int_vec_map1 = 0;
+	int ret = -ENODEV;
+	int index = 0;
+	u32 flags = 0;
+
+	intr->number_of_vectors = 0;
+
+	/* Try to set up MSIX interrupts */
+	memset(&msix_entries[0], 0,
+	       sizeof(struct msix_entry) * LAN743X_MAX_VECTOR_COUNT);
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++)
+		msix_entries[index].entry = index;
+	ret = pci_enable_msix_range(adapter->pdev,
+				    msix_entries, 1,
+				    1 + LAN743X_USED_TX_CHANNELS +
+				    LAN743X_USED_RX_CHANNELS);
+
+	if (ret > 0) {
+		intr->flags |= INTR_FLAG_MSIX_ENABLED;
+		intr->number_of_vectors = ret;
+		intr->using_vectors = true;
+		for (index = 0; index < intr->number_of_vectors; index++)
+			intr->vector_list[index].irq = msix_entries
+						       [index].vector;
+		netif_info(adapter, ifup, adapter->netdev,
+			   "using MSIX interrupts, number of vectors = %d\n",
+			   intr->number_of_vectors);
+	}
+
+	/* If MSIX failed try to setup using MSI interrupts */
+	if (!intr->number_of_vectors) {
+		if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+			if (!pci_enable_msi(adapter->pdev)) {
+				intr->flags |= INTR_FLAG_MSI_ENABLED;
+				intr->number_of_vectors = 1;
+				intr->using_vectors = true;
+				intr->vector_list[0].irq =
+					adapter->pdev->irq;
+				netif_info(adapter, ifup, adapter->netdev,
+					   "using MSI interrupts, number of vectors = %d\n",
+					   intr->number_of_vectors);
+			}
+		}
+	}
+
+	/* If MSIX, and MSI failed, setup using legacy interrupt */
+	if (!intr->number_of_vectors) {
+		intr->number_of_vectors = 1;
+		intr->using_vectors = false;
+		intr->vector_list[0].irq = intr->irq;
+		netif_info(adapter, ifup, adapter->netdev,
+			   "using legacy interrupts\n");
+	}
+
+	/* At this point we must have at least one irq */
+	lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0xFFFFFFFF);
+
+	/* map all interrupts to vector 0 */
+	lan743x_csr_write(adapter, INT_VEC_MAP0, 0x00000000);
+	lan743x_csr_write(adapter, INT_VEC_MAP1, 0x00000000);
+	lan743x_csr_write(adapter, INT_VEC_MAP2, 0x00000000);
+	flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+		LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+		LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+		LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+
+	if (intr->using_vectors) {
+		flags |= LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			 LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+	} else {
+		flags |= LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR |
+			 LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET |
+			 LAN743X_VECTOR_FLAG_IRQ_SHARED;
+	}
+
+	if (adapter->csr.flags & LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK;
+		flags |= LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C;
+		flags |= LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C;
+	}
+
+	ret = lan743x_intr_register_isr(adapter, 0, flags,
+					INT_BIT_ALL_RX_ | INT_BIT_ALL_TX_ |
+					INT_BIT_ALL_OTHER_,
+					lan743x_intr_shared_isr, adapter);
+	if (ret)
+		goto clean_up;
+	intr->flags |= INTR_FLAG_IRQ_REQUESTED(0);
+
+	if (intr->using_vectors)
+		lan743x_csr_write(adapter, INT_VEC_EN_SET,
+				  INT_VEC_EN_(0));
+
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		lan743x_csr_write(adapter, INT_MOD_CFG0, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG1, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG2, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG3, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG4, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG5, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG6, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG7, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432);
+		lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001);
+		lan743x_csr_write(adapter, INT_MOD_MAP2, 0x00FFFFFF);
+	}
+
+	/* enable interrupts */
+	lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+	ret = lan743x_intr_test_isr(adapter);
+	if (ret)
+		goto clean_up;
+
+	if (intr->number_of_vectors > 1) {
+		int number_of_tx_vectors = intr->number_of_vectors - 1;
+
+		if (number_of_tx_vectors > LAN743X_USED_TX_CHANNELS)
+			number_of_tx_vectors = LAN743X_USED_TX_CHANNELS;
+		flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+			LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+		if (adapter->csr.flags &
+		   LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+		}
+
+		for (index = 0; index < number_of_tx_vectors; index++) {
+			u32 int_bit = INT_BIT_DMA_TX_(index);
+			int vector = index + 1;
+
+			/* map TX interrupt to vector */
+			int_vec_map1 |= INT_VEC_MAP1_TX_VEC_(index, vector);
+			lan743x_csr_write(adapter, INT_VEC_MAP1, int_vec_map1);
+			if (flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+				int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+				lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+						  int_vec_en_auto_clr);
+			}
+
+			/* Remove TX interrupt from shared mask */
+			intr->vector_list[0].int_mask &= ~int_bit;
+			ret = lan743x_intr_register_isr(adapter, vector, flags,
+							int_bit, lan743x_tx_isr,
+							&adapter->tx[index]);
+			if (ret)
+				goto clean_up;
+			intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+			if (!(flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET))
+				lan743x_csr_write(adapter, INT_VEC_EN_SET,
+						  INT_VEC_EN_(vector));
+		}
+	}
+	if ((intr->number_of_vectors - LAN743X_USED_TX_CHANNELS) > 1) {
+		int number_of_rx_vectors = intr->number_of_vectors -
+					   LAN743X_USED_TX_CHANNELS - 1;
+
+		if (number_of_rx_vectors > LAN743X_USED_RX_CHANNELS)
+			number_of_rx_vectors = LAN743X_USED_RX_CHANNELS;
+
+		flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+			LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+		if (adapter->csr.flags &
+		    LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+		}
+		for (index = 0; index < number_of_rx_vectors; index++) {
+			int vector = index + 1 + LAN743X_USED_TX_CHANNELS;
+			u32 int_bit = INT_BIT_DMA_RX_(index);
+
+			/* map RX interrupt to vector */
+			int_vec_map0 |= INT_VEC_MAP0_RX_VEC_(index, vector);
+			lan743x_csr_write(adapter, INT_VEC_MAP0, int_vec_map0);
+			if (flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+				int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+				lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+						  int_vec_en_auto_clr);
+			}
+
+			/* Remove RX interrupt from shared mask */
+			intr->vector_list[0].int_mask &= ~int_bit;
+			ret = lan743x_intr_register_isr(adapter, vector, flags,
+							int_bit, lan743x_rx_isr,
+							&adapter->rx[index]);
+			if (ret)
+				goto clean_up;
+			intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+
+			lan743x_csr_write(adapter, INT_VEC_EN_SET,
+					  INT_VEC_EN_(vector));
+		}
+	}
+	return 0;
+
+clean_up:
+	lan743x_intr_close(adapter);
+	return ret;
+}
+
+static int lan743x_dp_write(struct lan743x_adapter *adapter,
+			    u32 select, u32 addr, u32 length, u32 *buf)
+{
+	int ret = -EIO;
+	u32 dp_sel;
+	int i;
+
+	mutex_lock(&adapter->dp_lock);
+	if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+				     1, 40, 100, 100))
+		goto unlock;
+	dp_sel = lan743x_csr_read(adapter, DP_SEL);
+	dp_sel &= ~DP_SEL_MASK_;
+	dp_sel |= select;
+	lan743x_csr_write(adapter, DP_SEL, dp_sel);
+
+	for (i = 0; i < length; i++) {
+		lan743x_csr_write(adapter, DP_ADDR, addr + i);
+		lan743x_csr_write(adapter, DP_DATA_0, buf[i]);
+		lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_);
+		if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+					     1, 40, 100, 100))
+			goto unlock;
+	}
+	ret = 0;
+
+unlock:
+	mutex_unlock(&adapter->dp_lock);
+	return ret;
+}
+
+static u32 lan743x_mac_mii_access(u16 id, u16 index, int read)
+{
+	u32 ret;
+
+	ret = (id << MAC_MII_ACC_PHY_ADDR_SHIFT_) &
+		MAC_MII_ACC_PHY_ADDR_MASK_;
+	ret |= (index << MAC_MII_ACC_MIIRINDA_SHIFT_) &
+		MAC_MII_ACC_MIIRINDA_MASK_;
+
+	if (read)
+		ret |= MAC_MII_ACC_MII_READ_;
+	else
+		ret |= MAC_MII_ACC_MII_WRITE_;
+	ret |= MAC_MII_ACC_MII_BUSY_;
+
+	return ret;
+}
+
+static int lan743x_mac_mii_wait_till_not_busy(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, MAC_MII_ACC, data,
+				  !(data & MAC_MII_ACC_MII_BUSY_), 0, 1000000);
+}
+
+static int lan743x_mdiobus_read(struct mii_bus *bus, int phy_id, int index)
+{
+	struct lan743x_adapter *adapter = bus->priv;
+	u32 val, mii_access;
+	int ret;
+
+	/* comfirm MII not busy */
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+
+	/* set the address, index & direction (read from PHY) */
+	mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_READ);
+	lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+
+	val = lan743x_csr_read(adapter, MAC_MII_DATA);
+	return (int)(val & 0xFFFF);
+}
+
+static int lan743x_mdiobus_write(struct mii_bus *bus,
+				 int phy_id, int index, u16 regval)
+{
+	struct lan743x_adapter *adapter = bus->priv;
+	u32 val, mii_access;
+	int ret;
+
+	/* confirm MII not busy */
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+	val = (u32)regval;
+	lan743x_csr_write(adapter, MAC_MII_DATA, val);
+
+	/* set the address, index & direction (write to PHY) */
+	mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_WRITE);
+	lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	return ret;
+}
+
+static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
+				    u8 *addr)
+{
+	u32 addr_lo, addr_hi;
+
+	addr_lo = addr[0] |
+		addr[1] << 8 |
+		addr[2] << 16 |
+		addr[3] << 24;
+	addr_hi = addr[4] |
+		addr[5] << 8;
+	lan743x_csr_write(adapter, MAC_RX_ADDRL, addr_lo);
+	lan743x_csr_write(adapter, MAC_RX_ADDRH, addr_hi);
+
+	ether_addr_copy(adapter->mac_address, addr);
+	netif_info(adapter, drv, adapter->netdev,
+		   "MAC address set to %pM\n", addr);
+}
+
+static int lan743x_mac_init(struct lan743x_adapter *adapter)
+{
+	bool mac_address_valid = true;
+	struct net_device *netdev;
+	u32 mac_addr_hi = 0;
+	u32 mac_addr_lo = 0;
+	u32 data;
+	int ret;
+
+	netdev = adapter->netdev;
+	lan743x_csr_write(adapter, MAC_CR, MAC_CR_RST_);
+	ret = lan743x_csr_wait_for_bit(adapter, MAC_CR, MAC_CR_RST_,
+				       0, 1000, 20000, 100);
+	if (ret)
+		return ret;
+
+	/* setup auto duplex, and speed detection */
+	data = lan743x_csr_read(adapter, MAC_CR);
+	data |= MAC_CR_ADD_ | MAC_CR_ASD_;
+	data |= MAC_CR_CNTR_RST_;
+	lan743x_csr_write(adapter, MAC_CR, data);
+
+	mac_addr_hi = lan743x_csr_read(adapter, MAC_RX_ADDRH);
+	mac_addr_lo = lan743x_csr_read(adapter, MAC_RX_ADDRL);
+	adapter->mac_address[0] = mac_addr_lo & 0xFF;
+	adapter->mac_address[1] = (mac_addr_lo >> 8) & 0xFF;
+	adapter->mac_address[2] = (mac_addr_lo >> 16) & 0xFF;
+	adapter->mac_address[3] = (mac_addr_lo >> 24) & 0xFF;
+	adapter->mac_address[4] = mac_addr_hi & 0xFF;
+	adapter->mac_address[5] = (mac_addr_hi >> 8) & 0xFF;
+
+	if (((mac_addr_hi & 0x0000FFFF) == 0x0000FFFF) &&
+	    mac_addr_lo == 0xFFFFFFFF) {
+		mac_address_valid = false;
+	} else if (!is_valid_ether_addr(adapter->mac_address)) {
+		mac_address_valid = false;
+	}
+
+	if (!mac_address_valid)
+		random_ether_addr(adapter->mac_address);
+	lan743x_mac_set_address(adapter, adapter->mac_address);
+	ether_addr_copy(netdev->dev_addr, adapter->mac_address);
+	return 0;
+}
+
+static int lan743x_mac_open(struct lan743x_adapter *adapter)
+{
+	int ret = 0;
+	u32 temp;
+
+	temp = lan743x_csr_read(adapter, MAC_RX);
+	lan743x_csr_write(adapter, MAC_RX, temp | MAC_RX_RXEN_);
+	temp = lan743x_csr_read(adapter, MAC_TX);
+	lan743x_csr_write(adapter, MAC_TX, temp | MAC_TX_TXEN_);
+	return ret;
+}
+
+static void lan743x_mac_close(struct lan743x_adapter *adapter)
+{
+	u32 temp;
+
+	temp = lan743x_csr_read(adapter, MAC_TX);
+	temp &= ~MAC_TX_TXEN_;
+	lan743x_csr_write(adapter, MAC_TX, temp);
+	lan743x_csr_wait_for_bit(adapter, MAC_TX, MAC_TX_TXD_,
+				 1, 1000, 20000, 100);
+
+	temp = lan743x_csr_read(adapter, MAC_RX);
+	temp &= ~MAC_RX_RXEN_;
+	lan743x_csr_write(adapter, MAC_RX, temp);
+	lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+				 1, 1000, 20000, 100);
+}
+
+static void lan743x_mac_flow_ctrl_set_enables(struct lan743x_adapter *adapter,
+					      bool tx_enable, bool rx_enable)
+{
+	u32 flow_setting = 0;
+
+	/* set maximum pause time because when fifo space frees
+	 * up a zero value pause frame will be sent to release the pause
+	 */
+	flow_setting = MAC_FLOW_CR_FCPT_MASK_;
+	if (tx_enable)
+		flow_setting |= MAC_FLOW_CR_TX_FCEN_;
+	if (rx_enable)
+		flow_setting |= MAC_FLOW_CR_RX_FCEN_;
+	lan743x_csr_write(adapter, MAC_FLOW, flow_setting);
+}
+
+static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu)
+{
+	int enabled = 0;
+	u32 mac_rx = 0;
+
+	mac_rx = lan743x_csr_read(adapter, MAC_RX);
+	if (mac_rx & MAC_RX_RXEN_) {
+		enabled = 1;
+		if (mac_rx & MAC_RX_RXD_) {
+			lan743x_csr_write(adapter, MAC_RX, mac_rx);
+			mac_rx &= ~MAC_RX_RXD_;
+		}
+		mac_rx &= ~MAC_RX_RXEN_;
+		lan743x_csr_write(adapter, MAC_RX, mac_rx);
+		lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+					 1, 1000, 20000, 100);
+		lan743x_csr_write(adapter, MAC_RX, mac_rx | MAC_RX_RXD_);
+	}
+
+	mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_);
+	mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) &
+		  MAC_RX_MAX_SIZE_MASK_);
+	lan743x_csr_write(adapter, MAC_RX, mac_rx);
+
+	if (enabled) {
+		mac_rx |= MAC_RX_RXEN_;
+		lan743x_csr_write(adapter, MAC_RX, mac_rx);
+	}
+	return 0;
+}
+
+/* PHY */
+static int lan743x_phy_reset(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	/* Only called with in probe, and before mdiobus_register */
+
+	data = lan743x_csr_read(adapter, PMT_CTL);
+	data |= PMT_CTL_ETH_PHY_RST_;
+	lan743x_csr_write(adapter, PMT_CTL, data);
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, PMT_CTL, data,
+				  (!(data & PMT_CTL_ETH_PHY_RST_) &&
+				  (data & PMT_CTL_READY_)),
+				  50000, 1000000);
+}
+
+static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
+					   u8 duplex, u16 local_adv,
+					   u16 remote_adv)
+{
+	struct lan743x_phy *phy = &adapter->phy;
+	u8 cap;
+
+	if (phy->fc_autoneg)
+		cap = mii_resolve_flowctrl_fdx(local_adv, remote_adv);
+	else
+		cap = phy->fc_request_control;
+
+	lan743x_mac_flow_ctrl_set_enables(adapter,
+					  cap & FLOW_CTRL_TX,
+					  cap & FLOW_CTRL_RX);
+}
+
+static int lan743x_phy_init(struct lan743x_adapter *adapter)
+{
+	return lan743x_phy_reset(adapter);
+}
+
+static void lan743x_phy_link_status_change(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
+
+	phy_print_status(phydev);
+	if (phydev->state == PHY_RUNNING) {
+		struct ethtool_link_ksettings ksettings;
+		int remote_advertisement = 0;
+		int local_advertisement = 0;
+
+		memset(&ksettings, 0, sizeof(ksettings));
+		phy_ethtool_get_link_ksettings(netdev, &ksettings);
+		local_advertisement = phy_read(phydev, MII_ADVERTISE);
+		if (local_advertisement < 0)
+			return;
+
+		remote_advertisement = phy_read(phydev, MII_LPA);
+		if (remote_advertisement < 0)
+			return;
+
+		lan743x_phy_update_flowcontrol(adapter,
+					       ksettings.base.duplex,
+					       local_advertisement,
+					       remote_advertisement);
+	}
+}
+
+static void lan743x_phy_close(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	phy_stop(netdev->phydev);
+	phy_disconnect(netdev->phydev);
+	netdev->phydev = NULL;
+}
+
+static int lan743x_phy_open(struct lan743x_adapter *adapter)
+{
+	struct lan743x_phy *phy = &adapter->phy;
+	struct phy_device *phydev;
+	struct net_device *netdev;
+	int ret = -EIO;
+	u32 mii_adv;
+
+	netdev = adapter->netdev;
+	phydev = phy_find_first(adapter->mdiobus);
+	if (!phydev)
+		goto return_error;
+
+	ret = phy_connect_direct(netdev, phydev,
+				 lan743x_phy_link_status_change,
+				 PHY_INTERFACE_MODE_GMII);
+	if (ret)
+		goto return_error;
+
+	/* MAC doesn't support 1000T Half */
+	phydev->supported &= ~SUPPORTED_1000baseT_Half;
+
+	/* support both flow controls */
+	phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
+	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+	mii_adv = (u32)mii_advertise_flowctrl(phy->fc_request_control);
+	phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
+	phy->fc_autoneg = phydev->autoneg;
+
+	phy_start(phydev);
+	phy_start_aneg(phydev);
+	return 0;
+
+return_error:
+	return ret;
+}
+
+static void lan743x_rfe_update_mac_address(struct lan743x_adapter *adapter)
+{
+	u8 *mac_addr;
+	u32 mac_addr_hi = 0;
+	u32 mac_addr_lo = 0;
+
+	/* Add mac address to perfect Filter */
+	mac_addr = adapter->mac_address;
+	mac_addr_lo = ((((u32)(mac_addr[0])) << 0) |
+		      (((u32)(mac_addr[1])) << 8) |
+		      (((u32)(mac_addr[2])) << 16) |
+		      (((u32)(mac_addr[3])) << 24));
+	mac_addr_hi = ((((u32)(mac_addr[4])) << 0) |
+		      (((u32)(mac_addr[5])) << 8));
+
+	lan743x_csr_write(adapter, RFE_ADDR_FILT_LO(0), mac_addr_lo);
+	lan743x_csr_write(adapter, RFE_ADDR_FILT_HI(0),
+			  mac_addr_hi | RFE_ADDR_FILT_HI_VALID_);
+}
+
+static void lan743x_rfe_set_multicast(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	u32 hash_table[DP_SEL_VHF_HASH_LEN];
+	u32 rfctl;
+	u32 data;
+
+	rfctl = lan743x_csr_read(adapter, RFE_CTL);
+	rfctl &= ~(RFE_CTL_AU_ | RFE_CTL_AM_ |
+		 RFE_CTL_DA_PERFECT_ | RFE_CTL_MCAST_HASH_);
+	rfctl |= RFE_CTL_AB_;
+	if (netdev->flags & IFF_PROMISC) {
+		rfctl |= RFE_CTL_AM_ | RFE_CTL_AU_;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI)
+			rfctl |= RFE_CTL_AM_;
+	}
+
+	memset(hash_table, 0, DP_SEL_VHF_HASH_LEN * sizeof(u32));
+	if (netdev_mc_count(netdev)) {
+		struct netdev_hw_addr *ha;
+		int i;
+
+		rfctl |= RFE_CTL_DA_PERFECT_;
+		i = 1;
+		netdev_for_each_mc_addr(ha, netdev) {
+			/* set first 32 into Perfect Filter */
+			if (i < 33) {
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_HI(i), 0);
+				data = ha->addr[3];
+				data = ha->addr[2] | (data << 8);
+				data = ha->addr[1] | (data << 8);
+				data = ha->addr[0] | (data << 8);
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_LO(i), data);
+				data = ha->addr[5];
+				data = ha->addr[4] | (data << 8);
+				data |= RFE_ADDR_FILT_HI_VALID_;
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_HI(i), data);
+			} else {
+				u32 bitnum = (ether_crc(ETH_ALEN, ha->addr) >>
+					     23) & 0x1FF;
+				hash_table[bitnum / 32] |= (1 << (bitnum % 32));
+				rfctl |= RFE_CTL_MCAST_HASH_;
+			}
+			i++;
+		}
+	}
+
+	lan743x_dp_write(adapter, DP_SEL_RFE_RAM,
+			 DP_SEL_VHF_VLAN_LEN,
+			 DP_SEL_VHF_HASH_LEN, hash_table);
+	lan743x_csr_write(adapter, RFE_CTL, rfctl);
+}
+
+static int lan743x_dmac_init(struct lan743x_adapter *adapter)
+{
+	u32 data = 0;
+
+	lan743x_csr_write(adapter, DMAC_CMD, DMAC_CMD_SWR_);
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD, DMAC_CMD_SWR_,
+				 0, 1000, 20000, 100);
+	switch (DEFAULT_DMA_DESCRIPTOR_SPACING) {
+	case DMA_DESCRIPTOR_SPACING_16:
+		data = DMAC_CFG_MAX_DSPACE_16_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_32:
+		data = DMAC_CFG_MAX_DSPACE_32_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_64:
+		data = DMAC_CFG_MAX_DSPACE_64_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_128:
+		data = DMAC_CFG_MAX_DSPACE_128_;
+		break;
+	default:
+		return -EPERM;
+	}
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= DMAC_CFG_COAL_EN_;
+	data |= DMAC_CFG_CH_ARB_SEL_RX_HIGH_;
+	data |= DMAC_CFG_MAX_READ_REQ_SET_(6);
+	lan743x_csr_write(adapter, DMAC_CFG, data);
+	data = DMAC_COAL_CFG_TIMER_LIMIT_SET_(1);
+	data |= DMAC_COAL_CFG_TIMER_TX_START_;
+	data |= DMAC_COAL_CFG_FLUSH_INTS_;
+	data |= DMAC_COAL_CFG_INT_EXIT_COAL_;
+	data |= DMAC_COAL_CFG_CSR_EXIT_COAL_;
+	data |= DMAC_COAL_CFG_TX_THRES_SET_(0x0A);
+	data |= DMAC_COAL_CFG_RX_THRES_SET_(0x0C);
+	lan743x_csr_write(adapter, DMAC_COAL_CFG, data);
+	data = DMAC_OBFF_TX_THRES_SET_(0x08);
+	data |= DMAC_OBFF_RX_THRES_SET_(0x0A);
+	lan743x_csr_write(adapter, DMAC_OBFF_CFG, data);
+	return 0;
+}
+
+static int lan743x_dmac_tx_get_state(struct lan743x_adapter *adapter,
+				     int tx_channel)
+{
+	u32 dmac_cmd = 0;
+
+	dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+	return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+				      DMAC_CMD_START_T_(tx_channel)),
+				      (dmac_cmd &
+				      DMAC_CMD_STOP_T_(tx_channel)));
+}
+
+static int lan743x_dmac_tx_wait_till_stopped(struct lan743x_adapter *adapter,
+					     int tx_channel)
+{
+	int timeout = 100;
+	int result = 0;
+
+	while (timeout &&
+	       ((result = lan743x_dmac_tx_get_state(adapter, tx_channel)) ==
+	       DMAC_CHANNEL_STATE_STOP_PENDING)) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+	if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+		result = -ENODEV;
+	return result;
+}
+
+static int lan743x_dmac_rx_get_state(struct lan743x_adapter *adapter,
+				     int rx_channel)
+{
+	u32 dmac_cmd = 0;
+
+	dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+	return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+				      DMAC_CMD_START_R_(rx_channel)),
+				      (dmac_cmd &
+				      DMAC_CMD_STOP_R_(rx_channel)));
+}
+
+static int lan743x_dmac_rx_wait_till_stopped(struct lan743x_adapter *adapter,
+					     int rx_channel)
+{
+	int timeout = 100;
+	int result = 0;
+
+	while (timeout &&
+	       ((result = lan743x_dmac_rx_get_state(adapter, rx_channel)) ==
+	       DMAC_CHANNEL_STATE_STOP_PENDING)) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+	if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+		result = -ENODEV;
+	return result;
+}
+
+static void lan743x_tx_release_desc(struct lan743x_tx *tx,
+				    int descriptor_index, bool cleanup)
+{
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_tx_descriptor *descriptor = NULL;
+	u32 descriptor_type = 0;
+
+	descriptor = &tx->ring_cpu_ptr[descriptor_index];
+	buffer_info = &tx->buffer_info[descriptor_index];
+	if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_ACTIVE))
+		goto done;
+
+	descriptor_type = (descriptor->data0) &
+			  TX_DESC_DATA0_DTYPE_MASK_;
+	if (descriptor_type == TX_DESC_DATA0_DTYPE_DATA_)
+		goto clean_up_data_descriptor;
+	else
+		goto clear_active;
+
+clean_up_data_descriptor:
+	if (buffer_info->dma_ptr) {
+		if (buffer_info->flags &
+		    TX_BUFFER_INFO_FLAG_SKB_FRAGMENT) {
+			dma_unmap_page(&tx->adapter->pdev->dev,
+				       buffer_info->dma_ptr,
+				       buffer_info->buffer_length,
+				       DMA_TO_DEVICE);
+		} else {
+			dma_unmap_single(&tx->adapter->pdev->dev,
+					 buffer_info->dma_ptr,
+					 buffer_info->buffer_length,
+					 DMA_TO_DEVICE);
+		}
+		buffer_info->dma_ptr = 0;
+		buffer_info->buffer_length = 0;
+	}
+	if (buffer_info->skb) {
+		dev_kfree_skb(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+
+clear_active:
+	buffer_info->flags &= ~TX_BUFFER_INFO_FLAG_ACTIVE;
+
+done:
+	memset(buffer_info, 0, sizeof(*buffer_info));
+	memset(descriptor, 0, sizeof(*descriptor));
+}
+
+static int lan743x_tx_next_index(struct lan743x_tx *tx, int index)
+{
+	return ((++index) % tx->ring_size);
+}
+
+static void lan743x_tx_release_completed_descriptors(struct lan743x_tx *tx)
+{
+	while ((*tx->head_cpu_ptr) != (tx->last_head)) {
+		lan743x_tx_release_desc(tx, tx->last_head, false);
+		tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+	}
+}
+
+static void lan743x_tx_release_all_descriptors(struct lan743x_tx *tx)
+{
+	u32 original_head = 0;
+
+	original_head = tx->last_head;
+	do {
+		lan743x_tx_release_desc(tx, tx->last_head, true);
+		tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+	} while (tx->last_head != original_head);
+	memset(tx->ring_cpu_ptr, 0,
+	       sizeof(*tx->ring_cpu_ptr) * (tx->ring_size));
+	memset(tx->buffer_info, 0,
+	       sizeof(*tx->buffer_info) * (tx->ring_size));
+}
+
+static int lan743x_tx_get_desc_cnt(struct lan743x_tx *tx,
+				   struct sk_buff *skb)
+{
+	int result = 1; /* 1 for the main skb buffer */
+	int nr_frags = 0;
+
+	if (skb_is_gso(skb))
+		result++; /* requires an extension descriptor */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	result += nr_frags; /* 1 for each fragment buffer */
+	return result;
+}
+
+static int lan743x_tx_get_avail_desc(struct lan743x_tx *tx)
+{
+	int last_head = tx->last_head;
+	int last_tail = tx->last_tail;
+
+	if (last_tail >= last_head)
+		return tx->ring_size - last_tail + last_head - 1;
+	else
+		return last_head - last_tail - 1;
+}
+
+static int lan743x_tx_frame_start(struct lan743x_tx *tx,
+				  unsigned char *first_buffer,
+				  unsigned int first_buffer_length,
+				  unsigned int frame_length,
+				  bool check_sum)
+{
+	/* called only from within lan743x_tx_xmit_frame.
+	 * assuming tx->ring_lock has already been acquired.
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	struct device *dev = &adapter->pdev->dev;
+	dma_addr_t dma_ptr;
+
+	tx->frame_flags |= TX_FRAME_FLAG_IN_PROGRESS;
+	tx->frame_first = tx->last_tail;
+	tx->frame_tail = tx->frame_first;
+
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	dma_ptr = dma_map_single(dev, first_buffer, first_buffer_length,
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_ptr))
+		return -ENOMEM;
+
+	tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+	tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+	tx_descriptor->data3 = (frame_length << 16) &
+		TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = dma_ptr;
+	buffer_info->buffer_length = first_buffer_length;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+	tx->frame_data0 = (first_buffer_length &
+		TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+		TX_DESC_DATA0_DTYPE_DATA_ |
+		TX_DESC_DATA0_FS_ |
+		TX_DESC_DATA0_FCS_;
+
+	if (check_sum)
+		tx->frame_data0 |= TX_DESC_DATA0_ICE_ |
+				   TX_DESC_DATA0_IPE_ |
+				   TX_DESC_DATA0_TPE_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+	return 0;
+}
+
+static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
+				     unsigned int frame_length)
+{
+	/* called only from within lan743x_tx_xmit_frame.
+	 * assuming tx->ring_lock has already been acquired.
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+
+	/* wrap up previous descriptor */
+	tx->frame_data0 |= TX_DESC_DATA0_EXT_;
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	tx_descriptor->data0 = tx->frame_data0;
+
+	/* move to next descriptor */
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+
+	/* add extension descriptor */
+	tx_descriptor->data1 = 0;
+	tx_descriptor->data2 = 0;
+	tx_descriptor->data3 = 0;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = 0;
+	buffer_info->buffer_length = 0;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+	tx->frame_data0 = (frame_length & TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_) |
+			  TX_DESC_DATA0_DTYPE_EXT_ |
+			  TX_DESC_DATA0_EXT_LSO_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+}
+
+static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx,
+					 const struct skb_frag_struct *fragment,
+					 unsigned int frame_length)
+{
+	/* called only from within lan743x_tx_xmit_frame
+	 * assuming tx->ring_lock has already been acquired
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	struct device *dev = &adapter->pdev->dev;
+	unsigned int fragment_length = 0;
+	dma_addr_t dma_ptr;
+
+	fragment_length = skb_frag_size(fragment);
+	if (!fragment_length)
+		return 0;
+
+	/* wrap up previous descriptor */
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	tx_descriptor->data0 = tx->frame_data0;
+
+	/* move to next descriptor */
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	dma_ptr = skb_frag_dma_map(dev, fragment,
+				   0, fragment_length,
+				   DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_ptr)) {
+		int desc_index;
+
+		/* cleanup all previously setup descriptors */
+		desc_index = tx->frame_first;
+		while (desc_index != tx->frame_tail) {
+			lan743x_tx_release_desc(tx, desc_index, true);
+			desc_index = lan743x_tx_next_index(tx, desc_index);
+		}
+		dma_wmb();
+		tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+		tx->frame_first = 0;
+		tx->frame_data0 = 0;
+		tx->frame_tail = 0;
+		return -ENOMEM;
+	}
+
+	tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+	tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+	tx_descriptor->data3 = (frame_length << 16) &
+			       TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = dma_ptr;
+	buffer_info->buffer_length = fragment_length;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_SKB_FRAGMENT;
+
+	tx->frame_data0 = (fragment_length & TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+			  TX_DESC_DATA0_DTYPE_DATA_ |
+			  TX_DESC_DATA0_FCS_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+	return 0;
+}
+
+static void lan743x_tx_frame_end(struct lan743x_tx *tx,
+				 struct sk_buff *skb,
+				 bool ignore_sync)
+{
+	/* called only from within lan743x_tx_xmit_frame
+	 * assuming tx->ring_lock has already been acquired
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	u32 tx_tail_flags = 0;
+
+	/* wrap up previous descriptor */
+	tx->frame_data0 |= TX_DESC_DATA0_LS_;
+	tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	buffer_info->skb = skb;
+	if (ignore_sync)
+		buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC;
+
+	tx_descriptor->data0 = tx->frame_data0;
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx->last_tail = tx->frame_tail;
+
+	dma_wmb();
+
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+		tx_tail_flags |= TX_TAIL_SET_TOP_INT_VEC_EN_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET)
+		tx_tail_flags |= TX_TAIL_SET_DMAC_INT_EN_ |
+		TX_TAIL_SET_TOP_INT_EN_;
+
+	lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+			  tx_tail_flags | tx->frame_tail);
+	tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+}
+
+static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
+					 struct sk_buff *skb)
+{
+	int required_number_of_descriptors = 0;
+	unsigned int start_frame_length = 0;
+	unsigned int frame_length = 0;
+	unsigned int head_length = 0;
+	unsigned long irq_flags = 0;
+	bool ignore_sync = false;
+	int nr_frags = 0;
+	bool gso = false;
+	int j;
+
+	required_number_of_descriptors = lan743x_tx_get_desc_cnt(tx, skb);
+
+	spin_lock_irqsave(&tx->ring_lock, irq_flags);
+	if (required_number_of_descriptors >
+		lan743x_tx_get_avail_desc(tx)) {
+		if (required_number_of_descriptors > (tx->ring_size - 1)) {
+			dev_kfree_skb(skb);
+		} else {
+			/* save to overflow buffer */
+			tx->overflow_skb = skb;
+			netif_stop_queue(tx->adapter->netdev);
+		}
+		goto unlock;
+	}
+
+	/* space available, transmit skb  */
+	head_length = skb_headlen(skb);
+	frame_length = skb_pagelen(skb);
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	start_frame_length = frame_length;
+	gso = skb_is_gso(skb);
+	if (gso) {
+		start_frame_length = max(skb_shinfo(skb)->gso_size,
+					 (unsigned short)8);
+	}
+
+	if (lan743x_tx_frame_start(tx,
+				   skb->data, head_length,
+				   start_frame_length,
+				   skb->ip_summed == CHECKSUM_PARTIAL)) {
+		dev_kfree_skb(skb);
+		goto unlock;
+	}
+
+	if (gso)
+		lan743x_tx_frame_add_lso(tx, frame_length);
+
+	if (nr_frags <= 0)
+		goto finish;
+
+	for (j = 0; j < nr_frags; j++) {
+		const struct skb_frag_struct *frag;
+
+		frag = &(skb_shinfo(skb)->frags[j]);
+		if (lan743x_tx_frame_add_fragment(tx, frag, frame_length)) {
+			/* upon error no need to call
+			 *	lan743x_tx_frame_end
+			 * frame assembler clean up was performed inside
+			 *	lan743x_tx_frame_add_fragment
+			 */
+			dev_kfree_skb(skb);
+			goto unlock;
+		}
+	}
+
+finish:
+	lan743x_tx_frame_end(tx, skb, ignore_sync);
+
+unlock:
+	spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+	return NETDEV_TX_OK;
+}
+
+static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
+{
+	struct lan743x_tx *tx = container_of(napi, struct lan743x_tx, napi);
+	struct lan743x_adapter *adapter = tx->adapter;
+	bool start_transmitter = false;
+	unsigned long irq_flags = 0;
+	u32 ioc_bit = 0;
+	u32 int_sts = 0;
+
+	ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+	int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C)
+		lan743x_csr_write(adapter, DMAC_INT_STS, ioc_bit);
+	spin_lock_irqsave(&tx->ring_lock, irq_flags);
+
+	/* clean up tx ring */
+	lan743x_tx_release_completed_descriptors(tx);
+	if (netif_queue_stopped(adapter->netdev)) {
+		if (tx->overflow_skb) {
+			if (lan743x_tx_get_desc_cnt(tx, tx->overflow_skb) <=
+				lan743x_tx_get_avail_desc(tx))
+				start_transmitter = true;
+		} else {
+			netif_wake_queue(adapter->netdev);
+		}
+	}
+	spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+
+	if (start_transmitter) {
+		/* space is now available, transmit overflow skb */
+		lan743x_tx_xmit_frame(tx, tx->overflow_skb);
+		tx->overflow_skb = NULL;
+		netif_wake_queue(adapter->netdev);
+	}
+
+	if (!napi_complete_done(napi, weight))
+		goto done;
+
+	/* enable isr */
+	lan743x_csr_write(adapter, INT_EN_SET,
+			  INT_BIT_DMA_TX_(tx->channel_number));
+	lan743x_csr_read(adapter, INT_STS);
+
+done:
+	return weight;
+}
+
+static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx)
+{
+	if (tx->head_cpu_ptr) {
+		pci_free_consistent(tx->adapter->pdev,
+				    sizeof(*tx->head_cpu_ptr),
+				    (void *)(tx->head_cpu_ptr),
+				    tx->head_dma_ptr);
+		tx->head_cpu_ptr = NULL;
+		tx->head_dma_ptr = 0;
+	}
+	kfree(tx->buffer_info);
+	tx->buffer_info = NULL;
+
+	if (tx->ring_cpu_ptr) {
+		pci_free_consistent(tx->adapter->pdev,
+				    tx->ring_allocation_size,
+				    tx->ring_cpu_ptr,
+				    tx->ring_dma_ptr);
+		tx->ring_allocation_size = 0;
+		tx->ring_cpu_ptr = NULL;
+		tx->ring_dma_ptr = 0;
+	}
+	tx->ring_size = 0;
+}
+
+static int lan743x_tx_ring_init(struct lan743x_tx *tx)
+{
+	size_t ring_allocation_size = 0;
+	void *cpu_ptr = NULL;
+	dma_addr_t dma_ptr;
+	int ret = -ENOMEM;
+
+	tx->ring_size = LAN743X_TX_RING_SIZE;
+	if (tx->ring_size & ~TX_CFG_B_TX_RING_LEN_MASK_) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	ring_allocation_size = ALIGN(tx->ring_size *
+				     sizeof(struct lan743x_tx_descriptor),
+				     PAGE_SIZE);
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+					ring_allocation_size, &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	tx->ring_allocation_size = ring_allocation_size;
+	tx->ring_cpu_ptr = (struct lan743x_tx_descriptor *)cpu_ptr;
+	tx->ring_dma_ptr = dma_ptr;
+
+	cpu_ptr = kcalloc(tx->ring_size, sizeof(*tx->buffer_info), GFP_KERNEL);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	tx->buffer_info = (struct lan743x_tx_buffer_info *)cpu_ptr;
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+					sizeof(*tx->head_cpu_ptr), &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	tx->head_cpu_ptr = cpu_ptr;
+	tx->head_dma_ptr = dma_ptr;
+	if (tx->head_dma_ptr & 0x3) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	return 0;
+
+cleanup:
+	lan743x_tx_ring_cleanup(tx);
+	return ret;
+}
+
+static void lan743x_tx_close(struct lan743x_tx *tx)
+{
+	struct lan743x_adapter *adapter = tx->adapter;
+
+	lan743x_csr_write(adapter,
+			  DMAC_CMD,
+			  DMAC_CMD_STOP_T_(tx->channel_number));
+	lan743x_dmac_tx_wait_till_stopped(adapter, tx->channel_number);
+
+	lan743x_csr_write(adapter,
+			  DMAC_INT_EN_CLR,
+			  DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+	lan743x_csr_write(adapter, INT_EN_CLR,
+			  INT_BIT_DMA_TX_(tx->channel_number));
+	napi_disable(&tx->napi);
+	netif_napi_del(&tx->napi);
+
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_DIS_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+				 FCT_TX_CTL_EN_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	lan743x_tx_release_all_descriptors(tx);
+
+	if (tx->overflow_skb) {
+		dev_kfree_skb(tx->overflow_skb);
+		tx->overflow_skb = NULL;
+	}
+
+	lan743x_tx_ring_cleanup(tx);
+}
+
+static int lan743x_tx_open(struct lan743x_tx *tx)
+{
+	struct lan743x_adapter *adapter = NULL;
+	u32 data = 0;
+	int ret;
+
+	adapter = tx->adapter;
+	ret = lan743x_tx_ring_init(tx);
+	if (ret)
+		return ret;
+
+	/* initialize fifo */
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_RESET_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+				 FCT_TX_CTL_RESET_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* enable fifo */
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_EN_(tx->channel_number));
+
+	/* reset tx channel */
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_TX_SWR_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+				 DMAC_CMD_TX_SWR_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* Write TX_BASE_ADDR */
+	lan743x_csr_write(adapter,
+			  TX_BASE_ADDRH(tx->channel_number),
+			  DMA_ADDR_HIGH32(tx->ring_dma_ptr));
+	lan743x_csr_write(adapter,
+			  TX_BASE_ADDRL(tx->channel_number),
+			  DMA_ADDR_LOW32(tx->ring_dma_ptr));
+
+	/* Write TX_CFG_B */
+	data = lan743x_csr_read(adapter, TX_CFG_B(tx->channel_number));
+	data &= ~TX_CFG_B_TX_RING_LEN_MASK_;
+	data |= ((tx->ring_size) & TX_CFG_B_TX_RING_LEN_MASK_);
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= TX_CFG_B_TDMABL_512_;
+	lan743x_csr_write(adapter, TX_CFG_B(tx->channel_number), data);
+
+	/* Write TX_CFG_A */
+	data = TX_CFG_A_TX_TMR_HPWB_SEL_IOC_ | TX_CFG_A_TX_HP_WB_EN_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		data |= TX_CFG_A_TX_HP_WB_ON_INT_TMR_;
+		data |= TX_CFG_A_TX_PF_THRES_SET_(0x10);
+		data |= TX_CFG_A_TX_PF_PRI_THRES_SET_(0x04);
+		data |= TX_CFG_A_TX_HP_WB_THRES_SET_(0x07);
+	}
+	lan743x_csr_write(adapter, TX_CFG_A(tx->channel_number), data);
+
+	/* Write TX_HEAD_WRITEBACK_ADDR */
+	lan743x_csr_write(adapter,
+			  TX_HEAD_WRITEBACK_ADDRH(tx->channel_number),
+			  DMA_ADDR_HIGH32(tx->head_dma_ptr));
+	lan743x_csr_write(adapter,
+			  TX_HEAD_WRITEBACK_ADDRL(tx->channel_number),
+			  DMA_ADDR_LOW32(tx->head_dma_ptr));
+
+	/* set last head */
+	tx->last_head = lan743x_csr_read(adapter, TX_HEAD(tx->channel_number));
+
+	/* write TX_TAIL */
+	tx->last_tail = 0;
+	lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+			  (u32)(tx->last_tail));
+	tx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+							 INT_BIT_DMA_TX_
+							 (tx->channel_number));
+	netif_napi_add(adapter->netdev,
+		       &tx->napi, lan743x_tx_napi_poll,
+		       tx->ring_size - 1);
+	napi_enable(&tx->napi);
+
+	data = 0;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+		data |= TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+		data |= TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+		data |= TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+		data |= TX_CFG_C_TX_INT_EN_R2C_;
+	lan743x_csr_write(adapter, TX_CFG_C(tx->channel_number), data);
+
+	if (!(tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET))
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+			  DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+
+	/*  start dmac channel */
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_START_T_(tx->channel_number));
+	return 0;
+}
+
+static int lan743x_rx_next_index(struct lan743x_rx *rx, int index)
+{
+	return ((++index) % rx->ring_size);
+}
+
+static int lan743x_rx_allocate_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+	int length = 0;
+
+	length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING);
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+	buffer_info->skb = __netdev_alloc_skb(rx->adapter->netdev,
+					      length,
+					      GFP_ATOMIC | GFP_DMA);
+	if (!(buffer_info->skb))
+		return -ENOMEM;
+	buffer_info->dma_ptr = dma_map_single(&rx->adapter->pdev->dev,
+					      buffer_info->skb->data,
+					      length,
+					      DMA_FROM_DEVICE);
+	if (dma_mapping_error(&rx->adapter->pdev->dev,
+			      buffer_info->dma_ptr)) {
+		buffer_info->dma_ptr = 0;
+		return -ENOMEM;
+	}
+
+	buffer_info->buffer_length = length;
+	descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+	descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+	descriptor->data3 = 0;
+	descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+			    (length & RX_DESC_DATA0_BUF_LENGTH_MASK_));
+	skb_reserve(buffer_info->skb, RX_HEAD_PADDING);
+
+	return 0;
+}
+
+static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+
+	descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+	descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+	descriptor->data3 = 0;
+	descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+			    ((buffer_info->buffer_length) &
+			    RX_DESC_DATA0_BUF_LENGTH_MASK_));
+}
+
+static void lan743x_rx_release_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+
+	memset(descriptor, 0, sizeof(*descriptor));
+
+	if (buffer_info->dma_ptr) {
+		dma_unmap_single(&rx->adapter->pdev->dev,
+				 buffer_info->dma_ptr,
+				 buffer_info->buffer_length,
+				 DMA_FROM_DEVICE);
+		buffer_info->dma_ptr = 0;
+	}
+
+	if (buffer_info->skb) {
+		dev_kfree_skb(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+
+	memset(buffer_info, 0, sizeof(*buffer_info));
+}
+
+static int lan743x_rx_process_packet(struct lan743x_rx *rx)
+{
+	struct skb_shared_hwtstamps *hwtstamps = NULL;
+	int result = RX_PROCESS_RESULT_NOTHING_TO_DO;
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+	int current_head_index = -1;
+	int extension_index = -1;
+	int first_index = -1;
+	int last_index = -1;
+
+	current_head_index = *rx->head_cpu_ptr;
+	if (current_head_index < 0 || current_head_index >= rx->ring_size)
+		goto done;
+
+	if (rx->last_head < 0 || rx->last_head >= rx->ring_size)
+		goto done;
+
+	if (rx->last_head != current_head_index) {
+		descriptor = &rx->ring_cpu_ptr[rx->last_head];
+		if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+			goto done;
+
+		if (!(descriptor->data0 & RX_DESC_DATA0_FS_))
+			goto done;
+
+		first_index = rx->last_head;
+		if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+			last_index = rx->last_head;
+		} else {
+			int index;
+
+			index = lan743x_rx_next_index(rx, first_index);
+			while (index != current_head_index) {
+				descriptor = &rx->ring_cpu_ptr[index];
+				if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+					goto done;
+
+				if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+					last_index = index;
+					break;
+				}
+				index = lan743x_rx_next_index(rx, index);
+			}
+		}
+		if (last_index >= 0) {
+			descriptor = &rx->ring_cpu_ptr[last_index];
+			if (descriptor->data0 & RX_DESC_DATA0_EXT_) {
+				/* extension is expected to follow */
+				int index = lan743x_rx_next_index(rx,
+								  last_index);
+				if (index != current_head_index) {
+					descriptor = &rx->ring_cpu_ptr[index];
+					if (descriptor->data0 &
+					    RX_DESC_DATA0_OWN_) {
+						goto done;
+					}
+					if (descriptor->data0 &
+					    RX_DESC_DATA0_EXT_) {
+						extension_index = index;
+					} else {
+						goto done;
+					}
+				} else {
+					/* extension is not yet available */
+					/* prevent processing of this packet */
+					first_index = -1;
+					last_index = -1;
+				}
+			}
+		}
+	}
+	if (first_index >= 0 && last_index >= 0) {
+		int real_last_index = last_index;
+		struct sk_buff *skb = NULL;
+		u32 ts_sec = 0;
+		u32 ts_nsec = 0;
+
+		/* packet is available */
+		if (first_index == last_index) {
+			/* single buffer packet */
+			int packet_length;
+
+			buffer_info = &rx->buffer_info[first_index];
+			skb = buffer_info->skb;
+			descriptor = &rx->ring_cpu_ptr[first_index];
+
+			/* unmap from dma */
+			if (buffer_info->dma_ptr) {
+				dma_unmap_single(&rx->adapter->pdev->dev,
+						 buffer_info->dma_ptr,
+						 buffer_info->buffer_length,
+						 DMA_FROM_DEVICE);
+				buffer_info->dma_ptr = 0;
+				buffer_info->buffer_length = 0;
+			}
+			buffer_info->skb = NULL;
+			packet_length =	RX_DESC_DATA0_FRAME_LENGTH_GET_
+					(descriptor->data0);
+			skb_put(skb, packet_length - 4);
+			skb->protocol = eth_type_trans(skb,
+						       rx->adapter->netdev);
+			lan743x_rx_allocate_ring_element(rx, first_index);
+		} else {
+			int index = first_index;
+
+			/* multi buffer packet not supported */
+			/* this should not happen since
+			 * buffers are allocated to be at least jumbo size
+			 */
+
+			/* clean up buffers */
+			if (first_index <= last_index) {
+				while ((index >= first_index) &&
+				       (index <= last_index)) {
+					lan743x_rx_release_ring_element(rx,
+									index);
+					lan743x_rx_allocate_ring_element(rx,
+									 index);
+					index = lan743x_rx_next_index(rx,
+								      index);
+				}
+			} else {
+				while ((index >= first_index) ||
+				       (index <= last_index)) {
+					lan743x_rx_release_ring_element(rx,
+									index);
+					lan743x_rx_allocate_ring_element(rx,
+									 index);
+					index = lan743x_rx_next_index(rx,
+								      index);
+				}
+			}
+		}
+
+		if (extension_index >= 0) {
+			descriptor = &rx->ring_cpu_ptr[extension_index];
+			buffer_info = &rx->buffer_info[extension_index];
+
+			ts_sec = descriptor->data1;
+			ts_nsec = (descriptor->data2 &
+				  RX_DESC_DATA2_TS_NS_MASK_);
+			lan743x_rx_reuse_ring_element(rx, extension_index);
+			real_last_index = extension_index;
+		}
+
+		if (!skb) {
+			result = RX_PROCESS_RESULT_PACKET_DROPPED;
+			goto move_forward;
+		}
+
+		if (extension_index < 0)
+			goto pass_packet_to_os;
+		hwtstamps = skb_hwtstamps(skb);
+		if (hwtstamps)
+			hwtstamps->hwtstamp = ktime_set(ts_sec, ts_nsec);
+
+pass_packet_to_os:
+		/* pass packet to OS */
+		napi_gro_receive(&rx->napi, skb);
+		result = RX_PROCESS_RESULT_PACKET_RECEIVED;
+
+move_forward:
+		/* push tail and head forward */
+		rx->last_tail = real_last_index;
+		rx->last_head = lan743x_rx_next_index(rx, real_last_index);
+	}
+done:
+	return result;
+}
+
+static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight)
+{
+	struct lan743x_rx *rx = container_of(napi, struct lan743x_rx, napi);
+	struct lan743x_adapter *adapter = rx->adapter;
+	u32 rx_tail_flags = 0;
+	int count;
+
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C) {
+		/* clear int status bit before reading packet */
+		lan743x_csr_write(adapter, DMAC_INT_STS,
+				  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	}
+	count = 0;
+	while (count < weight) {
+		int rx_process_result = -1;
+
+		rx_process_result = lan743x_rx_process_packet(rx);
+		if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) {
+			count++;
+		} else if (rx_process_result ==
+			RX_PROCESS_RESULT_NOTHING_TO_DO) {
+			break;
+		} else if (rx_process_result ==
+			RX_PROCESS_RESULT_PACKET_DROPPED) {
+			continue;
+		}
+	}
+	rx->frame_count += count;
+	if (count == weight)
+		goto done;
+
+	if (!napi_complete_done(napi, count))
+		goto done;
+
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+		rx_tail_flags |= RX_TAIL_SET_TOP_INT_VEC_EN_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET) {
+		rx_tail_flags |= RX_TAIL_SET_TOP_INT_EN_;
+	} else {
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+
+	/* update RX_TAIL */
+	lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+			  rx_tail_flags | rx->last_tail);
+done:
+	return count;
+}
+
+static void lan743x_rx_ring_cleanup(struct lan743x_rx *rx)
+{
+	if (rx->buffer_info && rx->ring_cpu_ptr) {
+		int index;
+
+		for (index = 0; index < rx->ring_size; index++)
+			lan743x_rx_release_ring_element(rx, index);
+	}
+
+	if (rx->head_cpu_ptr) {
+		pci_free_consistent(rx->adapter->pdev,
+				    sizeof(*rx->head_cpu_ptr),
+				    rx->head_cpu_ptr,
+				    rx->head_dma_ptr);
+		rx->head_cpu_ptr = NULL;
+		rx->head_dma_ptr = 0;
+	}
+
+	kfree(rx->buffer_info);
+	rx->buffer_info = NULL;
+
+	if (rx->ring_cpu_ptr) {
+		pci_free_consistent(rx->adapter->pdev,
+				    rx->ring_allocation_size,
+				    rx->ring_cpu_ptr,
+				    rx->ring_dma_ptr);
+		rx->ring_allocation_size = 0;
+		rx->ring_cpu_ptr = NULL;
+		rx->ring_dma_ptr = 0;
+	}
+
+	rx->ring_size = 0;
+	rx->last_head = 0;
+}
+
+static int lan743x_rx_ring_init(struct lan743x_rx *rx)
+{
+	size_t ring_allocation_size = 0;
+	dma_addr_t dma_ptr = 0;
+	void *cpu_ptr = NULL;
+	int ret = -ENOMEM;
+	int index = 0;
+
+	rx->ring_size = LAN743X_RX_RING_SIZE;
+	if (rx->ring_size <= 1) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	if (rx->ring_size & ~RX_CFG_B_RX_RING_LEN_MASK_) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	ring_allocation_size = ALIGN(rx->ring_size *
+				     sizeof(struct lan743x_rx_descriptor),
+				     PAGE_SIZE);
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+					ring_allocation_size, &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	rx->ring_allocation_size = ring_allocation_size;
+	rx->ring_cpu_ptr = (struct lan743x_rx_descriptor *)cpu_ptr;
+	rx->ring_dma_ptr = dma_ptr;
+
+	cpu_ptr = kcalloc(rx->ring_size, sizeof(*rx->buffer_info),
+			  GFP_KERNEL);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	rx->buffer_info = (struct lan743x_rx_buffer_info *)cpu_ptr;
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+					sizeof(*rx->head_cpu_ptr), &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	rx->head_cpu_ptr = cpu_ptr;
+	rx->head_dma_ptr = dma_ptr;
+	if (rx->head_dma_ptr & 0x3) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	rx->last_head = 0;
+	for (index = 0; index < rx->ring_size; index++) {
+		ret = lan743x_rx_allocate_ring_element(rx, index);
+		if (ret)
+			goto cleanup;
+	}
+	return 0;
+
+cleanup:
+	lan743x_rx_ring_cleanup(rx);
+	return ret;
+}
+
+static void lan743x_rx_close(struct lan743x_rx *rx)
+{
+	struct lan743x_adapter *adapter = rx->adapter;
+
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_DIS_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+				 FCT_RX_CTL_EN_(rx->channel_number),
+				 0, 1000, 20000, 100);
+
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_STOP_R_(rx->channel_number));
+	lan743x_dmac_rx_wait_till_stopped(adapter, rx->channel_number);
+
+	lan743x_csr_write(adapter, DMAC_INT_EN_CLR,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, INT_EN_CLR,
+			  INT_BIT_DMA_RX_(rx->channel_number));
+	napi_disable(&rx->napi);
+
+	netif_napi_del(&rx->napi);
+
+	lan743x_rx_ring_cleanup(rx);
+}
+
+static int lan743x_rx_open(struct lan743x_rx *rx)
+{
+	struct lan743x_adapter *adapter = rx->adapter;
+	u32 data = 0;
+	int ret;
+
+	rx->frame_count = 0;
+	ret = lan743x_rx_ring_init(rx);
+	if (ret)
+		goto return_error;
+
+	netif_napi_add(adapter->netdev,
+		       &rx->napi, lan743x_rx_napi_poll,
+		       rx->ring_size - 1);
+
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_RX_SWR_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+				 DMAC_CMD_RX_SWR_(rx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* set ring base address */
+	lan743x_csr_write(adapter,
+			  RX_BASE_ADDRH(rx->channel_number),
+			  DMA_ADDR_HIGH32(rx->ring_dma_ptr));
+	lan743x_csr_write(adapter,
+			  RX_BASE_ADDRL(rx->channel_number),
+			  DMA_ADDR_LOW32(rx->ring_dma_ptr));
+
+	/* set rx write back address */
+	lan743x_csr_write(adapter,
+			  RX_HEAD_WRITEBACK_ADDRH(rx->channel_number),
+			  DMA_ADDR_HIGH32(rx->head_dma_ptr));
+	lan743x_csr_write(adapter,
+			  RX_HEAD_WRITEBACK_ADDRL(rx->channel_number),
+			  DMA_ADDR_LOW32(rx->head_dma_ptr));
+	data = RX_CFG_A_RX_HP_WB_EN_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		data |= (RX_CFG_A_RX_WB_ON_INT_TMR_ |
+			RX_CFG_A_RX_WB_THRES_SET_(0x7) |
+			RX_CFG_A_RX_PF_THRES_SET_(16) |
+			RX_CFG_A_RX_PF_PRI_THRES_SET_(4));
+	}
+
+	/* set RX_CFG_A */
+	lan743x_csr_write(adapter,
+			  RX_CFG_A(rx->channel_number), data);
+
+	/* set RX_CFG_B */
+	data = lan743x_csr_read(adapter, RX_CFG_B(rx->channel_number));
+	data &= ~RX_CFG_B_RX_PAD_MASK_;
+	if (!RX_HEAD_PADDING)
+		data |= RX_CFG_B_RX_PAD_0_;
+	else
+		data |= RX_CFG_B_RX_PAD_2_;
+	data &= ~RX_CFG_B_RX_RING_LEN_MASK_;
+	data |= ((rx->ring_size) & RX_CFG_B_RX_RING_LEN_MASK_);
+	data |= RX_CFG_B_TS_ALL_RX_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= RX_CFG_B_RDMABL_512_;
+
+	lan743x_csr_write(adapter, RX_CFG_B(rx->channel_number), data);
+	rx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+							 INT_BIT_DMA_RX_
+							 (rx->channel_number));
+
+	/* set RX_CFG_C */
+	data = 0;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+		data |= RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+		data |= RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+		data |= RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+		data |= RX_CFG_C_RX_INT_EN_R2C_;
+	lan743x_csr_write(adapter, RX_CFG_C(rx->channel_number), data);
+
+	rx->last_tail = ((u32)(rx->ring_size - 1));
+	lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+			  rx->last_tail);
+	rx->last_head = lan743x_csr_read(adapter, RX_HEAD(rx->channel_number));
+	if (rx->last_head) {
+		ret = -EIO;
+		goto napi_delete;
+	}
+
+	napi_enable(&rx->napi);
+
+	lan743x_csr_write(adapter, INT_EN_SET,
+			  INT_BIT_DMA_RX_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_STS,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_START_R_(rx->channel_number));
+
+	/* initialize fifo */
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_RESET_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+				 FCT_RX_CTL_RESET_(rx->channel_number),
+				 0, 1000, 20000, 100);
+	lan743x_csr_write(adapter, FCT_FLOW(rx->channel_number),
+			  FCT_FLOW_CTL_REQ_EN_ |
+			  FCT_FLOW_CTL_ON_THRESHOLD_SET_(0x2A) |
+			  FCT_FLOW_CTL_OFF_THRESHOLD_SET_(0xA));
+
+	/* enable fifo */
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_EN_(rx->channel_number));
+	return 0;
+
+napi_delete:
+	netif_napi_del(&rx->napi);
+	lan743x_rx_ring_cleanup(rx);
+
+return_error:
+	return ret;
+}
+
+static int lan743x_netdev_close(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int index;
+
+	lan743x_tx_close(&adapter->tx[0]);
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++)
+		lan743x_rx_close(&adapter->rx[index]);
+
+	lan743x_phy_close(adapter);
+
+	lan743x_mac_close(adapter);
+
+	lan743x_intr_close(adapter);
+
+	return 0;
+}
+
+static int lan743x_netdev_open(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int index;
+	int ret;
+
+	ret = lan743x_intr_open(adapter);
+	if (ret)
+		goto return_error;
+
+	ret = lan743x_mac_open(adapter);
+	if (ret)
+		goto close_intr;
+
+	ret = lan743x_phy_open(adapter);
+	if (ret)
+		goto close_mac;
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		ret = lan743x_rx_open(&adapter->rx[index]);
+		if (ret)
+			goto close_rx;
+	}
+
+	ret = lan743x_tx_open(&adapter->tx[0]);
+	if (ret)
+		goto close_rx;
+
+	return 0;
+
+close_rx:
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		if (adapter->rx[index].ring_cpu_ptr)
+			lan743x_rx_close(&adapter->rx[index]);
+	}
+	lan743x_phy_close(adapter);
+
+close_mac:
+	lan743x_mac_close(adapter);
+
+close_intr:
+	lan743x_intr_close(adapter);
+
+return_error:
+	netif_warn(adapter, ifup, adapter->netdev,
+		   "Error opening LAN743x\n");
+	return ret;
+}
+
+static netdev_tx_t lan743x_netdev_xmit_frame(struct sk_buff *skb,
+					     struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	return lan743x_tx_xmit_frame(&adapter->tx[0], skb);
+}
+
+static int lan743x_netdev_ioctl(struct net_device *netdev,
+				struct ifreq *ifr, int cmd)
+{
+	if (!netif_running(netdev))
+		return -EINVAL;
+	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+}
+
+static void lan743x_netdev_set_multicast(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	lan743x_rfe_set_multicast(adapter);
+}
+
+static int lan743x_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	ret = lan743x_mac_set_mtu(adapter, new_mtu);
+	if (!ret)
+		netdev->mtu = new_mtu;
+	return ret;
+}
+
+static void lan743x_netdev_get_stats64(struct net_device *netdev,
+				       struct rtnl_link_stats64 *stats)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	stats->rx_packets = lan743x_csr_read(adapter, STAT_RX_TOTAL_FRAMES);
+	stats->tx_packets = lan743x_csr_read(adapter, STAT_TX_TOTAL_FRAMES);
+	stats->rx_bytes = lan743x_csr_read(adapter,
+					   STAT_RX_UNICAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_RX_BROADCAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_RX_MULTICAST_BYTE_COUNT);
+	stats->tx_bytes = lan743x_csr_read(adapter,
+					   STAT_TX_UNICAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_TX_BROADCAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_TX_MULTICAST_BYTE_COUNT);
+	stats->rx_errors = lan743x_csr_read(adapter, STAT_RX_FCS_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_ALIGNMENT_ERRORS) +
+			   lan743x_csr_read(adapter, STAT_RX_JABBER_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_UNDERSIZE_FRAME_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_OVERSIZE_FRAME_ERRORS);
+	stats->tx_errors = lan743x_csr_read(adapter, STAT_TX_FCS_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_TX_EXCESS_DEFERRAL_ERRORS) +
+			   lan743x_csr_read(adapter, STAT_TX_CARRIER_ERRORS);
+	stats->rx_dropped = lan743x_csr_read(adapter,
+					     STAT_RX_DROPPED_FRAMES);
+	stats->tx_dropped = lan743x_csr_read(adapter,
+					     STAT_TX_EXCESSIVE_COLLISION);
+	stats->multicast = lan743x_csr_read(adapter,
+					    STAT_RX_MULTICAST_FRAMES) +
+			   lan743x_csr_read(adapter,
+					    STAT_TX_MULTICAST_FRAMES);
+	stats->collisions = lan743x_csr_read(adapter,
+					     STAT_TX_SINGLE_COLLISIONS) +
+			    lan743x_csr_read(adapter,
+					     STAT_TX_MULTIPLE_COLLISIONS) +
+			    lan743x_csr_read(adapter,
+					     STAT_TX_LATE_COLLISIONS);
+}
+
+static int lan743x_netdev_set_mac_address(struct net_device *netdev,
+					  void *addr)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	struct sockaddr *sock_addr = addr;
+	int ret;
+
+	ret = eth_prepare_mac_addr_change(netdev, sock_addr);
+	if (ret)
+		return ret;
+	ether_addr_copy(netdev->dev_addr, sock_addr->sa_data);
+	lan743x_mac_set_address(adapter, sock_addr->sa_data);
+	lan743x_rfe_update_mac_address(adapter);
+	return 0;
+}
+
+static const struct net_device_ops lan743x_netdev_ops = {
+	.ndo_open		= lan743x_netdev_open,
+	.ndo_stop		= lan743x_netdev_close,
+	.ndo_start_xmit		= lan743x_netdev_xmit_frame,
+	.ndo_do_ioctl		= lan743x_netdev_ioctl,
+	.ndo_set_rx_mode	= lan743x_netdev_set_multicast,
+	.ndo_change_mtu		= lan743x_netdev_change_mtu,
+	.ndo_get_stats64	= lan743x_netdev_get_stats64,
+	.ndo_set_mac_address	= lan743x_netdev_set_mac_address,
+};
+
+static void lan743x_hardware_cleanup(struct lan743x_adapter *adapter)
+{
+	lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+}
+
+static void lan743x_mdiobus_cleanup(struct lan743x_adapter *adapter)
+{
+	mdiobus_unregister(adapter->mdiobus);
+}
+
+static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
+{
+	unregister_netdev(adapter->netdev);
+
+	lan743x_mdiobus_cleanup(adapter);
+	lan743x_hardware_cleanup(adapter);
+	lan743x_pci_cleanup(adapter);
+}
+
+static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+				 struct pci_dev *pdev)
+{
+	struct lan743x_tx *tx;
+	int index;
+	int ret;
+
+	adapter->intr.irq = adapter->pdev->irq;
+	lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+	mutex_init(&adapter->dp_lock);
+	ret = lan743x_mac_init(adapter);
+	if (ret)
+		return ret;
+
+	ret = lan743x_phy_init(adapter);
+	if (ret)
+		return ret;
+
+	lan743x_rfe_update_mac_address(adapter);
+
+	ret = lan743x_dmac_init(adapter);
+	if (ret)
+		return ret;
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		adapter->rx[index].adapter = adapter;
+		adapter->rx[index].channel_number = index;
+	}
+
+	tx = &adapter->tx[0];
+	tx->adapter = adapter;
+	tx->channel_number = 0;
+	spin_lock_init(&tx->ring_lock);
+	return 0;
+}
+
+static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
+{
+	int ret;
+
+	adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev);
+	if (!(adapter->mdiobus)) {
+		ret = -ENOMEM;
+		goto return_error;
+	}
+
+	adapter->mdiobus->priv = (void *)adapter;
+	adapter->mdiobus->read = lan743x_mdiobus_read;
+	adapter->mdiobus->write = lan743x_mdiobus_write;
+	adapter->mdiobus->name = "lan743x-mdiobus";
+	snprintf(adapter->mdiobus->id, MII_BUS_ID_SIZE,
+		 "pci-%s", pci_name(adapter->pdev));
+
+	/* set to internal PHY id */
+	adapter->mdiobus->phy_mask = ~(u32)BIT(1);
+
+	/* register mdiobus */
+	ret = mdiobus_register(adapter->mdiobus);
+	if (ret < 0)
+		goto return_error;
+	return 0;
+
+return_error:
+	return ret;
+}
+
+/* lan743x_pcidev_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @id: entry in lan743x_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int lan743x_pcidev_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	struct lan743x_adapter *adapter = NULL;
+	struct net_device *netdev = NULL;
+	int ret = -ENODEV;
+
+	netdev = devm_alloc_etherdev(&pdev->dev,
+				     sizeof(struct lan743x_adapter));
+	if (!netdev)
+		goto return_error;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE |
+			      NETIF_MSG_LINK | NETIF_MSG_IFUP |
+			      NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED;
+	netdev->max_mtu = LAN743X_MAX_FRAME_SIZE;
+
+	ret = lan743x_pci_init(adapter, pdev);
+	if (ret)
+		goto return_error;
+
+	ret = lan743x_csr_init(adapter);
+	if (ret)
+		goto cleanup_pci;
+
+	ret = lan743x_hardware_init(adapter, pdev);
+	if (ret)
+		goto cleanup_pci;
+
+	ret = lan743x_mdiobus_init(adapter);
+	if (ret)
+		goto cleanup_hardware;
+
+	adapter->netdev->netdev_ops = &lan743x_netdev_ops;
+	adapter->netdev->features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM;
+	adapter->netdev->hw_features = adapter->netdev->features;
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	ret = register_netdev(adapter->netdev);
+	if (ret < 0)
+		goto cleanup_mdiobus;
+	return 0;
+
+cleanup_mdiobus:
+	lan743x_mdiobus_cleanup(adapter);
+
+cleanup_hardware:
+	lan743x_hardware_cleanup(adapter);
+
+cleanup_pci:
+	lan743x_pci_cleanup(adapter);
+
+return_error:
+	pr_warn("Initialization failed\n");
+	return ret;
+}
+
+/**
+ * lan743x_pcidev_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * this is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void lan743x_pcidev_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	lan743x_full_cleanup(adapter);
+}
+
+static void lan743x_pcidev_shutdown(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	/* close netdev when netdev is at running state.
+	 * For instance, it is true when system goes to sleep by pm-suspend
+	 * However, it is false when system goes to sleep by suspend GUI menu
+	 */
+	if (netif_running(netdev))
+		lan743x_netdev_close(netdev);
+	rtnl_unlock();
+
+	/* clean up lan743x portion */
+	lan743x_hardware_cleanup(adapter);
+}
+
+static const struct pci_device_id lan743x_pcidev_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
+	{ 0, }
+};
+
+static struct pci_driver lan743x_pcidev_driver = {
+	.name     = DRIVER_NAME,
+	.id_table = lan743x_pcidev_tbl,
+	.probe    = lan743x_pcidev_probe,
+	.remove   = lan743x_pcidev_remove,
+	.shutdown = lan743x_pcidev_shutdown,
+};
+
+module_pci_driver(lan743x_pcidev_driver);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
new file mode 100644
index 000000000000..73b463a9df61
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -0,0 +1,597 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#ifndef _LAN743X_H
+#define _LAN743X_H
+
+#define DRIVER_AUTHOR   "Bryan Whitehead <[email protected]>"
+#define DRIVER_DESC "LAN743x PCIe Gigabit Ethernet Driver"
+#define DRIVER_NAME "lan743x"
+
+/* Register Definitions */
+#define ID_REV				(0x00)
+#define ID_REV_IS_VALID_CHIP_ID_(id_rev)	\
+	(((id_rev) & 0xFFF00000) == 0x74300000)
+#define ID_REV_CHIP_REV_MASK_		(0x0000FFFF)
+#define ID_REV_CHIP_REV_A0_		(0x00000000)
+#define ID_REV_CHIP_REV_B0_		(0x00000010)
+
+#define FPGA_REV			(0x04)
+#define FPGA_REV_GET_MINOR_(fpga_rev)	(((fpga_rev) >> 8) & 0x000000FF)
+#define FPGA_REV_GET_MAJOR_(fpga_rev)	((fpga_rev) & 0x000000FF)
+
+#define HW_CFG					(0x010)
+#define HW_CFG_LRST_				BIT(1)
+
+#define PMT_CTL					(0x014)
+#define PMT_CTL_READY_				BIT(7)
+#define PMT_CTL_ETH_PHY_RST_			BIT(4)
+
+#define DP_SEL				(0x024)
+#define DP_SEL_DPRDY_			BIT(31)
+#define DP_SEL_MASK_			(0x0000001F)
+#define DP_SEL_RFE_RAM			(0x00000001)
+
+#define DP_SEL_VHF_HASH_LEN		(16)
+#define DP_SEL_VHF_VLAN_LEN		(128)
+
+#define DP_CMD				(0x028)
+#define DP_CMD_WRITE_			(0x00000001)
+
+#define DP_ADDR				(0x02C)
+
+#define DP_DATA_0			(0x030)
+
+#define FCT_RX_CTL			(0xAC)
+#define FCT_RX_CTL_EN_(channel)		BIT(28 + (channel))
+#define FCT_RX_CTL_DIS_(channel)	BIT(24 + (channel))
+#define FCT_RX_CTL_RESET_(channel)	BIT(20 + (channel))
+
+#define FCT_TX_CTL			(0xC4)
+#define FCT_TX_CTL_EN_(channel)		BIT(28 + (channel))
+#define FCT_TX_CTL_DIS_(channel)	BIT(24 + (channel))
+#define FCT_TX_CTL_RESET_(channel)	BIT(20 + (channel))
+
+#define FCT_FLOW(rx_channel)			(0xE0 + ((rx_channel) << 2))
+#define FCT_FLOW_CTL_OFF_THRESHOLD_		(0x00007F00)
+#define FCT_FLOW_CTL_OFF_THRESHOLD_SET_(value)	\
+	((value << 8) & FCT_FLOW_CTL_OFF_THRESHOLD_)
+#define FCT_FLOW_CTL_REQ_EN_			BIT(7)
+#define FCT_FLOW_CTL_ON_THRESHOLD_		(0x0000007F)
+#define FCT_FLOW_CTL_ON_THRESHOLD_SET_(value)	\
+	((value << 0) & FCT_FLOW_CTL_ON_THRESHOLD_)
+
+#define MAC_CR				(0x100)
+#define MAC_CR_ADD_			BIT(12)
+#define MAC_CR_ASD_			BIT(11)
+#define MAC_CR_CNTR_RST_		BIT(5)
+#define MAC_CR_RST_			BIT(0)
+
+#define MAC_RX				(0x104)
+#define MAC_RX_MAX_SIZE_SHIFT_		(16)
+#define MAC_RX_MAX_SIZE_MASK_		(0x3FFF0000)
+#define MAC_RX_RXD_			BIT(1)
+#define MAC_RX_RXEN_			BIT(0)
+
+#define MAC_TX				(0x108)
+#define MAC_TX_TXD_			BIT(1)
+#define MAC_TX_TXEN_			BIT(0)
+
+#define MAC_FLOW			(0x10C)
+#define MAC_FLOW_CR_TX_FCEN_		BIT(30)
+#define MAC_FLOW_CR_RX_FCEN_		BIT(29)
+#define MAC_FLOW_CR_FCPT_MASK_		(0x0000FFFF)
+
+#define MAC_RX_ADDRH			(0x118)
+
+#define MAC_RX_ADDRL			(0x11C)
+
+#define MAC_MII_ACC			(0x120)
+#define MAC_MII_ACC_PHY_ADDR_SHIFT_	(11)
+#define MAC_MII_ACC_PHY_ADDR_MASK_	(0x0000F800)
+#define MAC_MII_ACC_MIIRINDA_SHIFT_	(6)
+#define MAC_MII_ACC_MIIRINDA_MASK_	(0x000007C0)
+#define MAC_MII_ACC_MII_READ_		(0x00000000)
+#define MAC_MII_ACC_MII_WRITE_		(0x00000002)
+#define MAC_MII_ACC_MII_BUSY_		BIT(0)
+
+#define MAC_MII_DATA			(0x124)
+
+/* offset 0x400 - 0x500, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_HI(x)		(0x400 + (8 * (x)))
+#define RFE_ADDR_FILT_HI_VALID_		BIT(31)
+
+/* offset 0x404 - 0x504, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_LO(x)		(0x404 + (8 * (x)))
+
+#define RFE_CTL				(0x508)
+#define RFE_CTL_AB_			BIT(10)
+#define RFE_CTL_AM_			BIT(9)
+#define RFE_CTL_AU_			BIT(8)
+#define RFE_CTL_MCAST_HASH_		BIT(3)
+#define RFE_CTL_DA_PERFECT_		BIT(1)
+
+#define INT_STS				(0x780)
+#define INT_BIT_DMA_RX_(channel)	BIT(24 + (channel))
+#define INT_BIT_ALL_RX_			(0x0F000000)
+#define INT_BIT_DMA_TX_(channel)	BIT(16 + (channel))
+#define INT_BIT_ALL_TX_			(0x000F0000)
+#define INT_BIT_SW_GP_			BIT(9)
+#define INT_BIT_ALL_OTHER_		(0x00000280)
+#define INT_BIT_MAS_			BIT(0)
+
+#define INT_SET				(0x784)
+
+#define INT_EN_SET			(0x788)
+
+#define INT_EN_CLR			(0x78C)
+
+#define INT_STS_R2C			(0x790)
+
+#define INT_VEC_EN_SET			(0x794)
+#define INT_VEC_EN_CLR			(0x798)
+#define INT_VEC_EN_AUTO_CLR		(0x79C)
+#define INT_VEC_EN_(vector_index)	BIT(0 + vector_index)
+
+#define INT_VEC_MAP0			(0x7A0)
+#define INT_VEC_MAP0_RX_VEC_(channel, vector)	\
+	(((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP1			(0x7A4)
+#define INT_VEC_MAP1_TX_VEC_(channel, vector)	\
+	(((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP2			(0x7A8)
+
+#define INT_MOD_MAP0			(0x7B0)
+
+#define INT_MOD_MAP1			(0x7B4)
+
+#define INT_MOD_MAP2			(0x7B8)
+
+#define INT_MOD_CFG0			(0x7C0)
+#define INT_MOD_CFG1			(0x7C4)
+#define INT_MOD_CFG2			(0x7C8)
+#define INT_MOD_CFG3			(0x7CC)
+#define INT_MOD_CFG4			(0x7D0)
+#define INT_MOD_CFG5			(0x7D4)
+#define INT_MOD_CFG6			(0x7D8)
+#define INT_MOD_CFG7			(0x7DC)
+
+#define DMAC_CFG				(0xC00)
+#define DMAC_CFG_COAL_EN_			BIT(16)
+#define DMAC_CFG_CH_ARB_SEL_RX_HIGH_		(0x00000000)
+#define DMAC_CFG_MAX_READ_REQ_MASK_		(0x00000070)
+#define DMAC_CFG_MAX_READ_REQ_SET_(val)	\
+	((((u32)(val)) << 4) & DMAC_CFG_MAX_READ_REQ_MASK_)
+#define DMAC_CFG_MAX_DSPACE_16_			(0x00000000)
+#define DMAC_CFG_MAX_DSPACE_32_			(0x00000001)
+#define DMAC_CFG_MAX_DSPACE_64_			BIT(1)
+#define DMAC_CFG_MAX_DSPACE_128_		(0x00000003)
+
+#define DMAC_COAL_CFG				(0xC04)
+#define DMAC_COAL_CFG_TIMER_LIMIT_MASK_		(0xFFF00000)
+#define DMAC_COAL_CFG_TIMER_LIMIT_SET_(val)	\
+	((((u32)(val)) << 20) & DMAC_COAL_CFG_TIMER_LIMIT_MASK_)
+#define DMAC_COAL_CFG_TIMER_TX_START_		BIT(19)
+#define DMAC_COAL_CFG_FLUSH_INTS_		BIT(18)
+#define DMAC_COAL_CFG_INT_EXIT_COAL_		BIT(17)
+#define DMAC_COAL_CFG_CSR_EXIT_COAL_		BIT(16)
+#define DMAC_COAL_CFG_TX_THRES_MASK_		(0x0000FF00)
+#define DMAC_COAL_CFG_TX_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & DMAC_COAL_CFG_TX_THRES_MASK_)
+#define DMAC_COAL_CFG_RX_THRES_MASK_		(0x000000FF)
+#define DMAC_COAL_CFG_RX_THRES_SET_(val)	\
+	(((u32)(val)) & DMAC_COAL_CFG_RX_THRES_MASK_)
+
+#define DMAC_OBFF_CFG				(0xC08)
+#define DMAC_OBFF_TX_THRES_MASK_		(0x0000FF00)
+#define DMAC_OBFF_TX_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & DMAC_OBFF_TX_THRES_MASK_)
+#define DMAC_OBFF_RX_THRES_MASK_		(0x000000FF)
+#define DMAC_OBFF_RX_THRES_SET_(val)	\
+	(((u32)(val)) & DMAC_OBFF_RX_THRES_MASK_)
+
+#define DMAC_CMD				(0xC0C)
+#define DMAC_CMD_SWR_				BIT(31)
+#define DMAC_CMD_TX_SWR_(channel)		BIT(24 + (channel))
+#define DMAC_CMD_START_T_(channel)		BIT(20 + (channel))
+#define DMAC_CMD_STOP_T_(channel)		BIT(16 + (channel))
+#define DMAC_CMD_RX_SWR_(channel)		BIT(8 + (channel))
+#define DMAC_CMD_START_R_(channel)		BIT(4 + (channel))
+#define DMAC_CMD_STOP_R_(channel)		BIT(0 + (channel))
+
+#define DMAC_INT_STS				(0xC10)
+#define DMAC_INT_EN_SET				(0xC14)
+#define DMAC_INT_EN_CLR				(0xC18)
+#define DMAC_INT_BIT_RXFRM_(channel)		BIT(16 + (channel))
+#define DMAC_INT_BIT_TX_IOC_(channel)		BIT(0 + (channel))
+
+#define RX_CFG_A(channel)			(0xC40 + ((channel) << 6))
+#define RX_CFG_A_RX_WB_ON_INT_TMR_		BIT(30)
+#define RX_CFG_A_RX_WB_THRES_MASK_		(0x1F000000)
+#define RX_CFG_A_RX_WB_THRES_SET_(val)	\
+	((((u32)(val)) << 24) & RX_CFG_A_RX_WB_THRES_MASK_)
+#define RX_CFG_A_RX_PF_THRES_MASK_		(0x001F0000)
+#define RX_CFG_A_RX_PF_THRES_SET_(val)	\
+	((((u32)(val)) << 16) & RX_CFG_A_RX_PF_THRES_MASK_)
+#define RX_CFG_A_RX_PF_PRI_THRES_MASK_		(0x00001F00)
+#define RX_CFG_A_RX_PF_PRI_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & RX_CFG_A_RX_PF_PRI_THRES_MASK_)
+#define RX_CFG_A_RX_HP_WB_EN_			BIT(5)
+
+#define RX_CFG_B(channel)			(0xC44 + ((channel) << 6))
+#define RX_CFG_B_TS_ALL_RX_			BIT(29)
+#define RX_CFG_B_RX_PAD_MASK_			(0x03000000)
+#define RX_CFG_B_RX_PAD_0_			(0x00000000)
+#define RX_CFG_B_RX_PAD_2_			(0x02000000)
+#define RX_CFG_B_RDMABL_512_			(0x00040000)
+#define RX_CFG_B_RX_RING_LEN_MASK_		(0x0000FFFF)
+
+#define RX_BASE_ADDRH(channel)			(0xC48 + ((channel) << 6))
+
+#define RX_BASE_ADDRL(channel)			(0xC4C + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRH(channel)	(0xC50 + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRL(channel)	(0xC54 + ((channel) << 6))
+
+#define RX_HEAD(channel)			(0xC58 + ((channel) << 6))
+
+#define RX_TAIL(channel)			(0xC5C + ((channel) << 6))
+#define RX_TAIL_SET_TOP_INT_EN_			BIT(30)
+#define RX_TAIL_SET_TOP_INT_VEC_EN_		BIT(29)
+
+#define RX_CFG_C(channel)			(0xC64 + ((channel) << 6))
+#define RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_	BIT(6)
+#define RX_CFG_C_RX_INT_EN_R2C_			BIT(4)
+#define RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_	BIT(3)
+#define RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_	(0x00000007)
+
+#define TX_CFG_A(channel)			(0xD40 + ((channel) << 6))
+#define TX_CFG_A_TX_HP_WB_ON_INT_TMR_		BIT(30)
+#define TX_CFG_A_TX_TMR_HPWB_SEL_IOC_		(0x10000000)
+#define TX_CFG_A_TX_PF_THRES_MASK_		(0x001F0000)
+#define TX_CFG_A_TX_PF_THRES_SET_(value)	\
+	((((u32)(value)) << 16) & TX_CFG_A_TX_PF_THRES_MASK_)
+#define TX_CFG_A_TX_PF_PRI_THRES_MASK_		(0x00001F00)
+#define TX_CFG_A_TX_PF_PRI_THRES_SET_(value)	\
+	((((u32)(value)) << 8) & TX_CFG_A_TX_PF_PRI_THRES_MASK_)
+#define TX_CFG_A_TX_HP_WB_EN_			BIT(5)
+#define TX_CFG_A_TX_HP_WB_THRES_MASK_		(0x0000000F)
+#define TX_CFG_A_TX_HP_WB_THRES_SET_(value)	\
+	(((u32)(value)) & TX_CFG_A_TX_HP_WB_THRES_MASK_)
+
+#define TX_CFG_B(channel)			(0xD44 + ((channel) << 6))
+#define TX_CFG_B_TDMABL_512_			(0x00040000)
+#define TX_CFG_B_TX_RING_LEN_MASK_		(0x0000FFFF)
+
+#define TX_BASE_ADDRH(channel)			(0xD48 + ((channel) << 6))
+
+#define TX_BASE_ADDRL(channel)			(0xD4C + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRH(channel)	(0xD50 + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRL(channel)	(0xD54 + ((channel) << 6))
+
+#define TX_HEAD(channel)			(0xD58 + ((channel) << 6))
+
+#define TX_TAIL(channel)			(0xD5C + ((channel) << 6))
+#define TX_TAIL_SET_DMAC_INT_EN_		BIT(31)
+#define TX_TAIL_SET_TOP_INT_EN_			BIT(30)
+#define TX_TAIL_SET_TOP_INT_VEC_EN_		BIT(29)
+
+#define TX_CFG_C(channel)			(0xD64 + ((channel) << 6))
+#define TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_	BIT(6)
+#define TX_CFG_C_TX_DMA_INT_EN_AUTO_CLR_	BIT(5)
+#define TX_CFG_C_TX_INT_EN_R2C_			BIT(4)
+#define TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_	BIT(3)
+#define TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_	(0x00000007)
+
+/* MAC statistics registers */
+#define STAT_RX_FCS_ERRORS			(0x1200)
+#define STAT_RX_ALIGNMENT_ERRORS		(0x1204)
+#define STAT_RX_JABBER_ERRORS			(0x120C)
+#define STAT_RX_UNDERSIZE_FRAME_ERRORS		(0x1210)
+#define STAT_RX_OVERSIZE_FRAME_ERRORS		(0x1214)
+#define STAT_RX_DROPPED_FRAMES			(0x1218)
+#define STAT_RX_UNICAST_BYTE_COUNT		(0x121C)
+#define STAT_RX_BROADCAST_BYTE_COUNT		(0x1220)
+#define STAT_RX_MULTICAST_BYTE_COUNT		(0x1224)
+#define STAT_RX_MULTICAST_FRAMES		(0x1230)
+#define STAT_RX_TOTAL_FRAMES			(0x1254)
+
+#define STAT_TX_FCS_ERRORS			(0x1280)
+#define STAT_TX_EXCESS_DEFERRAL_ERRORS		(0x1284)
+#define STAT_TX_CARRIER_ERRORS			(0x1288)
+#define STAT_TX_SINGLE_COLLISIONS		(0x1290)
+#define STAT_TX_MULTIPLE_COLLISIONS		(0x1294)
+#define STAT_TX_EXCESSIVE_COLLISION		(0x1298)
+#define STAT_TX_LATE_COLLISIONS			(0x129C)
+#define STAT_TX_UNICAST_BYTE_COUNT		(0x12A0)
+#define STAT_TX_BROADCAST_BYTE_COUNT		(0x12A4)
+#define STAT_TX_MULTICAST_BYTE_COUNT		(0x12A8)
+#define STAT_TX_MULTICAST_FRAMES		(0x12B4)
+#define STAT_TX_TOTAL_FRAMES			(0x12D8)
+
+/* End of Register definitions */
+
+#define LAN743X_MAX_RX_CHANNELS		(4)
+#define LAN743X_MAX_TX_CHANNELS		(1)
+struct lan743x_adapter;
+
+#define LAN743X_USED_RX_CHANNELS	(4)
+#define LAN743X_USED_TX_CHANNELS	(1)
+#define LAN743X_INT_MOD	(400)
+
+#if (LAN743X_USED_RX_CHANNELS > LAN743X_MAX_RX_CHANNELS)
+#error Invalid LAN743X_USED_RX_CHANNELS
+#endif
+#if (LAN743X_USED_TX_CHANNELS > LAN743X_MAX_TX_CHANNELS)
+#error Invalid LAN743X_USED_TX_CHANNELS
+#endif
+
+/* PCI */
+/* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */
+#define PCI_VENDOR_ID_SMSC		PCI_VENDOR_ID_EFAR
+#define PCI_DEVICE_ID_SMSC_LAN7430	(0x7430)
+
+#define PCI_CONFIG_LENGTH		(0x1000)
+
+/* CSR */
+#define CSR_LENGTH					(0x2000)
+
+#define LAN743X_CSR_FLAG_IS_A0				BIT(0)
+#define LAN743X_CSR_FLAG_IS_B0				BIT(1)
+#define LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR	BIT(8)
+
+struct lan743x_csr {
+	u32 flags;
+	u8 __iomem *csr_address;
+	u32 id_rev;
+	u32 fpga_rev;
+};
+
+/* INTERRUPTS */
+typedef void(*lan743x_vector_handler)(void *context, u32 int_sts, u32 flags);
+
+#define LAN743X_VECTOR_FLAG_IRQ_SHARED			BIT(0)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ		BIT(1)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C		BIT(2)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C		BIT(3)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK		BIT(4)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR		BIT(5)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C		BIT(6)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR		BIT(7)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET		BIT(8)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR	BIT(9)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET	BIT(10)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR	BIT(11)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET	BIT(12)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR	BIT(13)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET	BIT(14)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR	BIT(15)
+
+struct lan743x_vector {
+	int			irq;
+	u32			flags;
+	struct lan743x_adapter	*adapter;
+	int			vector_index;
+	u32			int_mask;
+	lan743x_vector_handler	handler;
+	void			*context;
+};
+
+#define LAN743X_MAX_VECTOR_COUNT	(8)
+
+struct lan743x_intr {
+	int			flags;
+
+	unsigned int		irq;
+
+	struct lan743x_vector	vector_list[LAN743X_MAX_VECTOR_COUNT];
+	int			number_of_vectors;
+	bool			using_vectors;
+
+	int			software_isr_flag;
+};
+
+#define LAN743X_MAX_FRAME_SIZE			(9 * 1024)
+
+/* PHY */
+struct lan743x_phy {
+	bool	fc_autoneg;
+	u8	fc_request_control;
+};
+
+/* TX */
+struct lan743x_tx_descriptor;
+struct lan743x_tx_buffer_info;
+
+#define GPIO_QUEUE_STARTED		(0)
+#define GPIO_TX_FUNCTION		(1)
+#define GPIO_TX_COMPLETION		(2)
+#define GPIO_TX_FRAGMENT		(3)
+
+#define TX_FRAME_FLAG_IN_PROGRESS	BIT(0)
+
+struct lan743x_tx {
+	struct lan743x_adapter *adapter;
+	u32	vector_flags;
+	int	channel_number;
+
+	int	ring_size;
+	size_t	ring_allocation_size;
+	struct lan743x_tx_descriptor *ring_cpu_ptr;
+	dma_addr_t ring_dma_ptr;
+	/* ring_lock: used to prevent concurrent access to tx ring */
+	spinlock_t ring_lock;
+	u32		frame_flags;
+	u32		frame_first;
+	u32		frame_data0;
+	u32		frame_tail;
+
+	struct lan743x_tx_buffer_info *buffer_info;
+
+	u32		*head_cpu_ptr;
+	dma_addr_t	head_dma_ptr;
+	int		last_head;
+	int		last_tail;
+
+	struct napi_struct napi;
+
+	struct sk_buff *overflow_skb;
+};
+
+/* RX */
+struct lan743x_rx_descriptor;
+struct lan743x_rx_buffer_info;
+
+struct lan743x_rx {
+	struct lan743x_adapter *adapter;
+	u32	vector_flags;
+	int	channel_number;
+
+	int	ring_size;
+	size_t	ring_allocation_size;
+	struct lan743x_rx_descriptor *ring_cpu_ptr;
+	dma_addr_t ring_dma_ptr;
+
+	struct lan743x_rx_buffer_info *buffer_info;
+
+	u32		*head_cpu_ptr;
+	dma_addr_t	head_dma_ptr;
+	u32		last_head;
+	u32		last_tail;
+
+	struct napi_struct napi;
+
+	u32		frame_count;
+};
+
+struct lan743x_adapter {
+	struct net_device       *netdev;
+	struct mii_bus		*mdiobus;
+	int                     msg_enable;
+	struct pci_dev		*pdev;
+	struct lan743x_csr      csr;
+	struct lan743x_intr     intr;
+
+	/* lock, used to prevent concurrent access to data port */
+	struct mutex		dp_lock;
+
+	u8			mac_address[ETH_ALEN];
+
+	struct lan743x_phy      phy;
+	struct lan743x_tx       tx[LAN743X_MAX_TX_CHANNELS];
+	struct lan743x_rx       rx[LAN743X_MAX_RX_CHANNELS];
+};
+
+#define LAN743X_COMPONENT_FLAG_RX(channel)  BIT(20 + (channel))
+
+#define INTR_FLAG_IRQ_REQUESTED(vector_index)	BIT(0 + vector_index)
+#define INTR_FLAG_MSI_ENABLED			BIT(8)
+#define INTR_FLAG_MSIX_ENABLED			BIT(9)
+
+#define MAC_MII_READ            1
+#define MAC_MII_WRITE           0
+
+#define PHY_FLAG_OPENED     BIT(0)
+#define PHY_FLAG_ATTACHED   BIT(1)
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF))
+#else
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(0))
+#endif
+#define DMA_ADDR_LOW32(dma_addr) ((u32)((dma_addr) & 0xFFFFFFFF))
+#define DMA_DESCRIPTOR_SPACING_16       (16)
+#define DMA_DESCRIPTOR_SPACING_32       (32)
+#define DMA_DESCRIPTOR_SPACING_64       (64)
+#define DMA_DESCRIPTOR_SPACING_128      (128)
+#define DEFAULT_DMA_DESCRIPTOR_SPACING  (L1_CACHE_BYTES)
+
+#define DMAC_CHANNEL_STATE_SET(start_bit, stop_bit) \
+	(((start_bit) ? 2 : 0) | ((stop_bit) ? 1 : 0))
+#define DMAC_CHANNEL_STATE_INITIAL      DMAC_CHANNEL_STATE_SET(0, 0)
+#define DMAC_CHANNEL_STATE_STARTED      DMAC_CHANNEL_STATE_SET(1, 0)
+#define DMAC_CHANNEL_STATE_STOP_PENDING DMAC_CHANNEL_STATE_SET(1, 1)
+#define DMAC_CHANNEL_STATE_STOPPED      DMAC_CHANNEL_STATE_SET(0, 1)
+
+/* TX Descriptor bits */
+#define TX_DESC_DATA0_DTYPE_MASK_		(0xC0000000)
+#define TX_DESC_DATA0_DTYPE_DATA_		(0x00000000)
+#define TX_DESC_DATA0_DTYPE_EXT_		(0x40000000)
+#define TX_DESC_DATA0_FS_			(0x20000000)
+#define TX_DESC_DATA0_LS_			(0x10000000)
+#define TX_DESC_DATA0_EXT_			(0x08000000)
+#define TX_DESC_DATA0_IOC_			(0x04000000)
+#define TX_DESC_DATA0_ICE_			(0x00400000)
+#define TX_DESC_DATA0_IPE_			(0x00200000)
+#define TX_DESC_DATA0_TPE_			(0x00100000)
+#define TX_DESC_DATA0_FCS_			(0x00020000)
+#define TX_DESC_DATA0_BUF_LENGTH_MASK_		(0x0000FFFF)
+#define TX_DESC_DATA0_EXT_LSO_			(0x00200000)
+#define TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_	(0x000FFFFF)
+#define TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_	(0x3FFF0000)
+
+struct lan743x_tx_descriptor {
+	u32     data0;
+	u32     data1;
+	u32     data2;
+	u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define TX_BUFFER_INFO_FLAG_ACTIVE		BIT(0)
+#define TX_BUFFER_INFO_FLAG_IGNORE_SYNC		BIT(2)
+#define TX_BUFFER_INFO_FLAG_SKB_FRAGMENT	BIT(3)
+struct lan743x_tx_buffer_info {
+	int flags;
+	struct sk_buff *skb;
+	dma_addr_t      dma_ptr;
+	unsigned int    buffer_length;
+};
+
+#define LAN743X_TX_RING_SIZE    (50)
+
+/* OWN bit is set. ie, Descs are owned by RX DMAC */
+#define RX_DESC_DATA0_OWN_                (0x00008000)
+/* OWN bit is clear. ie, Descs are owned by host */
+#define RX_DESC_DATA0_FS_                 (0x80000000)
+#define RX_DESC_DATA0_LS_                 (0x40000000)
+#define RX_DESC_DATA0_FRAME_LENGTH_MASK_  (0x3FFF0000)
+#define RX_DESC_DATA0_FRAME_LENGTH_GET_(data0)	\
+	(((data0) & RX_DESC_DATA0_FRAME_LENGTH_MASK_) >> 16)
+#define RX_DESC_DATA0_EXT_                (0x00004000)
+#define RX_DESC_DATA0_BUF_LENGTH_MASK_    (0x00003FFF)
+#define RX_DESC_DATA2_TS_NS_MASK_         (0x3FFFFFFF)
+
+#if ((NET_IP_ALIGN != 0) && (NET_IP_ALIGN != 2))
+#error NET_IP_ALIGN must be 0 or 2
+#endif
+
+#define RX_HEAD_PADDING		NET_IP_ALIGN
+
+struct lan743x_rx_descriptor {
+	u32     data0;
+	u32     data1;
+	u32     data2;
+	u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define RX_BUFFER_INFO_FLAG_ACTIVE      BIT(0)
+struct lan743x_rx_buffer_info {
+	int flags;
+	struct sk_buff *skb;
+
+	dma_addr_t      dma_ptr;
+	unsigned int    buffer_length;
+};
+
+#define LAN743X_RX_RING_SIZE        (65)
+
+#define RX_PROCESS_RESULT_NOTHING_TO_DO     (0)
+#define RX_PROCESS_RESULT_PACKET_RECEIVED   (1)
+#define RX_PROCESS_RESULT_PACKET_DROPPED    (2)
+
+#endif /* _LAN743X_H */
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 2521c8c40015..b2d2ec8c11e2 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -266,7 +266,7 @@ MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
 
 /* Careful: must be accessed under kernel_param_lock() */
 static char *myri10ge_fw_name = NULL;
-module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
+module_param(myri10ge_fw_name, charp, 0644);
 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
 
 #define MYRI10GE_MAX_BOARDS 8
@@ -277,49 +277,49 @@ module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL,
 MODULE_PARM_DESC(myri10ge_fw_names, "Firmware image names per board");
 
 static int myri10ge_ecrc_enable = 1;
-module_param(myri10ge_ecrc_enable, int, S_IRUGO);
+module_param(myri10ge_ecrc_enable, int, 0444);
 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E");
 
 static int myri10ge_small_bytes = -1;	/* -1 == auto */
-module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_small_bytes, int, 0644);
 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets");
 
 static int myri10ge_msi = 1;	/* enable msi by default */
-module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_msi, int, 0644);
 MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts");
 
 static int myri10ge_intr_coal_delay = 75;
-module_param(myri10ge_intr_coal_delay, int, S_IRUGO);
+module_param(myri10ge_intr_coal_delay, int, 0444);
 MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay");
 
 static int myri10ge_flow_control = 1;
-module_param(myri10ge_flow_control, int, S_IRUGO);
+module_param(myri10ge_flow_control, int, 0444);
 MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter");
 
 static int myri10ge_deassert_wait = 1;
-module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_deassert_wait, int, 0644);
 MODULE_PARM_DESC(myri10ge_deassert_wait,
 		 "Wait when deasserting legacy interrupts");
 
 static int myri10ge_force_firmware = 0;
-module_param(myri10ge_force_firmware, int, S_IRUGO);
+module_param(myri10ge_force_firmware, int, 0444);
 MODULE_PARM_DESC(myri10ge_force_firmware,
 		 "Force firmware to assume aligned completions");
 
 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
-module_param(myri10ge_initial_mtu, int, S_IRUGO);
+module_param(myri10ge_initial_mtu, int, 0444);
 MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU");
 
 static int myri10ge_napi_weight = 64;
-module_param(myri10ge_napi_weight, int, S_IRUGO);
+module_param(myri10ge_napi_weight, int, 0444);
 MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight");
 
 static int myri10ge_watchdog_timeout = 1;
-module_param(myri10ge_watchdog_timeout, int, S_IRUGO);
+module_param(myri10ge_watchdog_timeout, int, 0444);
 MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout");
 
 static int myri10ge_max_irq_loops = 1048576;
-module_param(myri10ge_max_irq_loops, int, S_IRUGO);
+module_param(myri10ge_max_irq_loops, int, 0444);
 MODULE_PARM_DESC(myri10ge_max_irq_loops,
 		 "Set stuck legacy IRQ detection threshold");
 
@@ -330,21 +330,21 @@ module_param(myri10ge_debug, int, 0);
 MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)");
 
 static int myri10ge_fill_thresh = 256;
-module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_fill_thresh, int, 0644);
 MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed");
 
 static int myri10ge_reset_recover = 1;
 
 static int myri10ge_max_slices = 1;
-module_param(myri10ge_max_slices, int, S_IRUGO);
+module_param(myri10ge_max_slices, int, 0444);
 MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues");
 
 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
-module_param(myri10ge_rss_hash, int, S_IRUGO);
+module_param(myri10ge_rss_hash, int, 0444);
 MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do");
 
 static int myri10ge_dca = 1;
-module_param(myri10ge_dca, int, S_IRUGO);
+module_param(myri10ge_dca, int, 0444);
 MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible");
 
 #define MYRI10GE_FW_OFFSET 1024*1024
diff --git a/drivers/net/ethernet/natsemi/Kconfig b/drivers/net/ethernet/natsemi/Kconfig
index a10ef50e4f12..017fb2322589 100644
--- a/drivers/net/ethernet/natsemi/Kconfig
+++ b/drivers/net/ethernet/natsemi/Kconfig
@@ -1,16 +1,16 @@
 #
-# National Semi-conductor device configuration
+# National Semiconductor device configuration
 #
 
 config NET_VENDOR_NATSEMI
-	bool "National Semi-conductor devices"
+	bool "National Semiconductor devices"
 	default y
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
-	  the questions about National Semi-conductor devices. If you say Y,
+	  the questions about National Semiconductor devices. If you say Y,
 	  you will be asked for your specific card in the following questions.
 
 if NET_VENDOR_NATSEMI
diff --git a/drivers/net/ethernet/natsemi/Makefile b/drivers/net/ethernet/natsemi/Makefile
index cc664977596e..a759aa09ef59 100644
--- a/drivers/net/ethernet/natsemi/Makefile
+++ b/drivers/net/ethernet/natsemi/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 #
-# Makefile for the National Semi-conductor Sonic devices.
+# Makefile for the National Semiconductor Sonic devices.
 #
 
 obj-$(CONFIG_MACSONIC) += macsonic.o
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index d5b28884e21e..51fa82b429a3 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -60,14 +60,6 @@ do {									\
 	*((volatile unsigned int *)dev->base_addr+(reg)) = (val);		\
 } while (0)
 
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = {
 
 static int sonic_probe1(struct net_device *dev)
 {
-	static unsigned version_printed;
 	unsigned int silicon_revision;
 	unsigned int val;
 	struct sonic_local *lp = netdev_priv(dev);
@@ -133,26 +124,17 @@ static int sonic_probe1(struct net_device *dev)
 	 * the expected location.
 	 */
 	silicon_revision = SONIC_READ(SONIC_SR);
-	if (sonic_debug > 1)
-		printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
 	i = 0;
 	while (known_revisions[i] != 0xffff &&
 	       known_revisions[i] != silicon_revision)
 		i++;
 
 	if (known_revisions[i] == 0xffff) {
-		printk("SONIC ethernet controller not found (0x%4x)\n",
-		       silicon_revision);
+		pr_info("SONIC ethernet controller not found (0x%4x)\n",
+			silicon_revision);
 		goto out;
 	}
 
-	if (sonic_debug  &&  version_printed++ == 0)
-		printk(version);
-
-	printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
-	       dev_name(lp->device), dev->base_addr);
-
 	/*
 	 * Put the sonic into software reset, then
 	 * retrieve and print the ethernet address.
@@ -245,12 +227,16 @@ static int jazz_sonic_probe(struct platform_device *pdev)
 	err = sonic_probe1(dev);
 	if (err)
 		goto out;
+
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
+	sonic_msg_init(dev);
+
 	err = register_netdev(dev);
 	if (err)
 		goto out1;
 
-	printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out1:
@@ -262,8 +248,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
 MODULE_ALIAS("platform:jazzsonic");
 
 #include "sonic.c"
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index b922ab5cedea..0937fc2a928e 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -60,8 +60,6 @@
 #include <asm/macints.h>
 #include <asm/mac_via.h>
 
-static char mac_sonic_string[] = "macsonic";
-
 #include "sonic.h"
 
 /* These should basically be bus-size and endian independent (since
@@ -72,15 +70,6 @@ static char mac_sonic_string[] = "macsonic";
 #define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
 	      + lp->reg_offset))
 
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
 /* For onboard SONIC */
 #define ONBOARD_SONIC_REGISTERS	0x50F0A000
 #define ONBOARD_SONIC_PROM_BASE	0x50f08000
@@ -313,11 +302,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	int sr;
 	bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
 
-	if (!MACH_IS_MAC)
-		return -ENODEV;
-
-	printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
 	/* Bogus probing, on the models which may or may not have
 	   Ethernet (BTW, the Ethernet *is* always at the same
 	   address, and nothing else lives there, at least if Apple's
@@ -327,13 +311,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 
 		card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
 		if (!card_present) {
-			printk("none.\n");
+			pr_info("Onboard/comm-slot SONIC not found\n");
 			return -ENODEV;
 		}
 	}
 
-	printk("yes\n");
-
 	/* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
 	 * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
 	dev->base_addr = ONBOARD_SONIC_REGISTERS;
@@ -342,18 +324,10 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	else
 		dev->irq = IRQ_NUBUS_9;
 
-	if (!sonic_version_printed) {
-		printk(KERN_INFO "%s", version);
-		sonic_version_printed = 1;
-	}
-	printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
-	       dev_name(lp->device), dev->base_addr);
-
 	/* The PowerBook's SONIC is 16 bit always. */
 	if (macintosh_config->ident == MAC_MODEL_PB520) {
 		lp->reg_offset = 0;
 		lp->dma_bitmode = SONIC_BITMODE16;
-		sr = SONIC_READ(SONIC_SR);
 	} else if (commslot) {
 		/* Some of the comm-slot cards are 16 bit.  But some
 		   of them are not.  The 32-bit cards use offset 2 and
@@ -370,22 +344,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 		else {
 			lp->dma_bitmode = SONIC_BITMODE16;
 			lp->reg_offset = 0;
-			sr = SONIC_READ(SONIC_SR);
 		}
 	} else {
 		/* All onboard cards are at offset 2 with 32 bit DMA. */
 		lp->reg_offset = 2;
 		lp->dma_bitmode = SONIC_BITMODE32;
-		sr = SONIC_READ(SONIC_SR);
 	}
-	printk(KERN_INFO
-	       "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-	       dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-	printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-	       SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+	pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+		SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+		lp->reg_offset);
+
+	/* This is sometimes useful to find out how MacOS configured the card */
+	pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+		 SONIC_READ(SONIC_DCR) & 0xffff,
+		 SONIC_READ(SONIC_DCR2) & 0xffff);
 
 	/* Software reset, then initialize control registers. */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -406,11 +379,14 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	/* Now look for the MAC address. */
 	mac_onboard_sonic_ethernet_addr(dev);
 
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
 	/* Shared init code */
 	return macsonic_init(dev);
 }
 
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
 					 unsigned long prom_addr, int id)
 {
 	int i;
@@ -449,70 +425,49 @@ static int macsonic_ident(struct nubus_rsrc *fres)
 	return -1;
 }
 
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+				       struct net_device *dev)
 {
-	static int slots;
-	struct nubus_rsrc *ndev = NULL;
 	struct sonic_local* lp = netdev_priv(dev);
 	unsigned long base_addr, prom_addr;
 	u16 sonic_dcr;
-	int id = -1;
 	int reg_offset, dma_bitmode;
 
-	/* Find the first SONIC that hasn't been initialized already */
-	for_each_func_rsrc(ndev) {
-		if (ndev->category != NUBUS_CAT_NETWORK ||
-		    ndev->type != NUBUS_TYPE_ETHERNET)
-			continue;
-
-		/* Have we seen it already? */
-		if (slots & (1<<ndev->board->slot))
-			continue;
-		slots |= 1<<ndev->board->slot;
-
-		/* Is it one of ours? */
-		if ((id = macsonic_ident(ndev)) != -1)
-			break;
-	}
-
-	if (ndev == NULL)
-		return -ENODEV;
-
 	switch (id) {
 	case MACSONIC_DUODOCK:
-		base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
 		            SONIC_DCR_TFT0;
 		reg_offset = 2;
 		dma_bitmode = SONIC_BITMODE32;
 		break;
 	case MACSONIC_APPLE:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE32;
 		break;
 	case MACSONIC_APPLE16:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
 		            SONIC_DCR_PO1 | SONIC_DCR_BMS;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE16;
 		break;
 	case MACSONIC_DAYNALINK:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
 		sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
 		            SONIC_DCR_PO1 | SONIC_DCR_BMS;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE16;
 		break;
 	case MACSONIC_DAYNA:
-		base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+		base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
 		sonic_dcr = SONIC_DCR_BMS |
 		            SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
 		reg_offset = 0;
@@ -528,21 +483,16 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
 	dev->base_addr = base_addr;
 	lp->reg_offset = reg_offset;
 	lp->dma_bitmode = dma_bitmode;
-	dev->irq = SLOT2IRQ(ndev->board->slot);
+	dev->irq = SLOT2IRQ(board->slot);
 
-	if (!sonic_version_printed) {
-		printk(KERN_INFO "%s", version);
-		sonic_version_printed = 1;
-	}
-	printk(KERN_INFO "%s: %s in slot %X\n",
-	       dev_name(lp->device), ndev->board->name, ndev->board->slot);
-	printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-	       dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
+	dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+		 board->name, SONIC_READ(SONIC_SR),
+		 lp->dma_bitmode ? 32 : 16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-	printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-	       SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+	/* This is sometimes useful to find out how MacOS configured the card */
+	dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+		SONIC_READ(SONIC_DCR) & 0xffff,
+		SONIC_READ(SONIC_DCR2) & 0xffff);
 
 	/* Software reset, then initialize control registers. */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -557,14 +507,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
 
 	/* Now look for the MAC address. */
-	if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+	if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
 		return -ENODEV;
 
+	dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		 dev->base_addr, dev->dev_addr, dev->irq);
+
 	/* Shared init code */
 	return macsonic_init(dev);
 }
 
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct sonic_local *lp;
@@ -579,22 +532,16 @@ static int mac_sonic_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
 
-	/* This will catch fatal stuff like -ENOMEM as well as success */
 	err = mac_onboard_sonic_probe(dev);
-	if (err == 0)
-		goto found;
-	if (err != -ENODEV)
-		goto out;
-	err = mac_nubus_sonic_probe(dev);
 	if (err)
 		goto out;
-found:
+
+	sonic_msg_init(dev);
+
 	err = register_netdev(dev);
 	if (err)
 		goto out;
 
-	printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out:
@@ -604,13 +551,11 @@ out:
 }
 
 MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
 MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
 
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sonic_local* lp = netdev_priv(dev);
@@ -623,12 +568,105 @@ static int mac_sonic_device_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_driver mac_sonic_driver = {
-	.probe  = mac_sonic_probe,
-	.remove = mac_sonic_device_remove,
-	.driver	= {
-		.name	= mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+	.probe  = mac_sonic_platform_probe,
+	.remove = mac_sonic_platform_remove,
+	.driver = {
+		.name = "macsonic",
+	},
+};
+
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+	struct net_device *ndev;
+	struct sonic_local *lp;
+	struct nubus_rsrc *fres;
+	int id = -1;
+	int err;
+
+	/* The platform driver will handle a PDS or Comm Slot card (even if
+	 * it has a pseudoslot declaration ROM).
+	 */
+	if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+		return -ENODEV;
+
+	for_each_board_func_rsrc(board, fres) {
+		if (fres->category != NUBUS_CAT_NETWORK ||
+		    fres->type != NUBUS_TYPE_ETHERNET)
+			continue;
+
+		id = macsonic_ident(fres);
+		if (id != -1)
+			break;
+	}
+	if (!fres)
+		return -ENODEV;
+
+	ndev = alloc_etherdev(sizeof(struct sonic_local));
+	if (!ndev)
+		return -ENOMEM;
+
+	lp = netdev_priv(ndev);
+	lp->device = &board->dev;
+	SET_NETDEV_DEV(ndev, &board->dev);
+
+	err = mac_sonic_nubus_probe_board(board, id, ndev);
+	if (err)
+		goto out;
+
+	sonic_msg_init(ndev);
+
+	err = register_netdev(ndev);
+	if (err)
+		goto out;
+
+	nubus_set_drvdata(board, ndev);
+
+	return 0;
+
+out:
+	free_netdev(ndev);
+	return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+	struct net_device *ndev = nubus_get_drvdata(board);
+	struct sonic_local *lp = netdev_priv(ndev);
+
+	unregister_netdev(ndev);
+	dma_free_coherent(lp->device,
+			  SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+			  lp->descriptors, lp->descriptors_laddr);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+	.probe  = mac_sonic_nubus_probe,
+	.remove = mac_sonic_nubus_remove,
+	.driver = {
+		.name = "macsonic-nubus",
+		.owner = THIS_MODULE,
 	},
 };
 
-module_platform_driver(mac_sonic_driver);
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+	perr = platform_driver_register(&mac_sonic_platform_driver);
+	nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+	return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+	if (!perr)
+		platform_driver_unregister(&mac_sonic_platform_driver);
+	if (!nerr)
+		nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index 612c7a44b26c..7ed08486ae23 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -33,7 +33,21 @@
  * the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
  */
 
+static unsigned int version_printed;
 
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+	struct sonic_local *lp = netdev_priv(dev);
+
+	lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+	if (version_printed++ == 0)
+		netif_dbg(lp, drv, dev, "%s", version);
+}
 
 /*
  * Open/initialize the SONIC controller.
@@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev)
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
 
-	if (sonic_debug > 2)
-		printk("sonic_open: initializing sonic driver.\n");
+	netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
 
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
@@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev)
 
 	netif_start_queue(dev);
 
-	if (sonic_debug > 2)
-		printk("sonic_open: Initialization done.\n");
+	netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
 
 	return 0;
 }
@@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev)
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
 
-	if (sonic_debug > 2)
-		printk("sonic_close\n");
+	netif_dbg(lp, ifdown, dev, "%s\n", __func__);
 
 	netif_stop_queue(dev);
 
@@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	int length;
 	int entry = lp->next_tx;
 
-	if (sonic_debug > 2)
-		printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+	netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
 
 	length = skb->len;
 	if (length < ETH_ZLEN) {
@@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
 	if (lp->tx_skb[lp->next_tx] != NULL) {
 		/* The ring is full, the ISR has yet to process the next TD. */
-		if (sonic_debug > 3)
-			printk("%s: stopping queue\n", dev->name);
+		netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
 		netif_stop_queue(dev);
 		/* after this packet, wait for ISR to free up some TDAs */
 	} else netif_start_queue(dev);
 
-	if (sonic_debug > 2)
-		printk("sonic_send_packet: issuing Tx command\n");
+	netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
 
@@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
 	do {
 		if (status & SONIC_INT_PKTRX) {
-			if (sonic_debug > 2)
-				printk("%s: packet rx\n", dev->name);
+			netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
 			sonic_rx(dev);	/* got packet(s) */
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
 		}
@@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 			 *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
 			 */
 
-			if (sonic_debug > 2)
-				printk("%s: tx done\n", dev->name);
+			netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
 
 			while (lp->tx_skb[entry] != NULL) {
 				if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
@@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 		 * check error conditions
 		 */
 		if (status & SONIC_INT_RFO) {
-			if (sonic_debug > 1)
-				printk("%s: rx fifo overrun\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+				  __func__);
 			lp->stats.rx_fifo_errors++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RDE) {
-			if (sonic_debug > 1)
-				printk("%s: rx descriptors exhausted\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+				  __func__);
 			lp->stats.rx_dropped++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RBAE) {
-			if (sonic_debug > 1)
-				printk("%s: rx buffer area exceeded\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+				  __func__);
 			lp->stats.rx_dropped++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
 		}
@@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
 		/* transmit error */
 		if (status & SONIC_INT_TXER) {
-			if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
-				printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+			if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+				netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+					  __func__);
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
 		}
 
@@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev)
 			if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
 			SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
 			if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
-				if (sonic_debug > 2)
-					printk("%s: rx buffer exhausted\n", dev->name);
+				netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+					  __func__);
 				SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
 			}
 		} else
@@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev)
 		    (netdev_mc_count(dev) > 15)) {
 			rcr |= SONIC_RCR_AMC;
 		} else {
-			if (sonic_debug > 2)
-				printk("sonic_multicast_list: mc_count %d\n",
-				       netdev_mc_count(dev));
+			netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+				  netdev_mc_count(dev));
 			sonic_set_cam_enable(dev, 1);  /* always enable our own address */
 			i = 1;
 			netdev_for_each_mc_addr(ha, dev) {
@@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev)
 		}
 	}
 
-	if (sonic_debug > 2)
-		printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+	netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
 
 	SONIC_WRITE(SONIC_RCR, rcr);
 }
@@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev)
 	/*
 	 * initialize the receive resource area
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize receive resource area\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+		  __func__);
 
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
@@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev)
 	SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
 
 	/* load the resource pointers */
-	if (sonic_debug > 3)
-		printk("sonic_init: issuing RRRA command\n");
+	netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
 	i = 0;
@@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev)
 			break;
 	}
 
-	if (sonic_debug > 2)
-		printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+	netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+		  SONIC_READ(SONIC_CMD), i);
 
 	/*
 	 * Initialize the receive descriptors so that they
 	 * become a circular linked list, ie. let the last
 	 * descriptor point to the first again.
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize receive descriptors\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+		  __func__);
+
 	for (i=0; i<SONIC_NUM_RDS; i++) {
 		sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
 		sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
@@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev)
 	/*
 	 * initialize transmit descriptors
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize transmit descriptors\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+		  __func__);
+
 	for (i = 0; i < SONIC_NUM_TDS; i++) {
 		sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
 		sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
@@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev)
 		if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
 			break;
 	}
-	if (sonic_debug > 2) {
-		printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
-		       SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
-	}
+	netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+		  SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
 
 	/*
 	 * enable receiver, disable loopback
@@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev)
 	if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
 		printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
 
-	if (sonic_debug > 2)
-		printk("sonic_init: new status=%x\n",
-		       SONIC_READ(SONIC_CMD));
+	netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+		  SONIC_READ(SONIC_CMD));
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 421b1a283fed..2b27f7049acb 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -319,6 +319,7 @@ struct sonic_local {
 	unsigned int eol_rx;
 	unsigned int eol_tx;           /* last unacked transmit packet */
 	unsigned int next_tx;          /* next free TD */
+	int msg_enable;
 	struct device *device;         /* generic device */
 	struct net_device_stats stats;
 };
@@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
 static int sonic_init(struct net_device *dev);
 static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
 
 /* Internal inlines for reading/writing DMA buffers.  Note that bus
    size and endianness matter here, whereas they don't for registers,
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 1817deea98a4..e1b886e87a76 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf);
 #define SONIC_WRITE(reg,val) \
 	*((volatile unsigned int *)dev->base_addr+reg) = val
 
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
 
 static int __init sonic_probe1(struct net_device *dev)
 {
-	static unsigned version_printed = 0;
 	unsigned int silicon_revision;
 	struct sonic_local *lp = netdev_priv(dev);
 	unsigned int base_addr = dev->base_addr;
@@ -146,23 +137,17 @@ static int __init sonic_probe1(struct net_device *dev)
 	 * the expected location.
 	 */
 	silicon_revision = SONIC_READ(SONIC_SR);
-	if (sonic_debug > 1)
-		printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
 	i = 0;
 	while ((known_revisions[i] != 0xffff) &&
 			(known_revisions[i] != silicon_revision))
 		i++;
 
 	if (known_revisions[i] == 0xffff) {
-		printk("SONIC ethernet controller not found (0x%4x)\n",
-				silicon_revision);
+		pr_info("SONIC ethernet controller not found (0x%4x)\n",
+			silicon_revision);
 		return -ENODEV;
 	}
 
-	if (sonic_debug  &&  version_printed++ == 0)
-		printk(version);
-
 	/*
 	 * Put the sonic into software reset, then retrieve ethernet address.
 	 * Note: we are assuming that the boot-loader has initialized the cam.
@@ -273,12 +258,15 @@ int xtsonic_probe(struct platform_device *pdev)
 
 	if ((err = sonic_probe1(dev)))
 		goto out;
+
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
+	sonic_msg_init(dev);
+
 	if ((err = register_netdev(dev)))
 		goto out1;
 
-	printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
-	       dev->base_addr, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out1:
@@ -290,8 +278,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
 
 #include "sonic.c"
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/Makefile b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/flower/Makefile b/drivers/net/ethernet/netronome/nfp/flower/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index adfe474c2cf0..3f46d836d1b8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -61,6 +61,16 @@
 #define NFP_FLOWER_MASK_MPLS_BOS	BIT(8)
 #define NFP_FLOWER_MASK_MPLS_Q		BIT(0)
 
+#define NFP_FL_IP_FRAG_FIRST		BIT(7)
+#define NFP_FL_IP_FRAGMENTED		BIT(6)
+
+/* Compressed HW representation of TCP Flags */
+#define NFP_FL_TCP_FLAG_URG		BIT(4)
+#define NFP_FL_TCP_FLAG_PSH		BIT(3)
+#define NFP_FL_TCP_FLAG_RST		BIT(2)
+#define NFP_FL_TCP_FLAG_SYN		BIT(1)
+#define NFP_FL_TCP_FLAG_FIN		BIT(0)
+
 #define NFP_FL_SC_ACT_DROP		0x80000000
 #define NFP_FL_SC_ACT_USER		0x7D000000
 #define NFP_FL_SC_ACT_POPV		0x6A000000
@@ -253,11 +263,18 @@ struct nfp_flower_tp_ports {
 	__be16 port_dst;
 };
 
+struct nfp_flower_ip_ext {
+	u8 tos;
+	u8 proto;
+	u8 ttl;
+	u8 flags;
+};
+
 /* L3 IPv4 details (3W/12B)
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |    DSCP   |ECN|   protocol    |           reserved            |
+ * |    DSCP   |ECN|   protocol    |      ttl      |     flags     |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                        ipv4_addr_src                          |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -265,10 +282,7 @@ struct nfp_flower_tp_ports {
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct nfp_flower_ipv4 {
-	u8 tos;
-	u8 proto;
-	u8 ttl;
-	u8 reserved;
+	struct nfp_flower_ip_ext ip_ext;
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
 };
@@ -277,7 +291,7 @@ struct nfp_flower_ipv4 {
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |    DSCP   |ECN|   protocol    |          reserved             |
+ * |    DSCP   |ECN|   protocol    |      ttl      |     flags     |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |   ipv6_exthdr   | res |            ipv6_flow_label            |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -299,10 +313,7 @@ struct nfp_flower_ipv4 {
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct nfp_flower_ipv6 {
-	u8 tos;
-	u8 proto;
-	u8 ttl;
-	u8 reserved;
+	struct nfp_flower_ip_ext ip_ext;
 	__be32 ipv6_flow_label_exthdr;
 	struct in6_addr ipv6_src;
 	struct in6_addr ipv6_dst;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 332ff0fdc038..c5cebf6fb1d3 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -41,6 +41,7 @@
 #include <linux/time64.h>
 #include <linux/types.h>
 #include <net/pkt_cls.h>
+#include <net/tcp.h>
 #include <linux/workqueue.h>
 
 struct net_device;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 37c2ecae2a7a..91935405f586 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -146,26 +146,15 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame,
 }
 
 static void
-nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
-			struct tc_cls_flower_offload *flow,
-			bool mask_version)
+nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *frame,
+			  struct tc_cls_flower_offload *flow,
+			  bool mask_version)
 {
 	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv4_addrs *addr;
-	struct flow_dissector_key_basic *basic;
-
-	memset(frame, 0, sizeof(struct nfp_flower_ipv4));
-
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						 target);
-		frame->ipv4_src = addr->src;
-		frame->ipv4_dst = addr->dst;
-	}
 
 	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *basic;
+
 		basic = skb_flow_dissector_target(flow->dissector,
 						  FLOW_DISSECTOR_KEY_BASIC,
 						  target);
@@ -181,6 +170,60 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
 		frame->tos = flow_ip->tos;
 		frame->ttl = flow_ip->ttl;
 	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_dissector_key_tcp *tcp;
+		u32 tcp_flags;
+
+		tcp = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_TCP, target);
+		tcp_flags = be16_to_cpu(tcp->flags);
+
+		if (tcp_flags & TCPHDR_FIN)
+			frame->flags |= NFP_FL_TCP_FLAG_FIN;
+		if (tcp_flags & TCPHDR_SYN)
+			frame->flags |= NFP_FL_TCP_FLAG_SYN;
+		if (tcp_flags & TCPHDR_RST)
+			frame->flags |= NFP_FL_TCP_FLAG_RST;
+		if (tcp_flags & TCPHDR_PSH)
+			frame->flags |= NFP_FL_TCP_FLAG_PSH;
+		if (tcp_flags & TCPHDR_URG)
+			frame->flags |= NFP_FL_TCP_FLAG_URG;
+	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key;
+
+		key = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_CONTROL,
+						target);
+		if (key->flags & FLOW_DIS_IS_FRAGMENT)
+			frame->flags |= NFP_FL_IP_FRAGMENTED;
+		if (key->flags & FLOW_DIS_FIRST_FRAG)
+			frame->flags |= NFP_FL_IP_FRAG_FIRST;
+	}
+}
+
+static void
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
+			struct tc_cls_flower_offload *flow,
+			bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_ipv4_addrs *addr;
+
+	memset(frame, 0, sizeof(struct nfp_flower_ipv4));
+
+	if (dissector_uses_key(flow->dissector,
+			       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		addr = skb_flow_dissector_target(flow->dissector,
+						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						 target);
+		frame->ipv4_src = addr->src;
+		frame->ipv4_dst = addr->dst;
+	}
+
+	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
 }
 
 static void
@@ -190,7 +233,6 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 {
 	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
 	struct flow_dissector_key_ipv6_addrs *addr;
-	struct flow_dissector_key_basic *basic;
 
 	memset(frame, 0, sizeof(struct nfp_flower_ipv6));
 
@@ -203,22 +245,7 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 		frame->ipv6_dst = addr->dst;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		basic = skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  target);
-		frame->proto = basic->ip_proto;
-	}
-
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *flow_ip;
-
-		flow_ip = skb_flow_dissector_target(flow->dissector,
-						    FLOW_DISSECTOR_KEY_IP,
-						    target);
-		frame->tos = flow_ip->tos;
-		frame->ttl = flow_ip->ttl;
-	}
+	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
 }
 
 static void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index eb5c13dea8f5..114d2ab02a38 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -44,11 +44,20 @@
 #include "../nfp_net.h"
 #include "../nfp_port.h"
 
+#define NFP_FLOWER_SUPPORTED_TCPFLAGS \
+	(TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
+	 TCPHDR_PSH | TCPHDR_URG)
+
+#define NFP_FLOWER_SUPPORTED_CTLFLAGS \
+	(FLOW_DIS_IS_FRAGMENT | \
+	 FLOW_DIS_FIRST_FRAG)
+
 #define NFP_FLOWER_WHITELIST_DISSECTOR \
 	(BIT(FLOW_DISSECTOR_KEY_CONTROL) | \
 	 BIT(FLOW_DISSECTOR_KEY_BASIC) | \
 	 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_TCP) | \
 	 BIT(FLOW_DISSECTOR_KEY_PORTS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_VLAN) | \
@@ -288,6 +297,46 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		}
 	}
 
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_dissector_key_tcp *tcp;
+		u32 tcp_flags;
+
+		tcp = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_TCP,
+						flow->key);
+		tcp_flags = be16_to_cpu(tcp->flags);
+
+		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
+			return -EOPNOTSUPP;
+
+		/* We only support PSH and URG flags when either
+		 * FIN, SYN or RST is present as well.
+		 */
+		if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
+		    !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
+			return -EOPNOTSUPP;
+
+		/* We need to store TCP flags in the IPv4 key space, thus
+		 * we need to ensure we include a IPv4 key layer if we have
+		 * not done so already.
+		 */
+		if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) {
+			key_layer |= NFP_FLOWER_LAYER_IPV4;
+			key_size += sizeof(struct nfp_flower_ipv4);
+		}
+	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key_ctl;
+
+		key_ctl = skb_flow_dissector_target(flow->dissector,
+						    FLOW_DISSECTOR_KEY_CONTROL,
+						    flow->key);
+
+		if (key_ctl->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS)
+			return -EOPNOTSUPP;
+	}
+
 	ret_key_ls->key_layer = key_layer;
 	ret_key_ls->key_layer_two = key_layer_two;
 	ret_key_ls->key_size = key_size;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index ab301d56430b..c4b1f344b4da 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -645,6 +645,7 @@ MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_4x10_1x40.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_8x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x25.nffw");
+MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw");
 
 MODULE_AUTHOR("Netronome Systems <[email protected]>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 4499a7333078..bb63c115537d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
+ * Copyright (C) 2015-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -51,12 +51,12 @@
  * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
-#define NFP_NET_CFG_BAR_SZ              (32 * 1024)
+#define NFP_NET_CFG_BAR_SZ		(32 * 1024)
 
 /**
  * Offset in Freelist buffer where packet starts on RX
  */
-#define NFP_NET_RX_OFFSET               32
+#define NFP_NET_RX_OFFSET		32
 
 /**
  * LSO parameters
@@ -75,65 +75,65 @@
 #define NFP_NET_META_PORTID		5
 #define NFP_NET_META_CSUM		6 /* checksum complete type */
 
-#define	NFP_META_PORT_ID_CTRL		~0U
+#define NFP_META_PORT_ID_CTRL		~0U
 
 /**
  * Hash type pre-pended when a RSS hash was computed
  */
-#define NFP_NET_RSS_NONE                0
-#define NFP_NET_RSS_IPV4                1
-#define NFP_NET_RSS_IPV6                2
-#define NFP_NET_RSS_IPV6_EX             3
-#define NFP_NET_RSS_IPV4_TCP            4
-#define NFP_NET_RSS_IPV6_TCP            5
-#define NFP_NET_RSS_IPV6_EX_TCP         6
-#define NFP_NET_RSS_IPV4_UDP            7
-#define NFP_NET_RSS_IPV6_UDP            8
-#define NFP_NET_RSS_IPV6_EX_UDP         9
+#define NFP_NET_RSS_NONE		0
+#define NFP_NET_RSS_IPV4		1
+#define NFP_NET_RSS_IPV6		2
+#define NFP_NET_RSS_IPV6_EX		3
+#define NFP_NET_RSS_IPV4_TCP		4
+#define NFP_NET_RSS_IPV6_TCP		5
+#define NFP_NET_RSS_IPV6_EX_TCP		6
+#define NFP_NET_RSS_IPV4_UDP		7
+#define NFP_NET_RSS_IPV6_UDP		8
+#define NFP_NET_RSS_IPV6_EX_UDP		9
 
 /**
  * Ring counts
- * %NFP_NET_TXR_MAX:         Maximum number of TX rings
- * %NFP_NET_RXR_MAX:         Maximum number of RX rings
+ * %NFP_NET_TXR_MAX:	     Maximum number of TX rings
+ * %NFP_NET_RXR_MAX:	     Maximum number of RX rings
  */
-#define NFP_NET_TXR_MAX                 64
-#define NFP_NET_RXR_MAX                 64
+#define NFP_NET_TXR_MAX			64
+#define NFP_NET_RXR_MAX			64
 
 /**
  * Read/Write config words (0x0000 - 0x002c)
- * %NFP_NET_CFG_CTRL:        Global control
+ * %NFP_NET_CFG_CTRL:	     Global control
  * %NFP_NET_CFG_UPDATE:      Indicate which fields are updated
  * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
  * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
- * %NFP_NET_CFG_MTU:         Set MTU size
+ * %NFP_NET_CFG_MTU:	     Set MTU size
  * %NFP_NET_CFG_FLBUFSZ:     Set freelist buffer size (must be larger than MTU)
- * %NFP_NET_CFG_EXN:         MSI-X table entry for exceptions
- * %NFP_NET_CFG_LSC:         MSI-X table entry for link state changes
+ * %NFP_NET_CFG_EXN:	     MSI-X table entry for exceptions
+ * %NFP_NET_CFG_LSC:	     MSI-X table entry for link state changes
  * %NFP_NET_CFG_MACADDR:     MAC address
  *
  * TODO:
  * - define Error details in UPDATE
  */
-#define NFP_NET_CFG_CTRL                0x0000
-#define   NFP_NET_CFG_CTRL_ENABLE         (0x1 <<  0) /* Global enable */
-#define   NFP_NET_CFG_CTRL_PROMISC        (0x1 <<  1) /* Enable Promisc mode */
-#define   NFP_NET_CFG_CTRL_L2BC           (0x1 <<  2) /* Allow L2 Broadcast */
-#define   NFP_NET_CFG_CTRL_L2MC           (0x1 <<  3) /* Allow L2 Multicast */
-#define   NFP_NET_CFG_CTRL_RXCSUM         (0x1 <<  4) /* Enable RX Checksum */
-#define   NFP_NET_CFG_CTRL_TXCSUM         (0x1 <<  5) /* Enable TX Checksum */
-#define   NFP_NET_CFG_CTRL_RXVLAN         (0x1 <<  6) /* Enable VLAN strip */
-#define   NFP_NET_CFG_CTRL_TXVLAN         (0x1 <<  7) /* Enable VLAN insert */
-#define   NFP_NET_CFG_CTRL_SCATTER        (0x1 <<  8) /* Scatter DMA */
-#define   NFP_NET_CFG_CTRL_GATHER         (0x1 <<  9) /* Gather DMA */
-#define   NFP_NET_CFG_CTRL_LSO            (0x1 << 10) /* LSO/TSO (version 1) */
+#define NFP_NET_CFG_CTRL		0x0000
+#define   NFP_NET_CFG_CTRL_ENABLE	  (0x1 <<  0) /* Global enable */
+#define   NFP_NET_CFG_CTRL_PROMISC	  (0x1 <<  1) /* Enable Promisc mode */
+#define   NFP_NET_CFG_CTRL_L2BC		  (0x1 <<  2) /* Allow L2 Broadcast */
+#define   NFP_NET_CFG_CTRL_L2MC		  (0x1 <<  3) /* Allow L2 Multicast */
+#define   NFP_NET_CFG_CTRL_RXCSUM	  (0x1 <<  4) /* Enable RX Checksum */
+#define   NFP_NET_CFG_CTRL_TXCSUM	  (0x1 <<  5) /* Enable TX Checksum */
+#define   NFP_NET_CFG_CTRL_RXVLAN	  (0x1 <<  6) /* Enable VLAN strip */
+#define   NFP_NET_CFG_CTRL_TXVLAN	  (0x1 <<  7) /* Enable VLAN insert */
+#define   NFP_NET_CFG_CTRL_SCATTER	  (0x1 <<  8) /* Scatter DMA */
+#define   NFP_NET_CFG_CTRL_GATHER	  (0x1 <<  9) /* Gather DMA */
+#define   NFP_NET_CFG_CTRL_LSO		  (0x1 << 10) /* LSO/TSO (version 1) */
 #define   NFP_NET_CFG_CTRL_CTAG_FILTER	  (0x1 << 11) /* VLAN CTAG filtering */
-#define   NFP_NET_CFG_CTRL_RINGCFG        (0x1 << 16) /* Ring runtime changes */
+#define   NFP_NET_CFG_CTRL_RINGCFG	  (0x1 << 16) /* Ring runtime changes */
 #define   NFP_NET_CFG_CTRL_RSS		  (0x1 << 17) /* RSS (version 1) */
-#define   NFP_NET_CFG_CTRL_IRQMOD         (0x1 << 18) /* Interrupt moderation */
-#define   NFP_NET_CFG_CTRL_RINGPRIO       (0x1 << 19) /* Ring priorities */
-#define   NFP_NET_CFG_CTRL_MSIXAUTO       (0x1 << 20) /* MSI-X auto-masking */
-#define   NFP_NET_CFG_CTRL_TXRWB          (0x1 << 21) /* Write-back of TX ring*/
-#define   NFP_NET_CFG_CTRL_L2SWITCH       (0x1 << 22) /* L2 Switch */
+#define   NFP_NET_CFG_CTRL_IRQMOD	  (0x1 << 18) /* Interrupt moderation */
+#define   NFP_NET_CFG_CTRL_RINGPRIO	  (0x1 << 19) /* Ring priorities */
+#define   NFP_NET_CFG_CTRL_MSIXAUTO	  (0x1 << 20) /* MSI-X auto-masking */
+#define   NFP_NET_CFG_CTRL_TXRWB	  (0x1 << 21) /* Write-back of TX ring*/
+#define   NFP_NET_CFG_CTRL_L2SWITCH	  (0x1 << 22) /* L2 Switch */
 #define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
@@ -152,35 +152,35 @@
 #define NFP_NET_CFG_CTRL_CHAIN_META	(NFP_NET_CFG_CTRL_RSS2 | \
 					 NFP_NET_CFG_CTRL_CSUM_COMPLETE)
 
-#define NFP_NET_CFG_UPDATE              0x0004
-#define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
-#define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
-#define   NFP_NET_CFG_UPDATE_RSS          (0x1 <<  2) /* RSS config change */
-#define   NFP_NET_CFG_UPDATE_TXRPRIO      (0x1 <<  3) /* TX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_RXRPRIO      (0x1 <<  4) /* RX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_MSIX         (0x1 <<  5) /* MSI-X change */
-#define   NFP_NET_CFG_UPDATE_L2SWITCH     (0x1 <<  6) /* Switch changes */
-#define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
-#define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
+#define NFP_NET_CFG_UPDATE		0x0004
+#define   NFP_NET_CFG_UPDATE_GEN	  (0x1 <<  0) /* General update */
+#define   NFP_NET_CFG_UPDATE_RING	  (0x1 <<  1) /* Ring config change */
+#define   NFP_NET_CFG_UPDATE_RSS	  (0x1 <<  2) /* RSS config change */
+#define   NFP_NET_CFG_UPDATE_TXRPRIO	  (0x1 <<  3) /* TX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_RXRPRIO	  (0x1 <<  4) /* RX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_MSIX	  (0x1 <<  5) /* MSI-X change */
+#define   NFP_NET_CFG_UPDATE_L2SWITCH	  (0x1 <<  6) /* Switch changes */
+#define   NFP_NET_CFG_UPDATE_RESET	  (0x1 <<  7) /* Update due to FLR */
+#define   NFP_NET_CFG_UPDATE_IRQMOD	  (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
 #define   NFP_NET_CFG_UPDATE_BPF	  (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_MACADDR	  (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX	  (0x1 << 12) /* Mailbox update */
 #define   NFP_NET_CFG_UPDATE_VF		  (0x1 << 13) /* VF settings change */
-#define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
-#define NFP_NET_CFG_TXRS_ENABLE         0x0008
-#define NFP_NET_CFG_RXRS_ENABLE         0x0010
-#define NFP_NET_CFG_MTU                 0x0018
-#define NFP_NET_CFG_FLBUFSZ             0x001c
-#define NFP_NET_CFG_EXN                 0x001f
-#define NFP_NET_CFG_LSC                 0x0020
-#define NFP_NET_CFG_MACADDR             0x0024
+#define   NFP_NET_CFG_UPDATE_ERR	  (0x1 << 31) /* A error occurred */
+#define NFP_NET_CFG_TXRS_ENABLE		0x0008
+#define NFP_NET_CFG_RXRS_ENABLE		0x0010
+#define NFP_NET_CFG_MTU			0x0018
+#define NFP_NET_CFG_FLBUFSZ		0x001c
+#define NFP_NET_CFG_EXN			0x001f
+#define NFP_NET_CFG_LSC			0x0020
+#define NFP_NET_CFG_MACADDR		0x0024
 
 /**
  * Read-only words (0x0030 - 0x0050):
  * %NFP_NET_CFG_VERSION:     Firmware version number
- * %NFP_NET_CFG_STS:         Status
- * %NFP_NET_CFG_CAP:         Capabilities (same bits as %NFP_NET_CFG_CTRL)
+ * %NFP_NET_CFG_STS:	     Status
+ * %NFP_NET_CFG_CAP:	     Capabilities (same bits as %NFP_NET_CFG_CTRL)
  * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
  * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
  * %NFP_NET_CFG_MAX_MTU:     Maximum support MTU
@@ -190,37 +190,37 @@
  * TODO:
  * - define more STS bits
  */
-#define NFP_NET_CFG_VERSION             0x0030
+#define NFP_NET_CFG_VERSION		0x0030
 #define   NFP_NET_CFG_VERSION_RESERVED_MASK	(0xff << 24)
 #define   NFP_NET_CFG_VERSION_CLASS_MASK  (0xff << 16)
-#define   NFP_NET_CFG_VERSION_CLASS(x)    (((x) & 0xff) << 16)
+#define   NFP_NET_CFG_VERSION_CLASS(x)	  (((x) & 0xff) << 16)
 #define   NFP_NET_CFG_VERSION_CLASS_GENERIC	0
 #define   NFP_NET_CFG_VERSION_MAJOR_MASK  (0xff <<  8)
-#define   NFP_NET_CFG_VERSION_MAJOR(x)    (((x) & 0xff) <<  8)
+#define   NFP_NET_CFG_VERSION_MAJOR(x)	  (((x) & 0xff) <<  8)
 #define   NFP_NET_CFG_VERSION_MINOR_MASK  (0xff <<  0)
-#define   NFP_NET_CFG_VERSION_MINOR(x)    (((x) & 0xff) <<  0)
-#define NFP_NET_CFG_STS                 0x0034
-#define   NFP_NET_CFG_STS_LINK            (0x1 << 0) /* Link up or down */
+#define   NFP_NET_CFG_VERSION_MINOR(x)	  (((x) & 0xff) <<  0)
+#define NFP_NET_CFG_STS			0x0034
+#define   NFP_NET_CFG_STS_LINK		  (0x1 << 0) /* Link up or down */
 /* Link rate */
 #define   NFP_NET_CFG_STS_LINK_RATE_SHIFT 1
 #define   NFP_NET_CFG_STS_LINK_RATE_MASK  0xF
-#define   NFP_NET_CFG_STS_LINK_RATE       \
+#define   NFP_NET_CFG_STS_LINK_RATE	  \
 	(NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT)
 #define   NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED   0
-#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN       1
-#define   NFP_NET_CFG_STS_LINK_RATE_1G            2
-#define   NFP_NET_CFG_STS_LINK_RATE_10G           3
-#define   NFP_NET_CFG_STS_LINK_RATE_25G           4
-#define   NFP_NET_CFG_STS_LINK_RATE_40G           5
-#define   NFP_NET_CFG_STS_LINK_RATE_50G           6
-#define   NFP_NET_CFG_STS_LINK_RATE_100G          7
-#define NFP_NET_CFG_CAP                 0x0038
-#define NFP_NET_CFG_MAX_TXRINGS         0x003c
-#define NFP_NET_CFG_MAX_RXRINGS         0x0040
-#define NFP_NET_CFG_MAX_MTU             0x0044
+#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN	  1
+#define   NFP_NET_CFG_STS_LINK_RATE_1G		  2
+#define   NFP_NET_CFG_STS_LINK_RATE_10G		  3
+#define   NFP_NET_CFG_STS_LINK_RATE_25G		  4
+#define   NFP_NET_CFG_STS_LINK_RATE_40G		  5
+#define   NFP_NET_CFG_STS_LINK_RATE_50G		  6
+#define   NFP_NET_CFG_STS_LINK_RATE_100G	  7
+#define NFP_NET_CFG_CAP			0x0038
+#define NFP_NET_CFG_MAX_TXRINGS		0x003c
+#define NFP_NET_CFG_MAX_RXRINGS		0x0040
+#define NFP_NET_CFG_MAX_MTU		0x0044
 /* Next two words are being used by VFs for solving THB350 issue */
-#define NFP_NET_CFG_START_TXQ           0x0048
-#define NFP_NET_CFG_START_RXQ           0x004c
+#define NFP_NET_CFG_START_TXQ		0x0048
+#define NFP_NET_CFG_START_RXQ		0x004c
 
 /**
  * Prepend configuration
@@ -280,8 +280,8 @@
 /**
  * 40B reserved for future use (0x0098 - 0x00c0)
  */
-#define NFP_NET_CFG_RESERVED            0x0098
-#define NFP_NET_CFG_RESERVED_SZ         0x0028
+#define NFP_NET_CFG_RESERVED		0x0098
+#define NFP_NET_CFG_RESERVED_SZ		0x0028
 
 /**
  * RSS configuration (0x0100 - 0x01ac):
@@ -290,26 +290,26 @@
  * %NFP_NET_CFG_RSS_KEY:     RSS "secret" key
  * %NFP_NET_CFG_RSS_ITBL:    RSS indirection table
  */
-#define NFP_NET_CFG_RSS_BASE            0x0100
-#define NFP_NET_CFG_RSS_CTRL            NFP_NET_CFG_RSS_BASE
-#define   NFP_NET_CFG_RSS_MASK            (0x7f)
-#define   NFP_NET_CFG_RSS_MASK_of(_x)     ((_x) & 0x7f)
-#define   NFP_NET_CFG_RSS_IPV4            (1 <<  8) /* RSS for IPv4 */
-#define   NFP_NET_CFG_RSS_IPV6            (1 <<  9) /* RSS for IPv6 */
-#define   NFP_NET_CFG_RSS_IPV4_TCP        (1 << 10) /* RSS for IPv4/TCP */
-#define   NFP_NET_CFG_RSS_IPV4_UDP        (1 << 11) /* RSS for IPv4/UDP */
-#define   NFP_NET_CFG_RSS_IPV6_TCP        (1 << 12) /* RSS for IPv6/TCP */
-#define   NFP_NET_CFG_RSS_IPV6_UDP        (1 << 13) /* RSS for IPv6/UDP */
+#define NFP_NET_CFG_RSS_BASE		0x0100
+#define NFP_NET_CFG_RSS_CTRL		NFP_NET_CFG_RSS_BASE
+#define   NFP_NET_CFG_RSS_MASK		  (0x7f)
+#define   NFP_NET_CFG_RSS_MASK_of(_x)	  ((_x) & 0x7f)
+#define   NFP_NET_CFG_RSS_IPV4		  (1 <<  8) /* RSS for IPv4 */
+#define   NFP_NET_CFG_RSS_IPV6		  (1 <<  9) /* RSS for IPv6 */
+#define   NFP_NET_CFG_RSS_IPV4_TCP	  (1 << 10) /* RSS for IPv4/TCP */
+#define   NFP_NET_CFG_RSS_IPV4_UDP	  (1 << 11) /* RSS for IPv4/UDP */
+#define   NFP_NET_CFG_RSS_IPV6_TCP	  (1 << 12) /* RSS for IPv6/TCP */
+#define   NFP_NET_CFG_RSS_IPV6_UDP	  (1 << 13) /* RSS for IPv6/UDP */
 #define   NFP_NET_CFG_RSS_HFUNC		  0xff000000
-#define   NFP_NET_CFG_RSS_TOEPLITZ        (1 << 24) /* Use Toeplitz hash */
+#define   NFP_NET_CFG_RSS_TOEPLITZ	  (1 << 24) /* Use Toeplitz hash */
 #define   NFP_NET_CFG_RSS_XOR		  (1 << 25) /* Use XOR as hash */
 #define   NFP_NET_CFG_RSS_CRC32		  (1 << 26) /* Use CRC32 as hash */
 #define   NFP_NET_CFG_RSS_HFUNCS	  3
-#define NFP_NET_CFG_RSS_KEY             (NFP_NET_CFG_RSS_BASE + 0x4)
-#define NFP_NET_CFG_RSS_KEY_SZ          0x28
-#define NFP_NET_CFG_RSS_ITBL            (NFP_NET_CFG_RSS_BASE + 0x4 + \
+#define NFP_NET_CFG_RSS_KEY		(NFP_NET_CFG_RSS_BASE + 0x4)
+#define NFP_NET_CFG_RSS_KEY_SZ		0x28
+#define NFP_NET_CFG_RSS_ITBL		(NFP_NET_CFG_RSS_BASE + 0x4 + \
 					 NFP_NET_CFG_RSS_KEY_SZ)
-#define NFP_NET_CFG_RSS_ITBL_SZ         0x80
+#define NFP_NET_CFG_RSS_ITBL_SZ		0x80
 
 /**
  * TX ring configuration (0x200 - 0x800)
@@ -321,13 +321,13 @@
  * %NFP_NET_CFG_TXR_PRIO:    Per TX ring priority (1B entries)
  * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
  */
-#define NFP_NET_CFG_TXR_BASE            0x0200
-#define NFP_NET_CFG_TXR_ADDR(_x)        (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_WB_ADDR(_x)     (NFP_NET_CFG_TXR_BASE + 0x200 + \
+#define NFP_NET_CFG_TXR_BASE		0x0200
+#define NFP_NET_CFG_TXR_ADDR(_x)	(NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_TXR_WB_ADDR(_x)	(NFP_NET_CFG_TXR_BASE + 0x200 + \
 					 ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_SZ(_x)          (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
-#define NFP_NET_CFG_TXR_VEC(_x)         (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
-#define NFP_NET_CFG_TXR_PRIO(_x)        (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
+#define NFP_NET_CFG_TXR_SZ(_x)		(NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
+#define NFP_NET_CFG_TXR_VEC(_x)		(NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
+#define NFP_NET_CFG_TXR_PRIO(_x)	(NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
 #define NFP_NET_CFG_TXR_IRQ_MOD(_x)	(NFP_NET_CFG_TXR_BASE + 0x500 + \
 					 ((_x) * 0x4))
 
@@ -340,11 +340,11 @@
  * %NFP_NET_CFG_RXR_PRIO:    Per RX ring priority (1B entries)
  * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
  */
-#define NFP_NET_CFG_RXR_BASE            0x0800
-#define NFP_NET_CFG_RXR_ADDR(_x)        (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_RXR_SZ(_x)          (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
-#define NFP_NET_CFG_RXR_VEC(_x)         (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
-#define NFP_NET_CFG_RXR_PRIO(_x)        (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
+#define NFP_NET_CFG_RXR_BASE		0x0800
+#define NFP_NET_CFG_RXR_ADDR(_x)	(NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_RXR_SZ(_x)		(NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
+#define NFP_NET_CFG_RXR_VEC(_x)		(NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
+#define NFP_NET_CFG_RXR_PRIO(_x)	(NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
 #define NFP_NET_CFG_RXR_IRQ_MOD(_x)	(NFP_NET_CFG_RXR_BASE + 0x300 + \
 					 ((_x) * 0x4))
 
@@ -358,36 +358,36 @@
  * the MSI-X entry and the host driver must clear the register to
  * re-enable the interrupt.
  */
-#define NFP_NET_CFG_ICR_BASE            0x0c00
-#define NFP_NET_CFG_ICR(_x)             (NFP_NET_CFG_ICR_BASE + (_x))
-#define   NFP_NET_CFG_ICR_UNMASKED      0x0
-#define   NFP_NET_CFG_ICR_RXTX          0x1
-#define   NFP_NET_CFG_ICR_LSC           0x2
+#define NFP_NET_CFG_ICR_BASE		0x0c00
+#define NFP_NET_CFG_ICR(_x)		(NFP_NET_CFG_ICR_BASE + (_x))
+#define   NFP_NET_CFG_ICR_UNMASKED	0x0
+#define   NFP_NET_CFG_ICR_RXTX		0x1
+#define   NFP_NET_CFG_ICR_LSC		0x2
 
 /**
  * General device stats (0x0d00 - 0x0d90)
  * all counters are 64bit.
  */
-#define NFP_NET_CFG_STATS_BASE          0x0d00
-#define NFP_NET_CFG_STATS_RX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x00)
-#define NFP_NET_CFG_STATS_RX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x08)
-#define NFP_NET_CFG_STATS_RX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x10)
-#define NFP_NET_CFG_STATS_RX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x18)
-#define NFP_NET_CFG_STATS_RX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x20)
-#define NFP_NET_CFG_STATS_RX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x28)
-#define NFP_NET_CFG_STATS_RX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x30)
-#define NFP_NET_CFG_STATS_RX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x38)
-#define NFP_NET_CFG_STATS_RX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x40)
-
-#define NFP_NET_CFG_STATS_TX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x48)
-#define NFP_NET_CFG_STATS_TX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x50)
-#define NFP_NET_CFG_STATS_TX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x58)
-#define NFP_NET_CFG_STATS_TX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x60)
-#define NFP_NET_CFG_STATS_TX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x68)
-#define NFP_NET_CFG_STATS_TX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x70)
-#define NFP_NET_CFG_STATS_TX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x78)
-#define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
-#define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
+#define NFP_NET_CFG_STATS_BASE		0x0d00
+#define NFP_NET_CFG_STATS_RX_DISCARDS	(NFP_NET_CFG_STATS_BASE + 0x00)
+#define NFP_NET_CFG_STATS_RX_ERRORS	(NFP_NET_CFG_STATS_BASE + 0x08)
+#define NFP_NET_CFG_STATS_RX_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x10)
+#define NFP_NET_CFG_STATS_RX_UC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x18)
+#define NFP_NET_CFG_STATS_RX_MC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x20)
+#define NFP_NET_CFG_STATS_RX_BC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x28)
+#define NFP_NET_CFG_STATS_RX_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x30)
+#define NFP_NET_CFG_STATS_RX_MC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x38)
+#define NFP_NET_CFG_STATS_RX_BC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x40)
+
+#define NFP_NET_CFG_STATS_TX_DISCARDS	(NFP_NET_CFG_STATS_BASE + 0x48)
+#define NFP_NET_CFG_STATS_TX_ERRORS	(NFP_NET_CFG_STATS_BASE + 0x50)
+#define NFP_NET_CFG_STATS_TX_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x58)
+#define NFP_NET_CFG_STATS_TX_UC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x60)
+#define NFP_NET_CFG_STATS_TX_MC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x68)
+#define NFP_NET_CFG_STATS_TX_BC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x70)
+#define NFP_NET_CFG_STATS_TX_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x78)
+#define NFP_NET_CFG_STATS_TX_MC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x80)
+#define NFP_NET_CFG_STATS_TX_BC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x88)
 
 #define NFP_NET_CFG_STATS_APP0_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x90)
 #define NFP_NET_CFG_STATS_APP0_BYTES	(NFP_NET_CFG_STATS_BASE + 0x98)
@@ -404,11 +404,11 @@
  * %NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
  * %NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
  */
-#define NFP_NET_CFG_TXR_STATS_BASE      0x1000
-#define NFP_NET_CFG_TXR_STATS(_x)       (NFP_NET_CFG_TXR_STATS_BASE + \
+#define NFP_NET_CFG_TXR_STATS_BASE	0x1000
+#define NFP_NET_CFG_TXR_STATS(_x)	(NFP_NET_CFG_TXR_STATS_BASE + \
 					 ((_x) * 0x10))
-#define NFP_NET_CFG_RXR_STATS_BASE      0x1400
-#define NFP_NET_CFG_RXR_STATS(_x)       (NFP_NET_CFG_RXR_STATS_BASE + \
+#define NFP_NET_CFG_RXR_STATS_BASE	0x1400
+#define NFP_NET_CFG_RXR_STATS(_x)	(NFP_NET_CFG_RXR_STATS_BASE + \
 					 ((_x) * 0x10))
 
 /**
@@ -444,7 +444,7 @@
  * %NFP_NET_CFG_TLV_TYPE:	Offset of type within the TLV
  * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
  * %NFP_NET_CFG_TLV_LENGTH:	Offset of length within the TLV
- * %NFP_NET_CFG_TLV_LENGTH_INC:	TLV length increments
+ * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
  * %NFP_NET_CFG_TLV_VALUE:	Offset of value with the TLV
  *
  * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE.
@@ -457,12 +457,12 @@
  * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH.
  */
 #define NFP_NET_CFG_TLV_TYPE		0x00
-#define   NFP_NET_CFG_TLV_TYPE_REQUIRED	  0x8000
+#define   NFP_NET_CFG_TLV_TYPE_REQUIRED   0x8000
 #define NFP_NET_CFG_TLV_LENGTH		0x02
 #define   NFP_NET_CFG_TLV_LENGTH_INC	  4
 #define NFP_NET_CFG_TLV_VALUE		0x04
 
-#define NFP_NET_CFG_TLV_HEADER_REQUIRED	0x80000000
+#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
 #define NFP_NET_CFG_TLV_HEADER_TYPE	0x7fff0000
 #define NFP_NET_CFG_TLV_HEADER_LENGTH	0x0000ffff
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index cf81cf95d1d8..67cdd8330c59 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -231,15 +231,15 @@ void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id)
 
 	for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) {
 		sprintf(name, "%d", i);
-		debugfs_create_file(name, S_IRUSR, rx,
+		debugfs_create_file(name, 0400, rx,
 				    &nn->r_vecs[i], &nfp_rx_q_fops);
-		debugfs_create_file(name, S_IRUSR, xdp,
+		debugfs_create_file(name, 0400, xdp,
 				    &nn->r_vecs[i], &nfp_xdp_q_fops);
 	}
 
 	for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) {
 		sprintf(name, "%d", i);
-		debugfs_create_file(name, S_IRUSR, tx,
+		debugfs_create_file(name, 0400, tx,
 				    &nn->r_vecs[i], &nfp_tx_q_fops);
 	}
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nic/Makefile b/drivers/net/ethernet/netronome/nfp/nic/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nic/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index f2e8de607119..8259e8309320 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -2829,9 +2829,9 @@ netxen_show_bridged_mode(struct device *dev,
 }
 
 static const struct device_attribute dev_attr_bridged_mode = {
-       .attr = {.name = "bridged_mode", .mode = (S_IRUGO | S_IWUSR)},
-       .show = netxen_show_bridged_mode,
-       .store = netxen_store_bridged_mode,
+	.attr = { .name = "bridged_mode", .mode = 0644 },
+	.show = netxen_show_bridged_mode,
+	.store = netxen_store_bridged_mode,
 };
 
 static ssize_t
@@ -2861,7 +2861,7 @@ netxen_show_diag_mode(struct device *dev,
 }
 
 static const struct device_attribute dev_attr_diag_mode = {
-	.attr = {.name = "diag_mode", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "diag_mode", .mode = 0644 },
 	.show = netxen_show_diag_mode,
 	.store = netxen_store_diag_mode,
 };
@@ -3006,14 +3006,14 @@ static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj,
 
 
 static const struct bin_attribute bin_attr_crb = {
-	.attr = {.name = "crb", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "crb", .mode = 0644 },
 	.size = 0,
 	.read = netxen_sysfs_read_crb,
 	.write = netxen_sysfs_write_crb,
 };
 
 static const struct bin_attribute bin_attr_mem = {
-	.attr = {.name = "mem", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "mem", .mode = 0644 },
 	.size = 0,
 	.read = netxen_sysfs_read_mem,
 	.write = netxen_sysfs_write_mem,
@@ -3142,7 +3142,7 @@ out:
 }
 
 static const struct bin_attribute bin_attr_dimm = {
-	.attr = { .name = "dimm", .mode = (S_IRUGO | S_IWUSR) },
+	.attr = { .name = "dimm", .mode = 0644 },
 	.size = sizeof(struct netxen_dimm_cfg),
 	.read = netxen_sysfs_read_dimm,
 };
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 6f546e869d8d..00f41c145d4d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2480,7 +2480,10 @@ int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto)
 	if (rc)
 		return rc;
 
-	/* Free Task CXT */
+	/* Free Task CXT ( Intentionally RoCE as task-id is shared between
+	 * RoCE and iWARP )
+	 */
+	proto = PROTOCOLID_ROCE;
 	rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_TASK, 0,
 				    qed_cxt_get_proto_tid_count(p_hwfn, proto));
 	if (rc)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 553a6d17260e..cdb3eec0f68c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -298,8 +298,8 @@ static void qed_init_qm_params(struct qed_hwfn *p_hwfn)
 	qm_info->start_vport = (u8) RESC_START(p_hwfn, QED_VPORT);
 
 	/* rate limiting and weighted fair queueing are always enabled */
-	qm_info->vport_rl_en = 1;
-	qm_info->vport_wfq_en = 1;
+	qm_info->vport_rl_en = true;
+	qm_info->vport_wfq_en = true;
 
 	/* TC config is different for AH 4 port */
 	four_port = p_hwfn->cdev->num_ports_in_engine == MAX_NUM_PORTS_K2;
@@ -1276,9 +1276,9 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 
 	if (p_hwfn->mcp_info) {
 		if (p_hwfn->mcp_info->func_info.bandwidth_max)
-			qm_info->pf_rl_en = 1;
+			qm_info->pf_rl_en = true;
 		if (p_hwfn->mcp_info->func_info.bandwidth_min)
-			qm_info->pf_wfq_en = 1;
+			qm_info->pf_wfq_en = true;
 	}
 
 	memset(&params, 0, sizeof(params));
@@ -1630,7 +1630,7 @@ static int qed_vf_start(struct qed_hwfn *p_hwfn,
 		qed_vf_pf_tunnel_param_update(p_hwfn, p_params->p_tunn);
 	}
 
-	p_hwfn->b_int_enabled = 1;
+	p_hwfn->b_int_enabled = true;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ca4a81dc1ace..69051e98aff9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1703,6 +1703,13 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
 	iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen);
 
 	if (eth_type == ETH_P_IP) {
+		if (iph->protocol != IPPROTO_TCP) {
+			DP_NOTICE(p_hwfn,
+				  "Unexpected ip protocol on ll2 %x\n",
+				  iph->protocol);
+			return -EINVAL;
+		}
+
 		cm_info->local_ip[0] = ntohl(iph->daddr);
 		cm_info->remote_ip[0] = ntohl(iph->saddr);
 		cm_info->ip_version = TCP_IPV4;
@@ -1711,6 +1718,14 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
 		*payload_len = ntohs(iph->tot_len) - ip_hlen;
 	} else if (eth_type == ETH_P_IPV6) {
 		ip6h = (struct ipv6hdr *)iph;
+
+		if (ip6h->nexthdr != IPPROTO_TCP) {
+			DP_NOTICE(p_hwfn,
+				  "Unexpected ip protocol on ll2 %x\n",
+				  iph->protocol);
+			return -EINVAL;
+		}
+
 		for (i = 0; i < 4; i++) {
 			cm_info->local_ip[i] =
 			    ntohl(ip6h->daddr.in6_u.u6_addr32[i]);
@@ -1784,7 +1799,7 @@ enum qed_iwarp_mpa_pkt_type {
 /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
 #define QED_IWARP_MAX_BDS_PER_FPDU 3
 
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
 	"QED_IWARP_MPA_PKT_PACKED",
 	"QED_IWARP_MPA_PKT_PARTIAL",
 	"QED_IWARP_MPA_PKT_UNALIGNED"
@@ -1928,8 +1943,8 @@ qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn,
 		/* Missing lower byte is now available */
 		mpa_len = fpdu->fpdu_length | *mpa_data;
 		fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
-		fpdu->mpa_frag_len = fpdu->fpdu_length;
 		/* one byte of hdr */
+		fpdu->mpa_frag_len = 1;
 		fpdu->incomplete_bytes = fpdu->fpdu_length - 1;
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_RDMA,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 5d040b873137..a411f9c702a1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -379,6 +379,7 @@ static void qed_rdma_free(struct qed_hwfn *p_hwfn)
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n");
 
 	qed_rdma_free_reserved_lkey(p_hwfn);
+	qed_cxt_free_proto_ilt(p_hwfn, p_hwfn->p_rdma_info->proto);
 	qed_rdma_resc_free(p_hwfn);
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 2db70eabddfe..a01e7d6e5442 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -288,7 +288,7 @@ int __init qede_init(void)
 	}
 
 	/* Must register notifier before pci ops, since we might miss
-	 * interface rename after pci probe and netdev registeration.
+	 * interface rename after pci probe and netdev registration.
 	 */
 	ret = register_netdevice_notifier(&qede_netdev_notifier);
 	if (ret) {
@@ -988,7 +988,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	if (rc)
 		goto err3;
 
-	/* Prepare the lock prior to the registeration of the netdev,
+	/* Prepare the lock prior to the registration of the netdev,
 	 * as once it's registered we might reach flows requiring it
 	 * [it's even possible to reach a flow needing it directly
 	 * from there, although it's unlikely].
@@ -2067,8 +2067,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 	link_params.link_up = true;
 	edev->ops->common->set_link(edev->cdev, &link_params);
 
-	qede_rdma_dev_event_open(edev);
-
 	edev->state = QEDE_STATE_OPEN;
 
 	DP_INFO(edev, "Ending successfully qede load\n");
@@ -2169,12 +2167,14 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
 			DP_NOTICE(edev, "Link is up\n");
 			netif_tx_start_all_queues(edev->ndev);
 			netif_carrier_on(edev->ndev);
+			qede_rdma_dev_event_open(edev);
 		}
 	} else {
 		if (netif_carrier_ok(edev->ndev)) {
 			DP_NOTICE(edev, "Link is down\n");
 			netif_tx_disable(edev->ndev);
 			netif_carrier_off(edev->ndev);
+			qede_rdma_dev_event_close(edev);
 		}
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index 9b2280badaf7..02adb513f475 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -485,7 +485,7 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc)
 	ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev);
 	if (IS_ERR(ptp->clock)) {
 		rc = -EINVAL;
-		DP_ERR(edev, "PTP clock registeration failed\n");
+		DP_ERR(edev, "PTP clock registration failed\n");
 		goto err2;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 9e5264d8773b..b48f76182049 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -1858,8 +1858,9 @@ static void ql_update_small_bufq_prod_index(struct ql3_adapter *qdev)
 			qdev->small_buf_release_cnt -= 8;
 		}
 		wmb();
-		writel(qdev->small_buf_q_producer_index,
-			&port_regs->CommonRegs.rxSmallQProducerIndex);
+		writel_relaxed(qdev->small_buf_q_producer_index,
+			       &port_regs->CommonRegs.rxSmallQProducerIndex);
+		mmiowb();
 	}
 }
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 46b0372dd032..97c146e7698a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -478,7 +478,7 @@ irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *adapter)
 	wmb();
 
 	/* clear the interrupt trigger control register */
-	writel(0, adapter->isr_int_vec);
+	writel_relaxed(0, adapter->isr_int_vec);
 	intr_val = readl(adapter->isr_int_vec);
 	do {
 		intr_val = readl(adapter->tgt_status_reg);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 287d89dd086f..891f03a7a33d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -1175,81 +1175,81 @@ static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp,
 }
 
 static const struct device_attribute dev_attr_bridged_mode = {
-       .attr = {.name = "bridged_mode", .mode = (S_IRUGO | S_IWUSR)},
-       .show = qlcnic_show_bridged_mode,
-       .store = qlcnic_store_bridged_mode,
+	.attr = { .name = "bridged_mode", .mode = 0644 },
+	.show = qlcnic_show_bridged_mode,
+	.store = qlcnic_store_bridged_mode,
 };
 
 static const struct device_attribute dev_attr_diag_mode = {
-	.attr = {.name = "diag_mode", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "diag_mode", .mode = 0644 },
 	.show = qlcnic_show_diag_mode,
 	.store = qlcnic_store_diag_mode,
 };
 
 static const struct device_attribute dev_attr_beacon = {
-	.attr = {.name = "beacon", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "beacon", .mode = 0644 },
 	.show = qlcnic_show_beacon,
 	.store = qlcnic_store_beacon,
 };
 
 static const struct bin_attribute bin_attr_crb = {
-	.attr = {.name = "crb", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "crb", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_crb,
 	.write = qlcnic_sysfs_write_crb,
 };
 
 static const struct bin_attribute bin_attr_mem = {
-	.attr = {.name = "mem", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "mem", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_mem,
 	.write = qlcnic_sysfs_write_mem,
 };
 
 static const struct bin_attribute bin_attr_npar_config = {
-	.attr = {.name = "npar_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "npar_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_npar_config,
 	.write = qlcnic_sysfs_write_npar_config,
 };
 
 static const struct bin_attribute bin_attr_pci_config = {
-	.attr = {.name = "pci_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "pci_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_pci_config,
 	.write = NULL,
 };
 
 static const struct bin_attribute bin_attr_port_stats = {
-	.attr = {.name = "port_stats", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "port_stats", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_get_port_stats,
 	.write = qlcnic_sysfs_clear_port_stats,
 };
 
 static const struct bin_attribute bin_attr_esw_stats = {
-	.attr = {.name = "esw_stats", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "esw_stats", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_get_esw_stats,
 	.write = qlcnic_sysfs_clear_esw_stats,
 };
 
 static const struct bin_attribute bin_attr_esw_config = {
-	.attr = {.name = "esw_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "esw_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_esw_config,
 	.write = qlcnic_sysfs_write_esw_config,
 };
 
 static const struct bin_attribute bin_attr_pm_config = {
-	.attr = {.name = "pm_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "pm_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_pm_config,
 	.write = qlcnic_sysfs_write_pm_config,
 };
 
 static const struct bin_attribute bin_attr_flash = {
-	.attr = {.name = "flash", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "flash", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_83xx_sysfs_flash_read_handler,
 	.write = qlcnic_83xx_sysfs_flash_write_handler,
@@ -1276,7 +1276,7 @@ static ssize_t qlcnic_hwmon_show_temp(struct device *dev,
 }
 
 /* hwmon-sysfs attributes */
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+static SENSOR_DEVICE_ATTR(temp1_input, 0444,
 			  qlcnic_hwmon_show_temp, NULL, 1);
 
 static struct attribute *qlcnic_hwmon_attrs[] = {
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 84ac50f92c9c..3e71b65a9546 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -2185,6 +2185,22 @@ static inline void ql_write_db_reg(u32 val, void __iomem *addr)
 }
 
 /*
+ * Doorbell Registers:
+ * Doorbell registers are virtual registers in the PCI memory space.
+ * The space is allocated by the chip during PCI initialization.  The
+ * device driver finds the doorbell address in BAR 3 in PCI config space.
+ * The registers are used to control outbound and inbound queues. For
+ * example, the producer index for an outbound queue.  Each queue uses
+ * 1 4k chunk of memory.  The lower half of the space is for outbound
+ * queues. The upper half is for inbound queues.
+ * Caller has to guarantee ordering.
+ */
+static inline void ql_write_db_reg_relaxed(u32 val, void __iomem *addr)
+{
+	writel_relaxed(val, addr);
+}
+
+/*
  * Shadow Registers:
  * Outbound queues have a consumer index that is maintained by the chip.
  * Inbound queues have a producer index that is maintained by the chip.
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 50038d946ced..8293c2028002 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2700,7 +2700,8 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 		tx_ring->prod_idx = 0;
 	wmb();
 
-	ql_write_db_reg(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
+	ql_write_db_reg_relaxed(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
+	mmiowb();
 	netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
 		     "tx queued, slot %d, len %d\n",
 		     tx_ring->prod_idx, skb->len);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 9cbb27263742..d5a32b7c7dc5 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -1194,9 +1194,9 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q)
 	while (tx_q->tpd.consume_idx != hw_consume_idx) {
 		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx);
 		if (tpbuf->dma_addr) {
-			dma_unmap_single(adpt->netdev->dev.parent,
-					 tpbuf->dma_addr, tpbuf->length,
-					 DMA_TO_DEVICE);
+			dma_unmap_page(adpt->netdev->dev.parent,
+				       tpbuf->dma_addr, tpbuf->length,
+				       DMA_TO_DEVICE);
 			tpbuf->dma_addr = 0;
 		}
 
@@ -1353,9 +1353,11 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt,
 
 		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
 		tpbuf->length = mapped_len;
-		tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
-						 skb->data, tpbuf->length,
-						 DMA_TO_DEVICE);
+		tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+					       virt_to_page(skb->data),
+					       offset_in_page(skb->data),
+					       tpbuf->length,
+					       DMA_TO_DEVICE);
 		ret = dma_mapping_error(adpt->netdev->dev.parent,
 					tpbuf->dma_addr);
 		if (ret)
@@ -1371,9 +1373,12 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt,
 	if (mapped_len < len) {
 		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
 		tpbuf->length = len - mapped_len;
-		tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
-						 skb->data + mapped_len,
-						 tpbuf->length, DMA_TO_DEVICE);
+		tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+					       virt_to_page(skb->data +
+							    mapped_len),
+					       offset_in_page(skb->data +
+							      mapped_len),
+					       tpbuf->length, DMA_TO_DEVICE);
 		ret = dma_mapping_error(adpt->netdev->dev.parent,
 					tpbuf->dma_addr);
 		if (ret)
diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
index 92b6be9c4429..51d89c86e60f 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.c
+++ b/drivers/net/ethernet/qualcomm/qca_debug.c
@@ -151,7 +151,7 @@ qcaspi_init_device_debugfs(struct qcaspi *qca)
 			dev_name(&qca->net_dev->dev));
 		return;
 	}
-	debugfs_create_file("info", S_IFREG | S_IRUGO, device_root, qca,
+	debugfs_create_file("info", S_IFREG | 0444, device_root, qca,
 			    &qcaspi_info_ops);
 }
 
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 9c236298fe21..5803cd6db406 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -705,7 +705,6 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
 		tskb = skb_copy_expand(skb, QCAFRM_HEADER_LEN,
 				       QCAFRM_FOOTER_LEN + pad_len, GFP_ATOMIC);
 		if (!tskb) {
-			netdev_dbg(qca->net_dev, "could not allocate tx_buff\n");
 			qca->stats.out_of_mem++;
 			return NETDEV_TX_BUSY;
 		}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 7e7704daf5f1..d33988570217 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -43,10 +43,9 @@
 
 /* Local Definitions and Declarations */
 
-struct rmnet_walk_data {
-	struct net_device *real_dev;
-	struct list_head *head;
-	struct rmnet_port *port;
+static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = {
+	[IFLA_RMNET_MUX_ID]	= { .type = NLA_U16 },
+	[IFLA_RMNET_FLAGS]	= { .len = sizeof(struct ifla_rmnet_flags) },
 };
 
 static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
@@ -112,17 +111,14 @@ static int rmnet_register_real_device(struct net_device *real_dev)
 static void rmnet_unregister_bridge(struct net_device *dev,
 				    struct rmnet_port *port)
 {
-	struct net_device *rmnet_dev, *bridge_dev;
 	struct rmnet_port *bridge_port;
+	struct net_device *bridge_dev;
 
 	if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
 		return;
 
 	/* bridge slave handling */
 	if (!port->nr_rmnet_devs) {
-		rmnet_dev = netdev_master_upper_dev_get_rcu(dev);
-		netdev_upper_dev_unlink(dev, rmnet_dev);
-
 		bridge_dev = port->bridge_ep;
 
 		bridge_port = rmnet_get_port_rtnl(bridge_dev);
@@ -132,9 +128,6 @@ static void rmnet_unregister_bridge(struct net_device *dev,
 		bridge_dev = port->bridge_ep;
 
 		bridge_port = rmnet_get_port_rtnl(bridge_dev);
-		rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev);
-		netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
-
 		rmnet_unregister_real_device(bridge_dev, bridge_port);
 	}
 }
@@ -143,7 +136,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
 {
-	u32 data_format = RMNET_INGRESS_FORMAT_DEAGGREGATION;
+	u32 data_format = RMNET_FLAGS_INGRESS_DEAGGREGATION;
 	struct net_device *real_dev;
 	int mode = RMNET_EPMODE_VND;
 	struct rmnet_endpoint *ep;
@@ -155,14 +148,14 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 	if (!real_dev || !dev)
 		return -ENODEV;
 
-	if (!data[IFLA_VLAN_ID])
+	if (!data[IFLA_RMNET_MUX_ID])
 		return -EINVAL;
 
 	ep = kzalloc(sizeof(*ep), GFP_ATOMIC);
 	if (!ep)
 		return -ENOMEM;
 
-	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 
 	err = rmnet_register_real_device(real_dev);
 	if (err)
@@ -173,18 +166,14 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 	if (err)
 		goto err1;
 
-	err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack);
-	if (err)
-		goto err2;
-
 	port->rmnet_mode = mode;
 
 	hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
 
-	if (data[IFLA_VLAN_FLAGS]) {
-		struct ifla_vlan_flags *flags;
+	if (data[IFLA_RMNET_FLAGS]) {
+		struct ifla_rmnet_flags *flags;
 
-		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		flags = nla_data(data[IFLA_RMNET_FLAGS]);
 		data_format = flags->flags & flags->mask;
 	}
 
@@ -193,8 +182,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 
 	return 0;
 
-err2:
-	rmnet_vnd_dellink(mux_id, port, ep);
 err1:
 	rmnet_unregister_real_device(real_dev, port);
 err0:
@@ -204,14 +191,13 @@ err0:
 
 static void rmnet_dellink(struct net_device *dev, struct list_head *head)
 {
+	struct rmnet_priv *priv = netdev_priv(dev);
 	struct net_device *real_dev;
 	struct rmnet_endpoint *ep;
 	struct rmnet_port *port;
 	u8 mux_id;
 
-	rcu_read_lock();
-	real_dev = netdev_master_upper_dev_get_rcu(dev);
-	rcu_read_unlock();
+	real_dev = priv->real_dev;
 
 	if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
 		return;
@@ -219,7 +205,6 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
 	port = rmnet_get_port_rtnl(real_dev);
 
 	mux_id = rmnet_vnd_get_mux(dev);
-	netdev_upper_dev_unlink(dev, real_dev);
 
 	ep = rmnet_get_endpoint(port, mux_id);
 	if (ep) {
@@ -233,30 +218,13 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
 	unregister_netdevice_queue(dev, head);
 }
 
-static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data)
-{
-	struct rmnet_walk_data *d = data;
-	struct rmnet_endpoint *ep;
-	u8 mux_id;
-
-	mux_id = rmnet_vnd_get_mux(rmnet_dev);
-	ep = rmnet_get_endpoint(d->port, mux_id);
-	if (ep) {
-		hlist_del_init_rcu(&ep->hlnode);
-		rmnet_vnd_dellink(mux_id, d->port, ep);
-		kfree(ep);
-	}
-	netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
-	unregister_netdevice_queue(rmnet_dev, d->head);
-
-	return 0;
-}
-
 static void rmnet_force_unassociate_device(struct net_device *dev)
 {
 	struct net_device *real_dev = dev;
-	struct rmnet_walk_data d;
+	struct hlist_node *tmp_ep;
+	struct rmnet_endpoint *ep;
 	struct rmnet_port *port;
+	unsigned long bkt_ep;
 	LIST_HEAD(list);
 
 	if (!rmnet_is_real_dev_registered(real_dev))
@@ -264,16 +232,19 @@ static void rmnet_force_unassociate_device(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	d.real_dev = real_dev;
-	d.head = &list;
-
 	port = rmnet_get_port_rtnl(dev);
-	d.port = port;
 
 	rcu_read_lock();
 	rmnet_unregister_bridge(dev, port);
 
-	netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d);
+	hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
+		unregister_netdevice_queue(ep->egress_dev, &list);
+		rmnet_vnd_dellink(ep->mux_id, port, ep);
+
+		hlist_del_init_rcu(&ep->hlnode);
+		kfree(ep);
+	}
+
 	rcu_read_unlock();
 	unregister_netdevice_many(&list);
 
@@ -310,10 +281,10 @@ static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
 {
 	u16 mux_id;
 
-	if (!data || !data[IFLA_VLAN_ID])
+	if (!data || !data[IFLA_RMNET_MUX_ID])
 		return -EINVAL;
 
-	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 	if (mux_id > (RMNET_MAX_LOGICAL_EP - 1))
 		return -ERANGE;
 
@@ -338,9 +309,11 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 	port = rmnet_get_port_rtnl(real_dev);
 
-	if (data[IFLA_VLAN_ID]) {
-		mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	if (data[IFLA_RMNET_MUX_ID]) {
+		mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 		ep = rmnet_get_endpoint(port, priv->mux_id);
+		if (!ep)
+			return -ENODEV;
 
 		hlist_del_init_rcu(&ep->hlnode);
 		hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
@@ -349,10 +322,10 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 		priv->mux_id = mux_id;
 	}
 
-	if (data[IFLA_VLAN_FLAGS]) {
-		struct ifla_vlan_flags *flags;
+	if (data[IFLA_RMNET_FLAGS]) {
+		struct ifla_rmnet_flags *flags;
 
-		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		flags = nla_data(data[IFLA_RMNET_FLAGS]);
 		port->data_format = flags->flags & flags->mask;
 	}
 
@@ -361,13 +334,45 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 static size_t rmnet_get_size(const struct net_device *dev)
 {
-	return nla_total_size(2) /* IFLA_VLAN_ID */ +
-	       nla_total_size(sizeof(struct ifla_vlan_flags)); /* IFLA_VLAN_FLAGS */
+	return
+		/* IFLA_RMNET_MUX_ID */
+		nla_total_size(2) +
+		/* IFLA_RMNET_FLAGS */
+		nla_total_size(sizeof(struct ifla_rmnet_flags));
+}
+
+static int rmnet_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct rmnet_priv *priv = netdev_priv(dev);
+	struct net_device *real_dev;
+	struct ifla_rmnet_flags f;
+	struct rmnet_port *port;
+
+	real_dev = priv->real_dev;
+
+	if (!rmnet_is_real_dev_registered(real_dev))
+		return -ENODEV;
+
+	if (nla_put_u16(skb, IFLA_RMNET_MUX_ID, priv->mux_id))
+		goto nla_put_failure;
+
+	port = rmnet_get_port_rtnl(real_dev);
+
+	f.flags = port->data_format;
+	f.mask  = ~0;
+
+	if (nla_put(skb, IFLA_RMNET_FLAGS, sizeof(f), &f))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
 }
 
 struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.kind		= "rmnet",
-	.maxtype	= __IFLA_VLAN_MAX,
+	.maxtype	= __IFLA_RMNET_MAX,
 	.priv_size	= sizeof(struct rmnet_priv),
 	.setup		= rmnet_vnd_setup,
 	.validate	= rmnet_rtnl_validate,
@@ -375,6 +380,8 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.dellink	= rmnet_dellink,
 	.get_size	= rmnet_get_size,
 	.changelink     = rmnet_changelink,
+	.policy		= rmnet_policy,
+	.fill_info	= rmnet_fill_info,
 };
 
 /* Needs either rcu_read_lock() or rtnl lock */
@@ -422,11 +429,6 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
 	if (err)
 		return -EBUSY;
 
-	err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
-					   extack);
-	if (err)
-		return -EINVAL;
-
 	slave_port = rmnet_get_port(slave_dev);
 	slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
 	slave_port->bridge_ep = real_dev;
@@ -449,7 +451,6 @@ int rmnet_del_bridge(struct net_device *rmnet_dev,
 	port->rmnet_mode = RMNET_EPMODE_VND;
 	port->bridge_ep = NULL;
 
-	netdev_upper_dev_unlink(slave_dev, rmnet_dev);
 	slave_port = rmnet_get_port(slave_dev);
 	rmnet_unregister_real_device(slave_dev, slave_port);
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 00e4634100d3..0b5b5da80198 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 601edec28c5f..6fcd586e9804 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -70,7 +70,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	u8 mux_id;
 
 	if (RMNET_MAP_GET_CD_BIT(skb)) {
-		if (port->data_format & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
+		if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
 
 		goto free_skb;
@@ -93,7 +93,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	skb_pull(skb, sizeof(struct rmnet_map_header));
 	rmnet_set_skb_proto(skb);
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
 		if (!rmnet_map_checksum_downlink_packet(skb, len + pad))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
@@ -121,7 +121,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb,
 		skb_push(skb, ETH_HLEN);
 	}
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_DEAGGREGATION) {
 		while ((skbn = rmnet_map_deaggregate(skb, port)) != NULL)
 			__rmnet_map_ingress_handler(skbn, port);
 
@@ -141,7 +141,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	additional_header_len = 0;
 	required_headroom = sizeof(struct rmnet_map_header);
 
-	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4) {
 		additional_header_len = sizeof(struct rmnet_map_ul_csum_header);
 		required_headroom += additional_header_len;
 	}
@@ -151,7 +151,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 			goto fail;
 	}
 
-	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4)
+	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4)
 		rmnet_map_checksum_uplink_packet(skb, orig_dev);
 
 	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 6ce31e29136d..884f1f52dcc2 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -23,8 +23,8 @@ struct rmnet_map_control_command {
 		struct {
 			u16 ip_family:2;
 			u16 reserved:14;
-			u16 flow_control_seq_num;
-			u32 qos_id;
+			__be16 flow_control_seq_num;
+			__be32 qos_id;
 		} flow_control;
 		u8 data[0];
 	};
@@ -44,7 +44,7 @@ struct rmnet_map_header {
 	u8  reserved_bit:1;
 	u8  cd_bit:1;
 	u8  mux_id;
-	u16 pkt_len;
+	__be16 pkt_len;
 }  __aligned(1);
 
 struct rmnet_map_dl_csum_trailer {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index 6bc328fb88e1..78fdad0c6f76 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -38,6 +38,11 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
 	}
 
 	ep = rmnet_get_endpoint(port, mux_id);
+	if (!ep) {
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
 	vnd = ep->egress_dev;
 
 	ip_family = cmd->flow_control.ip_family;
@@ -64,7 +69,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 	struct rmnet_map_control_command *cmd;
 	int xmit_status;
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
 		if (skb->len < sizeof(struct rmnet_map_header) +
 		    RMNET_MAP_GET_LENGTH(skb) +
 		    sizeof(struct rmnet_map_dl_csum_trailer)) {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index c74a6c56d315..a6ea09416f8d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -309,7 +309,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	maph = (struct rmnet_map_header *)skb->data;
 	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4)
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
 		packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
 
 	if (((int)skb->len - (int)packet_len) < 0)
@@ -323,7 +323,6 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	if (!skbn)
 		return NULL;
 
-	skbn->dev = skb->dev;
 	skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM);
 	skb_put(skbn, packet_len);
 	memcpy(skbn->data, skb->data, packet_len);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index de0143eaa05a..b9cc4f85f229 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -18,12 +18,6 @@
 #define RMNET_NEEDED_HEADROOM      16
 #define RMNET_TX_QUEUE_LEN         1000
 
-/* Constants */
-#define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(0)
-#define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(1)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(2)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(3)
-
 /* Replace skb->dev to a virtual rmnet device and pass up the stack */
 #define RMNET_EPMODE_VND (1)
 /* Pass the frame directly to another device with dev_queue_xmit() */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 570a227acdd8..2ea16a088de8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -121,7 +121,7 @@ static void rmnet_get_stats64(struct net_device *dev,
 	memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
 
 	for_each_possible_cpu(cpu) {
-		pcpu_ptr = this_cpu_ptr(priv->pcpu_stats);
+		pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
 
 		do {
 			start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0bf7d1759250..630409e0337f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -99,12 +99,12 @@ static const int multicast_filter_limit = 32;
 #define RTL8169_PHY_TIMEOUT	(10*HZ)
 
 /* write/read MMIO register */
-#define RTL_W8(reg, val8)	writeb ((val8), ioaddr + (reg))
-#define RTL_W16(reg, val16)	writew ((val16), ioaddr + (reg))
-#define RTL_W32(reg, val32)	writel ((val32), ioaddr + (reg))
-#define RTL_R8(reg)		readb (ioaddr + (reg))
-#define RTL_R16(reg)		readw (ioaddr + (reg))
-#define RTL_R32(reg)		readl (ioaddr + (reg))
+#define RTL_W8(tp, reg, val8)	writeb((val8), tp->mmio_addr + (reg))
+#define RTL_W16(tp, reg, val16)	writew((val16), tp->mmio_addr + (reg))
+#define RTL_W32(tp, reg, val32)	writel((val32), tp->mmio_addr + (reg))
+#define RTL_R8(tp, reg)		readb(tp->mmio_addr + (reg))
+#define RTL_R16(tp, reg)		readw(tp->mmio_addr + (reg))
+#define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
 enum mac_version {
 	RTL_GIGA_MAC_VER_01 = 0,
@@ -735,12 +735,6 @@ struct ring_info {
 	u8		__pad[sizeof(void *) - sizeof(u32)];
 };
 
-enum features {
-	RTL_FEATURE_WOL		= (1 << 0),
-	RTL_FEATURE_MSI		= (1 << 1),
-	RTL_FEATURE_GMII	= (1 << 2),
-};
-
 struct rtl8169_counters {
 	__le64	tx_packets;
 	__le64	rx_packets;
@@ -829,7 +823,7 @@ struct rtl8169_private {
 	void (*phy_reset_enable)(struct rtl8169_private *tp);
 	void (*hw_start)(struct net_device *);
 	unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
-	unsigned int (*link_ok)(void __iomem *);
+	unsigned int (*link_ok)(struct rtl8169_private *tp);
 	int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
 	bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
 
@@ -893,6 +887,11 @@ MODULE_FIRMWARE(FIRMWARE_8168H_2);
 MODULE_FIRMWARE(FIRMWARE_8107E_1);
 MODULE_FIRMWARE(FIRMWARE_8107E_2);
 
+static inline struct device *tp_to_dev(struct rtl8169_private *tp)
+{
+	return &tp->pci_dev->dev;
+}
+
 static void rtl_lock_work(struct rtl8169_private *tp)
 {
 	mutex_lock(&tp->wk.mutex);
@@ -903,9 +902,9 @@ static void rtl_unlock_work(struct rtl8169_private *tp)
 	mutex_unlock(&tp->wk.mutex);
 }
 
-static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force)
+static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
 {
-	pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+	pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
 					   PCI_EXP_DEVCTL_READRQ, force);
 }
 
@@ -984,56 +983,46 @@ static bool rtl_ocp_reg_failure(struct rtl8169_private *tp, u32 reg)
 
 DECLARE_RTL_COND(rtl_ocp_gphy_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(GPHY_OCP) & OCPAR_FLAG;
+	return RTL_R32(tp, GPHY_OCP) & OCPAR_FLAG;
 }
 
 static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return;
 
-	RTL_W32(GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
+	RTL_W32(tp, GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
 static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
 
-	RTL_W32(GPHY_OCP, reg << 15);
+	RTL_W32(tp, GPHY_OCP, reg << 15);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
-		(RTL_R32(GPHY_OCP) & 0xffff) : ~0;
+		(RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
 }
 
 static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return;
 
-	RTL_W32(OCPDR, OCPAR_FLAG | (reg << 15) | data);
+	RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
 }
 
 static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
 
-	RTL_W32(OCPDR, reg << 15);
+	RTL_W32(tp, OCPDR, reg << 15);
 
-	return RTL_R32(OCPDR);
+	return RTL_R32(tp, OCPDR);
 }
 
 #define OCP_STD_PHY_BASE	0xa400
@@ -1076,16 +1065,12 @@ static int mac_mcu_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_phyar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(PHYAR) & 0x80000000;
+	return RTL_R32(tp, PHYAR) & 0x80000000;
 }
 
 static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
+	RTL_W32(tp, PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
 
 	rtl_udelay_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
 	/*
@@ -1097,13 +1082,12 @@ static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	int value;
 
-	RTL_W32(PHYAR, 0x0 | (reg & 0x1f) << 16);
+	RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
 	value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
-		RTL_R32(PHYAR) & 0xffff : ~0;
+		RTL_R32(tp, PHYAR) & 0xffff : ~0;
 
 	/*
 	 * According to hardware specs a 20us delay is required after read
@@ -1116,18 +1100,14 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_ocpar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(OCPAR) & OCPAR_FLAG;
+	return RTL_R32(tp, OCPAR) & OCPAR_FLAG;
 }
 
 static void r8168dp_1_mdio_access(struct rtl8169_private *tp, int reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
-	RTL_W32(OCPAR, OCPAR_GPHY_WRITE_CMD);
-	RTL_W32(EPHY_RXER_NUM, 0);
+	RTL_W32(tp, OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
+	RTL_W32(tp, OCPAR, OCPAR_GPHY_WRITE_CMD);
+	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
 }
@@ -1140,51 +1120,46 @@ static void r8168dp_1_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168dp_1_mdio_access(tp, reg, OCPDR_READ_CMD);
 
 	mdelay(1);
-	RTL_W32(OCPAR, OCPAR_GPHY_READ_CMD);
-	RTL_W32(EPHY_RXER_NUM, 0);
+	RTL_W32(tp, OCPAR, OCPAR_GPHY_READ_CMD);
+	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
-		RTL_R32(OCPDR) & OCPDR_DATA_MASK : ~0;
+		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
 }
 
 #define R8168DP_1_MDIO_ACCESS_BIT	0x00020000
 
-static void r8168dp_2_mdio_start(void __iomem *ioaddr)
+static void r8168dp_2_mdio_start(struct rtl8169_private *tp)
 {
-	RTL_W32(0xd0, RTL_R32(0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
+	RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
 }
 
-static void r8168dp_2_mdio_stop(void __iomem *ioaddr)
+static void r8168dp_2_mdio_stop(struct rtl8169_private *tp)
 {
-	RTL_W32(0xd0, RTL_R32(0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
+	RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
 }
 
 static void r8168dp_2_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	r8168dp_2_mdio_start(ioaddr);
+	r8168dp_2_mdio_start(tp);
 
 	r8169_mdio_write(tp, reg, value);
 
-	r8168dp_2_mdio_stop(ioaddr);
+	r8168dp_2_mdio_stop(tp);
 }
 
 static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	int value;
 
-	r8168dp_2_mdio_start(ioaddr);
+	r8168dp_2_mdio_start(tp);
 
 	value = r8169_mdio_read(tp, reg);
 
-	r8168dp_2_mdio_stop(ioaddr);
+	r8168dp_2_mdio_stop(tp);
 
 	return value;
 }
@@ -1229,16 +1204,12 @@ static int rtl_mdio_read(struct net_device *dev, int phy_id, int location)
 
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(EPHYAR) & EPHYAR_FLAG;
+	return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
 }
 
 static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
+	RTL_W32(tp, EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
 		(reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
@@ -1248,41 +1219,33 @@ static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 
 static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
+	RTL_W32(tp, EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
-		RTL_R32(EPHYAR) & EPHYAR_DATA_MASK : ~0;
+		RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0;
 }
 
 DECLARE_RTL_COND(rtl_eriar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(ERIAR) & ERIAR_FLAG;
+	return RTL_R32(tp, ERIAR) & ERIAR_FLAG;
 }
 
 static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
 			  u32 val, int type)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	BUG_ON((addr & 3) || (mask == 0));
-	RTL_W32(ERIDR, val);
-	RTL_W32(ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
+	RTL_W32(tp, ERIDR, val);
+	RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
 }
 
 static u32 rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
+	RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
-		RTL_R32(ERIDR) : ~0;
+		RTL_R32(tp, ERIDR) : ~0;
 }
 
 static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
@@ -1296,11 +1259,9 @@ static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
 
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+	RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
-		RTL_R32(OCPDR) : ~0;
+		RTL_R32(tp, OCPDR) : ~0;
 }
 
 static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
@@ -1328,10 +1289,8 @@ static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
 			      u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPDR, data);
-	RTL_W32(OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+	RTL_W32(tp, OCPDR, data);
+	RTL_W32(tp, OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
 	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
 }
 
@@ -1393,19 +1352,15 @@ DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
 
 DECLARE_RTL_COND(rtl_ocp_tx_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(IBISR0) & 0x20;
+	return RTL_R8(tp, IBISR0) & 0x20;
 }
 
 static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
+	RTL_W8(tp, IBCR2, RTL_R8(tp, IBCR2) & ~0x01);
 	rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
-	RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
-	RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
+	RTL_W8(tp, IBISR0, RTL_R8(tp, IBISR0) | 0x20);
+	RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
 
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
@@ -1473,19 +1428,19 @@ static void rtl8168_driver_stop(struct rtl8169_private *tp)
 	}
 }
 
-static int r8168dp_check_dash(struct rtl8169_private *tp)
+static bool r8168dp_check_dash(struct rtl8169_private *tp)
 {
 	u16 reg = rtl8168_get_ocp_reg(tp);
 
-	return (ocp_read(tp, 0x0f, reg) & 0x00008000) ? 1 : 0;
+	return !!(ocp_read(tp, 0x0f, reg) & 0x00008000);
 }
 
-static int r8168ep_check_dash(struct rtl8169_private *tp)
+static bool r8168ep_check_dash(struct rtl8169_private *tp)
 {
-	return (ocp_read(tp, 0x0f, 0x128) & 0x00000001) ? 1 : 0;
+	return !!(ocp_read(tp, 0x0f, 0x128) & 0x00000001);
 }
 
-static int r8168_check_dash(struct rtl8169_private *tp)
+static bool r8168_check_dash(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_27:
@@ -1497,7 +1452,7 @@ static int r8168_check_dash(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_51:
 		return r8168ep_check_dash(tp);
 	default:
-		return 0;
+		return false;
 	}
 }
 
@@ -1518,49 +1473,37 @@ static void rtl_write_exgmac_batch(struct rtl8169_private *tp,
 
 DECLARE_RTL_COND(rtl_efusear_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(EFUSEAR) & EFUSEAR_FLAG;
+	return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
 }
 
 static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
+	RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
-		RTL_R32(EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
+		RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
 }
 
 static u16 rtl_get_events(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R16(IntrStatus);
+	return RTL_R16(tp, IntrStatus);
 }
 
 static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrStatus, bits);
+	RTL_W16(tp, IntrStatus, bits);
 	mmiowb();
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrMask, 0);
+	RTL_W16(tp, IntrMask, 0);
 	mmiowb();
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp, u16 bits)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrMask, bits);
+	RTL_W16(tp, IntrMask, bits);
 }
 
 #define RTL_EVENT_NAPI_RX	(RxOK | RxErr)
@@ -1574,18 +1517,14 @@ static void rtl_irq_enable_all(struct rtl8169_private *tp)
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_irq_disable(tp);
 	rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
-	RTL_R8(ChipCmd);
+	RTL_R8(tp, ChipCmd);
 }
 
 static unsigned int rtl8169_tbi_reset_pending(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(TBICSR) & TBIReset;
+	return RTL_R32(tp, TBICSR) & TBIReset;
 }
 
 static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
@@ -1593,21 +1532,19 @@ static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
 	return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
 }
 
-static unsigned int rtl8169_tbi_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_tbi_link_ok(struct rtl8169_private *tp)
 {
-	return RTL_R32(TBICSR) & TBILinkOk;
+	return RTL_R32(tp, TBICSR) & TBILinkOk;
 }
 
-static unsigned int rtl8169_xmii_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_xmii_link_ok(struct rtl8169_private *tp)
 {
-	return RTL_R8(PHYstatus) & LinkStatus;
+	return RTL_R8(tp, PHYstatus) & LinkStatus;
 }
 
 static void rtl8169_tbi_reset_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(TBICSR, RTL_R32(TBICSR) | TBIReset);
+	RTL_W32(tp, TBICSR, RTL_R32(tp, TBICSR) | TBIReset);
 }
 
 static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
@@ -1620,7 +1557,6 @@ static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
 
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct net_device *dev = tp->dev;
 
 	if (!netif_running(dev))
@@ -1628,12 +1564,12 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_38) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
 				      ERIAR_EXGMAC);
-		} else if (RTL_R8(PHYstatus) & _100bps) {
+		} else if (RTL_R8(tp, PHYstatus) & _100bps) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1651,7 +1587,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 			     ERIAR_EXGMAC);
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_36) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1663,7 +1599,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 				      ERIAR_EXGMAC);
 		}
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
-		if (RTL_R8(PHYstatus) & _10bps) {
+		if (RTL_R8(tp, PHYstatus) & _10bps) {
 			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
@@ -1676,20 +1612,21 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 }
 
 static void rtl8169_check_link_status(struct net_device *dev,
-				      struct rtl8169_private *tp,
-				      void __iomem *ioaddr)
+				      struct rtl8169_private *tp)
 {
-	if (tp->link_ok(ioaddr)) {
+	struct device *d = tp_to_dev(tp);
+
+	if (tp->link_ok(tp)) {
 		rtl_link_chg_patch(tp);
 		/* This is to cancel a scheduled suspend if there's one. */
-		pm_request_resume(&tp->pci_dev->dev);
+		pm_request_resume(d);
 		netif_carrier_on(dev);
 		if (net_ratelimit())
 			netif_info(tp, ifup, dev, "link up\n");
 	} else {
 		netif_carrier_off(dev);
 		netif_info(tp, ifdown, dev, "link down\n");
-		pm_runtime_idle(&tp->pci_dev->dev);
+		pm_runtime_idle(d);
 	}
 }
 
@@ -1697,15 +1634,14 @@ static void rtl8169_check_link_status(struct net_device *dev,
 
 static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u8 options;
 	u32 wolopts = 0;
 
-	options = RTL_R8(Config1);
+	options = RTL_R8(tp, Config1);
 	if (!(options & PMEnable))
 		return 0;
 
-	options = RTL_R8(Config3);
+	options = RTL_R8(tp, Config3);
 	if (options & LinkUp)
 		wolopts |= WAKE_PHY;
 	switch (tp->mac_version) {
@@ -1735,7 +1671,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 		break;
 	}
 
-	options = RTL_R8(Config5);
+	options = RTL_R8(tp, Config5);
 	if (options & UWF)
 		wolopts |= WAKE_UCAST;
 	if (options & BWF)
@@ -1749,7 +1685,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	pm_runtime_get_noresume(d);
 
@@ -1768,7 +1704,6 @@ static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned int i, tmp;
 	static const struct {
 		u32 opt;
@@ -1784,7 +1719,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	};
 	u8 options;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_34:
@@ -1826,43 +1761,39 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	}
 
 	for (i = 0; i < tmp; i++) {
-		options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+		options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
 		if (wolopts & cfg[i].opt)
 			options |= cfg[i].mask;
-		RTL_W8(cfg[i].reg, options);
+		RTL_W8(tp, cfg[i].reg, options);
 	}
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
-		options = RTL_R8(Config1) & ~PMEnable;
+		options = RTL_R8(tp, Config1) & ~PMEnable;
 		if (wolopts)
 			options |= PMEnable;
-		RTL_W8(Config1, options);
+		RTL_W8(tp, Config1, options);
 		break;
 	default:
-		options = RTL_R8(Config2) & ~PME_SIGNAL;
+		options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
 		if (wolopts)
 			options |= PME_SIGNAL;
-		RTL_W8(Config2, options);
+		RTL_W8(tp, Config2, options);
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	pm_runtime_get_noresume(d);
 
 	rtl_lock_work(tp);
 
-	if (wol->wolopts)
-		tp->features |= RTL_FEATURE_WOL;
-	else
-		tp->features &= ~RTL_FEATURE_WOL;
 	if (pm_runtime_active(d))
 		__rtl8169_set_wol(tp, wol->wolopts);
 	else
@@ -1870,7 +1801,7 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 	rtl_unlock_work(tp);
 
-	device_set_wakeup_enable(&tp->pci_dev->dev, wol->wolopts);
+	device_set_wakeup_enable(d, wol->wolopts);
 
 	pm_runtime_put_noidle(d);
 
@@ -1906,16 +1837,15 @@ static int rtl8169_set_speed_tbi(struct net_device *dev,
 				 u8 autoneg, u16 speed, u8 duplex, u32 ignored)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	int ret = 0;
 	u32 reg;
 
-	reg = RTL_R32(TBICSR);
+	reg = RTL_R32(tp, TBICSR);
 	if ((autoneg == AUTONEG_DISABLE) && (speed == SPEED_1000) &&
 	    (duplex == DUPLEX_FULL)) {
-		RTL_W32(TBICSR, reg & ~(TBINwEnable | TBINwRestart));
+		RTL_W32(tp, TBICSR, reg & ~(TBINwEnable | TBINwRestart));
 	} else if (autoneg == AUTONEG_ENABLE)
-		RTL_W32(TBICSR, reg | TBINwEnable | TBINwRestart);
+		RTL_W32(tp, TBICSR, reg | TBINwEnable | TBINwRestart);
 	else {
 		netif_warn(tp, link, dev,
 			   "incorrect speed setting refused in TBI mode\n");
@@ -2040,16 +1970,15 @@ static void __rtl8169_set_features(struct net_device *dev,
 				   netdev_features_t features)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 rx_config;
 
-	rx_config = RTL_R32(RxConfig);
+	rx_config = RTL_R32(tp, RxConfig);
 	if (features & NETIF_F_RXALL)
 		rx_config |= (AcceptErr | AcceptRunt);
 	else
 		rx_config &= ~(AcceptErr | AcceptRunt);
 
-	RTL_W32(RxConfig, rx_config);
+	RTL_W32(tp, RxConfig, rx_config);
 
 	if (features & NETIF_F_RXCSUM)
 		tp->cp_cmd |= RxChkSum;
@@ -2061,10 +1990,10 @@ static void __rtl8169_set_features(struct net_device *dev,
 	else
 		tp->cp_cmd &= ~RxVlan;
 
-	tp->cp_cmd |= RTL_R16(CPlusCmd) & ~(RxVlan | RxChkSum);
+	tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
-	RTL_R16(CPlusCmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+	RTL_R16(tp, CPlusCmd);
 }
 
 static int rtl8169_set_features(struct net_device *dev,
@@ -2101,7 +2030,6 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
 					  struct ethtool_link_ksettings *cmd)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 status;
 	u32 supported, advertising;
 
@@ -2109,7 +2037,7 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
 		SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
 	cmd->base.port = PORT_FIBRE;
 
-	status = RTL_R32(TBICSR);
+	status = RTL_R32(tp, TBICSR);
 	advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
 	cmd->base.autoneg = !!(status & TBINwEnable);
 
@@ -2224,23 +2152,20 @@ static int rtl8169_get_sset_count(struct net_device *dev, int sset)
 
 DECLARE_RTL_COND(rtl_counters_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
+	return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
 }
 
 static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	dma_addr_t paddr = tp->counters_phys_addr;
 	u32 cmd;
 
-	RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
-	RTL_R32(CounterAddrHigh);
+	RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+	RTL_R32(tp, CounterAddrHigh);
 	cmd = (u64)paddr & DMA_BIT_MASK(32);
-	RTL_W32(CounterAddrLow, cmd);
-	RTL_W32(CounterAddrLow, cmd | counter_cmd);
+	RTL_W32(tp, CounterAddrLow, cmd);
+	RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
 	return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
@@ -2262,13 +2187,12 @@ static bool rtl8169_reset_counters(struct net_device *dev)
 static bool rtl8169_update_counters(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
 	/*
 	 * Some chips are unable to dump tally counters when the receiver
 	 * is disabled.
 	 */
-	if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
+	if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
 		return true;
 
 	return rtl8169_do_counters(dev, CounterDump);
@@ -2317,7 +2241,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
 				      struct ethtool_stats *stats, u64 *data)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	struct rtl8169_counters *counters = tp->counters;
 
 	ASSERT_RTNL();
@@ -2448,7 +2372,6 @@ static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	const struct rtl_coalesce_info *ci;
 	const struct rtl_coalesce_scale *scale;
 	struct {
@@ -2468,10 +2391,10 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	if (IS_ERR(ci))
 		return PTR_ERR(ci);
 
-	scale = &ci->scalev[RTL_R16(CPlusCmd) & 3];
+	scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
 
 	/* read IntrMitigate and adjust according to scale */
-	for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+	for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
 		*p->max_frames = (w & RTL_COALESCE_MASK) << 2;
 		w >>= RTL_COALESCE_SHIFT;
 		*p->usecs = w & RTL_COALESCE_MASK;
@@ -2518,7 +2441,6 @@ static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	const struct rtl_coalesce_scale *scale;
 	struct {
 		u32 frames;
@@ -2566,11 +2488,11 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
 	rtl_lock_work(tp);
 
-	RTL_W16(IntrMitigate, swab16(w));
+	RTL_W16(tp, IntrMitigate, swab16(w));
 
 	tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
-	RTL_W16(CPlusCmd, tp->cp_cmd);
-	RTL_R16(CPlusCmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+	RTL_R16(tp, CPlusCmd);
 
 	rtl_unlock_work(tp);
 
@@ -2600,17 +2522,16 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 				    struct net_device *dev, u8 default_version)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	/*
 	 * The driver currently handles the 8168Bf and the 8168Be identically
 	 * but they can be identified more specifically through the test below
 	 * if needed:
 	 *
-	 * (RTL_R32(TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
+	 * (RTL_R32(tp, TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
 	 *
 	 * Same thing for the 8101Eb and the 8101Ec:
 	 *
-	 * (RTL_R32(TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
+	 * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
 	 */
 	static const struct rtl_mac_info {
 		u32 mask;
@@ -2708,7 +2629,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 	const struct rtl_mac_info *p = mac_info;
 	u32 reg;
 
-	reg = RTL_R32(TxConfig);
+	reg = RTL_R32(tp, TxConfig);
 	while ((reg & p->mask) != p->val)
 		p++;
 	tp->mac_version = p->mac_version;
@@ -3805,8 +3726,6 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0005);
 	rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
-	/* soft-reset phy */
-	rtl_writephy(tp, MII_BMCR, BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
 
 	/* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
 	rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
@@ -4591,7 +4510,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
 static void rtl_phy_work(struct rtl8169_private *tp)
 {
 	struct timer_list *timer = &tp->timer;
-	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned long timeout = RTL8169_PHY_TIMEOUT;
 
 	assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
@@ -4605,7 +4523,7 @@ static void rtl_phy_work(struct rtl8169_private *tp)
 		goto out_mod_timer;
 	}
 
-	if (tp->link_ok(ioaddr))
+	if (tp->link_ok(tp))
 		return;
 
 	netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
@@ -4643,21 +4561,17 @@ static void rtl8169_phy_reset(struct net_device *dev,
 
 static bool rtl_tbi_enabled(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-	    (RTL_R8(PHYstatus) & TBI_Enable);
+	    (RTL_R8(tp, PHYstatus) & TBI_Enable);
 }
 
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_hw_phy_config(dev);
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
 		dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-		RTL_W8(0x82, 0x01);
+		RTL_W8(tp, 0x82, 0x01);
 	}
 
 	pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
@@ -4667,7 +4581,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
 		dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-		RTL_W8(0x82, 0x01);
+		RTL_W8(tp, 0x82, 0x01);
 		dprintk("Set PHY Reg 0x0bh = 0x00h\n");
 		rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
 	}
@@ -4687,22 +4601,20 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_lock_work(tp);
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W32(MAC4, addr[4] | addr[5] << 8);
-	RTL_R32(MAC4);
+	RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
+	RTL_R32(tp, MAC4);
 
-	RTL_W32(MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
-	RTL_R32(MAC0);
+	RTL_W32(tp, MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
+	RTL_R32(tp, MAC0);
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34)
 		rtl_rar_exgmac_set(tp, addr);
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	rtl_unlock_work(tp);
 }
@@ -4710,13 +4622,12 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 static int rtl_set_mac_address(struct net_device *dev, void *p)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
-	struct sockaddr *addr = p;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
+	struct device *d = tp_to_dev(tp);
+	int ret;
 
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	ret = eth_mac_addr(dev, p);
+	if (ret)
+		return ret;
 
 	pm_runtime_get_noresume(d);
 
@@ -4822,8 +4733,6 @@ static void rtl_speed_down(struct rtl8169_private *tp)
 
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25:
 	case RTL_GIGA_MAC_VER_26:
@@ -4847,7 +4756,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W32(RxConfig, RTL_R32(RxConfig) |
+		RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
 			AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
 		break;
 	default:
@@ -4880,8 +4789,6 @@ static void r810x_phy_power_up(struct rtl8169_private *tp)
 
 static void r810x_pll_power_down(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_wol_pll_power_down(tp))
 		return;
 
@@ -4896,15 +4803,13 @@ static void r810x_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_16:
 		break;
 	default:
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	}
 }
 
 static void r810x_pll_power_up(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r810x_phy_power_up(tp);
 
 	switch (tp->mac_version) {
@@ -4917,10 +4822,10 @@ static void r810x_pll_power_up(struct rtl8169_private *tp)
 		break;
 	case RTL_GIGA_MAC_VER_47:
 	case RTL_GIGA_MAC_VER_48:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		break;
 	default:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
 		break;
 	}
 }
@@ -4987,21 +4892,12 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
 
 static void r8168_pll_power_down(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		return;
-	}
 
 	if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
 	     tp->mac_version == RTL_GIGA_MAC_VER_24) &&
-	    (RTL_R16(CPlusCmd) & ASF)) {
+	    (RTL_R16(tp, CPlusCmd) & ASF)) {
 		return;
 	}
 
@@ -5027,22 +4923,20 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
 		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000,
 			     0xfc000000, ERIAR_EXGMAC);
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	}
 }
 
 static void r8168_pll_power_up(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25:
 	case RTL_GIGA_MAC_VER_26:
@@ -5051,19 +4945,19 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_31:
 	case RTL_GIGA_MAC_VER_32:
 	case RTL_GIGA_MAC_VER_33:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
 		break;
 	case RTL_GIGA_MAC_VER_44:
 	case RTL_GIGA_MAC_VER_45:
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000,
 			     0x00000000, ERIAR_EXGMAC);
 		break;
@@ -5153,8 +5047,6 @@ static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_01:
 	case RTL_GIGA_MAC_VER_02:
@@ -5170,7 +5062,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_15:
 	case RTL_GIGA_MAC_VER_16:
 	case RTL_GIGA_MAC_VER_17:
-		RTL_W32(RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_18:
 	case RTL_GIGA_MAC_VER_19:
@@ -5181,7 +5073,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_24:
 	case RTL_GIGA_MAC_VER_34:
 	case RTL_GIGA_MAC_VER_35:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
@@ -5195,10 +5087,10 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
 		break;
 	default:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
 		break;
 	}
 }
@@ -5210,102 +5102,82 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 
 static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	rtl_generic_op(tp, tp->jumbo_ops.enable);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	rtl_generic_op(tp, tp->jumbo_ops.disable);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) | Jumbo_En1);
-	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) & ~Jumbo_En1);
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 }
 
 static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 }
 
 static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(MaxTxPacketSize, 0x3f);
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) | 0x01);
-	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+	RTL_W8(tp, MaxTxPacketSize, 0x3f);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(MaxTxPacketSize, 0x0c);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) & ~0x01);
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	RTL_W8(tp, MaxTxPacketSize, 0x0c);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	rtl_tx_performance_tweak(tp->pci_dev,
+	rtl_tx_performance_tweak(tp,
 		PCI_EXP_DEVCTL_READRQ_512B | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	rtl_tx_performance_tweak(tp->pci_dev,
+	rtl_tx_performance_tweak(tp,
 		(0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168b_0_hw_jumbo_enable(tp);
 
-	RTL_W8(Config4, RTL_R8(Config4) | (1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
 }
 
 static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168b_0_hw_jumbo_disable(tp);
 
-	RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
@@ -5372,16 +5244,12 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(ChipCmd) & CmdReset;
+	return RTL_R8(tp, ChipCmd) & CmdReset;
 }
 
 static void rtl_hw_reset(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(ChipCmd, CmdReset);
+	RTL_W8(tp, ChipCmd, CmdReset);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
@@ -5400,7 +5268,7 @@ static void rtl_request_uncached_firmware(struct rtl8169_private *tp)
 	if (!rtl_fw)
 		goto err_warn;
 
-	rc = request_firmware(&rtl_fw->fw, name, &tp->pci_dev->dev);
+	rc = request_firmware(&rtl_fw->fw, name, tp_to_dev(tp));
 	if (rc < 0)
 		goto err_free;
 
@@ -5432,29 +5300,21 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
 
 static void rtl_rx_close(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(RxConfig, RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
+	RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
 }
 
 DECLARE_RTL_COND(rtl_npq_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(TxPoll) & NPQ;
+	return RTL_R8(tp, TxPoll) & NPQ;
 }
 
 DECLARE_RTL_COND(rtl_txcfg_empty_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(TxConfig) & TXCFG_EMPTY;
+	return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
 }
 
 static void rtl8169_hw_reset(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Disable interrupts */
 	rtl8169_irq_mask_and_ack(tp);
 
@@ -5481,10 +5341,10 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 		   tp->mac_version == RTL_GIGA_MAC_VER_49 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_50 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_51) {
-		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
 	} else {
-		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		udelay(100);
 	}
 
@@ -5493,10 +5353,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 
 static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Set DMA burst size and Interframe Gap Time */
-	RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) |
+	RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
 		(InterFrameGap << TxInterFrameGapShift));
 }
 
@@ -5509,36 +5367,35 @@ static void rtl_hw_start(struct net_device *dev)
 	rtl_irq_enable_all(tp);
 }
 
-static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp,
-					 void __iomem *ioaddr)
+static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
 	/*
 	 * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
 	 * register to be written before TxDescAddrLow to work.
 	 * Switching from MMIO to I/O access fixes the issue as well.
 	 */
-	RTL_W32(TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
-	RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
-	RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
-	RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+	RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
+	RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
+	RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
+	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
 }
 
-static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
+static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
 {
 	u16 cmd;
 
-	cmd = RTL_R16(CPlusCmd);
-	RTL_W16(CPlusCmd, cmd);
+	cmd = RTL_R16(tp, CPlusCmd);
+	RTL_W16(tp, CPlusCmd, cmd);
 	return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
 {
 	/* Low hurts. Let's disable the filtering. */
-	RTL_W16(RxMaxSize, rx_buf_sz + 1);
+	RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
 }
 
-static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
+static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
 	static const struct rtl_cfg2_info {
 		u32 mac_version;
@@ -5554,10 +5411,10 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 	unsigned int i;
 	u32 clk;
 
-	clk = RTL_R8(Config2) & PCI_Clock_66MHz;
+	clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
 	for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
 		if ((p->mac_version == mac_version) && (p->clk == clk)) {
-			RTL_W32(0x7c, p->val);
+			RTL_W32(tp, 0x7c, p->val);
 			break;
 		}
 	}
@@ -5566,7 +5423,6 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 static void rtl_set_rx_mode(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 mc_filter[2];	/* Multicast hash filter */
 	int rx_mode;
 	u32 tmp = 0;
@@ -5598,7 +5454,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	if (dev->features & NETIF_F_RXALL)
 		rx_mode |= (AcceptErr | AcceptRunt);
 
-	tmp = (RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
+	tmp = (RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
 
 	if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
 		u32 data = mc_filter[0];
@@ -5610,35 +5466,34 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	if (tp->mac_version == RTL_GIGA_MAC_VER_35)
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 
-	RTL_W32(MAR0 + 4, mc_filter[1]);
-	RTL_W32(MAR0 + 0, mc_filter[0]);
+	RTL_W32(tp, MAR0 + 4, mc_filter[1]);
+	RTL_W32(tp, MAR0 + 0, mc_filter[0]);
 
-	RTL_W32(RxConfig, tmp);
+	RTL_W32(tp, RxConfig, tmp);
 }
 
 static void rtl_hw_start_8169(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
-		RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | PCIMulRW);
+		RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
 		pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_02 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_03 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_04)
-		RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_init_rxcfg(tp);
 
-	RTL_W8(EarlyTxThres, NoEarlyTx);
+	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_02 ||
@@ -5646,7 +5501,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 	    tp->mac_version == RTL_GIGA_MAC_VER_04)
 		rtl_set_rx_tx_config_registers(tp);
 
-	tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
+	tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
@@ -5655,37 +5510,37 @@ static void rtl_hw_start_8169(struct net_device *dev)
 		tp->cp_cmd |= (1 << 14);
 	}
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl8169_set_magic_reg(ioaddr, tp->mac_version);
+	rtl8169_set_magic_reg(tp, tp->mac_version);
 
 	/*
 	 * Undocumented corner. Supposedly:
 	 * (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets
 	 */
-	RTL_W16(IntrMitigate, 0x0000);
+	RTL_W16(tp, IntrMitigate, 0x0000);
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_02 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_03 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_04) {
-		RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 		rtl_set_rx_tx_config_registers(tp);
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
-	RTL_W32(RxMissed, 0);
+	RTL_W32(tp, RxMissed, 0);
 
 	rtl_set_rx_mode(dev);
 
 	/* no early-rx interrupts */
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
@@ -5719,17 +5574,13 @@ static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_csiar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(CSIAR) & CSIAR_FLAG;
+	return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
 }
 
 static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
@@ -5737,21 +5588,17 @@ static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
 		CSIAR_FUNC_NIC);
 
@@ -5760,21 +5607,17 @@ static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
 		CSIAR_FUNC_NIC2);
 
@@ -5783,13 +5626,11 @@ static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void rtl_init_csi_ops(struct rtl8169_private *tp)
@@ -5851,31 +5692,30 @@ static void rtl_ephy_init(struct rtl8169_private *tp, const struct ephy_info *e,
 	}
 }
 
-static void rtl_disable_clock_request(struct pci_dev *pdev)
+static void rtl_disable_clock_request(struct rtl8169_private *tp)
 {
-	pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL,
+	pcie_capability_clear_word(tp->pci_dev, PCI_EXP_LNKCTL,
 				   PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
-static void rtl_enable_clock_request(struct pci_dev *pdev)
+static void rtl_enable_clock_request(struct rtl8169_private *tp)
 {
-	pcie_capability_set_word(pdev, PCI_EXP_LNKCTL,
+	pcie_capability_set_word(tp->pci_dev, PCI_EXP_LNKCTL,
 				 PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
 static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u8 data;
 
-	data = RTL_R8(Config3);
+	data = RTL_R8(tp, Config3);
 
 	if (enable)
 		data |= Rdy_to_L23;
 	else
 		data &= ~Rdy_to_L23;
 
-	RTL_W8(Config3, data);
+	RTL_W8(tp, Config3, data);
 }
 
 #define R8168_CPCMD_QUIRK_MASK (\
@@ -5891,45 +5731,37 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
-
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN) {
-		rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) |
+		rtl_tx_performance_tweak(tp, (0x5 << MAX_READ_REQUEST_SHIFT) |
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 	}
 }
 
 static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_hw_start_8168bb(tp);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
+	RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
 
-	RTL_W8(Config1, RTL_R8(Config1) | Speed_down);
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -5951,42 +5783,35 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	/* Magic. */
-	RTL_W8(DBG_REG, 0x20);
+	RTL_W8(tp, DBG_REG, 0x20);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168c_1[] = {
 		{ 0x02, 0x0800,	0x1000 },
 		{ 0x03, 0,	0x0002 },
@@ -5995,7 +5820,7 @@ static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
+	RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
 
 	rtl_ephy_init(tp, e_info_8168c_1, ARRAY_SIZE(e_info_8168c_1));
 
@@ -6030,40 +5855,32 @@ static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_1(tp);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168d_4[] = {
 		{ 0x0b, 0x0000,	0x0048 },
 		{ 0x19, 0x0020,	0x0050 },
@@ -6072,19 +5889,17 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
 
-	rtl_enable_clock_request(pdev);
+	rtl_enable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_1[] = {
 		{ 0x00, 0x0200,	0x0100 },
 		{ 0x00, 0x0000,	0x0004 },
@@ -6106,23 +5921,21 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
 	/* Reset tx FIFO pointer */
-	RTL_W32(MISC, RTL_R32(MISC) | TXPLA_RST);
-	RTL_W32(MISC, RTL_R32(MISC) & ~TXPLA_RST);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST);
 
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_2[] = {
 		{ 0x09, 0x0000,	0x0080 },
 		{ 0x19, 0x0000,	0x0224 }
@@ -6133,7 +5946,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6144,29 +5957,26 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6179,20 +5989,19 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060, ERIAR_EXGMAC);
 
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168f_1[] = {
 		{ 0x06, 0x00c0,	0x0020 },
 		{ 0x08, 0x0001,	0x0002 },
@@ -6207,7 +6016,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -6229,10 +6038,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6241,20 +6047,20 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
 	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
@@ -6264,7 +6070,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168g_1[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x37d0,	0x0820 },
@@ -6275,14 +6080,13 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
 }
 
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168g_2[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
@@ -6293,14 +6097,13 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2));
 }
 
 static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8411_2[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
@@ -6312,15 +6115,13 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
 }
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	int rg_saw_cnt;
 	u32 data;
 	static const struct ephy_info e_info_8168h_1[] = {
@@ -6333,11 +6134,11 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6346,7 +6147,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6357,19 +6158,19 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
 
@@ -6417,12 +6218,9 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl8168ep_stop_cmac(tp);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
@@ -6431,7 +6229,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6440,25 +6238,24 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
 	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
 
 static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168ep_1[] = {
 		{ 0x00, 0xffff,	0x10ab },
 		{ 0x06, 0xffff,	0xf030 },
@@ -6468,8 +6265,8 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
 
 	rtl_hw_start_8168ep(tp);
@@ -6477,7 +6274,6 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168ep_2[] = {
 		{ 0x00, 0xffff,	0x10a3 },
 		{ 0x19, 0xffff,	0xfc00 },
@@ -6485,19 +6281,18 @@ static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
 
 	rtl_hw_start_8168ep(tp);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 }
 
 static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 data;
 	static const struct ephy_info e_info_8168ep_3[] = {
 		{ 0x00, 0xffff,	0x10a3 },
@@ -6507,14 +6302,14 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
 
 	rtl_hw_start_8168ep(tp);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
 	data = r8168_mac_ocp_read(tp, 0xd3e2);
 	data &= 0xf000;
@@ -6533,19 +6328,18 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 static void rtl_hw_start_8168(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
-	tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
+	tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	RTL_W16(IntrMitigate, 0x5151);
+	RTL_W16(tp, IntrMitigate, 0x5151);
 
 	/* Work around for RxFIFO overflow. */
 	if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
@@ -6553,11 +6347,11 @@ static void rtl_hw_start_8168(struct net_device *dev)
 		tp->event_slow &= ~RxOverflow;
 	}
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	rtl_set_rx_tx_config_registers(tp);
 
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
@@ -6663,13 +6457,13 @@ static void rtl_hw_start_8168(struct net_device *dev)
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_set_rx_mode(dev);
 
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 #define R810X_CPCMD_QUIRK_MASK (\
@@ -6685,8 +6479,6 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8102e_1[] = {
 		{ 0x01,	0, 0x6e65 },
 		{ 0x02,	0, 0x091f },
@@ -6701,32 +6493,29 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(DBG_REG, FIX_NAK_1);
+	RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(Config1,
+	RTL_W8(tp, Config1,
 	       LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	cfg1 = RTL_R8(Config1);
+	cfg1 = RTL_R8(tp, Config1);
 	if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
-		RTL_W8(Config1, cfg1 & ~LEDS0);
+		RTL_W8(tp, Config1, cfg1 & ~LEDS0);
 
 	rtl_ephy_init(tp, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
 }
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
 
 static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
@@ -6738,7 +6527,6 @@ static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8105e_1[] = {
 		{ 0x07,	0, 0x4000 },
 		{ 0x19,	0, 0x0200 },
@@ -6751,13 +6539,13 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 	};
 
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
 	/* Disable Early Tally Counter */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) & ~0x010000);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) & ~0x010000);
 
-	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 
 	rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
 
@@ -6772,7 +6560,6 @@ static void rtl_hw_start_8105e_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8402(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8402[] = {
 		{ 0x19,	0xffff, 0xff64 },
 		{ 0x1e,	0, 0x4000 }
@@ -6781,14 +6568,14 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 	rtl_csi_access_enable_2(tp);
 
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC);
@@ -6803,14 +6590,12 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8106(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-	RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
-	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
+	RTL_W32(tp, MISC, (RTL_R32(tp, MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
@@ -6818,7 +6603,6 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 static void rtl_hw_start_8101(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
@@ -6829,16 +6613,16 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
 	tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	rtl_set_rx_tx_config_registers(tp);
 
@@ -6878,17 +6662,17 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-	RTL_W16(IntrMitigate, 0x0000);
+	RTL_W16(tp, IntrMitigate, 0x0000);
 
-	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_set_rx_mode(dev);
 
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -6915,7 +6699,7 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
 static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
 				     void **data_buff, struct RxDesc *desc)
 {
-	dma_unmap_single(&tp->pci_dev->dev, le64_to_cpu(desc->addr), rx_buf_sz,
+	dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz,
 			 DMA_FROM_DEVICE);
 
 	kfree(*data_buff);
@@ -6950,7 +6734,7 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 {
 	void *data;
 	dma_addr_t mapping;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	struct net_device *dev = tp->dev;
 	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 
@@ -7062,7 +6846,7 @@ static void rtl8169_tx_clear_range(struct rtl8169_private *tp, u32 start,
 		if (len) {
 			struct sk_buff *skb = tx_skb->skb;
 
-			rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+			rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 					     tp->TxDescArray + entry);
 			if (skb) {
 				dev_consume_skb_any(skb);
@@ -7098,7 +6882,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 	napi_enable(&tp->napi);
 	rtl_hw_start(dev);
 	netif_wake_queue(dev);
-	rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+	rtl8169_check_link_status(dev, tp);
 }
 
 static void rtl8169_tx_timeout(struct net_device *dev)
@@ -7114,7 +6898,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb,
 	struct skb_shared_info *info = skb_shinfo(skb);
 	unsigned int cur_frag, entry;
 	struct TxDesc *uninitialized_var(txd);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	entry = tp->cur_tx;
 	for (cur_frag = 0; cur_frag < info->nr_frags; cur_frag++) {
@@ -7346,8 +7130,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 	struct rtl8169_private *tp = netdev_priv(dev);
 	unsigned int entry = tp->cur_tx % NUM_TX_DESC;
 	struct TxDesc *txd = tp->TxDescArray + entry;
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	dma_addr_t mapping;
 	u32 status, len;
 	u32 opts[2];
@@ -7406,7 +7189,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
 	tp->cur_tx += frags + 1;
 
-	RTL_W8(TxPoll, NPQ);
+	RTL_W8(tp, TxPoll, NPQ);
 
 	mmiowb();
 
@@ -7477,11 +7260,9 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 
 	/* The infamous DAC f*ckup only happens at boot time */
 	if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) {
-		void __iomem *ioaddr = tp->mmio_addr;
-
 		netif_info(tp, intr, dev, "disabling PCI DAC\n");
 		tp->cp_cmd &= ~PCIDAC;
-		RTL_W16(CPlusCmd, tp->cp_cmd);
+		RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 		dev->features &= ~NETIF_F_HIGHDMA;
 	}
 
@@ -7513,7 +7294,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 		 */
 		dma_rmb();
 
-		rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+		rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 				     tp->TxDescArray + entry);
 		if (status & LastFrag) {
 			u64_stats_update_begin(&tp->tx_stats.syncp);
@@ -7547,11 +7328,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 		 * of start_xmit activity is detected (if it is not detected,
 		 * it is slow enough). -- FR
 		 */
-		if (tp->cur_tx != dirty_tx) {
-			void __iomem *ioaddr = tp->mmio_addr;
-
-			RTL_W8(TxPoll, NPQ);
-		}
+		if (tp->cur_tx != dirty_tx)
+			RTL_W8(tp, TxPoll, NPQ);
 	}
 }
 
@@ -7577,7 +7355,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
 					   dma_addr_t addr)
 {
 	struct sk_buff *skb;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	data = rtl8169_align(data);
 	dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
@@ -7732,7 +7510,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp)
 		rtl8169_pcierr_interrupt(dev);
 
 	if (status & LinkChg)
-		rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+		rtl8169_check_link_status(dev, tp);
 
 	rtl_irq_enable_all(tp);
 }
@@ -7804,21 +7582,20 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
+static void rtl8169_rx_missed(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
 	if (tp->mac_version > RTL_GIGA_MAC_VER_06)
 		return;
 
-	dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
-	RTL_W32(RxMissed, 0);
+	dev->stats.rx_missed_errors += RTL_R32(tp, RxMissed) & 0xffffff;
+	RTL_W32(tp, RxMissed, 0);
 }
 
 static void rtl8169_down(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
 	del_timer_sync(&tp->timer);
 
@@ -7831,7 +7608,7 @@ static void rtl8169_down(struct net_device *dev)
 	 * as netif_running is not true (rtl8169_interrupt, rtl8169_reset_task)
 	 * and napi is disabled (rtl8169_poll).
 	 */
-	rtl8169_rx_missed(dev, ioaddr);
+	rtl8169_rx_missed(dev);
 
 	/* Give a racing hard_start_xmit a few cycles to complete. */
 	synchronize_sched();
@@ -7861,7 +7638,7 @@ static int rtl8169_close(struct net_device *dev)
 
 	cancel_work_sync(&tp->wk.work);
 
-	free_irq(pdev->irq, dev);
+	pci_free_irq(pdev, 0, dev);
 
 	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
 			  tp->RxPhyAddr);
@@ -7880,14 +7657,13 @@ static void rtl8169_netpoll(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	rtl8169_interrupt(tp->pci_dev->irq, dev);
+	rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
 }
 #endif
 
 static int rtl_open(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	int retval = -ENOMEM;
 
@@ -7917,9 +7693,8 @@ static int rtl_open(struct net_device *dev)
 
 	rtl_request_firmware(tp);
 
-	retval = request_irq(pdev->irq, rtl8169_interrupt,
-			     (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
-			     dev->name, dev);
+	retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+				 dev->name);
 	if (retval < 0)
 		goto err_release_fw_2;
 
@@ -7947,7 +7722,7 @@ static int rtl_open(struct net_device *dev)
 	tp->saved_wolopts = 0;
 	pm_runtime_put_sync(&pdev->dev);
 
-	rtl8169_check_link_status(dev, tp, ioaddr);
+	rtl8169_check_link_status(dev, tp);
 out:
 	return retval;
 
@@ -7971,7 +7746,6 @@ static void
 rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	struct rtl8169_counters *counters = tp->counters;
 	unsigned int start;
@@ -7979,7 +7753,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	pm_runtime_get_noresume(&pdev->dev);
 
 	if (netif_running(dev) && pm_runtime_active(&pdev->dev))
-		rtl8169_rx_missed(dev, ioaddr);
+		rtl8169_rx_missed(dev);
 
 	do {
 		start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
@@ -8102,7 +7876,7 @@ static int rtl8169_runtime_suspend(struct device *device)
 	rtl8169_net_suspend(dev);
 
 	/* Update counters before going runtime suspend */
-	rtl8169_rx_missed(dev, tp->mmio_addr);
+	rtl8169_rx_missed(dev);
 	rtl8169_update_counters(dev);
 
 	return 0;
@@ -8163,8 +7937,6 @@ static const struct dev_pm_ops rtl8169_pm_ops = {
 
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* WoL fails with 8168b when the receiver is disabled. */
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
@@ -8172,9 +7944,9 @@ static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_17:
 		pci_clear_master(tp->pci_dev);
 
-		RTL_W8(ChipCmd, CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdRxEnb);
 		/* PCI commit */
-		RTL_R8(ChipCmd);
+		RTL_R8(tp, ChipCmd);
 		break;
 	default:
 		break;
@@ -8209,15 +7981,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		rtl8168_driver_stop(tp);
-	}
 
 	netif_napi_del(&tp->napi);
 
@@ -8256,7 +8021,7 @@ static const struct rtl_cfg_info {
 	unsigned int region;
 	unsigned int align;
 	u16 event_slow;
-	unsigned features;
+	unsigned int has_gmii:1;
 	const struct rtl_coalesce_info *coalesce_info;
 	u8 default_ver;
 } rtl_cfg_infos [] = {
@@ -8265,7 +8030,7 @@ static const struct rtl_cfg_info {
 		.region		= 1,
 		.align		= 0,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-		.features	= RTL_FEATURE_GMII,
+		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8169,
 		.default_ver	= RTL_GIGA_MAC_VER_01,
 	},
@@ -8274,7 +8039,7 @@ static const struct rtl_cfg_info {
 		.region		= 2,
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow,
-		.features	= RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_11,
 	},
@@ -8284,56 +8049,44 @@ static const struct rtl_cfg_info {
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver |
 				  PCSTimeout,
-		.features	= RTL_FEATURE_MSI,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_13,
 	}
 };
 
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
-			    const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	unsigned msi = 0;
-	u8 cfg2;
+	unsigned int flags;
 
-	cfg2 = RTL_R8(Config2) & ~MSIEnable;
-	if (cfg->features & RTL_FEATURE_MSI) {
-		if (pci_enable_msi(tp->pci_dev)) {
-			netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
-		} else {
-			cfg2 |= MSIEnable;
-			msi = RTL_FEATURE_MSI;
-		}
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
+		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+		flags = PCI_IRQ_LEGACY;
+	} else {
+		flags = PCI_IRQ_ALL_TYPES;
 	}
-	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-		RTL_W8(Config2, cfg2);
-	return msi;
+
+	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
 }
 
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(MCU) & LINK_LIST_RDY;
+	return RTL_R8(tp, MCU) & LINK_LIST_RDY;
 }
 
 DECLARE_RTL_COND(rtl_rxtx_empty_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return (RTL_R8(MCU) & RXTX_EMPTY) == RXTX_EMPTY;
+	return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
 }
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 data;
 
 	tp->ocp_base = OCP_STD_PHY_BASE;
 
-	RTL_W32(MISC, RTL_R32(MISC) | RXDV_GATED_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
 		return;
@@ -8341,9 +8094,9 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
 		return;
 
-	RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
 	msleep(1);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	data = r8168_mac_ocp_read(tp, 0xe8de);
 	data &= ~(1 << 14);
@@ -8397,7 +8150,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct rtl8169_private *tp;
 	struct mii_if_info *mii;
 	struct net_device *dev;
-	void __iomem *ioaddr;
 	int chipset, i;
 	int rc;
 
@@ -8423,7 +8175,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mii->mdio_write = rtl_mdio_write;
 	mii->phy_id_mask = 0x1f;
 	mii->reg_num_mask = 0x1f;
-	mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+	mii->supports_gmii = cfg->has_gmii;
 
 	/* disable ASPM completely as that cause random device stop working
 	 * problems as well as full system hangs for some PCIe devices users */
@@ -8455,20 +8207,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENODEV;
 	}
 
-	rc = pci_request_regions(pdev, MODULENAME);
+	rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
 	if (rc < 0) {
-		netif_err(tp, probe, dev, "could not request regions\n");
+		netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
 		return rc;
 	}
 
-	/* ioremap MMIO region */
-	ioaddr = devm_ioremap(&pdev->dev, pci_resource_start(pdev, region),
-			      R8169_REGS_SIZE);
-	if (!ioaddr) {
-		netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
-		return -EIO;
-	}
-	tp->mmio_addr = ioaddr;
+	tp->mmio_addr = pcim_iomap_table(pdev)[region];
 
 	if (!pci_is_pcie(pdev))
 		netif_info(tp, probe, dev, "not PCI Express\n");
@@ -8518,41 +8263,14 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	chipset = tp->mac_version;
 	tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
-	RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
-	RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_34:
-	case RTL_GIGA_MAC_VER_35:
-	case RTL_GIGA_MAC_VER_36:
-	case RTL_GIGA_MAC_VER_37:
-	case RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40:
-	case RTL_GIGA_MAC_VER_41:
-	case RTL_GIGA_MAC_VER_42:
-	case RTL_GIGA_MAC_VER_43:
-	case RTL_GIGA_MAC_VER_44:
-	case RTL_GIGA_MAC_VER_45:
-	case RTL_GIGA_MAC_VER_46:
-	case RTL_GIGA_MAC_VER_47:
-	case RTL_GIGA_MAC_VER_48:
-	case RTL_GIGA_MAC_VER_49:
-	case RTL_GIGA_MAC_VER_50:
-	case RTL_GIGA_MAC_VER_51:
-		if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
-			tp->features |= RTL_FEATURE_WOL;
-		if ((RTL_R8(Config3) & LinkUp) != 0)
-			tp->features |= RTL_FEATURE_WOL;
-		break;
-	default:
-		if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
-			tp->features |= RTL_FEATURE_WOL;
-		break;
+	rc = rtl_alloc_irq(tp);
+	if (rc < 0) {
+		netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+		return rc;
 	}
-	if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
-		tp->features |= RTL_FEATURE_WOL;
-	tp->features |= rtl_try_msi(tp, cfg);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+
+	/* override BIOS settings, use userspace tools to enable WOL */
+	__rtl8169_set_wol(tp, 0);
 
 	if (rtl_tbi_enabled(tp)) {
 		tp->set_speed = rtl8169_set_speed_tbi;
@@ -8600,7 +8318,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			rtl_rar_set(tp, (u8 *)mac_addr);
 	}
 	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = RTL_R8(MAC0 + i);
+		dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
@@ -8667,8 +8385,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, dev);
 
 	netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
-		   rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
-		   (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+		   rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
+		   (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
+		   pci_irq_vector(pdev, 0));
 	if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
 		netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
 			   "tx checksumming: %s]\n",
@@ -8676,15 +8395,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			   rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
 	}
 
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		rtl8168_driver_start(tp);
-	}
 
 	netif_carrier_off(dev);
 
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 96a27b00c90e..b81f4faf7b10 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1018,6 +1018,7 @@ struct ravb_private {
 	u32 dirty_rx[NUM_RX_QUEUE];	/* Producer ring indices */
 	u32 cur_tx[NUM_TX_QUEUE];
 	u32 dirty_tx[NUM_TX_QUEUE];
+	u32 rx_buf_sz;			/* Based on MTU+slack. */
 	struct napi_struct napi[NUM_RX_QUEUE];
 	struct work_struct work;
 	/* MII transceiver section. */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index c87f57ca4437..68f122140966 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -238,7 +238,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
 					       le32_to_cpu(desc->dptr)))
 				dma_unmap_single(ndev->dev.parent,
 						 le32_to_cpu(desc->dptr),
-						 PKT_BUF_SZ,
+						 priv->rx_buf_sz,
 						 DMA_FROM_DEVICE);
 		}
 		ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -300,9 +300,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
 		/* RX descriptor */
 		rx_desc = &priv->rx_ring[q][i];
-		rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+		rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
 		dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
-					  PKT_BUF_SZ,
+					  priv->rx_buf_sz,
 					  DMA_FROM_DEVICE);
 		/* We just set the data size to 0 for a failed mapping which
 		 * should prevent DMA from happening...
@@ -346,6 +346,9 @@ static int ravb_ring_init(struct net_device *ndev, int q)
 	int ring_size;
 	int i;
 
+	priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
+		ETH_HLEN + VLAN_HLEN;
+
 	/* Allocate RX and TX skb rings */
 	priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
 				  sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -355,7 +358,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
 		goto error;
 
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
-		skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1);
+		skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
 		if (!skb)
 			goto error;
 		ravb_set_buffer_align(skb);
@@ -586,7 +589,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 			skb = priv->rx_skb[q][entry];
 			priv->rx_skb[q][entry] = NULL;
 			dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
-					 PKT_BUF_SZ,
+					 priv->rx_buf_sz,
 					 DMA_FROM_DEVICE);
 			get_ts &= (q == RAVB_NC) ?
 					RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -619,11 +622,12 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 	for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
 		entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
 		desc = &priv->rx_ring[q][entry];
-		desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+		desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
 
 		if (!priv->rx_skb[q][entry]) {
 			skb = netdev_alloc_skb(ndev,
-					       PKT_BUF_SZ + RAVB_ALIGN - 1);
+					       priv->rx_buf_sz +
+					       RAVB_ALIGN - 1);
 			if (!skb)
 				break;	/* Better luck next round. */
 			ravb_set_buffer_align(skb);
@@ -1854,6 +1858,17 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 	return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	if (netif_running(ndev))
+		return -EBUSY;
+
+	ndev->mtu = new_mtu;
+	netdev_update_features(ndev);
+
+	return 0;
+}
+
 static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
@@ -1895,6 +1910,7 @@ static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_set_rx_mode	= ravb_set_rx_mode,
 	.ndo_tx_timeout		= ravb_tx_timeout,
 	.ndo_do_ioctl		= ravb_do_ioctl,
+	.ndo_change_mtu		= ravb_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_set_features	= ravb_set_features,
@@ -2117,6 +2133,9 @@ static int ravb_probe(struct platform_device *pdev)
 		goto out_release;
 	}
 
+	ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+	ndev->min_mtu = ETH_MIN_MTU;
+
 	/* Set function */
 	ndev->netdev_ops = &ravb_netdev_ops;
 	ndev->ethtool_ops = &ravb_ethtool_ops;
@@ -2255,9 +2274,6 @@ static int ravb_wol_setup(struct net_device *ndev)
 	/* Enable MagicPacket */
 	ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
 
-	/* Increased clock usage so device won't be suspended */
-	clk_enable(priv->clk);
-
 	return enable_irq_wake(priv->emac_irq);
 }
 
@@ -2276,9 +2292,6 @@ static int ravb_wol_restore(struct net_device *ndev)
 	if (ret < 0)
 		return ret;
 
-	/* Restore clock usage count */
-	clk_disable(priv->clk);
-
 	return disable_irq_wake(priv->emac_irq);
 }
 
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index a197e11f3a56..306558ef36b5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -40,7 +40,6 @@
 #include <linux/slab.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
-#include <linux/clk.h>
 #include <linux/sh_eth.h>
 #include <linux/of_mdio.h>
 
@@ -124,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
 	[TSU_FWSL0]	= 0x0030,
 	[TSU_FWSL1]	= 0x0034,
 	[TSU_FWSLC]	= 0x0038,
-	[TSU_QTAG0]	= 0x0040,
-	[TSU_QTAG1]	= 0x0044,
+	[TSU_QTAGM0]	= 0x0040,
+	[TSU_QTAGM1]	= 0x0044,
 	[TSU_FWSR]	= 0x0050,
 	[TSU_FWINMK]	= 0x0054,
 	[TSU_ADQT0]	= 0x0048,
@@ -440,14 +439,15 @@ static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear,
 		     enum_index);
 }
 
-static bool sh_eth_is_gether(struct sh_eth_private *mdp)
+static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
+			     int enum_index)
 {
-	return mdp->reg_offset == sh_eth_offset_gigabit;
+	iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]);
 }
 
-static bool sh_eth_is_rz_fast_ether(struct sh_eth_private *mdp)
+static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
 {
-	return mdp->reg_offset == sh_eth_offset_fast_rz;
+	return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
 }
 
 static void sh_eth_select_mii(struct net_device *ndev)
@@ -491,6 +491,62 @@ static void sh_eth_chip_reset(struct net_device *ndev)
 	mdelay(1);
 }
 
+static int sh_eth_soft_reset(struct net_device *ndev)
+{
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, EDMR_SRST_ETHER);
+	mdelay(3);
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, 0);
+
+	return 0;
+}
+
+static int sh_eth_check_soft_reset(struct net_device *ndev)
+{
+	int cnt;
+
+	for (cnt = 100; cnt > 0; cnt--) {
+		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
+			return 0;
+		mdelay(1);
+	}
+
+	netdev_err(ndev, "Device reset failed\n");
+	return -ETIMEDOUT;
+}
+
+static int sh_eth_soft_reset_gether(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	int ret;
+
+	sh_eth_write(ndev, EDSR_ENALL, EDSR);
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_GETHER, EDMR_SRST_GETHER);
+
+	ret = sh_eth_check_soft_reset(ndev);
+	if (ret)
+		return ret;
+
+	/* Table Init */
+	sh_eth_write(ndev, 0, TDLAR);
+	sh_eth_write(ndev, 0, TDFAR);
+	sh_eth_write(ndev, 0, TDFXR);
+	sh_eth_write(ndev, 0, TDFFR);
+	sh_eth_write(ndev, 0, RDLAR);
+	sh_eth_write(ndev, 0, RDFAR);
+	sh_eth_write(ndev, 0, RDFXR);
+	sh_eth_write(ndev, 0, RDFFR);
+
+	/* Reset HW CRC register */
+	if (mdp->cd->hw_checksum)
+		sh_eth_write(ndev, 0, CSMR);
+
+	/* Select MII mode */
+	if (mdp->cd->select_mii)
+		sh_eth_select_mii(ndev);
+
+	return ret;
+}
+
 static void sh_eth_set_rate_gether(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -511,11 +567,14 @@ static void sh_eth_set_rate_gether(struct net_device *ndev)
 #ifdef CONFIG_OF
 /* R7S72100 */
 static struct sh_eth_cpu_data r7s72100_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 
 	.register_type	= SH_ETH_REG_FAST_RZ,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD,
 	.ecsipr_value	= ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_TWB1IP | EESIPR_TWBIP | EESIPR_TC1IP |
@@ -542,8 +601,10 @@ static struct sh_eth_cpu_data r7s72100_data = {
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
+	.no_tx_cntrs	= 1,
 };
 
 static void sh_eth_chip_reset_r8a7740(struct net_device *ndev)
@@ -555,12 +616,15 @@ static void sh_eth_chip_reset_r8a7740(struct net_device *ndev)
 
 /* R8A7740 */
 static struct sh_eth_cpu_data r8a7740_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset_r8a7740,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -587,10 +651,12 @@ static struct sh_eth_cpu_data r8a7740_data = {
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
 	.select_mii	= 1,
 	.magic		= 1,
+	.cexcr		= 1,
 };
 
 /* There is CPU dependent code */
@@ -610,11 +676,14 @@ static void sh_eth_set_rate_rcar(struct net_device *ndev)
 
 /* R-Car Gen1 */
 static struct sh_eth_cpu_data rcar_gen1_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_rcar,
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ADEIP | EESIPR_ECIIP |
@@ -637,11 +706,14 @@ static struct sh_eth_cpu_data rcar_gen1_data = {
 
 /* R-Car Gen2 and RZ/G1 */
 static struct sh_eth_cpu_data rcar_gen2_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_rcar,
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP |
 			  ECSIPR_MPDIP,
@@ -684,11 +756,14 @@ static void sh_eth_set_rate_sh7724(struct net_device *ndev)
 
 /* SH7724 */
 static struct sh_eth_cpu_data sh7724_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_sh7724,
 
 	.register_type	= SH_ETH_REG_FAST_SH4,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ADEIP | EESIPR_ECIIP |
@@ -726,11 +801,14 @@ static void sh_eth_set_rate_sh7757(struct net_device *ndev)
 
 /* SH7757 */
 static struct sh_eth_cpu_data sh7757_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_sh7757,
 
 	.register_type	= SH_ETH_REG_FAST_SH4,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -753,6 +831,7 @@ static struct sh_eth_cpu_data sh7757_data = {
 	.rpadir		= 1,
 	.rpadir_value   = 2 << 16,
 	.rtrate		= 1,
+	.dual_port	= 1,
 };
 
 #define SH_GIGA_ETH_BASE	0xfee00000UL
@@ -797,12 +876,15 @@ static void sh_eth_set_rate_giga(struct net_device *ndev)
 
 /* SH7757(GETHERC) */
 static struct sh_eth_cpu_data sh7757_data_giga = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset_giga,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_giga,
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -830,17 +912,23 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
+	.cexcr		= 1,
+	.dual_port	= 1,
 };
 
 /* SH7734 */
 static struct sh_eth_cpu_data sh7734_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -863,20 +951,25 @@ static struct sh_eth_cpu_data sh7734_data = {
 	.hw_swap	= 1,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.hw_checksum	= 1,
 	.select_mii	= 1,
 	.magic		= 1,
+	.cexcr		= 1,
 };
 
 /* SH7763 */
 static struct sh_eth_cpu_data sh7763_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -898,14 +991,20 @@ static struct sh_eth_cpu_data sh7763_data = {
 	.hw_swap	= 1,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
+	.cexcr		= 1,
+	.dual_port	= 1,
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -922,8 +1021,11 @@ static struct sh_eth_cpu_data sh7619_data = {
 };
 
 static struct sh_eth_cpu_data sh771x_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -933,6 +1035,7 @@ static struct sh_eth_cpu_data sh771x_data = {
 			  EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
 			  EESIPR_PREIP | EESIPR_CERFIP,
 	.tsu		= 1,
+	.dual_port	= 1,
 };
 
 static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
@@ -960,63 +1063,6 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
 		cd->trscer_err_mask = DEFAULT_TRSCER_ERR_MASK;
 }
 
-static int sh_eth_check_reset(struct net_device *ndev)
-{
-	int ret = 0;
-	int cnt = 100;
-
-	while (cnt > 0) {
-		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
-			break;
-		mdelay(1);
-		cnt--;
-	}
-	if (cnt <= 0) {
-		netdev_err(ndev, "Device reset failed\n");
-		ret = -ETIMEDOUT;
-	}
-	return ret;
-}
-
-static int sh_eth_reset(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-	int ret = 0;
-
-	if (sh_eth_is_gether(mdp) || sh_eth_is_rz_fast_ether(mdp)) {
-		sh_eth_write(ndev, EDSR_ENALL, EDSR);
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_GETHER, EDMR_SRST_GETHER);
-
-		ret = sh_eth_check_reset(ndev);
-		if (ret)
-			return ret;
-
-		/* Table Init */
-		sh_eth_write(ndev, 0x0, TDLAR);
-		sh_eth_write(ndev, 0x0, TDFAR);
-		sh_eth_write(ndev, 0x0, TDFXR);
-		sh_eth_write(ndev, 0x0, TDFFR);
-		sh_eth_write(ndev, 0x0, RDLAR);
-		sh_eth_write(ndev, 0x0, RDFAR);
-		sh_eth_write(ndev, 0x0, RDFXR);
-		sh_eth_write(ndev, 0x0, RDFFR);
-
-		/* Reset HW CRC register */
-		if (mdp->cd->hw_checksum)
-			sh_eth_write(ndev, 0x0, CSMR);
-
-		/* Select MII mode */
-		if (mdp->cd->select_mii)
-			sh_eth_select_mii(ndev);
-	} else {
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, EDMR_SRST_ETHER);
-		mdelay(3);
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, 0);
-	}
-
-	return ret;
-}
-
 static void sh_eth_set_receive_align(struct sk_buff *skb)
 {
 	uintptr_t reserve = (uintptr_t)skb->data & (SH_ETH_RX_ALIGN - 1);
@@ -1059,14 +1105,6 @@ static void read_mac_address(struct net_device *ndev, unsigned char *mac)
 	}
 }
 
-static u32 sh_eth_get_edtrr_trns(struct sh_eth_private *mdp)
-{
-	if (sh_eth_is_gether(mdp) || sh_eth_is_rz_fast_ether(mdp))
-		return EDTRR_TRNS_GETHER;
-	else
-		return EDTRR_TRNS_ETHER;
-}
-
 struct bb_info {
 	void (*set_gate)(void *addr);
 	struct mdiobb_ctrl ctrl;
@@ -1263,8 +1301,7 @@ static void sh_eth_ring_format(struct net_device *ndev)
 		/* Rx descriptor address set */
 		if (i == 0) {
 			sh_eth_write(ndev, mdp->rx_desc_dma, RDLAR);
-			if (sh_eth_is_gether(mdp) ||
-			    sh_eth_is_rz_fast_ether(mdp))
+			if (mdp->cd->xdfar_rw)
 				sh_eth_write(ndev, mdp->rx_desc_dma, RDFAR);
 		}
 	}
@@ -1286,8 +1323,7 @@ static void sh_eth_ring_format(struct net_device *ndev)
 		if (i == 0) {
 			/* Tx descriptor address set */
 			sh_eth_write(ndev, mdp->tx_desc_dma, TDLAR);
-			if (sh_eth_is_gether(mdp) ||
-			    sh_eth_is_rz_fast_ether(mdp))
+			if (mdp->cd->xdfar_rw)
 				sh_eth_write(ndev, mdp->tx_desc_dma, TDFAR);
 		}
 	}
@@ -1352,7 +1388,7 @@ static int sh_eth_dev_init(struct net_device *ndev)
 	int ret;
 
 	/* Soft Reset */
-	ret = sh_eth_reset(ndev);
+	ret = mdp->cd->soft_reset(ndev);
 	if (ret)
 		return ret;
 
@@ -1453,7 +1489,7 @@ static void sh_eth_dev_exit(struct net_device *ndev)
 	 */
 	msleep(2); /* max frame time at 10 Mbps < 1250 us */
 	sh_eth_get_stats(ndev);
-	sh_eth_reset(ndev);
+	mdp->cd->soft_reset(ndev);
 
 	/* Set MAC address again */
 	update_mac_address(ndev);
@@ -1706,9 +1742,9 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status)
 		sh_eth_tx_free(ndev, true);
 
 		/* SH7712 BUG */
-		if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) {
+		if (edtrr ^ mdp->cd->edtrr_trns) {
 			/* tx dma start */
-			sh_eth_write(ndev, sh_eth_get_edtrr_trns(mdp), EDTRR);
+			sh_eth_write(ndev, mdp->cd->edtrr_trns, EDTRR);
 		}
 		/* wakeup */
 		netif_wake_queue(ndev);
@@ -2102,8 +2138,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
 		add_tsu_reg(TSU_FWSL0);
 		add_tsu_reg(TSU_FWSL1);
 		add_tsu_reg(TSU_FWSLC);
-		add_tsu_reg(TSU_QTAG0);
-		add_tsu_reg(TSU_QTAG1);
 		add_tsu_reg(TSU_QTAGM0);
 		add_tsu_reg(TSU_QTAGM1);
 		add_tsu_reg(TSU_FWSR);
@@ -2304,7 +2338,7 @@ static void sh_eth_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 	wol->supported = 0;
 	wol->wolopts = 0;
 
-	if (mdp->cd->magic && mdp->clk) {
+	if (mdp->cd->magic) {
 		wol->supported = WAKE_MAGIC;
 		wol->wolopts = mdp->wol_enabled ? WAKE_MAGIC : 0;
 	}
@@ -2314,7 +2348,7 @@ static int sh_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 
-	if (!mdp->cd->magic || !mdp->clk || wol->wolopts & ~WAKE_MAGIC)
+	if (!mdp->cd->magic || wol->wolopts & ~WAKE_MAGIC)
 		return -EOPNOTSUPP;
 
 	mdp->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
@@ -2469,8 +2503,8 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	mdp->cur_tx++;
 
-	if (!(sh_eth_read(ndev, EDTRR) & sh_eth_get_edtrr_trns(mdp)))
-		sh_eth_write(ndev, sh_eth_get_edtrr_trns(mdp), EDTRR);
+	if (!(sh_eth_read(ndev, EDTRR) & mdp->cd->edtrr_trns))
+		sh_eth_write(ndev, mdp->cd->edtrr_trns, EDTRR);
 
 	return NETDEV_TX_OK;
 }
@@ -2495,7 +2529,7 @@ static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 
-	if (sh_eth_is_rz_fast_ether(mdp))
+	if (mdp->cd->no_tx_cntrs)
 		return &ndev->stats;
 
 	if (!mdp->is_opened)
@@ -2505,7 +2539,7 @@ static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev)
 	sh_eth_update_stat(ndev, &ndev->stats.collisions, CDCR);
 	sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors, LCCR);
 
-	if (sh_eth_is_gether(mdp)) {
+	if (mdp->cd->cexcr) {
 		sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors,
 				   CERCR);
 		sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors,
@@ -2922,7 +2956,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev,
 /* SuperH's TSU register init function */
 static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 {
-	if (sh_eth_is_rz_fast_ether(mdp)) {
+	if (!mdp->cd->dual_port) {
 		sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
 		sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
 				 TSU_FWSLC);	/* Enable POST registers */
@@ -2939,13 +2973,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 	sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
 	sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
 	sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
-	if (sh_eth_is_gether(mdp)) {
-		sh_eth_tsu_write(mdp, 0, TSU_QTAG0);	/* Disable QTAG(0->1) */
-		sh_eth_tsu_write(mdp, 0, TSU_QTAG1);	/* Disable QTAG(1->0) */
-	} else {
-		sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);	/* Disable QTAG(0->1) */
-		sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);	/* Disable QTAG(1->0) */
-	}
+	sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);	/* Disable QTAG(0->1) */
+	sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);	/* Disable QTAG(1->0) */
 	sh_eth_tsu_write(mdp, 0, TSU_FWSR);	/* all interrupt status clear */
 	sh_eth_tsu_write(mdp, 0, TSU_FWINMK);	/* Disable all interrupt */
 	sh_eth_tsu_write(mdp, 0, TSU_TEN);	/* Disable all CAM entry */
@@ -3153,11 +3182,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 		goto out_release;
 	}
 
-	/* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
-	mdp->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(mdp->clk))
-		mdp->clk = NULL;
-
 	ndev->base_addr = res->start;
 
 	spin_lock_init(&mdp->lock);
@@ -3278,7 +3302,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	if (ret)
 		goto out_napi_del;
 
-	if (mdp->cd->magic && mdp->clk)
+	if (mdp->cd->magic)
 		device_set_wakeup_capable(&pdev->dev, 1);
 
 	/* print device information */
@@ -3331,9 +3355,6 @@ static int sh_eth_wol_setup(struct net_device *ndev)
 	/* Enable MagicPacket */
 	sh_eth_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
 
-	/* Increased clock usage so device won't be suspended */
-	clk_enable(mdp->clk);
-
 	return enable_irq_wake(ndev->irq);
 }
 
@@ -3359,9 +3380,6 @@ static int sh_eth_wol_restore(struct net_device *ndev)
 	if (ret < 0)
 		return ret;
 
-	/* Restore clock usage count */
-	clk_disable(mdp->clk);
-
 	return disable_irq_wake(ndev->irq);
 }
 
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index a6753ccba711..a0416e04306a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -118,8 +118,8 @@ enum {
 	TSU_FWSL0,
 	TSU_FWSL1,
 	TSU_FWSLC,
-	TSU_QTAG0,
-	TSU_QTAG1,
+	TSU_QTAG0,			/* Same as TSU_QTAGM0 */
+	TSU_QTAG1,			/* Same as TSU_QTAGM1 */
 	TSU_QTAGM0,
 	TSU_QTAGM1,
 	TSU_FWSR,
@@ -469,6 +469,9 @@ struct sh_eth_rxdesc {
 
 /* This structure is used by each CPU dependency handling. */
 struct sh_eth_cpu_data {
+	/* mandatory functions */
+	int (*soft_reset)(struct net_device *ndev);
+
 	/* optional functions */
 	void (*chip_reset)(struct net_device *ndev);
 	void (*set_duplex)(struct net_device *ndev);
@@ -476,6 +479,7 @@ struct sh_eth_cpu_data {
 
 	/* mandatory initialize value */
 	int register_type;
+	u32 edtrr_trns;
 	u32 eesipr_value;
 
 	/* optional initialize value */
@@ -504,11 +508,15 @@ struct sh_eth_cpu_data {
 	unsigned rpadir:1;	/* E-DMAC have RPADIR */
 	unsigned no_trimd:1;	/* E-DMAC DO NOT have TRIMD */
 	unsigned no_ade:1;	/* E-DMAC DO NOT have ADE bit in EESR */
+	unsigned xdfar_rw:1;	/* E-DMAC has writeable RDFAR/TDFAR */
 	unsigned hw_checksum:1;	/* E-DMAC has CSMR */
 	unsigned select_mii:1;	/* EtherC have RMII_MII (MII select register) */
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
 	unsigned magic:1;	/* EtherC has ECMR.MPDE and ECSR.MPD */
+	unsigned no_tx_cntrs:1;	/* EtherC DOES NOT have TX error counters */
+	unsigned cexcr:1;	/* EtherC has CERCR/CEECR */
+	unsigned dual_port:1;	/* Dual EtherC/E-DMAC */
 };
 
 struct sh_eth_private {
@@ -567,15 +575,4 @@ static inline void *sh_eth_tsu_get_offset(struct sh_eth_private *mdp,
 	return mdp->tsu_addr + mdp->reg_offset[enum_index];
 }
 
-static inline void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
-				    int enum_index)
-{
-	iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]);
-}
-
-static inline u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
-{
-	return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
-}
-
 #endif	/* #ifndef __SH_ETH_H__ */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index fd35d8004a78..a9da1ad4b4f2 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -57,9 +57,9 @@
 static int debug = -1;
 static int eee_timer = SXGBE_DEFAULT_LPI_TIMER;
 
-module_param(eee_timer, int, S_IRUGO | S_IWUSR);
+module_param(eee_timer, int, 0644);
 
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 				      NETIF_MSG_LINK | NETIF_MSG_IFUP |
 				      NETIF_MSG_IFDOWN | NETIF_MSG_TIMER);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 75fbf58e421c..c4c45c94da77 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -28,9 +28,6 @@ enum {
 	EFX_EF10_TEST = 1,
 	EFX_EF10_REFILL,
 };
-
-/* The reserved RSS context value */
-#define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
 /* The maximum size of a shared RSS context */
 /* TODO: this should really be from the mcdi protocol export */
 #define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
@@ -99,17 +96,15 @@ struct efx_ef10_filter_table {
 		MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2];
 	unsigned int rx_match_count;
 
+	struct rw_semaphore lock; /* Protects entries */
 	struct {
 		unsigned long spec;	/* pointer to spec plus flag bits */
-/* BUSY flag indicates that an update is in progress.  AUTO_OLD is
- * used to mark and sweep MAC filters for the device address lists.
- */
-#define EFX_EF10_FILTER_FLAG_BUSY	1UL
+/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */
+/* unused flag	1UL */
 #define EFX_EF10_FILTER_FLAG_AUTO_OLD	2UL
 #define EFX_EF10_FILTER_FLAGS		3UL
 		u64 handle;		/* firmware handle */
 	} *entry;
-	wait_queue_head_t waitq;
 /* Shadow of net_device address lists, guarded by mac_lock */
 	struct efx_ef10_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX];
 	struct efx_ef10_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX];
@@ -697,7 +692,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	}
 	nic_data->warm_boot_count = rc;
 
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
 	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
 
@@ -1489,8 +1484,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 	}
 
 	/* don't fail init if RSS setup doesn't work */
-	rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-	efx->rss_active = (rc == 0);
+	rc = efx->type->rx_push_rss_config(efx, false,
+					   efx->rss_context.rx_indir_table, NULL);
 
 	return 0;
 }
@@ -1504,10 +1499,11 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
 
 	/* All our allocations have been reset */
 	nic_data->must_realloc_vis = true;
+	nic_data->must_restore_rss_contexts = true;
 	nic_data->must_restore_filters = true;
 	nic_data->must_restore_piobufs = true;
 	efx_ef10_forget_old_piobufs(efx);
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
 	/* Driver-created vswitches and vports must be re-created */
 	nic_data->must_probe_vswitching = true;
@@ -2703,27 +2699,30 @@ static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
  * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
  * just need to set the UDP ports flags (for both IP versions).
  */
-static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context)
+static void efx_ef10_set_rss_flags(struct efx_nic *efx,
+				   struct efx_rss_context *ctx)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
 	u32 flags;
 
 	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
 
-	if (efx_ef10_get_rss_flags(efx, context, &flags) != 0)
+	if (efx_ef10_get_rss_flags(efx, ctx->context_id, &flags) != 0)
 		return;
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
+		       ctx->context_id);
 	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
 	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
 	if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
 			  NULL, 0, NULL))
 		/* Succeeded, so UDP 4-tuple is now enabled */
-		efx->rx_hash_udp_4tuple = true;
+		ctx->rx_hash_udp_4tuple = true;
 }
 
-static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
-				      bool exclusive, unsigned *context_size)
+static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive,
+				      struct efx_rss_context *ctx,
+				      unsigned *context_size)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
@@ -2739,7 +2738,7 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 				    EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
 
 	if (!exclusive && rss_spread == 1) {
-		*context = EFX_EF10_RSS_CONTEXT_INVALID;
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 		if (context_size)
 			*context_size = 1;
 		return 0;
@@ -2762,29 +2761,26 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 	if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
 		return -EIO;
 
-	*context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
+	ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
 
 	if (context_size)
 		*context_size = rss_spread;
 
 	if (nic_data->datapath_caps &
 	    1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
-		efx_ef10_set_rss_flags(efx, *context);
+		efx_ef10_set_rss_flags(efx, ctx);
 
 	return 0;
 }
 
-static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
+static int efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
-	int rc;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
 		       context);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
+	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
 			    NULL, 0, NULL);
-	WARN_ON(rc != 0);
 }
 
 static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
@@ -2796,15 +2792,15 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 	MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
 		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
 
-	/* This iterates over the length of efx->rx_indir_table, but copies
-	 * bytes from rx_indir_table.  That's because the latter is a pointer
-	 * rather than an array, but should have the same length.
-	 * The efx->rx_hash_key loop below is similar.
+	/* This iterates over the length of efx->rss_context.rx_indir_table, but
+	 * copies bytes from rx_indir_table.  That's because the latter is a
+	 * pointer rather than an array, but should have the same length.
+	 * The efx->rss_context.rx_hash_key loop below is similar.
 	 */
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i)
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
 		MCDI_PTR(tablebuf,
 			 RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
 				(u8) rx_indir_table[i];
@@ -2816,9 +2812,9 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 	MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
 		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
 		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
-	for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
 		MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
 
 	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
@@ -2827,27 +2823,27 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	int rc;
 
-	if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
-		efx_ef10_free_rss_context(efx, nic_data->rx_rss_context);
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) {
+		rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id);
+		WARN_ON(rc != 0);
+	}
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 }
 
 static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx,
 					      unsigned *context_size)
 {
-	u32 new_rx_rss_context;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	int rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-					    false, context_size);
+	int rc = efx_ef10_alloc_rss_context(efx, false, &efx->rss_context,
+					    context_size);
 
 	if (rc != 0)
 		return rc;
 
-	nic_data->rx_rss_context = new_rx_rss_context;
 	nic_data->rx_rss_context_exclusive = false;
-	efx_set_default_rx_indir_table(efx);
+	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 	return 0;
 }
 
@@ -2855,65 +2851,98 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
 						 const u32 *rx_indir_table,
 						 const u8 *key)
 {
+	u32 old_rx_rss_context = efx->rss_context.context_id;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	int rc;
-	u32 new_rx_rss_context;
 
-	if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID ||
+	if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID ||
 	    !nic_data->rx_rss_context_exclusive) {
-		rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-						true, NULL);
+		rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context,
+						NULL);
 		if (rc == -EOPNOTSUPP)
 			return rc;
 		else if (rc != 0)
 			goto fail1;
-	} else {
-		new_rx_rss_context = nic_data->rx_rss_context;
 	}
 
-	rc = efx_ef10_populate_rss_table(efx, new_rx_rss_context,
+	rc = efx_ef10_populate_rss_table(efx, efx->rss_context.context_id,
 					 rx_indir_table, key);
 	if (rc != 0)
 		goto fail2;
 
-	if (nic_data->rx_rss_context != new_rx_rss_context)
-		efx_ef10_rx_free_indir_table(efx);
-	nic_data->rx_rss_context = new_rx_rss_context;
+	if (efx->rss_context.context_id != old_rx_rss_context &&
+	    old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+		WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0);
 	nic_data->rx_rss_context_exclusive = true;
-	if (rx_indir_table != efx->rx_indir_table)
-		memcpy(efx->rx_indir_table, rx_indir_table,
-		       sizeof(efx->rx_indir_table));
-	if (key != efx->rx_hash_key)
-		memcpy(efx->rx_hash_key, key, efx->type->rx_hash_key_size);
+	if (rx_indir_table != efx->rss_context.rx_indir_table)
+		memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+		       sizeof(efx->rss_context.rx_indir_table));
+	if (key != efx->rss_context.rx_hash_key)
+		memcpy(efx->rss_context.rx_hash_key, key,
+		       efx->type->rx_hash_key_size);
 
 	return 0;
 
 fail2:
-	if (new_rx_rss_context != nic_data->rx_rss_context)
-		efx_ef10_free_rss_context(efx, new_rx_rss_context);
+	if (old_rx_rss_context != efx->rss_context.context_id) {
+		WARN_ON(efx_ef10_free_rss_context(efx, efx->rss_context.context_id) != 0);
+		efx->rss_context.context_id = old_rx_rss_context;
+	}
 fail1:
 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
 	return rc;
 }
 
-static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
+					       struct efx_rss_context *ctx,
+					       const u32 *rx_indir_table,
+					       const u8 *key)
+{
+	int rc;
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+		rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
+		if (rc)
+			return rc;
+	}
+
+	if (!rx_indir_table) /* Delete this context */
+		return efx_ef10_free_rss_context(efx, ctx->context_id);
+
+	rc = efx_ef10_populate_rss_table(efx, ctx->context_id,
+					 rx_indir_table, key);
+	if (rc)
+		return rc;
+
+	memcpy(ctx->rx_indir_table, rx_indir_table,
+	       sizeof(efx->rss_context.rx_indir_table));
+	memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
+
+	return 0;
+}
+
+static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
+					       struct efx_rss_context *ctx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
 	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
 	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
 	size_t outlen;
 	int rc, i;
 
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
 	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
 		     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
 
-	if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
+	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
 		return -ENOENT;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
-		       nic_data->rx_rss_context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
 		     MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
 			  tablebuf, sizeof(tablebuf), &outlen);
@@ -2923,13 +2952,13 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
 		return -EIO;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] = MCDI_PTR(tablebuf,
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
 				RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
-		       nic_data->rx_rss_context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
 		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
 			  keybuf, sizeof(keybuf), &outlen);
@@ -2939,13 +2968,50 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
 		return -EIO;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
-		efx->rx_hash_key[i] = MCDI_PTR(
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
+		ctx->rx_hash_key[i] = MCDI_PTR(
 				keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
 
 	return 0;
 }
 
+static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+{
+	int rc;
+
+	mutex_lock(&efx->rss_lock);
+	rc = efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
+	mutex_unlock(&efx->rss_lock);
+	return rc;
+}
+
+static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_rss_context *ctx;
+	int rc;
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	if (!nic_data->must_restore_rss_contexts)
+		return;
+
+	list_for_each_entry(ctx, &efx->rss_context.list, list) {
+		/* previous NIC RSS context is gone */
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+		/* so try to allocate a new one */
+		rc = efx_ef10_rx_push_rss_context_config(efx, ctx,
+							 ctx->rx_indir_table,
+							 ctx->rx_hash_key);
+		if (rc)
+			netif_warn(efx, probe, efx->net_dev,
+				   "failed to restore RSS context %u, rc=%d"
+				   "; RSS filters may fail to be applied\n",
+				   ctx->user_id, rc);
+	}
+	nic_data->must_restore_rss_contexts = false;
+}
+
 static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 					  const u32 *rx_indir_table,
 					  const u8 *key)
@@ -2956,7 +3022,7 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 		return 0;
 
 	if (!key)
-		key = efx->rx_hash_key;
+		key = efx->rss_context.rx_hash_key;
 
 	rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
 
@@ -2965,7 +3031,8 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 		bool mismatch = false;
 		size_t i;
 
-		for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table) && !mismatch;
+		for (i = 0;
+		     i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
 		     i++)
 			mismatch = rx_indir_table[i] !=
 				ethtool_rxfh_indir_default(i, efx->rss_spread);
@@ -3000,11 +3067,9 @@ static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
 					  const u8 *key
 					  __attribute__ ((unused)))
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
 	if (user)
 		return -EOPNOTSUPP;
-	if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID)
 		return 0;
 	return efx_ef10_rx_push_shared_rss_config(efx, NULL);
 }
@@ -4109,6 +4174,7 @@ efx_ef10_filter_push_prep_set_match_fields(struct efx_nic *efx,
 static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 				      const struct efx_filter_spec *spec,
 				      efx_dword_t *inbuf, u64 handle,
+				      struct efx_rss_context *ctx,
 				      bool replacing)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
@@ -4116,11 +4182,16 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 
 	memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
 
-	/* Remove RSS flag if we don't have an RSS context. */
-	if (flags & EFX_FILTER_FLAG_RX_RSS &&
-	    spec->rss_context == EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-	    nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
-		flags &= ~EFX_FILTER_FLAG_RX_RSS;
+	/* If RSS filter, caller better have given us an RSS context */
+	if (flags & EFX_FILTER_FLAG_RX_RSS) {
+		/* We don't have the ability to return an error, so we'll just
+		 * log a warning and disable RSS for the filter.
+		 */
+		if (WARN_ON_ONCE(!ctx))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+		else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+	}
 
 	if (replacing) {
 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
@@ -4146,21 +4217,18 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 		       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
 		       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
 	if (flags & EFX_FILTER_FLAG_RX_RSS)
-		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT,
-			       spec->rss_context !=
-			       EFX_FILTER_RSS_CONTEXT_DEFAULT ?
-			       spec->rss_context : nic_data->rx_rss_context);
+		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
 }
 
 static int efx_ef10_filter_push(struct efx_nic *efx,
-				const struct efx_filter_spec *spec,
-				u64 *handle, bool replacing)
+				const struct efx_filter_spec *spec, u64 *handle,
+				struct efx_rss_context *ctx, bool replacing)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
 	int rc;
 
-	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, replacing);
+	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
 	rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
 			  outbuf, sizeof(outbuf), NULL);
 	if (rc == 0)
@@ -4249,25 +4317,35 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 				  struct efx_filter_spec *spec,
 				  bool replace_equal)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
 	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_ef10_filter_table *table;
 	struct efx_filter_spec *saved_spec;
+	struct efx_rss_context *ctx = NULL;
 	unsigned int match_pri, hash;
 	unsigned int priv_flags;
+	bool rss_locked = false;
 	bool replacing = false;
+	unsigned int depth, i;
 	int ins_index = -1;
 	DEFINE_WAIT(wait);
 	bool is_mc_recip;
 	s32 rc;
 
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+
 	/* For now, only support RX filters */
 	if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
-	    EFX_FILTER_FLAG_RX)
-		return -EINVAL;
+	    EFX_FILTER_FLAG_RX) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
 
 	rc = efx_ef10_filter_pri(table, spec);
 	if (rc < 0)
-		return rc;
+		goto out_unlock;
 	match_pri = rc;
 
 	hash = efx_ef10_filter_hash(spec);
@@ -4275,80 +4353,71 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	if (is_mc_recip)
 		bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
 
+	if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+		mutex_lock(&efx->rss_lock);
+		rss_locked = true;
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
+		else
+			ctx = &efx->rss_context;
+		if (!ctx) {
+			rc = -ENOENT;
+			goto out_unlock;
+		}
+		if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+			rc = -EOPNOTSUPP;
+			goto out_unlock;
+		}
+	}
+
 	/* Find any existing filters with the same match tuple or
-	 * else a free slot to insert at.  If any of them are busy,
-	 * we have to wait and retry.
+	 * else a free slot to insert at.
 	 */
-	for (;;) {
-		unsigned int depth = 1;
-		unsigned int i;
-
-		spin_lock_bh(&efx->filter_lock);
-
-		for (;;) {
-			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-			saved_spec = efx_ef10_filter_entry_spec(table, i);
+	for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
+		i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
+		saved_spec = efx_ef10_filter_entry_spec(table, i);
 
-			if (!saved_spec) {
-				if (ins_index < 0)
-					ins_index = i;
-			} else if (efx_ef10_filter_equal(spec, saved_spec)) {
-				if (table->entry[i].spec &
-				    EFX_EF10_FILTER_FLAG_BUSY)
-					break;
-				if (spec->priority < saved_spec->priority &&
-				    spec->priority != EFX_FILTER_PRI_AUTO) {
-					rc = -EPERM;
-					goto out_unlock;
-				}
-				if (!is_mc_recip) {
-					/* This is the only one */
-					if (spec->priority ==
-					    saved_spec->priority &&
-					    !replace_equal) {
-						rc = -EEXIST;
-						goto out_unlock;
-					}
-					ins_index = i;
-					goto found;
-				} else if (spec->priority >
-					   saved_spec->priority ||
-					   (spec->priority ==
-					    saved_spec->priority &&
-					    replace_equal)) {
-					if (ins_index < 0)
-						ins_index = i;
-					else
-						__set_bit(depth, mc_rem_map);
-				}
+		if (!saved_spec) {
+			if (ins_index < 0)
+				ins_index = i;
+		} else if (efx_ef10_filter_equal(spec, saved_spec)) {
+			if (spec->priority < saved_spec->priority &&
+			    spec->priority != EFX_FILTER_PRI_AUTO) {
+				rc = -EPERM;
+				goto out_unlock;
 			}
-
-			/* Once we reach the maximum search depth, use
-			 * the first suitable slot or return -EBUSY if
-			 * there was none
-			 */
-			if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) {
-				if (ins_index < 0) {
-					rc = -EBUSY;
+			if (!is_mc_recip) {
+				/* This is the only one */
+				if (spec->priority ==
+				    saved_spec->priority &&
+				    !replace_equal) {
+					rc = -EEXIST;
 					goto out_unlock;
 				}
-				goto found;
+				ins_index = i;
+				break;
+			} else if (spec->priority >
+				   saved_spec->priority ||
+				   (spec->priority ==
+				    saved_spec->priority &&
+				    replace_equal)) {
+				if (ins_index < 0)
+					ins_index = i;
+				else
+					__set_bit(depth, mc_rem_map);
 			}
-
-			++depth;
 		}
-
-		prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_bh(&efx->filter_lock);
-		schedule();
 	}
 
-found:
-	/* Create a software table entry if necessary, and mark it
-	 * busy.  We might yet fail to insert, but any attempt to
-	 * insert a conflicting filter while we're waiting for the
-	 * firmware must find the busy entry.
+	/* Once we reach the maximum search depth, use the first suitable
+	 * slot, or return -EBUSY if there was none
 	 */
+	if (ins_index < 0) {
+		rc = -EBUSY;
+		goto out_unlock;
+	}
+
+	/* Create a software table entry if necessary. */
 	saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
 	if (saved_spec) {
 		if (spec->priority == EFX_FILTER_PRI_AUTO &&
@@ -4372,28 +4441,19 @@ found:
 		*saved_spec = *spec;
 		priv_flags = 0;
 	}
-	efx_ef10_filter_set_entry(table, ins_index, saved_spec,
-				  priv_flags | EFX_EF10_FILTER_FLAG_BUSY);
-
-	/* Mark lower-priority multicast recipients busy prior to removal */
-	if (is_mc_recip) {
-		unsigned int depth, i;
-
-		for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
-			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-			if (test_bit(depth, mc_rem_map))
-				table->entry[i].spec |=
-					EFX_EF10_FILTER_FLAG_BUSY;
-		}
-	}
-
-	spin_unlock_bh(&efx->filter_lock);
+	efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
 
+	/* Actually insert the filter on the HW */
 	rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
-				  replacing);
+				  ctx, replacing);
+
+	if (rc == -EINVAL && nic_data->must_realloc_vis)
+		/* The MC rebooted under us, causing it to reject our filter
+		 * insertion as pointing to an invalid VI (spec->dmaq_id).
+		 */
+		rc = -EAGAIN;
 
 	/* Finalise the software table entry */
-	spin_lock_bh(&efx->filter_lock);
 	if (rc == 0) {
 		if (replacing) {
 			/* Update the fields that may differ */
@@ -4409,6 +4469,12 @@ found:
 	} else if (!replacing) {
 		kfree(saved_spec);
 		saved_spec = NULL;
+	} else {
+		/* We failed to replace, so the old filter is still present.
+		 * Roll back the software table to reflect this.  In fact the
+		 * efx_ef10_filter_set_entry() call below will do the right
+		 * thing, so nothing extra is needed here.
+		 */
 	}
 	efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
 
@@ -4430,7 +4496,6 @@ found:
 			priv_flags = efx_ef10_filter_entry_flags(table, i);
 
 			if (rc == 0) {
-				spin_unlock_bh(&efx->filter_lock);
 				MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
 					       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
 				MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
@@ -4438,15 +4503,12 @@ found:
 				rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
 						  inbuf, sizeof(inbuf),
 						  NULL, 0, NULL);
-				spin_lock_bh(&efx->filter_lock);
 			}
 
 			if (rc == 0) {
 				kfree(saved_spec);
 				saved_spec = NULL;
 				priv_flags = 0;
-			} else {
-				priv_flags &= ~EFX_EF10_FILTER_FLAG_BUSY;
 			}
 			efx_ef10_filter_set_entry(table, i, saved_spec,
 						  priv_flags);
@@ -4457,10 +4519,11 @@ found:
 	if (rc == 0)
 		rc = efx_ef10_make_filter_id(match_pri, ins_index);
 
-	wake_up_all(&table->waitq);
 out_unlock:
-	spin_unlock_bh(&efx->filter_lock);
-	finish_wait(&table->waitq, &wait);
+	if (rss_locked)
+		mutex_unlock(&efx->rss_lock);
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
 	return rc;
 }
 
@@ -4473,6 +4536,8 @@ static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx)
  * If !by_index, remove by ID
  * If by_index, remove by index
  * Filter ID may come from userland and must be range-checked.
+ * Caller must hold efx->filter_sem for read, and efx->filter_state->lock
+ * for write.
  */
 static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 					   unsigned int priority_mask,
@@ -4487,45 +4552,23 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 	DEFINE_WAIT(wait);
 	int rc;
 
-	/* Find the software table entry and mark it busy.  Don't
-	 * remove it yet; any attempt to update while we're waiting
-	 * for the firmware must find the busy entry.
-	 */
-	for (;;) {
-		spin_lock_bh(&efx->filter_lock);
-		if (!(table->entry[filter_idx].spec &
-		      EFX_EF10_FILTER_FLAG_BUSY))
-			break;
-		prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_bh(&efx->filter_lock);
-		schedule();
-	}
-
 	spec = efx_ef10_filter_entry_spec(table, filter_idx);
 	if (!spec ||
 	    (!by_index &&
 	     efx_ef10_filter_pri(table, spec) !=
-	     efx_ef10_filter_get_unsafe_pri(filter_id))) {
-		rc = -ENOENT;
-		goto out_unlock;
-	}
+	     efx_ef10_filter_get_unsafe_pri(filter_id)))
+		return -ENOENT;
 
 	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO &&
 	    priority_mask == (1U << EFX_FILTER_PRI_AUTO)) {
 		/* Just remove flags */
 		spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO;
 		table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
-		rc = 0;
-		goto out_unlock;
-	}
-
-	if (!(priority_mask & (1U << spec->priority))) {
-		rc = -ENOENT;
-		goto out_unlock;
+		return 0;
 	}
 
-	table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
-	spin_unlock_bh(&efx->filter_lock);
+	if (!(priority_mask & (1U << spec->priority)))
+		return -ENOENT;
 
 	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
 		/* Reset to an automatic filter */
@@ -4534,15 +4577,15 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 
 		new_spec.priority = EFX_FILTER_PRI_AUTO;
 		new_spec.flags = (EFX_FILTER_FLAG_RX |
-				  (efx_rss_enabled(efx) ?
+				  (efx_rss_active(&efx->rss_context) ?
 				   EFX_FILTER_FLAG_RX_RSS : 0));
 		new_spec.dmaq_id = 0;
-		new_spec.rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+		new_spec.rss_context = 0;
 		rc = efx_ef10_filter_push(efx, &new_spec,
 					  &table->entry[filter_idx].handle,
+					  &efx->rss_context,
 					  true);
 
-		spin_lock_bh(&efx->filter_lock);
 		if (rc == 0)
 			*spec = new_spec;
 	} else {
@@ -4557,7 +4600,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP,
 					inbuf, sizeof(inbuf), NULL, 0, NULL);
 
-		spin_lock_bh(&efx->filter_lock);
 		if ((rc == 0) || (rc == -ENOENT)) {
 			/* Filter removed OK or didn't actually exist */
 			kfree(spec);
@@ -4569,11 +4611,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 		}
 	}
 
-	table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY;
-	wake_up_all(&table->waitq);
-out_unlock:
-	spin_unlock_bh(&efx->filter_lock);
-	finish_wait(&table->waitq, &wait);
 	return rc;
 }
 
@@ -4581,17 +4618,33 @@ static int efx_ef10_filter_remove_safe(struct efx_nic *efx,
 				       enum efx_filter_priority priority,
 				       u32 filter_id)
 {
-	return efx_ef10_filter_remove_internal(efx, 1U << priority,
-					       filter_id, false);
+	struct efx_ef10_filter_table *table;
+	int rc;
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+	rc = efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
+					     false);
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return rc;
 }
 
+/* Caller must hold efx->filter_sem for read */
 static void efx_ef10_filter_remove_unsafe(struct efx_nic *efx,
 					  enum efx_filter_priority priority,
 					  u32 filter_id)
 {
+	struct efx_ef10_filter_table *table = efx->filter_state;
+
 	if (filter_id == EFX_EF10_FILTER_ID_INVALID)
 		return;
-	efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, true);
+
+	down_write(&table->lock);
+	efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
+					true);
+	up_write(&table->lock);
 }
 
 static int efx_ef10_filter_get_safe(struct efx_nic *efx,
@@ -4599,11 +4652,13 @@ static int efx_ef10_filter_get_safe(struct efx_nic *efx,
 				    u32 filter_id, struct efx_filter_spec *spec)
 {
 	unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id);
-	struct efx_ef10_filter_table *table = efx->filter_state;
 	const struct efx_filter_spec *saved_spec;
+	struct efx_ef10_filter_table *table;
 	int rc;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
 	saved_spec = efx_ef10_filter_entry_spec(table, filter_idx);
 	if (saved_spec && saved_spec->priority == priority &&
 	    efx_ef10_filter_pri(table, saved_spec) ==
@@ -4613,13 +4668,15 @@ static int efx_ef10_filter_get_safe(struct efx_nic *efx,
 	} else {
 		rc = -ENOENT;
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
 	return rc;
 }
 
 static int efx_ef10_filter_clear_rx(struct efx_nic *efx,
-				     enum efx_filter_priority priority)
+				    enum efx_filter_priority priority)
 {
+	struct efx_ef10_filter_table *table;
 	unsigned int priority_mask;
 	unsigned int i;
 	int rc;
@@ -4627,31 +4684,40 @@ static int efx_ef10_filter_clear_rx(struct efx_nic *efx,
 	priority_mask = (((1U << (priority + 1)) - 1) &
 			 ~(1U << EFX_FILTER_PRI_AUTO));
 
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
 	for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
 		rc = efx_ef10_filter_remove_internal(efx, priority_mask,
 						     i, true);
 		if (rc && rc != -ENOENT)
-			return rc;
+			break;
+		rc = 0;
 	}
 
-	return 0;
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return rc;
 }
 
 static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx,
 					 enum efx_filter_priority priority)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_table *table;
 	unsigned int filter_idx;
 	s32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
 		if (table->entry[filter_idx].spec &&
 		    efx_ef10_filter_entry_spec(table, filter_idx)->priority ==
 		    priority)
 			++count;
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
 	return count;
 }
 
@@ -4666,12 +4732,15 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
 				      enum efx_filter_priority priority,
 				      u32 *buf, u32 size)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_table *table;
 	struct efx_filter_spec *spec;
 	unsigned int filter_idx;
 	s32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
+
 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
 		if (spec && spec->priority == priority) {
@@ -4685,201 +4754,42 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
 					filter_idx);
 		}
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
 	return count;
 }
 
 #ifdef CONFIG_RFS_ACCEL
 
-static efx_mcdi_async_completer efx_ef10_filter_rfs_insert_complete;
-
-static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
-				      struct efx_filter_spec *spec)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
-	struct efx_filter_spec *saved_spec;
-	unsigned int hash, i, depth = 1;
-	bool replacing = false;
-	int ins_index = -1;
-	u64 cookie;
-	s32 rc;
-
-	/* Must be an RX filter without RSS and not for a multicast
-	 * destination address (RFS only works for connected sockets).
-	 * These restrictions allow us to pass only a tiny amount of
-	 * data through to the completion function.
-	 */
-	EFX_WARN_ON_PARANOID(spec->flags !=
-			     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_SCATTER));
-	EFX_WARN_ON_PARANOID(spec->priority != EFX_FILTER_PRI_HINT);
-	EFX_WARN_ON_PARANOID(efx_filter_is_mc_recipient(spec));
-
-	hash = efx_ef10_filter_hash(spec);
-
-	spin_lock_bh(&efx->filter_lock);
-
-	/* Find any existing filter with the same match tuple or else
-	 * a free slot to insert at.  If an existing filter is busy,
-	 * we have to give up.
-	 */
-	for (;;) {
-		i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-		saved_spec = efx_ef10_filter_entry_spec(table, i);
-
-		if (!saved_spec) {
-			if (ins_index < 0)
-				ins_index = i;
-		} else if (efx_ef10_filter_equal(spec, saved_spec)) {
-			if (table->entry[i].spec & EFX_EF10_FILTER_FLAG_BUSY) {
-				rc = -EBUSY;
-				goto fail_unlock;
-			}
-			if (spec->priority < saved_spec->priority) {
-				rc = -EPERM;
-				goto fail_unlock;
-			}
-			ins_index = i;
-			break;
-		}
-
-		/* Once we reach the maximum search depth, use the
-		 * first suitable slot or return -EBUSY if there was
-		 * none
-		 */
-		if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) {
-			if (ins_index < 0) {
-				rc = -EBUSY;
-				goto fail_unlock;
-			}
-			break;
-		}
-
-		++depth;
-	}
-
-	/* Create a software table entry if necessary, and mark it
-	 * busy.  We might yet fail to insert, but any attempt to
-	 * insert a conflicting filter while we're waiting for the
-	 * firmware must find the busy entry.
-	 */
-	saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
-	if (saved_spec) {
-		replacing = true;
-	} else {
-		saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
-		if (!saved_spec) {
-			rc = -ENOMEM;
-			goto fail_unlock;
-		}
-		*saved_spec = *spec;
-	}
-	efx_ef10_filter_set_entry(table, ins_index, saved_spec,
-				  EFX_EF10_FILTER_FLAG_BUSY);
-
-	spin_unlock_bh(&efx->filter_lock);
-
-	/* Pack up the variables needed on completion */
-	cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id;
-
-	efx_ef10_filter_push_prep(efx, spec, inbuf,
-				  table->entry[ins_index].handle, replacing);
-	efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
-			   MC_CMD_FILTER_OP_OUT_LEN,
-			   efx_ef10_filter_rfs_insert_complete, cookie);
-
-	return ins_index;
-
-fail_unlock:
-	spin_unlock_bh(&efx->filter_lock);
-	return rc;
-}
-
-static void
-efx_ef10_filter_rfs_insert_complete(struct efx_nic *efx, unsigned long cookie,
-				    int rc, efx_dword_t *outbuf,
-				    size_t outlen_actual)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	unsigned int ins_index, dmaq_id;
-	struct efx_filter_spec *spec;
-	bool replacing;
-
-	/* Unpack the cookie */
-	replacing = cookie >> 31;
-	ins_index = (cookie >> 16) & (HUNT_FILTER_TBL_ROWS - 1);
-	dmaq_id = cookie & 0xffff;
-
-	spin_lock_bh(&efx->filter_lock);
-	spec = efx_ef10_filter_entry_spec(table, ins_index);
-	if (rc == 0) {
-		table->entry[ins_index].handle =
-			MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
-		if (replacing)
-			spec->dmaq_id = dmaq_id;
-	} else if (!replacing) {
-		kfree(spec);
-		spec = NULL;
-	}
-	efx_ef10_filter_set_entry(table, ins_index, spec, 0);
-	spin_unlock_bh(&efx->filter_lock);
-
-	wake_up_all(&table->waitq);
-}
-
-static void
-efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
-				    unsigned long filter_idx,
-				    int rc, efx_dword_t *outbuf,
-				    size_t outlen_actual);
-
 static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 					   unsigned int filter_idx)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_filter_spec *spec =
-		efx_ef10_filter_entry_spec(table, filter_idx);
-	MCDI_DECLARE_BUF(inbuf,
-			 MC_CMD_FILTER_OP_IN_HANDLE_OFST +
-			 MC_CMD_FILTER_OP_IN_HANDLE_LEN);
-
-	if (!spec ||
-	    (table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAG_BUSY) ||
-	    spec->priority != EFX_FILTER_PRI_HINT ||
-	    !rps_may_expire_flow(efx->net_dev, spec->dmaq_id,
-				 flow_id, filter_idx))
-		return false;
-
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
-		       MC_CMD_FILTER_OP_IN_OP_REMOVE);
-	MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
-		       table->entry[filter_idx].handle);
-	if (efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), 0,
-			       efx_ef10_filter_rfs_expire_complete, filter_idx))
-		return false;
+	struct efx_ef10_filter_table *table;
+	struct efx_filter_spec *spec;
+	bool ret;
 
-	table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
-	return true;
-}
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+	spec = efx_ef10_filter_entry_spec(table, filter_idx);
 
-static void
-efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
-				    unsigned long filter_idx,
-				    int rc, efx_dword_t *outbuf,
-				    size_t outlen_actual)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_filter_spec *spec =
-		efx_ef10_filter_entry_spec(table, filter_idx);
+	if (!spec || spec->priority != EFX_FILTER_PRI_HINT) {
+		ret = true;
+		goto out_unlock;
+	}
 
-	spin_lock_bh(&efx->filter_lock);
-	if (rc == 0) {
-		kfree(spec);
-		efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
+	if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id,
+				 flow_id, filter_idx)) {
+		ret = false;
+		goto out_unlock;
 	}
-	table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY;
-	wake_up_all(&table->waitq);
-	spin_unlock_bh(&efx->filter_lock);
+
+	ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
+					      filter_idx, true) == 0;
+out_unlock:
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return ret;
 }
 
 #endif /* CONFIG_RFS_ACCEL */
@@ -5074,9 +4984,9 @@ static int efx_ef10_filter_table_probe(struct efx_nic *efx)
 	table->vlan_filter =
 		!!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
 	INIT_LIST_HEAD(&table->vlan_list);
+	init_rwsem(&table->lock);
 
 	efx->filter_state = table;
-	init_waitqueue_head(&table->waitq);
 
 	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
 		rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
@@ -5104,6 +5014,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 	unsigned int invalid_filters = 0, failed = 0;
 	struct efx_ef10_filter_vlan *vlan;
 	struct efx_filter_spec *spec;
+	struct efx_rss_context *ctx;
 	unsigned int filter_idx;
 	u32 mcdi_flags;
 	int match_pri;
@@ -5117,7 +5028,8 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 	if (!table)
 		return;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&table->lock);
+	mutex_lock(&efx->rss_lock);
 
 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
@@ -5133,20 +5045,32 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 			invalid_filters++;
 			goto not_restored;
 		}
-		if (spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-		    spec->rss_context != nic_data->rx_rss_context)
-			netif_warn(efx, drv, efx->net_dev,
-				   "Warning: unable to restore a filter with specific RSS context.\n");
-
-		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
-		spin_unlock_bh(&efx->filter_lock);
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
+		else
+			ctx = &efx->rss_context;
+		if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+			if (!ctx) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+			if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+		}
 
 		rc = efx_ef10_filter_push(efx, spec,
 					  &table->entry[filter_idx].handle,
-					  false);
+					  ctx, false);
 		if (rc)
 			failed++;
-		spin_lock_bh(&efx->filter_lock);
 
 		if (rc) {
 not_restored:
@@ -5158,13 +5082,11 @@ not_restored:
 
 			kfree(spec);
 			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
-		} else {
-			table->entry[filter_idx].spec &=
-				~EFX_EF10_FILTER_FLAG_BUSY;
 		}
 	}
 
-	spin_unlock_bh(&efx->filter_lock);
+	mutex_unlock(&efx->rss_lock);
+	up_write(&table->lock);
 
 	/* This can happen validly if the MC's capabilities have changed, so
 	 * is not an error.
@@ -5232,6 +5154,8 @@ static void efx_ef10_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	unsigned int filter_idx;
 
+	efx_rwsem_assert_write_locked(&table->lock);
+
 	if (*id != EFX_EF10_FILTER_ID_INVALID) {
 		filter_idx = efx_ef10_filter_get_unsafe_id(*id);
 		if (!table->entry[filter_idx].spec)
@@ -5267,10 +5191,10 @@ static void efx_ef10_filter_mark_old(struct efx_nic *efx)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct efx_ef10_filter_vlan *vlan;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&table->lock);
 	list_for_each_entry(vlan, &table->vlan_list, list)
 		_efx_ef10_filter_vlan_mark_old(efx, vlan);
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&table->lock);
 }
 
 static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
@@ -5547,10 +5471,7 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx,
 	return rc;
 }
 
-/* Remove filters that weren't renewed.  Since nothing else changes the AUTO_OLD
- * flag or removes these filters, we don't need to hold the filter_lock while
- * scanning for these filters.
- */
+/* Remove filters that weren't renewed. */
 static void efx_ef10_filter_remove_old(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
@@ -5559,6 +5480,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx)
 	int rc;
 	int i;
 
+	down_write(&table->lock);
 	for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
 		if (READ_ONCE(table->entry[i].spec) &
 		    EFX_EF10_FILTER_FLAG_AUTO_OLD) {
@@ -5570,6 +5492,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx)
 				remove_failed++;
 		}
 	}
+	up_write(&table->lock);
 
 	if (remove_failed)
 		netif_info(efx, drv, efx->net_dev,
@@ -6698,7 +6621,6 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
 	.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_ef10_filter_rfs_insert,
 	.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD
@@ -6784,6 +6706,9 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.tx_limit_len = efx_ef10_tx_limit_len,
 	.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
 	.rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
+	.rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config,
+	.rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config,
+	.rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts,
 	.rx_probe = efx_ef10_rx_probe,
 	.rx_init = efx_ef10_rx_init,
 	.rx_remove = efx_ef10_rx_remove,
@@ -6808,7 +6733,6 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
 	.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_ef10_filter_rfs_insert,
 	.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 16757cfc5b29..692dd729ee2a 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -340,7 +340,10 @@ static int efx_poll(struct napi_struct *napi, int budget)
 			efx_update_irq_mod(efx, channel);
 		}
 
-		efx_filter_rfs_expire(channel);
+#ifdef CONFIG_RFS_ACCEL
+		/* Perhaps expire some ARFS filters */
+		schedule_work(&channel->filter_work);
+#endif
 
 		/* There is no race here; although napi_disable() will
 		 * only wait for napi_complete(), this isn't a problem
@@ -470,6 +473,10 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 		tx_queue->channel = channel;
 	}
 
+#ifdef CONFIG_RFS_ACCEL
+	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
 	rx_queue = &channel->rx_queue;
 	rx_queue->efx = efx;
 	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
@@ -512,6 +519,9 @@ efx_copy_channel(const struct efx_channel *old_channel)
 	rx_queue->buffer = NULL;
 	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+#ifdef CONFIG_RFS_ACCEL
+	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
 
 	return channel;
 }
@@ -1353,12 +1363,13 @@ static void efx_fini_io(struct efx_nic *efx)
 		pci_disable_device(efx->pci_dev);
 }
 
-void efx_set_default_rx_indir_table(struct efx_nic *efx)
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx)
 {
 	size_t i;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] =
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] =
 			ethtool_rxfh_indir_default(i, efx->rss_spread);
 }
 
@@ -1739,9 +1750,9 @@ static int efx_probe_nic(struct efx_nic *efx)
 	} while (rc == -EAGAIN);
 
 	if (efx->n_channels > 1)
-		netdev_rss_key_fill(&efx->rx_hash_key,
-				    sizeof(efx->rx_hash_key));
-	efx_set_default_rx_indir_table(efx);
+		netdev_rss_key_fill(efx->rss_context.rx_hash_key,
+				    sizeof(efx->rss_context.rx_hash_key));
+	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 
 	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
@@ -1772,7 +1783,6 @@ static int efx_probe_filters(struct efx_nic *efx)
 {
 	int rc;
 
-	spin_lock_init(&efx->filter_lock);
 	init_rwsem(&efx->filter_sem);
 	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
@@ -2647,6 +2657,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
 	efx_disable_interrupts(efx);
 
 	mutex_lock(&efx->mac_lock);
+	mutex_lock(&efx->rss_lock);
 	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
 	    method != RESET_TYPE_DATAPATH)
 		efx->phy_op->fini(efx);
@@ -2700,6 +2711,9 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 			   " VFs may not function\n", rc);
 #endif
 
+	if (efx->type->rx_restore_rss_contexts)
+		efx->type->rx_restore_rss_contexts(efx);
+	mutex_unlock(&efx->rss_lock);
 	down_read(&efx->filter_sem);
 	efx_restore_filters(efx);
 	up_read(&efx->filter_sem);
@@ -2718,6 +2732,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 fail:
 	efx->port_initialized = false;
 
+	mutex_unlock(&efx->rss_lock);
 	mutex_unlock(&efx->mac_lock);
 
 	return rc;
@@ -3003,11 +3018,16 @@ static int efx_init_struct(struct efx_nic *efx,
 		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
 	efx->rx_packet_ts_offset =
 		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+	INIT_LIST_HEAD(&efx->rss_context.list);
+	mutex_init(&efx->rss_lock);
 	spin_lock_init(&efx->stats_lock);
 	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
 	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
 	BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
 	mutex_init(&efx->mac_lock);
+#ifdef CONFIG_RFS_ACCEL
+	mutex_init(&efx->rps_mutex);
+#endif
 	efx->phy_op = &efx_dummy_phy_operations;
 	efx->mdio.dev = net_dev;
 	INIT_WORK(&efx->mac_work, efx_mac_work);
@@ -3072,6 +3092,61 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
 }
 
+/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
+{
+	struct list_head *head = &efx->rss_context.list;
+	struct efx_rss_context *ctx, *new;
+	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	/* Search for first gap in the numbering */
+	list_for_each_entry(ctx, head, list) {
+		if (ctx->user_id != id)
+			break;
+		id++;
+		/* Check for wrap.  If this happens, we have nearly 2^32
+		 * allocated RSS contexts, which seems unlikely.
+		 */
+		if (WARN_ON_ONCE(!id))
+			return NULL;
+	}
+
+	/* Create the new entry */
+	new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
+	if (!new)
+		return NULL;
+	new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+	new->rx_hash_udp_4tuple = false;
+
+	/* Insert the new entry into the gap */
+	new->user_id = id;
+	list_add_tail(&new->list, &ctx->list);
+	return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
+{
+	struct list_head *head = &efx->rss_context.list;
+	struct efx_rss_context *ctx;
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	list_for_each_entry(ctx, head, list)
+		if (ctx->user_id == id)
+			return ctx;
+	return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+	list_del(&ctx->list);
+	kfree(ctx);
+}
+
 /**************************************************************************
  *
  * PCI interface
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 0cddc5ad77b1..a3140e16fcef 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -34,7 +34,8 @@ extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
 
 /* RX */
-void efx_set_default_rx_indir_table(struct efx_nic *efx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx);
 void efx_rx_config_page_split(struct efx_nic *efx);
 int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
 void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
@@ -169,19 +170,31 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx,
 int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 		   u16 rxq_index, u32 flow_id);
 bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota);
-static inline void efx_filter_rfs_expire(struct efx_channel *channel)
+static inline void efx_filter_rfs_expire(struct work_struct *data)
 {
+	struct efx_channel *channel = container_of(data, struct efx_channel,
+						   filter_work);
+
 	if (channel->rfs_filters_added >= 60 &&
 	    __efx_filter_rfs_expire(channel->efx, 100))
 		channel->rfs_filters_added -= 60;
 }
 #define efx_filter_rfs_enabled() 1
 #else
-static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
+static inline void efx_filter_rfs_expire(struct work_struct *data) {}
 #define efx_filter_rfs_enabled() 0
 #endif
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
 
+/* RSS contexts */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+static inline bool efx_rss_active(struct efx_rss_context *ctx)
+{
+	return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID;
+}
+
 /* Channels */
 int efx_channel_dummy_op_int(struct efx_channel *channel);
 void efx_channel_dummy_op_void(struct efx_channel *channel);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 4db2dc2bf52f..3143588ffd77 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -808,7 +808,8 @@ static inline void ip6_fill_mask(__be32 *mask)
 }
 
 static int efx_ethtool_get_class_rule(struct efx_nic *efx,
-				      struct ethtool_rx_flow_spec *rule)
+				      struct ethtool_rx_flow_spec *rule,
+				      u32 *rss_context)
 {
 	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
 	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -964,6 +965,11 @@ static int efx_ethtool_get_class_rule(struct efx_nic *efx,
 		rule->m_ext.vlan_tci = htons(0xfff);
 	}
 
+	if (spec.flags & EFX_FILTER_FLAG_RX_RSS) {
+		rule->flow_type |= FLOW_RSS;
+		*rss_context = spec.rss_context;
+	}
+
 	return rc;
 }
 
@@ -972,6 +978,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		      struct ethtool_rxnfc *info, u32 *rule_locs)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
+	u32 rss_context = 0;
+	s32 rc = 0;
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
@@ -979,12 +987,22 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		return 0;
 
 	case ETHTOOL_GRXFH: {
+		struct efx_rss_context *ctx = &efx->rss_context;
+
+		mutex_lock(&efx->rss_lock);
+		if (info->flow_type & FLOW_RSS && info->rss_context) {
+			ctx = efx_find_rss_context_entry(efx, info->rss_context);
+			if (!ctx) {
+				rc = -ENOENT;
+				goto out_unlock;
+			}
+		}
 		info->data = 0;
-		if (!efx->rss_active) /* No RSS */
-			return 0;
-		switch (info->flow_type) {
+		if (!efx_rss_active(ctx)) /* No RSS */
+			goto out_unlock;
+		switch (info->flow_type & ~FLOW_RSS) {
 		case UDP_V4_FLOW:
-			if (efx->rx_hash_udp_4tuple)
+			if (ctx->rx_hash_udp_4tuple)
 				/* fall through */
 		case TCP_V4_FLOW:
 				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -995,7 +1013,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 			info->data |= RXH_IP_SRC | RXH_IP_DST;
 			break;
 		case UDP_V6_FLOW:
-			if (efx->rx_hash_udp_4tuple)
+			if (ctx->rx_hash_udp_4tuple)
 				/* fall through */
 		case TCP_V6_FLOW:
 				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -1008,7 +1026,9 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		default:
 			break;
 		}
-		return 0;
+out_unlock:
+		mutex_unlock(&efx->rss_lock);
+		return rc;
 	}
 
 	case ETHTOOL_GRXCLSRLCNT:
@@ -1023,10 +1043,14 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 	case ETHTOOL_GRXCLSRULE:
 		if (efx_filter_get_rx_id_limit(efx) == 0)
 			return -EOPNOTSUPP;
-		return efx_ethtool_get_class_rule(efx, &info->fs);
+		rc = efx_ethtool_get_class_rule(efx, &info->fs, &rss_context);
+		if (rc < 0)
+			return rc;
+		if (info->fs.flow_type & FLOW_RSS)
+			info->rss_context = rss_context;
+		return 0;
 
-	case ETHTOOL_GRXCLSRLALL: {
-		s32 rc;
+	case ETHTOOL_GRXCLSRLALL:
 		info->data = efx_filter_get_rx_id_limit(efx);
 		if (info->data == 0)
 			return -EOPNOTSUPP;
@@ -1036,7 +1060,6 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 			return rc;
 		info->rule_cnt = rc;
 		return 0;
-	}
 
 	default:
 		return -EOPNOTSUPP;
@@ -1054,7 +1077,8 @@ static inline bool ip6_mask_is_empty(__be32 mask[4])
 }
 
 static int efx_ethtool_set_class_rule(struct efx_nic *efx,
-				      struct ethtool_rx_flow_spec *rule)
+				      struct ethtool_rx_flow_spec *rule,
+				      u32 rss_context)
 {
 	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
 	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -1064,8 +1088,10 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec;
 	struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec;
 	struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
+	u32 flow_type = rule->flow_type & ~(FLOW_EXT | FLOW_RSS);
 	struct ethhdr *mac_entry = &rule->h_u.ether_spec;
 	struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+	enum efx_filter_flags flags = 0;
 	struct efx_filter_spec spec;
 	int rc;
 
@@ -1084,19 +1110,26 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	     rule->m_ext.data[1]))
 		return -EINVAL;
 
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
-			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
+	if (efx->rx_scatter)
+		flags |= EFX_FILTER_FLAG_RX_SCATTER;
+	if (rule->flow_type & FLOW_RSS)
+		flags |= EFX_FILTER_FLAG_RX_RSS;
+
+	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, flags,
 			   (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
 			   EFX_FILTER_RX_DMAQ_ID_DROP : rule->ring_cookie);
 
-	switch (rule->flow_type & ~FLOW_EXT) {
+	if (rule->flow_type & FLOW_RSS)
+		spec.rss_context = rss_context;
+
+	switch (flow_type) {
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
 				    EFX_FILTER_MATCH_IP_PROTO);
 		spec.ether_type = htons(ETH_P_IP);
-		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V4_FLOW ?
-				 IPPROTO_TCP : IPPROTO_UDP);
+		spec.ip_proto = flow_type == TCP_V4_FLOW ? IPPROTO_TCP
+							 : IPPROTO_UDP;
 		if (ip_mask->ip4dst) {
 			if (ip_mask->ip4dst != IP4_ADDR_FULL_MASK)
 				return -EINVAL;
@@ -1130,8 +1163,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
 				    EFX_FILTER_MATCH_IP_PROTO);
 		spec.ether_type = htons(ETH_P_IPV6);
-		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V6_FLOW ?
-				 IPPROTO_TCP : IPPROTO_UDP);
+		spec.ip_proto = flow_type == TCP_V6_FLOW ? IPPROTO_TCP
+							 : IPPROTO_UDP;
 		if (!ip6_mask_is_empty(ip6_mask->ip6dst)) {
 			if (!ip6_mask_is_full(ip6_mask->ip6dst))
 				return -EINVAL;
@@ -1265,7 +1298,8 @@ static int efx_ethtool_set_rxnfc(struct net_device *net_dev,
 
 	switch (info->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
-		return efx_ethtool_set_class_rule(efx, &info->fs);
+		return efx_ethtool_set_class_rule(efx, &info->fs,
+						  info->rss_context);
 
 	case ETHTOOL_SRXCLSRLDEL:
 		return efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_MANUAL,
@@ -1280,7 +1314,9 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
-	return (efx->n_rx_channels == 1) ? 0 : ARRAY_SIZE(efx->rx_indir_table);
+	if (efx->n_rx_channels == 1)
+		return 0;
+	return ARRAY_SIZE(efx->rss_context.rx_indir_table);
 }
 
 static u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev)
@@ -1303,9 +1339,11 @@ static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 	if (indir)
-		memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
+		memcpy(indir, efx->rss_context.rx_indir_table,
+		       sizeof(efx->rss_context.rx_indir_table));
 	if (key)
-		memcpy(key, efx->rx_hash_key, efx->type->rx_hash_key_size);
+		memcpy(key, efx->rss_context.rx_hash_key,
+		       efx->type->rx_hash_key_size);
 	return 0;
 }
 
@@ -1321,13 +1359,109 @@ static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir,
 		return 0;
 
 	if (!key)
-		key = efx->rx_hash_key;
+		key = efx->rss_context.rx_hash_key;
 	if (!indir)
-		indir = efx->rx_indir_table;
+		indir = efx->rss_context.rx_indir_table;
 
 	return efx->type->rx_push_rss_config(efx, true, indir, key);
 }
 
+static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
+					u8 *key, u8 *hfunc, u32 rss_context)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_rss_context *ctx;
+	int rc = 0;
+
+	if (!efx->type->rx_pull_rss_context_config)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&efx->rss_lock);
+	ctx = efx_find_rss_context_entry(efx, rss_context);
+	if (!ctx) {
+		rc = -ENOENT;
+		goto out_unlock;
+	}
+	rc = efx->type->rx_pull_rss_context_config(efx, ctx);
+	if (rc)
+		goto out_unlock;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (indir)
+		memcpy(indir, ctx->rx_indir_table, sizeof(ctx->rx_indir_table));
+	if (key)
+		memcpy(key, ctx->rx_hash_key, efx->type->rx_hash_key_size);
+out_unlock:
+	mutex_unlock(&efx->rss_lock);
+	return rc;
+}
+
+static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
+					const u32 *indir, const u8 *key,
+					const u8 hfunc, u32 *rss_context,
+					bool delete)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_rss_context *ctx;
+	bool allocated = false;
+	int rc;
+
+	if (!efx->type->rx_push_rss_context_config)
+		return -EOPNOTSUPP;
+	/* Hash function is Toeplitz, cannot be changed */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&efx->rss_lock);
+
+	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+		if (delete) {
+			/* alloc + delete == Nothing to do */
+			rc = -EINVAL;
+			goto out_unlock;
+		}
+		ctx = efx_alloc_rss_context_entry(efx);
+		if (!ctx) {
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+		/* Initialise indir table and key to defaults */
+		efx_set_default_rx_indir_table(efx, ctx);
+		netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
+		allocated = true;
+	} else {
+		ctx = efx_find_rss_context_entry(efx, *rss_context);
+		if (!ctx) {
+			rc = -ENOENT;
+			goto out_unlock;
+		}
+	}
+
+	if (delete) {
+		/* delete this context */
+		rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
+		if (!rc)
+			efx_free_rss_context_entry(ctx);
+		goto out_unlock;
+	}
+
+	if (!key)
+		key = ctx->rx_hash_key;
+	if (!indir)
+		indir = ctx->rx_indir_table;
+
+	rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key);
+	if (rc && allocated)
+		efx_free_rss_context_entry(ctx);
+	else
+		*rss_context = ctx->user_id;
+out_unlock:
+	mutex_unlock(&efx->rss_lock);
+	return rc;
+}
+
 static int efx_ethtool_get_ts_info(struct net_device *net_dev,
 				   struct ethtool_ts_info *ts_info)
 {
@@ -1375,6 +1509,36 @@ static int efx_ethtool_get_module_info(struct net_device *net_dev,
 	return ret;
 }
 
+static int efx_ethtool_get_fecparam(struct net_device *net_dev,
+				    struct ethtool_fecparam *fecparam)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	if (!efx->phy_op || !efx->phy_op->get_fecparam)
+		return -EOPNOTSUPP;
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->get_fecparam(efx, fecparam);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+static int efx_ethtool_set_fecparam(struct net_device *net_dev,
+				    struct ethtool_fecparam *fecparam)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	if (!efx->phy_op || !efx->phy_op->get_fecparam)
+		return -EOPNOTSUPP;
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->set_fecparam(efx, fecparam);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
 const struct ethtool_ops efx_ethtool_ops = {
 	.get_drvinfo		= efx_ethtool_get_drvinfo,
 	.get_regs_len		= efx_ethtool_get_regs_len,
@@ -1403,9 +1567,13 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_rxfh_key_size	= efx_ethtool_get_rxfh_key_size,
 	.get_rxfh		= efx_ethtool_get_rxfh,
 	.set_rxfh		= efx_ethtool_set_rxfh,
+	.get_rxfh_context	= efx_ethtool_get_rxfh_context,
+	.set_rxfh_context	= efx_ethtool_set_rxfh_context,
 	.get_ts_info		= efx_ethtool_get_ts_info,
 	.get_module_info	= efx_ethtool_get_module_info,
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
 	.get_link_ksettings	= efx_ethtool_get_link_ksettings,
 	.set_link_ksettings	= efx_ethtool_set_link_ksettings,
+	.get_fecparam		= efx_ethtool_get_fecparam,
+	.set_fecparam		= efx_ethtool_set_fecparam,
 };
diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h
index 30a1136fc909..4824fcf5c3d4 100644
--- a/drivers/net/ethernet/sfc/falcon/enum.h
+++ b/drivers/net/ethernet/sfc/falcon/enum.h
@@ -81,7 +81,6 @@ enum ef4_loopback_mode {
 			    (1 << LOOPBACK_XAUI) |		\
 			    (1 << LOOPBACK_GMII) |		\
 			    (1 << LOOPBACK_SGMII) |		\
-			    (1 << LOOPBACK_SGMII) |		\
 			    (1 << LOOPBACK_XGBR) |		\
 			    (1 << LOOPBACK_XFI) |		\
 			    (1 << LOOPBACK_XAUI_FAR) |		\
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 266b9bee1f3a..4a19c7efdf8d 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1630,12 +1630,12 @@ void efx_farch_rx_push_indir_table(struct efx_nic *efx)
 	size_t i = 0;
 	efx_dword_t dword;
 
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
 	for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
 		EFX_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
-				     efx->rx_indir_table[i]);
+				     efx->rss_context.rx_indir_table[i]);
 		efx_writed(efx, &dword,
 			   FR_BZ_RX_INDIRECTION_TBL +
 			   FR_BZ_RX_INDIRECTION_TBL_STEP * i);
@@ -1647,14 +1647,14 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
 	size_t i = 0;
 	efx_dword_t dword;
 
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
 	for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
 		efx_readd(efx, &dword,
 			   FR_BZ_RX_INDIRECTION_TBL +
 			   FR_BZ_RX_INDIRECTION_TBL_STEP * i);
-		efx->rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
+		efx->rss_context.rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
 	}
 }
 
@@ -1878,6 +1878,7 @@ struct efx_farch_filter_table {
 };
 
 struct efx_farch_filter_state {
+	struct rw_semaphore lock; /* Protects table contents */
 	struct efx_farch_filter_table table[EFX_FARCH_FILTER_TABLE_COUNT];
 };
 
@@ -2032,8 +2033,7 @@ efx_farch_filter_from_gen_spec(struct efx_farch_filter_spec *spec,
 {
 	bool is_full = false;
 
-	if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) &&
-	    gen_spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT)
+	if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) && gen_spec->rss_context)
 		return -EINVAL;
 
 	spec->priority = gen_spec->priority;
@@ -2398,9 +2398,13 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 	if (rc)
 		return rc;
 
+	down_write(&state->lock);
+
 	table = &state->table[efx_farch_filter_spec_table_id(&spec)];
-	if (table->size == 0)
-		return -EINVAL;
+	if (table->size == 0) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
 
 	netif_vdbg(efx, hw, efx->net_dev,
 		   "%s: type %d search_limit=%d", __func__, spec.type,
@@ -2413,8 +2417,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 			     EFX_FARCH_FILTER_MC_DEF - EFX_FARCH_FILTER_UC_DEF);
 		rep_index = spec.type - EFX_FARCH_FILTER_UC_DEF;
 		ins_index = rep_index;
-
-		spin_lock_bh(&efx->filter_lock);
 	} else {
 		/* Search concurrently for
 		 * (1) a filter to be replaced (rep_index): any filter
@@ -2444,8 +2446,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 		ins_index = -1;
 		depth = 1;
 
-		spin_lock_bh(&efx->filter_lock);
-
 		for (;;) {
 			if (!test_bit(i, table->used_bitmap)) {
 				if (ins_index < 0)
@@ -2464,7 +2464,7 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 				/* Case (b) */
 				if (ins_index < 0) {
 					rc = -EBUSY;
-					goto out;
+					goto out_unlock;
 				}
 				rep_index = -1;
 				break;
@@ -2484,11 +2484,11 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 
 		if (spec.priority == saved_spec->priority && !replace_equal) {
 			rc = -EEXIST;
-			goto out;
+			goto out_unlock;
 		}
 		if (spec.priority < saved_spec->priority) {
 			rc = -EPERM;
-			goto out;
+			goto out_unlock;
 		}
 		if (saved_spec->priority == EFX_FILTER_PRI_AUTO ||
 		    saved_spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO)
@@ -2529,8 +2529,8 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 		   __func__, spec.type, ins_index, spec.dmaq_id);
 	rc = efx_farch_filter_make_id(&spec, ins_index);
 
-out:
-	spin_unlock_bh(&efx->filter_lock);
+out_unlock:
+	up_write(&state->lock);
 	return rc;
 }
 
@@ -2605,11 +2605,11 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx,
 	filter_idx = efx_farch_filter_id_index(filter_id);
 	if (filter_idx >= table->size)
 		return -ENOENT;
+	down_write(&state->lock);
 	spec = &table->spec[filter_idx];
 
-	spin_lock_bh(&efx->filter_lock);
 	rc = efx_farch_filter_remove(efx, table, filter_idx, priority);
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 
 	return rc;
 }
@@ -2623,30 +2623,28 @@ int efx_farch_filter_get_safe(struct efx_nic *efx,
 	struct efx_farch_filter_table *table;
 	struct efx_farch_filter_spec *spec;
 	unsigned int filter_idx;
-	int rc;
+	int rc = -ENOENT;
+
+	down_read(&state->lock);
 
 	table_id = efx_farch_filter_id_table_id(filter_id);
 	if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT)
-		return -ENOENT;
+		goto out_unlock;
 	table = &state->table[table_id];
 
 	filter_idx = efx_farch_filter_id_index(filter_id);
 	if (filter_idx >= table->size)
-		return -ENOENT;
+		goto out_unlock;
 	spec = &table->spec[filter_idx];
 
-	spin_lock_bh(&efx->filter_lock);
-
 	if (test_bit(filter_idx, table->used_bitmap) &&
 	    spec->priority == priority) {
 		efx_farch_filter_to_gen_spec(spec_buf, spec);
 		rc = 0;
-	} else {
-		rc = -ENOENT;
 	}
 
-	spin_unlock_bh(&efx->filter_lock);
-
+out_unlock:
+	up_read(&state->lock);
 	return rc;
 }
 
@@ -2659,13 +2657,13 @@ efx_farch_filter_table_clear(struct efx_nic *efx,
 	struct efx_farch_filter_table *table = &state->table[table_id];
 	unsigned int filter_idx;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 	for (filter_idx = 0; filter_idx < table->size; ++filter_idx) {
 		if (table->spec[filter_idx].priority != EFX_FILTER_PRI_AUTO)
 			efx_farch_filter_remove(efx, table,
 						filter_idx, priority);
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }
 
 int efx_farch_filter_clear_rx(struct efx_nic *efx,
@@ -2689,7 +2687,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
 	unsigned int filter_idx;
 	u32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&state->lock);
 
 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2702,7 +2700,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
 		}
 	}
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&state->lock);
 
 	return count;
 }
@@ -2717,7 +2715,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 	unsigned int filter_idx;
 	s32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&state->lock);
 
 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2736,7 +2734,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 		}
 	}
 out:
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&state->lock);
 
 	return count;
 }
@@ -2750,7 +2748,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx)
 	efx_oword_t filter;
 	unsigned int filter_idx;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 
 	for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
 		table = &state->table[table_id];
@@ -2771,7 +2769,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx)
 	efx_farch_filter_push_rx_config(efx);
 	efx_farch_filter_push_tx_limits(efx);
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }
 
 void efx_farch_filter_table_remove(struct efx_nic *efx)
@@ -2865,7 +2863,7 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
 	efx_oword_t filter;
 	unsigned int filter_idx;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 
 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2897,33 +2895,30 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
 
 	efx_farch_filter_push_rx_config(efx);
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }
 
 #ifdef CONFIG_RFS_ACCEL
 
-s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
-				struct efx_filter_spec *gen_spec)
-{
-	return efx_farch_filter_insert(efx, gen_spec, true);
-}
-
 bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 				     unsigned int index)
 {
 	struct efx_farch_filter_state *state = efx->filter_state;
-	struct efx_farch_filter_table *table =
-		&state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+	struct efx_farch_filter_table *table;
+	bool ret = false;
 
+	down_write(&state->lock);
+	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
 	if (test_bit(index, table->used_bitmap) &&
 	    table->spec[index].priority == EFX_FILTER_PRI_HINT &&
 	    rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id,
 				flow_id, index)) {
 		efx_farch_filter_table_clear_entry(efx, table, index);
-		return true;
+		ret = true;
 	}
 
-	return false;
+	up_write(&state->lock);
+	return ret;
 }
 
 #endif /* CONFIG_RFS_ACCEL */
diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h
index 8189a1cd973f..59021ad6d98d 100644
--- a/drivers/net/ethernet/sfc/filter.h
+++ b/drivers/net/ethernet/sfc/filter.h
@@ -125,7 +125,9 @@ enum efx_encap_type {
  * @match_flags: Match type flags, from &enum efx_filter_match_flags
  * @priority: Priority of the filter, from &enum efx_filter_priority
  * @flags: Miscellaneous flags, from &enum efx_filter_flags
- * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set
+ * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set.  This
+ *	is a user_id (with 0 meaning the driver/default RSS context), not an
+ *	MCFW context_id.
  * @dmaq_id: Source/target queue index, or %EFX_FILTER_RX_DMAQ_ID_DROP for
  *	an RX drop filter
  * @outer_vid: Outer VLAN ID to match, if %EFX_FILTER_MATCH_OUTER_VID is set
@@ -173,7 +175,6 @@ struct efx_filter_spec {
 };
 
 enum {
-	EFX_FILTER_RSS_CONTEXT_DEFAULT = 0xffffffff,
 	EFX_FILTER_RX_DMAQ_ID_DROP = 0xfff
 };
 
@@ -185,7 +186,7 @@ static inline void efx_filter_init_rx(struct efx_filter_spec *spec,
 	memset(spec, 0, sizeof(*spec));
 	spec->priority = priority;
 	spec->flags = EFX_FILTER_FLAG_RX | flags;
-	spec->rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+	spec->rss_context = 0;
 	spec->dmaq_id = rxq_id;
 }
 
diff --git a/drivers/net/ethernet/sfc/mcdi_mon.c b/drivers/net/ethernet/sfc/mcdi_mon.c
index f97da05952c7..f17751559ccc 100644
--- a/drivers/net/ethernet/sfc/mcdi_mon.c
+++ b/drivers/net/ethernet/sfc/mcdi_mon.c
@@ -298,7 +298,7 @@ efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name,
 	attr->limit_value = limit_value;
 	sysfs_attr_init(&attr->dev_attr.attr);
 	attr->dev_attr.attr.name = attr->name;
-	attr->dev_attr.attr.mode = S_IRUGO;
+	attr->dev_attr.attr.mode = 0444;
 	attr->dev_attr.show = reader;
 	hwmon->group.attrs[hwmon->n_attrs++] = &attr->dev_attr.attr;
 }
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index 869d76f8f589..3839eec783ea 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -273,7 +273,8 @@
 #define MC_CMD_ERR_NO_PRIVILEGE 0x1013
 /* Workaround 26807 could not be turned on/off because some functions
  * have already installed filters. See the comment at
- * MC_CMD_WORKAROUND_BUG26807. */
+ * MC_CMD_WORKAROUND_BUG26807.
+ * May also returned for other operations such as sub-variant switching. */
 #define MC_CMD_ERR_FILTERS_PRESENT 0x1014
 /* The clock whose frequency you've attempted to set set
  * doesn't exist on this NIC */
@@ -292,6 +293,10 @@
  * away.  This is distinct from MC_CMD_ERR_DATAPATH_DISABLED in that the
  * datapath absence may be temporary*/
 #define MC_CMD_ERR_NO_DATAPATH 0x1019
+/* The operation could not complete because some VIs are allocated */
+#define MC_CMD_ERR_VIS_PRESENT 0x101a
+/* The operation could not complete because some PIO buffers are allocated */
+#define MC_CMD_ERR_PIOBUFS_PRESENT 0x101b
 
 #define MC_CMD_ERR_CODE_OFST 0
 
@@ -312,10 +317,17 @@
 #define SIENA_MC_BOOTROM_COPYCODE_VEC (0x800 - 3 * 0x4)
 #define HUNT_MC_BOOTROM_COPYCODE_VEC (0x8000 - 3 * 0x4)
 #define MEDFORD_MC_BOOTROM_COPYCODE_VEC (0x10000 - 3 * 0x4)
-/* Points to the recovery mode entry point. */
+/* Points to the recovery mode entry point. Misnamed but kept for compatibility. */
 #define SIENA_MC_BOOTROM_NOFLASH_VEC (0x800 - 2 * 0x4)
 #define HUNT_MC_BOOTROM_NOFLASH_VEC (0x8000 - 2 * 0x4)
 #define MEDFORD_MC_BOOTROM_NOFLASH_VEC (0x10000 - 2 * 0x4)
+/* Points to the recovery mode entry point. Same as above, but the right name. */
+#define SIENA_MC_BOOTROM_RECOVERY_VEC (0x800 - 2 * 0x4)
+#define HUNT_MC_BOOTROM_RECOVERY_VEC (0x8000 - 2 * 0x4)
+#define MEDFORD_MC_BOOTROM_RECOVERY_VEC (0x10000 - 2 * 0x4)
+
+/* Points to noflash mode entry point. */
+#define MEDFORD_MC_BOOTROM_REAL_NOFLASH_VEC (0x10000 - 4 * 0x4)
 
 /* The command set exported by the boot ROM (MCDI v0) */
 #define MC_CMD_GET_VERSION_V0_SUPPORTED_FUNCS {		\
@@ -365,7 +377,7 @@
 #define       MCDI_EVENT_LEVEL_LBN 33
 #define       MCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MCDI_EVENT_LEVEL_INFO  0x0
+#define          MCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -385,21 +397,21 @@
 #define        MCDI_EVENT_LINKCHANGE_SPEED_LBN 16
 #define        MCDI_EVENT_LINKCHANGE_SPEED_WIDTH 4
 /* enum: Link is down or link speed could not be determined */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN  0x0
+#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN 0x0
 /* enum: 100Mbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100M  0x1
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100M 0x1
 /* enum: 1Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_1G  0x2
+#define          MCDI_EVENT_LINKCHANGE_SPEED_1G 0x2
 /* enum: 10Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_10G  0x3
+#define          MCDI_EVENT_LINKCHANGE_SPEED_10G 0x3
 /* enum: 40Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_40G  0x4
+#define          MCDI_EVENT_LINKCHANGE_SPEED_40G 0x4
 /* enum: 25Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_25G  0x5
+#define          MCDI_EVENT_LINKCHANGE_SPEED_25G 0x5
 /* enum: 50Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_50G  0x6
+#define          MCDI_EVENT_LINKCHANGE_SPEED_50G 0x6
 /* enum: 100Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100G  0x7
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100G 0x7
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_LBN 20
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_WIDTH 4
 #define        MCDI_EVENT_LINKCHANGE_LINK_FLAGS_LBN 24
@@ -606,23 +618,23 @@
 /* enum: Transmit error */
 #define          MCDI_EVENT_CODE_TX_ERR 0xb
 /* enum: Tx flush has completed */
-#define          MCDI_EVENT_CODE_TX_FLUSH  0xc
+#define          MCDI_EVENT_CODE_TX_FLUSH 0xc
 /* enum: PTP packet received timestamp */
-#define          MCDI_EVENT_CODE_PTP_RX  0xd
+#define          MCDI_EVENT_CODE_PTP_RX 0xd
 /* enum: PTP NIC failure */
-#define          MCDI_EVENT_CODE_PTP_FAULT  0xe
+#define          MCDI_EVENT_CODE_PTP_FAULT 0xe
 /* enum: PTP PPS event */
-#define          MCDI_EVENT_CODE_PTP_PPS  0xf
+#define          MCDI_EVENT_CODE_PTP_PPS 0xf
 /* enum: Rx flush has completed */
-#define          MCDI_EVENT_CODE_RX_FLUSH  0x10
+#define          MCDI_EVENT_CODE_RX_FLUSH 0x10
 /* enum: Receive error */
 #define          MCDI_EVENT_CODE_RX_ERR 0x11
 /* enum: AOE fault */
-#define          MCDI_EVENT_CODE_AOE  0x12
+#define          MCDI_EVENT_CODE_AOE 0x12
 /* enum: Network port calibration failed (VCAL). */
-#define          MCDI_EVENT_CODE_VCAL_FAIL  0x13
+#define          MCDI_EVENT_CODE_VCAL_FAIL 0x13
 /* enum: HW PPS event */
-#define          MCDI_EVENT_CODE_HW_PPS  0x14
+#define          MCDI_EVENT_CODE_HW_PPS 0x14
 /* enum: The MC has rebooted (huntington and later, siena uses CODE_REBOOT and
  * a different format)
  */
@@ -654,7 +666,7 @@
 /* enum: Artificial event generated by host and posted via MC for test
  * purposes.
  */
-#define          MCDI_EVENT_CODE_TESTGEN  0xfa
+#define          MCDI_EVENT_CODE_TESTGEN 0xfa
 #define       MCDI_EVENT_CMDDONE_DATA_OFST 0
 #define       MCDI_EVENT_CMDDONE_DATA_LEN 4
 #define       MCDI_EVENT_CMDDONE_DATA_LBN 0
@@ -784,7 +796,7 @@
 #define       FCDI_EVENT_LEVEL_LBN 33
 #define       FCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          FCDI_EVENT_LEVEL_INFO  0x0
+#define          FCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          FCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -916,7 +928,7 @@
 #define       MUM_EVENT_LEVEL_LBN 33
 #define       MUM_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MUM_EVENT_LEVEL_INFO  0x0
+#define          MUM_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MUM_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -1002,7 +1014,9 @@
 
 /***********************************/
 /* MC_CMD_READ32
- * Read multiple 32byte words from MC memory.
+ * Read multiple 32byte words from MC memory. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_READ32 0x1
 
@@ -1050,7 +1064,9 @@
 
 /***********************************/
 /* MC_CMD_COPYCODE
- * Copy MC code between two locations and jump.
+ * Copy MC code between two locations and jump. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_COPYCODE 0x3
 
@@ -1139,7 +1155,7 @@
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_OFST 0
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_LEN 4
 /* enum: indicates that the MC wasn't flash booted */
-#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL  0xdeadbeef
+#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL 0xdeadbeef
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_OFST 4
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_LEN 4
 #define        MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_WATCHDOG_LBN 0
@@ -1555,11 +1571,10 @@
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_OFST 8
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_LEN 4
 
-/* MC_CMD_PTP_IN_RESET_STATS msgrequest */
+/* MC_CMD_PTP_IN_RESET_STATS msgrequest: Reset PTP statistics */
 #define    MC_CMD_PTP_IN_RESET_STATS_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset PTP statistics */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
@@ -1710,11 +1725,10 @@
 /* enum: External. */
 #define          MC_CMD_PTP_CLK_SRC_EXTERNAL 0x1
 
-/* MC_CMD_PTP_IN_RST_CLK msgrequest */
+/* MC_CMD_PTP_IN_RST_CLK msgrequest: Reset value of Timer Reg. */
 #define    MC_CMD_PTP_IN_RST_CLK_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset value of Timer Reg. */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
@@ -2687,8 +2701,16 @@
 #define       MC_CMD_DRV_ATTACH_IN_NEW_STATE_LEN 4
 #define        MC_CMD_DRV_ATTACH_LBN 0
 #define        MC_CMD_DRV_ATTACH_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_LBN 0
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_WIDTH 1
 #define        MC_CMD_DRV_PREBOOT_LBN 1
 #define        MC_CMD_DRV_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_LBN 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_LBN 2
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_LBN 3
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_WIDTH 1
 /* 1 to set new state, or 0 to just report the existing state */
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_OFST 4
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_LEN 4
@@ -2711,8 +2733,14 @@
  * support
  */
 #define          MC_CMD_FW_RULES_ENGINE 0x5
+/* enum: Prefer to use firmware with additional DPDK support */
+#define          MC_CMD_FW_DPDK 0x6
+/* enum: Prefer to use "l3xudp" custom datapath firmware (see SF-119495-PD and
+ * bug69716)
+ */
+#define          MC_CMD_FW_L3XUDP 0x7
 /* enum: Only this option is allowed for non-admin functions */
-#define          MC_CMD_FW_DONT_CARE  0xffffffff
+#define          MC_CMD_FW_DONT_CARE 0xffffffff
 
 /* MC_CMD_DRV_ATTACH_OUT msgresponse */
 #define    MC_CMD_DRV_ATTACH_OUT_LEN 4
@@ -2740,6 +2768,11 @@
  * refers to the Sorrento external FPGA port.
  */
 #define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_NO_ACTIVE_PORT 0x3
+/* enum: If set, indicates that VI spreading is currently enabled. Will always
+ * indicate the current state, regardless of the value in the WANT_VI_SPREADING
+ * input.
+ */
+#define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_VI_SPREADING_ENABLED 0x4
 
 
 /***********************************/
@@ -3294,83 +3327,83 @@
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_HI_OFST 4
 /* enum: None. */
-#define          MC_CMD_LOOPBACK_NONE  0x0
+#define          MC_CMD_LOOPBACK_NONE 0x0
 /* enum: Data. */
-#define          MC_CMD_LOOPBACK_DATA  0x1
+#define          MC_CMD_LOOPBACK_DATA 0x1
 /* enum: GMAC. */
-#define          MC_CMD_LOOPBACK_GMAC  0x2
+#define          MC_CMD_LOOPBACK_GMAC 0x2
 /* enum: XGMII. */
 #define          MC_CMD_LOOPBACK_XGMII 0x3
 /* enum: XGXS. */
-#define          MC_CMD_LOOPBACK_XGXS  0x4
+#define          MC_CMD_LOOPBACK_XGXS 0x4
 /* enum: XAUI. */
-#define          MC_CMD_LOOPBACK_XAUI  0x5
+#define          MC_CMD_LOOPBACK_XAUI 0x5
 /* enum: GMII. */
-#define          MC_CMD_LOOPBACK_GMII  0x6
+#define          MC_CMD_LOOPBACK_GMII 0x6
 /* enum: SGMII. */
-#define          MC_CMD_LOOPBACK_SGMII  0x7
+#define          MC_CMD_LOOPBACK_SGMII 0x7
 /* enum: XGBR. */
-#define          MC_CMD_LOOPBACK_XGBR  0x8
+#define          MC_CMD_LOOPBACK_XGBR 0x8
 /* enum: XFI. */
-#define          MC_CMD_LOOPBACK_XFI  0x9
+#define          MC_CMD_LOOPBACK_XFI 0x9
 /* enum: XAUI Far. */
-#define          MC_CMD_LOOPBACK_XAUI_FAR  0xa
+#define          MC_CMD_LOOPBACK_XAUI_FAR 0xa
 /* enum: GMII Far. */
-#define          MC_CMD_LOOPBACK_GMII_FAR  0xb
+#define          MC_CMD_LOOPBACK_GMII_FAR 0xb
 /* enum: SGMII Far. */
-#define          MC_CMD_LOOPBACK_SGMII_FAR  0xc
+#define          MC_CMD_LOOPBACK_SGMII_FAR 0xc
 /* enum: XFI Far. */
-#define          MC_CMD_LOOPBACK_XFI_FAR  0xd
+#define          MC_CMD_LOOPBACK_XFI_FAR 0xd
 /* enum: GPhy. */
-#define          MC_CMD_LOOPBACK_GPHY  0xe
+#define          MC_CMD_LOOPBACK_GPHY 0xe
 /* enum: PhyXS. */
-#define          MC_CMD_LOOPBACK_PHYXS  0xf
+#define          MC_CMD_LOOPBACK_PHYXS 0xf
 /* enum: PCS. */
-#define          MC_CMD_LOOPBACK_PCS  0x10
+#define          MC_CMD_LOOPBACK_PCS 0x10
 /* enum: PMA-PMD. */
-#define          MC_CMD_LOOPBACK_PMAPMD  0x11
+#define          MC_CMD_LOOPBACK_PMAPMD 0x11
 /* enum: Cross-Port. */
-#define          MC_CMD_LOOPBACK_XPORT  0x12
+#define          MC_CMD_LOOPBACK_XPORT 0x12
 /* enum: XGMII-Wireside. */
-#define          MC_CMD_LOOPBACK_XGMII_WS  0x13
+#define          MC_CMD_LOOPBACK_XGMII_WS 0x13
 /* enum: XAUI Wireside. */
-#define          MC_CMD_LOOPBACK_XAUI_WS  0x14
+#define          MC_CMD_LOOPBACK_XAUI_WS 0x14
 /* enum: XAUI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15
+#define          MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15
 /* enum: XAUI Wireside near. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16
+#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16
 /* enum: GMII Wireside. */
-#define          MC_CMD_LOOPBACK_GMII_WS  0x17
+#define          MC_CMD_LOOPBACK_GMII_WS 0x17
 /* enum: XFI Wireside. */
-#define          MC_CMD_LOOPBACK_XFI_WS  0x18
+#define          MC_CMD_LOOPBACK_XFI_WS 0x18
 /* enum: XFI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XFI_WS_FAR  0x19
+#define          MC_CMD_LOOPBACK_XFI_WS_FAR 0x19
 /* enum: PhyXS Wireside. */
-#define          MC_CMD_LOOPBACK_PHYXS_WS  0x1a
+#define          MC_CMD_LOOPBACK_PHYXS_WS 0x1a
 /* enum: PMA lanes MAC-Serdes. */
-#define          MC_CMD_LOOPBACK_PMA_INT  0x1b
+#define          MC_CMD_LOOPBACK_PMA_INT 0x1b
 /* enum: KR Serdes Parallel (Encoder). */
-#define          MC_CMD_LOOPBACK_SD_NEAR  0x1c
+#define          MC_CMD_LOOPBACK_SD_NEAR 0x1c
 /* enum: KR Serdes Serial. */
-#define          MC_CMD_LOOPBACK_SD_FAR  0x1d
+#define          MC_CMD_LOOPBACK_SD_FAR 0x1d
 /* enum: PMA lanes MAC-Serdes Wireside. */
-#define          MC_CMD_LOOPBACK_PMA_INT_WS  0x1e
+#define          MC_CMD_LOOPBACK_PMA_INT_WS 0x1e
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-#define          MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f
+#define          MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20
+#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-#define          MC_CMD_LOOPBACK_SD_FEP_WS  0x21
+#define          MC_CMD_LOOPBACK_SD_FEP_WS 0x21
 /* enum: KR Serdes Serial Wireside. */
-#define          MC_CMD_LOOPBACK_SD_FES_WS  0x22
+#define          MC_CMD_LOOPBACK_SD_FES_WS 0x22
 /* enum: Near side of AOE Siena side port */
-#define          MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23
+#define          MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23
 /* enum: Medford Wireside datapath loopback */
-#define          MC_CMD_LOOPBACK_DATA_WS  0x24
+#define          MC_CMD_LOOPBACK_DATA_WS 0x24
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25
+#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_LEN 8
@@ -3410,83 +3443,83 @@
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_HI_OFST 4
 /* enum: None. */
-/*               MC_CMD_LOOPBACK_NONE  0x0 */
+/*               MC_CMD_LOOPBACK_NONE 0x0 */
 /* enum: Data. */
-/*               MC_CMD_LOOPBACK_DATA  0x1 */
+/*               MC_CMD_LOOPBACK_DATA 0x1 */
 /* enum: GMAC. */
-/*               MC_CMD_LOOPBACK_GMAC  0x2 */
+/*               MC_CMD_LOOPBACK_GMAC 0x2 */
 /* enum: XGMII. */
 /*               MC_CMD_LOOPBACK_XGMII 0x3 */
 /* enum: XGXS. */
-/*               MC_CMD_LOOPBACK_XGXS  0x4 */
+/*               MC_CMD_LOOPBACK_XGXS 0x4 */
 /* enum: XAUI. */
-/*               MC_CMD_LOOPBACK_XAUI  0x5 */
+/*               MC_CMD_LOOPBACK_XAUI 0x5 */
 /* enum: GMII. */
-/*               MC_CMD_LOOPBACK_GMII  0x6 */
+/*               MC_CMD_LOOPBACK_GMII 0x6 */
 /* enum: SGMII. */
-/*               MC_CMD_LOOPBACK_SGMII  0x7 */
+/*               MC_CMD_LOOPBACK_SGMII 0x7 */
 /* enum: XGBR. */
-/*               MC_CMD_LOOPBACK_XGBR  0x8 */
+/*               MC_CMD_LOOPBACK_XGBR 0x8 */
 /* enum: XFI. */
-/*               MC_CMD_LOOPBACK_XFI  0x9 */
+/*               MC_CMD_LOOPBACK_XFI 0x9 */
 /* enum: XAUI Far. */
-/*               MC_CMD_LOOPBACK_XAUI_FAR  0xa */
+/*               MC_CMD_LOOPBACK_XAUI_FAR 0xa */
 /* enum: GMII Far. */
-/*               MC_CMD_LOOPBACK_GMII_FAR  0xb */
+/*               MC_CMD_LOOPBACK_GMII_FAR 0xb */
 /* enum: SGMII Far. */
-/*               MC_CMD_LOOPBACK_SGMII_FAR  0xc */
+/*               MC_CMD_LOOPBACK_SGMII_FAR 0xc */
 /* enum: XFI Far. */
-/*               MC_CMD_LOOPBACK_XFI_FAR  0xd */
+/*               MC_CMD_LOOPBACK_XFI_FAR 0xd */
 /* enum: GPhy. */
-/*               MC_CMD_LOOPBACK_GPHY  0xe */
+/*               MC_CMD_LOOPBACK_GPHY 0xe */
 /* enum: PhyXS. */
-/*               MC_CMD_LOOPBACK_PHYXS  0xf */
+/*               MC_CMD_LOOPBACK_PHYXS 0xf */
 /* enum: PCS. */
-/*               MC_CMD_LOOPBACK_PCS  0x10 */
+/*               MC_CMD_LOOPBACK_PCS 0x10 */
 /* enum: PMA-PMD. */
-/*               MC_CMD_LOOPBACK_PMAPMD  0x11 */
+/*               MC_CMD_LOOPBACK_PMAPMD 0x11 */
 /* enum: Cross-Port. */
-/*               MC_CMD_LOOPBACK_XPORT  0x12 */
+/*               MC_CMD_LOOPBACK_XPORT 0x12 */
 /* enum: XGMII-Wireside. */
-/*               MC_CMD_LOOPBACK_XGMII_WS  0x13 */
+/*               MC_CMD_LOOPBACK_XGMII_WS 0x13 */
 /* enum: XAUI Wireside. */
-/*               MC_CMD_LOOPBACK_XAUI_WS  0x14 */
+/*               MC_CMD_LOOPBACK_XAUI_WS 0x14 */
 /* enum: XAUI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15 */
 /* enum: XAUI Wireside near. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16 */
 /* enum: GMII Wireside. */
-/*               MC_CMD_LOOPBACK_GMII_WS  0x17 */
+/*               MC_CMD_LOOPBACK_GMII_WS 0x17 */
 /* enum: XFI Wireside. */
-/*               MC_CMD_LOOPBACK_XFI_WS  0x18 */
+/*               MC_CMD_LOOPBACK_XFI_WS 0x18 */
 /* enum: XFI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XFI_WS_FAR  0x19 */
+/*               MC_CMD_LOOPBACK_XFI_WS_FAR 0x19 */
 /* enum: PhyXS Wireside. */
-/*               MC_CMD_LOOPBACK_PHYXS_WS  0x1a */
+/*               MC_CMD_LOOPBACK_PHYXS_WS 0x1a */
 /* enum: PMA lanes MAC-Serdes. */
-/*               MC_CMD_LOOPBACK_PMA_INT  0x1b */
+/*               MC_CMD_LOOPBACK_PMA_INT 0x1b */
 /* enum: KR Serdes Parallel (Encoder). */
-/*               MC_CMD_LOOPBACK_SD_NEAR  0x1c */
+/*               MC_CMD_LOOPBACK_SD_NEAR 0x1c */
 /* enum: KR Serdes Serial. */
-/*               MC_CMD_LOOPBACK_SD_FAR  0x1d */
+/*               MC_CMD_LOOPBACK_SD_FAR 0x1d */
 /* enum: PMA lanes MAC-Serdes Wireside. */
-/*               MC_CMD_LOOPBACK_PMA_INT_WS  0x1e */
+/*               MC_CMD_LOOPBACK_PMA_INT_WS 0x1e */
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-/*               MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f */
+/*               MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f */
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20 */
+/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20 */
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-/*               MC_CMD_LOOPBACK_SD_FEP_WS  0x21 */
+/*               MC_CMD_LOOPBACK_SD_FEP_WS 0x21 */
 /* enum: KR Serdes Serial Wireside. */
-/*               MC_CMD_LOOPBACK_SD_FES_WS  0x22 */
+/*               MC_CMD_LOOPBACK_SD_FES_WS 0x22 */
 /* enum: Near side of AOE Siena side port */
-/*               MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23 */
+/*               MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23 */
 /* enum: Medford Wireside datapath loopback */
-/*               MC_CMD_LOOPBACK_DATA_WS  0x24 */
+/*               MC_CMD_LOOPBACK_DATA_WS 0x24 */
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25 */
+/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25 */
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_LEN 8
@@ -3537,6 +3570,37 @@
 /*            Enum values, see field(s): */
 /*               100M */
 
+/* AN_TYPE structuredef: Auto-negotiation types defined in IEEE802.3 */
+#define    AN_TYPE_LEN 4
+#define       AN_TYPE_TYPE_OFST 0
+#define       AN_TYPE_TYPE_LEN 4
+/* enum: None, AN disabled or not supported */
+#define          MC_CMD_AN_NONE 0x0
+/* enum: Clause 28 - BASE-T */
+#define          MC_CMD_AN_CLAUSE28 0x1
+/* enum: Clause 37 - BASE-X */
+#define          MC_CMD_AN_CLAUSE37 0x2
+/* enum: Clause 73 - BASE-R startup protocol for backplane and copper cable
+ * assemblies. Includes Clause 72/Clause 92 link-training.
+ */
+#define          MC_CMD_AN_CLAUSE73 0x3
+#define       AN_TYPE_TYPE_LBN 0
+#define       AN_TYPE_TYPE_WIDTH 32
+
+/* FEC_TYPE structuredef: Forward error correction types defined in IEEE802.3
+ */
+#define    FEC_TYPE_LEN 4
+#define       FEC_TYPE_TYPE_OFST 0
+#define       FEC_TYPE_TYPE_LEN 4
+/* enum: No FEC */
+#define          MC_CMD_FEC_NONE 0x0
+/* enum: Clause 74 BASE-R FEC (a.k.a Firecode) */
+#define          MC_CMD_FEC_BASER 0x1
+/* enum: Clause 91/Clause 108 Reed-Solomon FEC */
+#define          MC_CMD_FEC_RS 0x2
+#define       FEC_TYPE_TYPE_LBN 0
+#define       FEC_TYPE_TYPE_WIDTH 32
+
 
 /***********************************/
 /* MC_CMD_GET_LINK
@@ -3552,10 +3616,14 @@
 
 /* MC_CMD_GET_LINK_OUT msgresponse */
 #define    MC_CMD_GET_LINK_OUT_LEN 28
-/* near-side advertised capabilities */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_CAP_OFST 0
 #define       MC_CMD_GET_LINK_OUT_CAP_LEN 4
-/* link-partner advertised capabilities */
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_OFST 4
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_LEN 4
 /* Autonegotiated speed in mbit/s. The link may still be down even if this
@@ -3598,6 +3666,97 @@
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1
 
+/* MC_CMD_GET_LINK_OUT_V2 msgresponse: Extended link state information */
+#define    MC_CMD_GET_LINK_OUT_V2_LEN 44
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_OFST 0
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_LEN 4
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_OFST 4
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_LEN 4
+/* Autonegotiated speed in mbit/s. The link may still be down even if this
+ * reads non-zero.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_OFST 8
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_LEN 4
+/* Current loopback setting. */
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_OFST 12
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_GET_LOOPBACK_MODES/MC_CMD_GET_LOOPBACK_MODES_OUT/100M */
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_OFST 16
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_WIDTH 1
+/* This returns the negotiated flow control value. */
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_OFST 20
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_SET_MAC/MC_CMD_SET_MAC_IN/FCNTL */
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_OFST 24
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_LEN 4
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_LBN 0 */
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_LBN 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_LBN 2 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1 */
+/* True local device capabilities (taking into account currently used PMD/MDI,
+ * e.g. plugged-in module). In general, subset of
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP, but may include extra _FEC_REQUEST
+ * bits, if the PMD requires FEC. 0 if unknown (e.g. module unplugged). Equal
+ * to SUPPORTED_CAP for non-pluggable PMDs. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_OFST 28
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_LEN 4
+/* Auto-negotiation type used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_OFST 32
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               AN_TYPE/TYPE */
+/* Forward error correction used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_OFST 36
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               FEC_TYPE/TYPE */
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_OFST 40
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_LBN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_LBN 5
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_LBN 8
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_WIDTH 1
+
 
 /***********************************/
 /* MC_CMD_SET_LINK
@@ -3610,7 +3769,9 @@
 
 /* MC_CMD_SET_LINK_IN msgrequest */
 #define    MC_CMD_SET_LINK_IN_LEN 16
-/* ??? */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_SET_LINK_IN_CAP_OFST 0
 #define       MC_CMD_SET_LINK_IN_CAP_LEN 4
 /* Flags */
@@ -3650,9 +3811,9 @@
 /* Set LED state. */
 #define       MC_CMD_SET_ID_LED_IN_STATE_OFST 0
 #define       MC_CMD_SET_ID_LED_IN_STATE_LEN 4
-#define          MC_CMD_LED_OFF  0x0 /* enum */
-#define          MC_CMD_LED_ON  0x1 /* enum */
-#define          MC_CMD_LED_DEFAULT  0x2 /* enum */
+#define          MC_CMD_LED_OFF 0x0 /* enum */
+#define          MC_CMD_LED_ON 0x1 /* enum */
+#define          MC_CMD_LED_DEFAULT 0x2 /* enum */
 
 /* MC_CMD_SET_ID_LED_OUT msgresponse */
 #define    MC_CMD_SET_ID_LED_OUT_LEN 0
@@ -3802,53 +3963,53 @@
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_LEN 4
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_PHY_NSTATS
 /* enum: OUI. */
-#define          MC_CMD_OUI  0x0
+#define          MC_CMD_OUI 0x0
 /* enum: PMA-PMD Link Up. */
-#define          MC_CMD_PMA_PMD_LINK_UP  0x1
+#define          MC_CMD_PMA_PMD_LINK_UP 0x1
 /* enum: PMA-PMD RX Fault. */
-#define          MC_CMD_PMA_PMD_RX_FAULT  0x2
+#define          MC_CMD_PMA_PMD_RX_FAULT 0x2
 /* enum: PMA-PMD TX Fault. */
-#define          MC_CMD_PMA_PMD_TX_FAULT  0x3
+#define          MC_CMD_PMA_PMD_TX_FAULT 0x3
 /* enum: PMA-PMD Signal */
-#define          MC_CMD_PMA_PMD_SIGNAL  0x4
+#define          MC_CMD_PMA_PMD_SIGNAL 0x4
 /* enum: PMA-PMD SNR A. */
-#define          MC_CMD_PMA_PMD_SNR_A  0x5
+#define          MC_CMD_PMA_PMD_SNR_A 0x5
 /* enum: PMA-PMD SNR B. */
-#define          MC_CMD_PMA_PMD_SNR_B  0x6
+#define          MC_CMD_PMA_PMD_SNR_B 0x6
 /* enum: PMA-PMD SNR C. */
-#define          MC_CMD_PMA_PMD_SNR_C  0x7
+#define          MC_CMD_PMA_PMD_SNR_C 0x7
 /* enum: PMA-PMD SNR D. */
-#define          MC_CMD_PMA_PMD_SNR_D  0x8
+#define          MC_CMD_PMA_PMD_SNR_D 0x8
 /* enum: PCS Link Up. */
-#define          MC_CMD_PCS_LINK_UP  0x9
+#define          MC_CMD_PCS_LINK_UP 0x9
 /* enum: PCS RX Fault. */
-#define          MC_CMD_PCS_RX_FAULT  0xa
+#define          MC_CMD_PCS_RX_FAULT 0xa
 /* enum: PCS TX Fault. */
-#define          MC_CMD_PCS_TX_FAULT  0xb
+#define          MC_CMD_PCS_TX_FAULT 0xb
 /* enum: PCS BER. */
-#define          MC_CMD_PCS_BER  0xc
+#define          MC_CMD_PCS_BER 0xc
 /* enum: PCS Block Errors. */
-#define          MC_CMD_PCS_BLOCK_ERRORS  0xd
+#define          MC_CMD_PCS_BLOCK_ERRORS 0xd
 /* enum: PhyXS Link Up. */
-#define          MC_CMD_PHYXS_LINK_UP  0xe
+#define          MC_CMD_PHYXS_LINK_UP 0xe
 /* enum: PhyXS RX Fault. */
-#define          MC_CMD_PHYXS_RX_FAULT  0xf
+#define          MC_CMD_PHYXS_RX_FAULT 0xf
 /* enum: PhyXS TX Fault. */
-#define          MC_CMD_PHYXS_TX_FAULT  0x10
+#define          MC_CMD_PHYXS_TX_FAULT 0x10
 /* enum: PhyXS Align. */
-#define          MC_CMD_PHYXS_ALIGN  0x11
+#define          MC_CMD_PHYXS_ALIGN 0x11
 /* enum: PhyXS Sync. */
-#define          MC_CMD_PHYXS_SYNC  0x12
+#define          MC_CMD_PHYXS_SYNC 0x12
 /* enum: AN link-up. */
-#define          MC_CMD_AN_LINK_UP  0x13
+#define          MC_CMD_AN_LINK_UP 0x13
 /* enum: AN Complete. */
-#define          MC_CMD_AN_COMPLETE  0x14
+#define          MC_CMD_AN_COMPLETE 0x14
 /* enum: AN 10GBaseT Status. */
-#define          MC_CMD_AN_10GBT_STATUS  0x15
+#define          MC_CMD_AN_10GBT_STATUS 0x15
 /* enum: Clause 22 Link-Up. */
-#define          MC_CMD_CL22_LINK_UP  0x16
+#define          MC_CMD_CL22_LINK_UP 0x16
 /* enum: (Last entry) */
-#define          MC_CMD_PHY_NSTATS  0x17
+#define          MC_CMD_PHY_NSTATS 0x17
 
 
 /***********************************/
@@ -3910,139 +4071,139 @@
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_LO_OFST 0
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS
-#define          MC_CMD_MAC_GENERATION_START  0x0 /* enum */
-#define          MC_CMD_MAC_DMABUF_START  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PKTS  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PAUSE_PKTS  0x2 /* enum */
-#define          MC_CMD_MAC_TX_CONTROL_PKTS  0x3 /* enum */
-#define          MC_CMD_MAC_TX_UNICAST_PKTS  0x4 /* enum */
-#define          MC_CMD_MAC_TX_MULTICAST_PKTS  0x5 /* enum */
-#define          MC_CMD_MAC_TX_BROADCAST_PKTS  0x6 /* enum */
-#define          MC_CMD_MAC_TX_BYTES  0x7 /* enum */
-#define          MC_CMD_MAC_TX_BAD_BYTES  0x8 /* enum */
-#define          MC_CMD_MAC_TX_LT64_PKTS  0x9 /* enum */
-#define          MC_CMD_MAC_TX_64_PKTS  0xa /* enum */
-#define          MC_CMD_MAC_TX_65_TO_127_PKTS  0xb /* enum */
-#define          MC_CMD_MAC_TX_128_TO_255_PKTS  0xc /* enum */
-#define          MC_CMD_MAC_TX_256_TO_511_PKTS  0xd /* enum */
-#define          MC_CMD_MAC_TX_512_TO_1023_PKTS  0xe /* enum */
-#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS  0xf /* enum */
-#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS  0x10 /* enum */
-#define          MC_CMD_MAC_TX_GTJUMBO_PKTS  0x11 /* enum */
-#define          MC_CMD_MAC_TX_BAD_FCS_PKTS  0x12 /* enum */
-#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS  0x13 /* enum */
-#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS  0x14 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS  0x15 /* enum */
-#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS  0x16 /* enum */
-#define          MC_CMD_MAC_TX_DEFERRED_PKTS  0x17 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS  0x18 /* enum */
-#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS  0x19 /* enum */
-#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS  0x1a /* enum */
-#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS  0x1b /* enum */
-#define          MC_CMD_MAC_RX_PKTS  0x1c /* enum */
-#define          MC_CMD_MAC_RX_PAUSE_PKTS  0x1d /* enum */
-#define          MC_CMD_MAC_RX_GOOD_PKTS  0x1e /* enum */
-#define          MC_CMD_MAC_RX_CONTROL_PKTS  0x1f /* enum */
-#define          MC_CMD_MAC_RX_UNICAST_PKTS  0x20 /* enum */
-#define          MC_CMD_MAC_RX_MULTICAST_PKTS  0x21 /* enum */
-#define          MC_CMD_MAC_RX_BROADCAST_PKTS  0x22 /* enum */
-#define          MC_CMD_MAC_RX_BYTES  0x23 /* enum */
-#define          MC_CMD_MAC_RX_BAD_BYTES  0x24 /* enum */
-#define          MC_CMD_MAC_RX_64_PKTS  0x25 /* enum */
-#define          MC_CMD_MAC_RX_65_TO_127_PKTS  0x26 /* enum */
-#define          MC_CMD_MAC_RX_128_TO_255_PKTS  0x27 /* enum */
-#define          MC_CMD_MAC_RX_256_TO_511_PKTS  0x28 /* enum */
-#define          MC_CMD_MAC_RX_512_TO_1023_PKTS  0x29 /* enum */
-#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS  0x2a /* enum */
-#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS  0x2b /* enum */
-#define          MC_CMD_MAC_RX_GTJUMBO_PKTS  0x2c /* enum */
-#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS  0x2d /* enum */
-#define          MC_CMD_MAC_RX_BAD_FCS_PKTS  0x2e /* enum */
-#define          MC_CMD_MAC_RX_OVERFLOW_PKTS  0x2f /* enum */
-#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS  0x30 /* enum */
-#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS  0x31 /* enum */
-#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS  0x32 /* enum */
-#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS  0x33 /* enum */
-#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS  0x34 /* enum */
-#define          MC_CMD_MAC_RX_JABBER_PKTS  0x35 /* enum */
-#define          MC_CMD_MAC_RX_NODESC_DROPS  0x36 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR  0x37 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR  0x38 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_DISP_ERR  0x39 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_DISP_ERR  0x3a /* enum */
-#define          MC_CMD_MAC_RX_MATCH_FAULT  0x3b /* enum */
+#define          MC_CMD_MAC_GENERATION_START 0x0 /* enum */
+#define          MC_CMD_MAC_DMABUF_START 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PKTS 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PAUSE_PKTS 0x2 /* enum */
+#define          MC_CMD_MAC_TX_CONTROL_PKTS 0x3 /* enum */
+#define          MC_CMD_MAC_TX_UNICAST_PKTS 0x4 /* enum */
+#define          MC_CMD_MAC_TX_MULTICAST_PKTS 0x5 /* enum */
+#define          MC_CMD_MAC_TX_BROADCAST_PKTS 0x6 /* enum */
+#define          MC_CMD_MAC_TX_BYTES 0x7 /* enum */
+#define          MC_CMD_MAC_TX_BAD_BYTES 0x8 /* enum */
+#define          MC_CMD_MAC_TX_LT64_PKTS 0x9 /* enum */
+#define          MC_CMD_MAC_TX_64_PKTS 0xa /* enum */
+#define          MC_CMD_MAC_TX_65_TO_127_PKTS 0xb /* enum */
+#define          MC_CMD_MAC_TX_128_TO_255_PKTS 0xc /* enum */
+#define          MC_CMD_MAC_TX_256_TO_511_PKTS 0xd /* enum */
+#define          MC_CMD_MAC_TX_512_TO_1023_PKTS 0xe /* enum */
+#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS 0xf /* enum */
+#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS 0x10 /* enum */
+#define          MC_CMD_MAC_TX_GTJUMBO_PKTS 0x11 /* enum */
+#define          MC_CMD_MAC_TX_BAD_FCS_PKTS 0x12 /* enum */
+#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS 0x13 /* enum */
+#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS 0x14 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS 0x15 /* enum */
+#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS 0x16 /* enum */
+#define          MC_CMD_MAC_TX_DEFERRED_PKTS 0x17 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS 0x18 /* enum */
+#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS 0x19 /* enum */
+#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS 0x1a /* enum */
+#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS 0x1b /* enum */
+#define          MC_CMD_MAC_RX_PKTS 0x1c /* enum */
+#define          MC_CMD_MAC_RX_PAUSE_PKTS 0x1d /* enum */
+#define          MC_CMD_MAC_RX_GOOD_PKTS 0x1e /* enum */
+#define          MC_CMD_MAC_RX_CONTROL_PKTS 0x1f /* enum */
+#define          MC_CMD_MAC_RX_UNICAST_PKTS 0x20 /* enum */
+#define          MC_CMD_MAC_RX_MULTICAST_PKTS 0x21 /* enum */
+#define          MC_CMD_MAC_RX_BROADCAST_PKTS 0x22 /* enum */
+#define          MC_CMD_MAC_RX_BYTES 0x23 /* enum */
+#define          MC_CMD_MAC_RX_BAD_BYTES 0x24 /* enum */
+#define          MC_CMD_MAC_RX_64_PKTS 0x25 /* enum */
+#define          MC_CMD_MAC_RX_65_TO_127_PKTS 0x26 /* enum */
+#define          MC_CMD_MAC_RX_128_TO_255_PKTS 0x27 /* enum */
+#define          MC_CMD_MAC_RX_256_TO_511_PKTS 0x28 /* enum */
+#define          MC_CMD_MAC_RX_512_TO_1023_PKTS 0x29 /* enum */
+#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS 0x2a /* enum */
+#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS 0x2b /* enum */
+#define          MC_CMD_MAC_RX_GTJUMBO_PKTS 0x2c /* enum */
+#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS 0x2d /* enum */
+#define          MC_CMD_MAC_RX_BAD_FCS_PKTS 0x2e /* enum */
+#define          MC_CMD_MAC_RX_OVERFLOW_PKTS 0x2f /* enum */
+#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS 0x30 /* enum */
+#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS 0x31 /* enum */
+#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS 0x32 /* enum */
+#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS 0x33 /* enum */
+#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS 0x34 /* enum */
+#define          MC_CMD_MAC_RX_JABBER_PKTS 0x35 /* enum */
+#define          MC_CMD_MAC_RX_NODESC_DROPS 0x36 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR 0x37 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR 0x38 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_DISP_ERR 0x39 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_DISP_ERR 0x3a /* enum */
+#define          MC_CMD_MAC_RX_MATCH_FAULT 0x3b /* enum */
 /* enum: PM trunc_bb_overflow counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW  0x3c
+#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW 0x3c
 /* enum: PM discard_bb_overflow counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW  0x3d
+#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW 0x3d
 /* enum: PM trunc_vfifo_full counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL  0x3e
+#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL 0x3e
 /* enum: PM discard_vfifo_full counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL  0x3f
+#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL 0x3f
 /* enum: PM trunc_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_QBB  0x40
+#define          MC_CMD_MAC_PM_TRUNC_QBB 0x40
 /* enum: PM discard_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_QBB  0x41
+#define          MC_CMD_MAC_PM_DISCARD_QBB 0x41
 /* enum: PM discard_mapping counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_MAPPING  0x42
+#define          MC_CMD_MAC_PM_DISCARD_MAPPING 0x42
 /* enum: RXDP counter: Number of packets dropped due to the queue being
  * disabled. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS  0x43
+#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS 0x43
 /* enum: RXDP counter: Number of packets dropped by the DICPU. Valid for EF10
  * with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS  0x45
+#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS 0x45
 /* enum: RXDP counter: Number of non-host packets. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_STREAMING_PKTS  0x46
+#define          MC_CMD_MAC_RXDP_STREAMING_PKTS 0x46
 /* enum: RXDP counter: Number of times an hlb descriptor fetch was performed.
  * Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS  0x47
+#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS 0x47
 /* enum: RXDP counter: Number of times the DPCPU waited for an existing
  * descriptor fetch. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS  0x48
-#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES  0x4d /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS  0x4e /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES  0x4f /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS  0x50 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES  0x51 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS  0x52 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES  0x53 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW  0x54 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES  0x58 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS  0x59 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES  0x5a /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS  0x5b /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES  0x5c /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS  0x5d /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES  0x5e /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW  0x5f /* enum */
+#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS 0x48
+#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES 0x4d /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS 0x4e /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES 0x4f /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS 0x50 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES 0x51 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS 0x52 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES 0x53 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW 0x54 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES 0x58 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS 0x59 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES 0x5a /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS 0x5b /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES 0x5c /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS 0x5d /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES 0x5e /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW 0x5f /* enum */
 /* enum: Start of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_START  0x40
+#define          MC_CMD_GMAC_DMABUF_START 0x40
 /* enum: End of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_END    0x5f
+#define          MC_CMD_GMAC_DMABUF_END 0x5f
 /* enum: GENERATION_END value, used together with GENERATION_START to verify
  * consistency of DMAd data. For legacy firmware / drivers without extended
  * stats (more precisely, when DMA_LEN == MC_CMD_MAC_NSTATS *
@@ -4054,7 +4215,7 @@
  * sizeof(uint64_t). See SF-109306-TC, Section 9.2 for details.
  */
 #define          MC_CMD_MAC_GENERATION_END 0x60
-#define          MC_CMD_MAC_NSTATS  0x61 /* enum */
+#define          MC_CMD_MAC_NSTATS 0x61 /* enum */
 
 /* MC_CMD_MAC_STATS_V2_OUT_DMA msgresponse */
 #define    MC_CMD_MAC_STATS_V2_OUT_DMA_LEN 0
@@ -4067,25 +4228,25 @@
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V2
 /* enum: Start of FEC stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_FEC_DMABUF_START  0x61
+#define          MC_CMD_MAC_FEC_DMABUF_START 0x61
 /* enum: Number of uncorrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS  0x61
+#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS 0x61
 /* enum: Number of corrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS  0x62
+#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS 0x62
 /* enum: Number of corrected 10-bit symbol errors, lane 0 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0  0x63
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0 0x63
 /* enum: Number of corrected 10-bit symbol errors, lane 1 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1  0x64
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1 0x64
 /* enum: Number of corrected 10-bit symbol errors, lane 2 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2  0x65
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2 0x65
 /* enum: Number of corrected 10-bit symbol errors, lane 3 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3  0x66
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3 0x66
 /* enum: This includes the space at offset 103 which is the final
  * GENERATION_END in a MAC_STATS_V2 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V2  0x68
+#define          MC_CMD_MAC_NSTATS_V2 0x68
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_OUT_NO_DMA/STATISTICS */
 
@@ -4100,66 +4261,66 @@
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V3
 /* enum: Start of CTPIO stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_CTPIO_DMABUF_START  0x68
+#define          MC_CMD_MAC_CTPIO_DMABUF_START 0x68
 /* enum: Number of CTPIO fallbacks because a DMA packet was in progress on the
  * target VI
  */
-#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK  0x68
+#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK 0x68
 /* enum: Number of times a CTPIO send wrote beyond frame end (informational
  * only)
  */
-#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS  0x69
+#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS 0x69
 /* enum: Number of CTPIO failures because the TX doorbell was written before
  * the end of the frame data
  */
-#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL  0x6a
+#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL 0x6a
 /* enum: Number of CTPIO failures because the internal FIFO overflowed */
-#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL  0x6b
+#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL 0x6b
 /* enum: Number of CTPIO failures because the host did not deliver data fast
  * enough to avoid MAC underflow
  */
-#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL  0x6c
+#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL 0x6c
 /* enum: Number of CTPIO failures because the host did not deliver all the
  * frame data within the timeout
  */
-#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL  0x6d
+#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL 0x6d
 /* enum: Number of CTPIO failures because the frame data arrived out of order
  * or with gaps
  */
-#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL  0x6e
+#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL 0x6e
 /* enum: Number of CTPIO failures because the host started a new frame before
  * completing the previous one
  */
-#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL  0x6f
+#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL 0x6f
 /* enum: Number of CTPIO failures because a write was not a multiple of 32 bits
  * or not 32-bit aligned
  */
-#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL  0x70
+#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL 0x70
 /* enum: Number of CTPIO fallbacks because another VI on the same port was
  * sending a CTPIO frame
  */
-#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK  0x71
+#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK 0x71
 /* enum: Number of CTPIO fallbacks because target VI did not have CTPIO enabled
  */
-#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK  0x72
+#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK 0x72
 /* enum: Number of CTPIO fallbacks because length in header was less than 29
  * bytes
  */
-#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK  0x73
+#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK 0x73
 /* enum: Total number of successful CTPIO sends on this port */
-#define          MC_CMD_MAC_CTPIO_SUCCESS  0x74
+#define          MC_CMD_MAC_CTPIO_SUCCESS 0x74
 /* enum: Total number of CTPIO fallbacks on this port */
-#define          MC_CMD_MAC_CTPIO_FALLBACK  0x75
+#define          MC_CMD_MAC_CTPIO_FALLBACK 0x75
 /* enum: Total number of CTPIO poisoned frames on this port, whether erased or
  * not
  */
-#define          MC_CMD_MAC_CTPIO_POISON  0x76
+#define          MC_CMD_MAC_CTPIO_POISON 0x76
 /* enum: Total number of CTPIO erased frames on this port */
-#define          MC_CMD_MAC_CTPIO_ERASE  0x77
+#define          MC_CMD_MAC_CTPIO_ERASE 0x77
 /* enum: This includes the space at offset 120 which is the final
  * GENERATION_END in a MAC_STATS_V3 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V3  0x79
+#define          MC_CMD_MAC_NSTATS_V3 0x79
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_V2_OUT_NO_DMA/STATISTICS */
 
@@ -4268,25 +4429,25 @@
 #define    MC_CMD_WOL_FILTER_SET_IN_LEN 192
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_OFST 0
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_LEN 4
-#define          MC_CMD_FILTER_MODE_SIMPLE    0x0 /* enum */
+#define          MC_CMD_FILTER_MODE_SIMPLE 0x0 /* enum */
 #define          MC_CMD_FILTER_MODE_STRUCTURED 0xffffffff /* enum */
 /* A type value of 1 is unused. */
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_OFST 4
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_LEN 4
 /* enum: Magic */
-#define          MC_CMD_WOL_TYPE_MAGIC      0x0
+#define          MC_CMD_WOL_TYPE_MAGIC 0x0
 /* enum: MS Windows Magic */
 #define          MC_CMD_WOL_TYPE_WIN_MAGIC 0x2
 /* enum: IPv4 Syn */
-#define          MC_CMD_WOL_TYPE_IPV4_SYN   0x3
+#define          MC_CMD_WOL_TYPE_IPV4_SYN 0x3
 /* enum: IPv6 Syn */
-#define          MC_CMD_WOL_TYPE_IPV6_SYN   0x4
+#define          MC_CMD_WOL_TYPE_IPV6_SYN 0x4
 /* enum: Bitmap */
-#define          MC_CMD_WOL_TYPE_BITMAP     0x5
+#define          MC_CMD_WOL_TYPE_BITMAP 0x5
 /* enum: Link */
-#define          MC_CMD_WOL_TYPE_LINK       0x6
+#define          MC_CMD_WOL_TYPE_LINK 0x6
 /* enum: (Above this for future use) */
-#define          MC_CMD_WOL_TYPE_MAX        0x7
+#define          MC_CMD_WOL_TYPE_MAX 0x7
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_OFST 8
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_LEN 4
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_NUM 46
@@ -4515,6 +4676,8 @@
 #define        MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6
@@ -4542,6 +4705,8 @@
 #define        MC_CMD_NVRAM_INFO_V2_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_A_B_LBN 7
@@ -4560,7 +4725,11 @@
 /* MC_CMD_NVRAM_UPDATE_START
  * Start a group of update operations on a virtual NVRAM partition. Locks
  * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type), EACCES (if
- * PHY_LOCK required and not held).
+ * PHY_LOCK required and not held). In an adapter bound to a TSA controller,
+ * MC_CMD_NVRAM_UPDATE_START can only be used on a subset of partition types
+ * i.e. static config, dynamic config and expansion ROM config. Attempting to
+ * perform this operation on a restricted partition will return the error
+ * EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_START 0x38
 
@@ -4720,8 +4889,12 @@
 /***********************************/
 /* MC_CMD_NVRAM_UPDATE_FINISH
  * Finish a group of update operations on a virtual NVRAM partition. Locks
- * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad
- * type/offset/length), EACCES (if PHY_LOCK required and not held)
+ * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type/offset/
+ * length), EACCES (if PHY_LOCK required and not held). In an adapter bound to
+ * a TSA controller, MC_CMD_NVRAM_UPDATE_FINISH can only be used on a subset of
+ * partition types i.e. static config, dynamic config and expansion ROM config.
+ * Attempting to perform this operation on a restricted partition will return
+ * the error EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_FINISH 0x3c
 
@@ -4958,181 +5131,181 @@
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_OFST 0
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_LEN 4
 /* enum: Controller temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TEMP  0x0
+#define          MC_CMD_SENSOR_CONTROLLER_TEMP 0x0
 /* enum: Phy common temperature: degC */
-#define          MC_CMD_SENSOR_PHY_COMMON_TEMP  0x1
+#define          MC_CMD_SENSOR_PHY_COMMON_TEMP 0x1
 /* enum: Controller cooling: bool */
-#define          MC_CMD_SENSOR_CONTROLLER_COOLING  0x2
+#define          MC_CMD_SENSOR_CONTROLLER_COOLING 0x2
 /* enum: Phy 0 temperature: degC */
-#define          MC_CMD_SENSOR_PHY0_TEMP  0x3
+#define          MC_CMD_SENSOR_PHY0_TEMP 0x3
 /* enum: Phy 0 cooling: bool */
-#define          MC_CMD_SENSOR_PHY0_COOLING  0x4
+#define          MC_CMD_SENSOR_PHY0_COOLING 0x4
 /* enum: Phy 1 temperature: degC */
-#define          MC_CMD_SENSOR_PHY1_TEMP  0x5
+#define          MC_CMD_SENSOR_PHY1_TEMP 0x5
 /* enum: Phy 1 cooling: bool */
-#define          MC_CMD_SENSOR_PHY1_COOLING  0x6
+#define          MC_CMD_SENSOR_PHY1_COOLING 0x6
 /* enum: 1.0v power: mV */
-#define          MC_CMD_SENSOR_IN_1V0  0x7
+#define          MC_CMD_SENSOR_IN_1V0 0x7
 /* enum: 1.2v power: mV */
-#define          MC_CMD_SENSOR_IN_1V2  0x8
+#define          MC_CMD_SENSOR_IN_1V2 0x8
 /* enum: 1.8v power: mV */
-#define          MC_CMD_SENSOR_IN_1V8  0x9
+#define          MC_CMD_SENSOR_IN_1V8 0x9
 /* enum: 2.5v power: mV */
-#define          MC_CMD_SENSOR_IN_2V5  0xa
+#define          MC_CMD_SENSOR_IN_2V5 0xa
 /* enum: 3.3v power: mV */
-#define          MC_CMD_SENSOR_IN_3V3  0xb
+#define          MC_CMD_SENSOR_IN_3V3 0xb
 /* enum: 12v power: mV */
-#define          MC_CMD_SENSOR_IN_12V0  0xc
+#define          MC_CMD_SENSOR_IN_12V0 0xc
 /* enum: 1.2v analogue power: mV */
-#define          MC_CMD_SENSOR_IN_1V2A  0xd
+#define          MC_CMD_SENSOR_IN_1V2A 0xd
 /* enum: reference voltage: mV */
-#define          MC_CMD_SENSOR_IN_VREF  0xe
+#define          MC_CMD_SENSOR_IN_VREF 0xe
 /* enum: AOE FPGA power: mV */
-#define          MC_CMD_SENSOR_OUT_VAOE  0xf
+#define          MC_CMD_SENSOR_OUT_VAOE 0xf
 /* enum: AOE FPGA temperature: degC */
-#define          MC_CMD_SENSOR_AOE_TEMP  0x10
+#define          MC_CMD_SENSOR_AOE_TEMP 0x10
 /* enum: AOE FPGA PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_AOE_TEMP  0x11
+#define          MC_CMD_SENSOR_PSU_AOE_TEMP 0x11
 /* enum: AOE PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_TEMP  0x12
+#define          MC_CMD_SENSOR_PSU_TEMP 0x12
 /* enum: Fan 0 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_0  0x13
+#define          MC_CMD_SENSOR_FAN_0 0x13
 /* enum: Fan 1 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_1  0x14
+#define          MC_CMD_SENSOR_FAN_1 0x14
 /* enum: Fan 2 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_2  0x15
+#define          MC_CMD_SENSOR_FAN_2 0x15
 /* enum: Fan 3 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_3  0x16
+#define          MC_CMD_SENSOR_FAN_3 0x16
 /* enum: Fan 4 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_4  0x17
+#define          MC_CMD_SENSOR_FAN_4 0x17
 /* enum: AOE FPGA input power: mV */
-#define          MC_CMD_SENSOR_IN_VAOE  0x18
+#define          MC_CMD_SENSOR_IN_VAOE 0x18
 /* enum: AOE FPGA current: mA */
-#define          MC_CMD_SENSOR_OUT_IAOE  0x19
+#define          MC_CMD_SENSOR_OUT_IAOE 0x19
 /* enum: AOE FPGA input current: mA */
-#define          MC_CMD_SENSOR_IN_IAOE  0x1a
+#define          MC_CMD_SENSOR_IN_IAOE 0x1a
 /* enum: NIC power consumption: W */
-#define          MC_CMD_SENSOR_NIC_POWER  0x1b
+#define          MC_CMD_SENSOR_NIC_POWER 0x1b
 /* enum: 0.9v power voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9  0x1c
+#define          MC_CMD_SENSOR_IN_0V9 0x1c
 /* enum: 0.9v power current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9  0x1d
+#define          MC_CMD_SENSOR_IN_I0V9 0x1d
 /* enum: 1.2v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V2  0x1e
+#define          MC_CMD_SENSOR_IN_I1V2 0x1e
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE0_NEXT  0x1f
+#define          MC_CMD_SENSOR_PAGE0_NEXT 0x1f
 /* enum: 0.9v power voltage (at ADC): mV */
-#define          MC_CMD_SENSOR_IN_0V9_ADC  0x20
+#define          MC_CMD_SENSOR_IN_0V9_ADC 0x20
 /* enum: Controller temperature 2: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP  0x21
+#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP 0x21
 /* enum: Voltage regulator internal temperature: degC */
-#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP  0x22
+#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP 0x22
 /* enum: 0.9V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_TEMP  0x23
+#define          MC_CMD_SENSOR_VREG_0V9_TEMP 0x23
 /* enum: 1.2V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_1V2_TEMP  0x24
+#define          MC_CMD_SENSOR_VREG_1V2_TEMP 0x24
 /* enum: controller internal temperature sensor voltage (internal ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT  0x25
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT 0x25
 /* enum: controller internal temperature (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP  0x26
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP 0x26
 /* enum: controller internal temperature sensor voltage (external ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC  0x27
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC 0x27
 /* enum: controller internal temperature (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC  0x28
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC 0x28
 /* enum: ambient temperature: degC */
-#define          MC_CMD_SENSOR_AMBIENT_TEMP  0x29
+#define          MC_CMD_SENSOR_AMBIENT_TEMP 0x29
 /* enum: air flow: bool */
-#define          MC_CMD_SENSOR_AIRFLOW  0x2a
+#define          MC_CMD_SENSOR_AIRFLOW 0x2a
 /* enum: voltage between VSS08D and VSS08D at CSR: mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR  0x2b
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR 0x2b
 /* enum: voltage between VSS08D and VSS08D at CSR (external ADC): mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC  0x2c
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC 0x2c
 /* enum: Hotpoint temperature: degC */
-#define          MC_CMD_SENSOR_HOTPOINT_TEMP  0x2d
+#define          MC_CMD_SENSOR_HOTPOINT_TEMP 0x2d
 /* enum: Port 0 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT0  0x2e
+#define          MC_CMD_SENSOR_PHY_POWER_PORT0 0x2e
 /* enum: Port 1 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT1  0x2f
-/* enum: Mop-up microcontroller reference voltage (millivolts) */
-#define          MC_CMD_SENSOR_MUM_VCC  0x30
+#define          MC_CMD_SENSOR_PHY_POWER_PORT1 0x2f
+/* enum: Mop-up microcontroller reference voltage: mV */
+#define          MC_CMD_SENSOR_MUM_VCC 0x30
 /* enum: 0.9v power phase A voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_A  0x31
+#define          MC_CMD_SENSOR_IN_0V9_A 0x31
 /* enum: 0.9v power phase A current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_A  0x32
+#define          MC_CMD_SENSOR_IN_I0V9_A 0x32
 /* enum: 0.9V voltage regulator phase A temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP  0x33
+#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP 0x33
 /* enum: 0.9v power phase B voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_B  0x34
+#define          MC_CMD_SENSOR_IN_0V9_B 0x34
 /* enum: 0.9v power phase B current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_B  0x35
+#define          MC_CMD_SENSOR_IN_I0V9_B 0x35
 /* enum: 0.9V voltage regulator phase B temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP  0x36
+#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP 0x36
 /* enum: CCOM AVREG 1v2 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY  0x37
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY 0x37
 /* enum: CCOM AVREG 1v2 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC  0x38
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC 0x38
 /* enum: CCOM AVREG 1v8 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY  0x39
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY 0x39
 /* enum: CCOM AVREG 1v8 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC  0x3a
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC 0x3a
 /* enum: CCOM RTS temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_RTS  0x3b
+#define          MC_CMD_SENSOR_CONTROLLER_RTS 0x3b
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE1_NEXT  0x3f
+#define          MC_CMD_SENSOR_PAGE1_NEXT 0x3f
 /* enum: controller internal temperature sensor voltage on master core
  * (internal ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT  0x40
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT 0x40
 /* enum: controller internal temperature on master core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP  0x41
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP 0x41
 /* enum: controller internal temperature sensor voltage on master core
  * (external ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC  0x42
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC 0x42
 /* enum: controller internal temperature on master core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC  0x43
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC 0x43
 /* enum: controller internal temperature on slave core sensor voltage (internal
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT  0x44
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT 0x44
 /* enum: controller internal temperature on slave core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP  0x45
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP 0x45
 /* enum: controller internal temperature on slave core sensor voltage (external
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC  0x46
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC 0x46
 /* enum: controller internal temperature on slave core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC  0x47
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC 0x47
 /* enum: Voltage supplied to the SODIMMs from their power supply: mV */
-#define          MC_CMD_SENSOR_SODIMM_VOUT  0x49
+#define          MC_CMD_SENSOR_SODIMM_VOUT 0x49
 /* enum: Temperature of SODIMM 0 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_0_TEMP  0x4a
+#define          MC_CMD_SENSOR_SODIMM_0_TEMP 0x4a
 /* enum: Temperature of SODIMM 1 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_1_TEMP  0x4b
+#define          MC_CMD_SENSOR_SODIMM_1_TEMP 0x4b
 /* enum: Voltage supplied to the QSFP #0 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY0_VCC  0x4c
+#define          MC_CMD_SENSOR_PHY0_VCC 0x4c
 /* enum: Voltage supplied to the QSFP #1 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY1_VCC  0x4d
+#define          MC_CMD_SENSOR_PHY1_VCC 0x4d
 /* enum: Controller die temperature (TDIODE): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP  0x4e
+#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP 0x4e
 /* enum: Board temperature (front): degC */
-#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP  0x4f
+#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP 0x4f
 /* enum: Board temperature (back): degC */
-#define          MC_CMD_SENSOR_BOARD_BACK_TEMP  0x50
+#define          MC_CMD_SENSOR_BOARD_BACK_TEMP 0x50
 /* enum: 1.8v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V8  0x51
+#define          MC_CMD_SENSOR_IN_I1V8 0x51
 /* enum: 2.5v power current: mA */
-#define          MC_CMD_SENSOR_IN_I2V5  0x52
+#define          MC_CMD_SENSOR_IN_I2V5 0x52
 /* enum: 3.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I3V3  0x53
+#define          MC_CMD_SENSOR_IN_I3V3 0x53
 /* enum: 12v power current: mA */
-#define          MC_CMD_SENSOR_IN_I12V0  0x54
+#define          MC_CMD_SENSOR_IN_I12V0 0x54
 /* enum: 1.3v power: mV */
-#define          MC_CMD_SENSOR_IN_1V3  0x55
+#define          MC_CMD_SENSOR_IN_1V3 0x55
 /* enum: 1.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V3  0x56
+#define          MC_CMD_SENSOR_IN_I1V3 0x56
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE2_NEXT  0x5f
+#define          MC_CMD_SENSOR_PAGE2_NEXT 0x5f
 /* MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF */
 #define       MC_CMD_SENSOR_ENTRY_OFST 4
 #define       MC_CMD_SENSOR_ENTRY_LEN 8
@@ -5234,17 +5407,17 @@
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_OFST 2
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LEN 1
 /* enum: Ok. */
-#define          MC_CMD_SENSOR_STATE_OK  0x0
+#define          MC_CMD_SENSOR_STATE_OK 0x0
 /* enum: Breached warning threshold. */
-#define          MC_CMD_SENSOR_STATE_WARNING  0x1
+#define          MC_CMD_SENSOR_STATE_WARNING 0x1
 /* enum: Breached fatal threshold. */
-#define          MC_CMD_SENSOR_STATE_FATAL  0x2
+#define          MC_CMD_SENSOR_STATE_FATAL 0x2
 /* enum: Fault with sensor. */
-#define          MC_CMD_SENSOR_STATE_BROKEN  0x3
+#define          MC_CMD_SENSOR_STATE_BROKEN 0x3
 /* enum: Sensor is working but does not currently have a reading. */
-#define          MC_CMD_SENSOR_STATE_NO_READING  0x4
+#define          MC_CMD_SENSOR_STATE_NO_READING 0x4
 /* enum: Sensor initialisation failed. */
-#define          MC_CMD_SENSOR_STATE_INIT_FAILED  0x5
+#define          MC_CMD_SENSOR_STATE_INIT_FAILED 0x5
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LBN 16
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_WIDTH 8
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_TYPE_OFST 3
@@ -5327,7 +5500,7 @@
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_OFST 0
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_LEN 4
 #define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_ARP 0x1 /* enum */
-#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS  0x2 /* enum */
+#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS 0x2 /* enum */
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_OFST 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_LEN 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_MINNUM 1
@@ -5416,17 +5589,17 @@
 /* enum: Assert using the FAIL_ASSERTION_WITH_USEFUL_VALUES macro. Unless
  * you're testing firmware, this is what you want.
  */
-#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES  0x0
+#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES 0x0
 /* enum: Assert using assert(0); */
-#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE  0x1
+#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE 0x1
 /* enum: Deliberately trigger a watchdog */
-#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG  0x2
+#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG 0x2
 /* enum: Deliberately trigger a trap by loading from an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP  0x3
+#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP 0x3
 /* enum: Deliberately trigger a trap by storing to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP  0x4
+#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP 0x4
 /* enum: Jump to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP  0x5
+#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP 0x5
 
 /* MC_CMD_TESTASSERT_V2_OUT msgresponse */
 #define    MC_CMD_TESTASSERT_V2_OUT_LEN 0
@@ -5969,7 +6142,7 @@
 /*            MC_CMD_MUM_IN_CMD_LEN 4 */
 #define       MC_CMD_MUM_IN_LOG_OP_OFST 4
 #define       MC_CMD_MUM_IN_LOG_OP_LEN 4
-#define          MC_CMD_MUM_IN_LOG_OP_UART  0x1 /* enum */
+#define          MC_CMD_MUM_IN_LOG_OP_UART 0x1 /* enum */
 
 /* MC_CMD_MUM_IN_LOG_OP_UART msgrequest */
 #define    MC_CMD_MUM_IN_LOG_OP_UART_LEN 12
@@ -6464,17 +6637,17 @@
 #define       EVB_PORT_ID_PORT_ID_OFST 0
 #define       EVB_PORT_ID_PORT_ID_LEN 4
 /* enum: An invalid port handle. */
-#define          EVB_PORT_ID_NULL  0x0
+#define          EVB_PORT_ID_NULL 0x0
 /* enum: The port assigned to this function.. */
-#define          EVB_PORT_ID_ASSIGNED  0x1000000
+#define          EVB_PORT_ID_ASSIGNED 0x1000000
 /* enum: External network port 0 */
-#define          EVB_PORT_ID_MAC0  0x2000000
+#define          EVB_PORT_ID_MAC0 0x2000000
 /* enum: External network port 1 */
-#define          EVB_PORT_ID_MAC1  0x2000001
+#define          EVB_PORT_ID_MAC1 0x2000001
 /* enum: External network port 2 */
-#define          EVB_PORT_ID_MAC2  0x2000002
+#define          EVB_PORT_ID_MAC2 0x2000002
 /* enum: External network port 3 */
-#define          EVB_PORT_ID_MAC3  0x2000003
+#define          EVB_PORT_ID_MAC3 0x2000003
 #define       EVB_PORT_ID_PORT_ID_LBN 0
 #define       EVB_PORT_ID_PORT_ID_WIDTH 32
 
@@ -6486,7 +6659,7 @@
 #define       EVB_VLAN_TAG_MODE_LBN 12
 #define       EVB_VLAN_TAG_MODE_WIDTH 4
 /* enum: Insert the VLAN. */
-#define          EVB_VLAN_TAG_INSERT  0x0
+#define          EVB_VLAN_TAG_INSERT 0x0
 /* enum: Replace the VLAN if already present. */
 #define          EVB_VLAN_TAG_REPLACE 0x1
 
@@ -6515,110 +6688,110 @@
 #define       NVRAM_PARTITION_TYPE_ID_OFST 0
 #define       NVRAM_PARTITION_TYPE_ID_LEN 2
 /* enum: Primary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE          0x100
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE 0x100
 /* enum: Secondary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP   0x200
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP 0x200
 /* enum: Expansion ROM partition */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM        0x300
+#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM 0x300
 /* enum: Static configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG        0x400
+#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG 0x400
 /* enum: Dynamic configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG       0x500
+#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG 0x500
 /* enum: Expansion ROM configuration data for port 0 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0  0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0 0x600
 /* enum: Synonym for EXPROM_CONFIG_PORT0 as used in pmap files */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG        0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG 0x600
 /* enum: Expansion ROM configuration data for port 1 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1  0x601
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1 0x601
 /* enum: Expansion ROM configuration data for port 2 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2  0x602
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2 0x602
 /* enum: Expansion ROM configuration data for port 3 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3  0x603
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3 0x603
 /* enum: Non-volatile log output partition */
-#define          NVRAM_PARTITION_TYPE_LOG                  0x700
+#define          NVRAM_PARTITION_TYPE_LOG 0x700
 /* enum: Non-volatile log output of second core on dual-core device */
-#define          NVRAM_PARTITION_TYPE_LOG_SLAVE            0x701
+#define          NVRAM_PARTITION_TYPE_LOG_SLAVE 0x701
 /* enum: Device state dump output partition */
-#define          NVRAM_PARTITION_TYPE_DUMP                 0x800
+#define          NVRAM_PARTITION_TYPE_DUMP 0x800
 /* enum: Application license key storage partition */
-#define          NVRAM_PARTITION_TYPE_LICENSE              0x900
+#define          NVRAM_PARTITION_TYPE_LICENSE 0x900
 /* enum: Start of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MIN              0xa00
+#define          NVRAM_PARTITION_TYPE_PHY_MIN 0xa00
 /* enum: End of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MAX              0xaff
+#define          NVRAM_PARTITION_TYPE_PHY_MAX 0xaff
 /* enum: Primary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA                 0xb00
+#define          NVRAM_PARTITION_TYPE_FPGA 0xb00
 /* enum: Secondary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP          0xb01
+#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP 0xb01
 /* enum: FC firmware partition */
-#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE          0xb02
+#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE 0xb02
 /* enum: FC License partition */
-#define          NVRAM_PARTITION_TYPE_FC_LICENSE           0xb03
+#define          NVRAM_PARTITION_TYPE_FC_LICENSE 0xb03
 /* enum: Non-volatile log output partition for FC */
-#define          NVRAM_PARTITION_TYPE_FC_LOG               0xb04
+#define          NVRAM_PARTITION_TYPE_FC_LOG 0xb04
 /* enum: MUM firmware partition */
-#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE 0xc00
 /* enum: SUC firmware partition (this is intentionally an alias of
  * MUM_FIRMWARE)
  */
-#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE 0xc00
 /* enum: MUM Non-volatile log output partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_LOG              0xc01
+#define          NVRAM_PARTITION_TYPE_MUM_LOG 0xc01
 /* enum: MUM Application table partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE         0xc02
+#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE 0xc02
 /* enum: MUM boot rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM         0xc03
+#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM 0xc03
 /* enum: MUM production signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM         0xc04
+#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM 0xc04
 /* enum: MUM user signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM         0xc05
+#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM 0xc05
 /* enum: MUM fuses and lockbits partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK         0xc06
+#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK 0xc06
 /* enum: UEFI expansion ROM if separate from PXE */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI       0xd00
+#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI 0xd00
 /* enum: Used by the expansion ROM for logging */
-#define          NVRAM_PARTITION_TYPE_PXE_LOG              0x1000
+#define          NVRAM_PARTITION_TYPE_PXE_LOG 0x1000
 /* enum: Used for XIP code of shmbooted images */
-#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH          0x1100
+#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH 0x1100
 /* enum: Spare partition 2 */
-#define          NVRAM_PARTITION_TYPE_SPARE_2              0x1200
+#define          NVRAM_PARTITION_TYPE_SPARE_2 0x1200
 /* enum: Manufacturing partition. Used during manufacture to pass information
  * between XJTAG and Manftest.
  */
-#define          NVRAM_PARTITION_TYPE_MANUFACTURING        0x1300
+#define          NVRAM_PARTITION_TYPE_MANUFACTURING 0x1300
 /* enum: Spare partition 4 */
-#define          NVRAM_PARTITION_TYPE_SPARE_4              0x1400
+#define          NVRAM_PARTITION_TYPE_SPARE_4 0x1400
 /* enum: Spare partition 5 */
-#define          NVRAM_PARTITION_TYPE_SPARE_5              0x1500
+#define          NVRAM_PARTITION_TYPE_SPARE_5 0x1500
 /* enum: Partition for reporting MC status. See mc_flash_layout.h
  * medford_mc_status_hdr_t for layout on Medford.
  */
-#define          NVRAM_PARTITION_TYPE_STATUS               0x1600
+#define          NVRAM_PARTITION_TYPE_STATUS 0x1600
 /* enum: Spare partition 13 */
-#define          NVRAM_PARTITION_TYPE_SPARE_13              0x1700
+#define          NVRAM_PARTITION_TYPE_SPARE_13 0x1700
 /* enum: Spare partition 14 */
-#define          NVRAM_PARTITION_TYPE_SPARE_14              0x1800
+#define          NVRAM_PARTITION_TYPE_SPARE_14 0x1800
 /* enum: Spare partition 15 */
-#define          NVRAM_PARTITION_TYPE_SPARE_15              0x1900
+#define          NVRAM_PARTITION_TYPE_SPARE_15 0x1900
 /* enum: Spare partition 16 */
-#define          NVRAM_PARTITION_TYPE_SPARE_16              0x1a00
+#define          NVRAM_PARTITION_TYPE_SPARE_16 0x1a00
 /* enum: Factory defaults for dynamic configuration */
-#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS    0x1b00
+#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS 0x1b00
 /* enum: Factory defaults for expansion ROM configuration */
-#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS    0x1c00
+#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS 0x1c00
 /* enum: Field Replaceable Unit inventory information for use on IPMI
  * platforms. See SF-119124-PS. The STATIC_CONFIG partition may contain a
  * subset of the information stored in this partition.
  */
-#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION       0x1d00
+#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00
 /* enum: Start of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN  0xff00
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00
 /* enum: End of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX  0xfffd
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX 0xfffd
 /* enum: Recovery partition map (provided if real map is missing or corrupt) */
-#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP         0xfffe
+#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP 0xfffe
 /* enum: Partition map (real map as stored in flash) */
-#define          NVRAM_PARTITION_TYPE_PARTITION_MAP        0xffff
+#define          NVRAM_PARTITION_TYPE_PARTITION_MAP 0xffff
 #define       NVRAM_PARTITION_TYPE_ID_LBN 0
 #define       NVRAM_PARTITION_TYPE_ID_WIDTH 16
 
@@ -6627,37 +6800,37 @@
 #define       LICENSED_APP_ID_ID_OFST 0
 #define       LICENSED_APP_ID_ID_LEN 4
 /* enum: OpenOnload */
-#define          LICENSED_APP_ID_ONLOAD                  0x1
+#define          LICENSED_APP_ID_ONLOAD 0x1
 /* enum: PTP timestamping */
-#define          LICENSED_APP_ID_PTP                     0x2
+#define          LICENSED_APP_ID_PTP 0x2
 /* enum: SolarCapture Pro */
-#define          LICENSED_APP_ID_SOLARCAPTURE_PRO        0x4
+#define          LICENSED_APP_ID_SOLARCAPTURE_PRO 0x4
 /* enum: SolarSecure filter engine */
-#define          LICENSED_APP_ID_SOLARSECURE             0x8
+#define          LICENSED_APP_ID_SOLARSECURE 0x8
 /* enum: Performance monitor */
-#define          LICENSED_APP_ID_PERF_MONITOR            0x10
+#define          LICENSED_APP_ID_PERF_MONITOR 0x10
 /* enum: SolarCapture Live */
-#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE       0x20
+#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE 0x20
 /* enum: Capture SolarSystem */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM     0x40
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM 0x40
 /* enum: Network Access Control */
-#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL  0x80
+#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL 0x80
 /* enum: TCP Direct */
-#define          LICENSED_APP_ID_TCP_DIRECT              0x100
+#define          LICENSED_APP_ID_TCP_DIRECT 0x100
 /* enum: Low Latency */
-#define          LICENSED_APP_ID_LOW_LATENCY             0x200
+#define          LICENSED_APP_ID_LOW_LATENCY 0x200
 /* enum: SolarCapture Tap */
-#define          LICENSED_APP_ID_SOLARCAPTURE_TAP        0x400
+#define          LICENSED_APP_ID_SOLARCAPTURE_TAP 0x400
 /* enum: Capture SolarSystem 40G */
 #define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_40G 0x800
 /* enum: Capture SolarSystem 1G */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G  0x1000
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G 0x1000
 /* enum: ScaleOut Onload */
-#define          LICENSED_APP_ID_SCALEOUT_ONLOAD         0x2000
+#define          LICENSED_APP_ID_SCALEOUT_ONLOAD 0x2000
 /* enum: SCS Network Analytics Dashboard */
-#define          LICENSED_APP_ID_DSHBRD                  0x4000
+#define          LICENSED_APP_ID_DSHBRD 0x4000
 /* enum: SolarCapture Trading Analytics */
-#define          LICENSED_APP_ID_SCATRD                  0x8000
+#define          LICENSED_APP_ID_SCATRD 0x8000
 #define       LICENSED_APP_ID_ID_LBN 0
 #define       LICENSED_APP_ID_ID_WIDTH 32
 
@@ -6775,23 +6948,23 @@
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_OFST 3
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LEN 1
 /* enum: This is a TX completion event, not a timestamp */
-#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION  0x0
+#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION 0x0
 /* enum: This is a TX completion event for a CTPIO transmit. The event format
  * is the same as for TX_EV_COMPLETION.
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION  0x11
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION 0x11
 /* enum: This is the low part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_LO
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO  0x12
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO 0x12
 /* enum: This is the high part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_HI
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI  0x13
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI 0x13
 /* enum: This is the low part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO  0x51
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO 0x51
 /* enum: This is the high part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI  0x52
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI 0x52
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LBN 24
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_WIDTH 8
 /* upper 16 bits of timestamp data */
@@ -7071,17 +7244,17 @@
 #define       QUEUE_CRC_MODE_MODE_LBN 0
 #define       QUEUE_CRC_MODE_MODE_WIDTH 4
 /* enum: No CRC. */
-#define          QUEUE_CRC_MODE_NONE  0x0
+#define          QUEUE_CRC_MODE_NONE 0x0
 /* enum: CRC Fiber channel over ethernet. */
-#define          QUEUE_CRC_MODE_FCOE  0x1
+#define          QUEUE_CRC_MODE_FCOE 0x1
 /* enum: CRC (digest) iSCSI header only. */
-#define          QUEUE_CRC_MODE_ISCSI_HDR  0x2
+#define          QUEUE_CRC_MODE_ISCSI_HDR 0x2
 /* enum: CRC (digest) iSCSI header and payload. */
-#define          QUEUE_CRC_MODE_ISCSI  0x3
+#define          QUEUE_CRC_MODE_ISCSI 0x3
 /* enum: CRC Fiber channel over IP over ethernet. */
-#define          QUEUE_CRC_MODE_FCOIPOE  0x4
+#define          QUEUE_CRC_MODE_FCOIPOE 0x4
 /* enum: CRC MPA. */
-#define          QUEUE_CRC_MODE_MPA  0x5
+#define          QUEUE_CRC_MODE_MPA 0x5
 #define       QUEUE_CRC_MODE_SPARE_LBN 4
 #define       QUEUE_CRC_MODE_SPARE_WIDTH 4
 
@@ -7157,11 +7330,15 @@
 /* Size, in entries */
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_OFST 0
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_LEN 4
-/* The EVQ to send events to. This is an index originally specified to INIT_EVQ
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
  */
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_OFST 4
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_LEN 4
-/* The value to put in the event data. Check hardware spec. for valid range. */
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_OFST 8
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_LEN 4
 /* Desired instance. Must be set to a specific instance, which is a function
@@ -7189,18 +7366,25 @@
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_LBN 10
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_WIDTH 4
 /* enum: One packet per descriptor (for normal networking) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET  0x0
+#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET 0x0
 /* enum: Pack multiple packets into large descriptors (for SolarCapture) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM  0x1
+#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_EXT_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_LBN 14
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M  0x0 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K  0x1 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K  0x2 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K  0x3 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K  0x4 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K 0x4 /* enum */
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_FORCE_EV_MERGING_LBN 19
@@ -7221,12 +7405,122 @@
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_OFST 540
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_LEN 4
 
+/* MC_CMD_INIT_RXQ_V3_IN msgrequest */
+#define    MC_CMD_INIT_RXQ_V3_IN_LEN 560
+/* Size, in entries */
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_OFST 0
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_LEN 4
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_OFST 4
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_LEN 4
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_OFST 8
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_LEN 4
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_OFST 12
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_LEN 4
+/* There will be more flags here. */
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_OFST 16
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_LEN 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_LBN 0
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_LBN 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_LBN 2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_LBN 3
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_WIDTH 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_LBN 7
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_LBN 8
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_LBN 9
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_LBN 10
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_WIDTH 4
+/* enum: One packet per descriptor (for normal networking) */
+#define          MC_CMD_INIT_RXQ_V3_IN_SINGLE_PACKET 0x0
+/* enum: Pack multiple packets into large descriptors (for SolarCapture) */
+#define          MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_V3_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_LBN 14
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_64K 0x4 /* enum */
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_LBN 19
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
+/* Owner ID to use if in buffer mode (zero if physical) */
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_OFST 20
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_LEN 4
+/* The port ID associated with the v-adaptor which should contain this DMAQ. */
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_OFST 24
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_LEN 4
+/* 64-bit address of 4k of 4k-aligned host memory buffer */
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LEN 8
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LO_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_HI_OFST 32
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_NUM 64
+/* Maximum length of packet to receive, if SNAPSHOT_MODE flag is set */
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_OFST 540
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_LEN 4
+/* The number of packet buffers that will be contained within each
+ * EQUAL_STRIDE_PACKED_STREAM format bucket supplied by the driver. This field
+ * is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_OFST 544
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_LEN 4
+/* The length in bytes of the area in each packet buffer that can be written to
+ * by the adapter. This is used to store the packet prefix and the packet
+ * payload. This length does not include any end padding added by the driver.
+ * This field is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_OFST 548
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_LEN 4
+/* The length in bytes of a single packet buffer within a
+ * EQUAL_STRIDE_PACKED_STREAM format bucket. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_OFST 552
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_LEN 4
+/* The maximum time in nanoseconds that the datapath will be backpressured if
+ * there are no RX descriptors available. If the timeout is reached and there
+ * are still no descriptors then the packet will be dropped. A timeout of 0
+ * means the datapath will never be blocked. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_OFST 556
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_LEN 4
+
 /* MC_CMD_INIT_RXQ_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_OUT_LEN 0
 
 /* MC_CMD_INIT_RXQ_EXT_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_EXT_OUT_LEN 0
 
+/* MC_CMD_INIT_RXQ_V3_OUT msgresponse */
+#define    MC_CMD_INIT_RXQ_V3_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_INIT_TXQ
@@ -7466,7 +7760,7 @@
 #define        MC_CMD_PROXY_CMD_IN_TARGET_PF_WIDTH 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_LBN 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_WIDTH 16
-#define          MC_CMD_PROXY_CMD_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PROXY_CMD_IN_VF_NULL 0xffff /* enum */
 
 /* MC_CMD_PROXY_CMD_OUT msgresponse */
 #define    MC_CMD_PROXY_CMD_OUT_LEN 0
@@ -7479,7 +7773,7 @@
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_OFST 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LEN 4
 /* enum: An invalid handle. */
-#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID  0x0
+#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID 0x0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LBN 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_WIDTH 32
 /* The requesting physical function number */
@@ -7748,17 +8042,17 @@
 #define       MC_CMD_FILTER_OP_IN_OP_OFST 0
 #define       MC_CMD_FILTER_OP_IN_OP_LEN 4
 /* enum: single-recipient filter insert */
-#define          MC_CMD_FILTER_OP_IN_OP_INSERT  0x0
+#define          MC_CMD_FILTER_OP_IN_OP_INSERT 0x0
 /* enum: single-recipient filter remove */
-#define          MC_CMD_FILTER_OP_IN_OP_REMOVE  0x1
+#define          MC_CMD_FILTER_OP_IN_OP_REMOVE 0x1
 /* enum: multi-recipient filter subscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE  0x2
+#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE 0x2
 /* enum: multi-recipient filter unsubscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE  0x3
+#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE 0x3
 /* enum: replace one recipient with another (warning - the filter handle may
  * change)
  */
-#define          MC_CMD_FILTER_OP_IN_OP_REPLACE  0x4
+#define          MC_CMD_FILTER_OP_IN_OP_REPLACE 0x4
 /* filter handle (for remove / unsubscribe operations) */
 #define       MC_CMD_FILTER_OP_IN_HANDLE_OFST 4
 #define       MC_CMD_FILTER_OP_IN_HANDLE_LEN 8
@@ -7803,15 +8097,15 @@
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_LEN 4
@@ -7819,14 +8113,14 @@
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
@@ -7843,7 +8137,7 @@
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_PM_LBN 1
@@ -7971,15 +8265,15 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_LEN 4
@@ -7987,14 +8281,14 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
@@ -8011,7 +8305,7 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_PM_LBN 1
@@ -8054,17 +8348,17 @@
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_WIDTH 8
 /* enum: Match VXLAN traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN 0x0
 /* enum: Match Geneve traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE 0x1
 /* enum: Reserved for experimental development use */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL  0xfe
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL 0xfe
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_WIDTH 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_WIDTH 8
 /* enum: Match NVGRE traffic with this VSID */
-#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE 0x0
 /* source IP address to match (as bytes in network order; set last 12 bytes to
  * 0 for IPv4 address)
  */
@@ -8131,6 +8425,273 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_OFST 156
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_LEN 16
 
+/* MC_CMD_FILTER_OP_V3_IN msgrequest: FILTER_OP extension to support additional
+ * filter actions for Intel's DPDK (Data Plane Development Kit, dpdk.org) via
+ * its rte_flow API. This extension is only useful with the sfc_efx driver
+ * included as part of DPDK, used in conjunction with the dpdk datapath
+ * firmware variant.
+ */
+#define    MC_CMD_FILTER_OP_V3_IN_LEN 180
+/* identifies the type of operation requested */
+#define       MC_CMD_FILTER_OP_V3_IN_OP_OFST 0
+#define       MC_CMD_FILTER_OP_V3_IN_OP_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_FILTER_OP_IN/OP */
+/* filter handle (for remove / unsubscribe operations) */
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LEN 8
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LO_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_HI_OFST 8
+/* The port ID associated with the v-adaptor which should contain this filter.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_OFST 12
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_LEN 4
+/* fields to include in match criteria */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_OFST 16
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_LBN 2
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_LBN 3
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_LBN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_LBN 5
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_LBN 6
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_LBN 7
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_LBN 8
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_LBN 9
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_LBN 10
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_LBN 11
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_LBN 12
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_LBN 13
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_LBN 14
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_LBN 15
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_LBN 16
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_LBN 17
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_LBN 18
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_LBN 19
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_LBN 20
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_LBN 21
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_LBN 22
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_LBN 23
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN 25
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_LBN 30
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_LBN 31
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_WIDTH 1
+/* receive destination */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_OFST 20
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_LEN 4
+/* enum: drop packets */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_DROP 0x0
+/* enum: receive to host */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_HOST 0x1
+/* enum: receive to MC */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_MC 0x2
+/* enum: loop back to TXDP 0 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX0 0x3
+/* enum: loop back to TXDP 1 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX1 0x4
+/* receive queue handle (for multiple queue modes, this is the base queue) */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_OFST 24
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_LEN 4
+/* receive mode */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_OFST 28
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_LEN 4
+/* enum: receive to just the specified queue */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_SIMPLE 0x0
+/* enum: receive to multiple queues using RSS context */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_RSS 0x1
+/* enum: receive to multiple queues using .1p mapping */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_DOT1P_MAPPING 0x2
+/* enum: install a filter entry that will never match; for test purposes only
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
+/* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
+ * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
+ * MC_CMD_DOT1P_MAPPING_ALLOC.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_OFST 32
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_LEN 4
+/* transmit domain (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_OFST 36
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_LEN 4
+/* transmit destination (either set the MAC and/or PM bits for explicit
+ * control, or set this field to TX_DEST_DEFAULT for sensible default
+ * behaviour)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_OFST 40
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_LEN 4
+/* enum: request default behaviour (based on filter type) */
+#define          MC_CMD_FILTER_OP_V3_IN_TX_DEST_DEFAULT 0xffffffff
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_WIDTH 1
+/* source MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_OFST 44
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_LEN 6
+/* source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_OFST 50
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_LEN 2
+/* destination MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_OFST 52
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_LEN 6
+/* destination port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_OFST 58
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_LEN 2
+/* Ethernet type to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_OFST 60
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_LEN 2
+/* Inner VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_OFST 62
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_LEN 2
+/* Outer VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_OFST 64
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_LEN 2
+/* IP protocol to match (in low byte; set high byte to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_OFST 66
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_LEN 2
+/* Firmware defined register 0 to match (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_OFST 68
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_LEN 4
+/* VNI (for VXLAN/Geneve, when IP protocol is UDP) or VSID (for NVGRE, when IP
+ * protocol is GRE) to match (as bytes in network order; set last byte to 0 for
+ * VXLAN/NVGRE, or 1 for Geneve)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_OFST 72
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_WIDTH 8
+/* enum: Match VXLAN traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_VXLAN 0x0
+/* enum: Match Geneve traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_GENEVE 0x1
+/* enum: Reserved for experimental development use */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_EXPERIMENTAL 0xfe
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_WIDTH 8
+/* enum: Match NVGRE traffic with this VSID */
+#define          MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_NVGRE 0x0
+/* source IP address to match (as bytes in network order; set last 12 bytes to
+ * 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_OFST 76
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_LEN 16
+/* destination IP address to match (as bytes in network order; set last 12
+ * bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_OFST 92
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_LEN 16
+/* VXLAN/NVGRE inner frame source MAC address to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_OFST 108
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_LEN 6
+/* VXLAN/NVGRE inner frame source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_OFST 114
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_LEN 2
+/* VXLAN/NVGRE inner frame destination MAC address to match (as bytes in
+ * network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_OFST 116
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_LEN 6
+/* VXLAN/NVGRE inner frame destination port to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_OFST 122
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_LEN 2
+/* VXLAN/NVGRE inner frame Ethernet type to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_OFST 124
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_LEN 2
+/* VXLAN/NVGRE inner frame Inner VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_OFST 126
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame Outer VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_OFST 128
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame IP protocol to match (in low byte; set high byte to
+ * 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_OFST 130
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_LEN 2
+/* VXLAN/NVGRE inner frame Firmware defined register 0 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_OFST 132
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_LEN 4
+/* VXLAN/NVGRE inner frame Firmware defined register 1 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_OFST 136
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_LEN 4
+/* VXLAN/NVGRE inner frame source IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_OFST 140
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_LEN 16
+/* VXLAN/NVGRE inner frame destination IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_OFST 156
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_LEN 16
+/* Set an action for all packets matching this filter. The DPDK driver and dpdk
+ * f/w variant use their own specific delivery structures, which are documented
+ * in the DPDK Firmware Driver Interface (SF-119419-TC). Requesting anything
+ * other than MATCH_ACTION_NONE when the NIC is running another f/w variant
+ * will cause the filter insertion to fail with ENOTSUP.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_OFST 172
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_LEN 4
+/* enum: do nothing extra */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_NONE 0x0
+/* enum: Set the match flag in the packet prefix for packets matching the
+ * filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "FLAG" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_FLAG 0x1
+/* enum: Insert MATCH_MARK_VALUE into the packet prefix for packets matching
+ * the filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "MARK" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_MARK 0x2
+/* the mark value for MATCH_ACTION_MARK */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_OFST 176
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_LEN 4
+
 /* MC_CMD_FILTER_OP_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_OUT_LEN 12
 /* identifies the type of operation requested */
@@ -8147,9 +8708,9 @@
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_LO_OFST 4
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_HI_OFST 8
 /* enum: guaranteed invalid filter handle (low 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID 0xffffffff
 /* enum: guaranteed invalid filter handle (high 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID 0xffffffff
 
 /* MC_CMD_FILTER_OP_EXT_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_EXT_OUT_LEN 12
@@ -8184,20 +8745,20 @@
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_OFST 0
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_LEN 4
 /* enum: read the list of supported RX filter matches */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES  0x1
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES 0x1
 /* enum: read flags indicating restrictions on filter insertion for the calling
  * client
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS  0x2
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS 0x2
 /* enum: read properties relating to security rules (Medford-only; for use by
  * SolarSecure apps, not directly by drivers. See SF-114946-SW.)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO  0x3
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO 0x3
 /* enum: read the list of supported RX filter matches for VXLAN/NVGRE
  * encapsulated frames, which follow a different match sequence to normal
  * frames (Medford only)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES  0x4
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES 0x4
 
 /* MC_CMD_GET_PARSER_DISP_INFO_OUT msgresponse */
 #define    MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMIN 8
@@ -8238,7 +8799,9 @@
  * Direct read/write of parser-dispatcher state (DICPUs and LUE) for debugging.
  * Please note that this interface is only of use to debug tools which have
  * knowledge of firmware and hardware data structures; nothing here is intended
- * for use by normal driver code.
+ * for use by normal driver code. Note that although this command is in the
+ * Admin privilege group, in tamperproof adapters, only read operations are
+ * permitted.
  */
 #define MC_CMD_PARSER_DISP_RW 0xe5
 
@@ -8250,32 +8813,36 @@
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_LEN 4
 /* enum: RX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU 0x0
 /* enum: TX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU 0x1
 /* enum: Lookup engine (with original metadata format). Deprecated; used only
  * by cmdclient as a fallback for very old Huntington firmware, and not
  * supported in firmware beyond v6.4.0.1005. Use LUE_VERSIONED_METADATA
  * instead.
  */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE 0x2
 /* enum: Lookup engine (with requested metadata format) */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA  0x3
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA 0x3
 /* enum: RX0 dispatcher CPU (alias for RX_DICPU; Medford has 2 RX DICPUs) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU 0x0
 /* enum: RX1 dispatcher CPU (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU  0x4
+#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU 0x4
 /* enum: Miscellaneous other state (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE  0x5
+#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE 0x5
 /* identifies the type of operation requested */
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_OFST 4
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_LEN 4
 /* enum: Read a word of DICPU DMEM or a LUE entry */
-#define          MC_CMD_PARSER_DISP_RW_IN_READ  0x0
-/* enum: Write a word of DICPU DMEM or a LUE entry. */
-#define          MC_CMD_PARSER_DISP_RW_IN_WRITE  0x1
-/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). */
-#define          MC_CMD_PARSER_DISP_RW_IN_RMW  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_READ 0x0
+/* enum: Write a word of DICPU DMEM or a LUE entry. Not permitted on
+ * tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_WRITE 0x1
+/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). Not
+ * permitted on tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_RMW 0x2
 /* data memory address (DICPU targets) or LUE index (LUE targets) */
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_LEN 4
@@ -8283,7 +8850,7 @@
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_LEN 4
 /* enum: Port to datapath mapping */
-#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING 0x1
 /* value to write (for DMEM writes) */
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_OFST 12
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_LEN 4
@@ -8317,8 +8884,8 @@
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_LEN 4
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_NUM 4
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP0  0x1 /* enum */
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP1  0x2 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP0 0x1 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP1 0x2 /* enum */
 
 
 /***********************************/
@@ -8783,13 +9350,13 @@
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_OFST 0
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_LEN 4
 /* enum: MISC. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC  0x0
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC 0x0
 /* enum: IDO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO  0x1
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO 0x1
 /* enum: RO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO  0x2
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO 0x2
 /* enum: TPH Type. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE  0x3
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE 0x3
 
 /* MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT msgresponse */
 #define    MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT_LEN 8
@@ -8920,57 +9487,57 @@
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_OFST 0
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_LEN 4
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE     0x0 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET    0x1 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS    0x2 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS  0x3 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY    0x4 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE 0x0 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET 0x1 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS 0x2 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS 0x3 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY 0x4 /* enum */
 /* Target for download. (These match the blob numbers defined in
  * mc_flash_layout.h.)
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_LEN 4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT 0x0
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT 0x1
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT 0x2
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT  0x3
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT 0x3
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT  0x4
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT 0x4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG  0x5
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG 0x5
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT  0x6
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT 0x6
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG  0x7
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG 0x7
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM  0x8
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM 0x8
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM  0x9
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM 0x9
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM  0xa
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM 0xa
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM  0xb
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM 0xb
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0  0xc
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0 0xc
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0  0xd
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0 0xd
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1  0xe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1 0xe
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1  0xf
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1 0xf
 /* enum: Valid in phases 1 (PHASE_RESET) and 4 (PHASE_READY) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL 0xffffffff
 /* Chunk ID, or CHUNK_ID_LAST or CHUNK_ID_ABORT */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_OFST 8
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LEN 4
 /* enum: Last chunk, containing checksum rather than data */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST 0xffffffff
 /* enum: Abort download of this item */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT  0xfffffffe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT 0xfffffffe
 /* Length of this chunk in bytes */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_OFST 12
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_LEN 4
@@ -8989,21 +9556,21 @@
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_LEN 4
 /* enum: Code download OK, completed. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE 0x0
 /* enum: Code download aborted as requested. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED 0x1
 /* enum: Code download OK so far, send next chunk. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK 0x2
 /* enum: Download phases out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE  0x100
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE 0x100
 /* enum: Bad target for this phase */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET  0x101
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET 0x101
 /* enum: Chunk ID out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID  0x200
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID 0x200
 /* enum: Chunk length zero or too large */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN  0x201
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN 0x201
 /* enum: Checksum was incorrect */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM  0x300
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM 0x300
 
 
 /***********************************/
@@ -9087,54 +9654,58 @@
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9144,41 +9715,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9188,34 +9761,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_LEN 4
@@ -9293,54 +9868,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9350,41 +9929,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9394,34 +9975,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_LEN 4
@@ -9469,6 +10052,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -9482,18 +10077,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -9501,9 +10096,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -9592,54 +10187,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9649,41 +10248,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9693,34 +10294,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_LEN 4
@@ -9768,6 +10371,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -9781,18 +10396,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -9800,9 +10415,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -9833,11 +10448,11 @@
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
@@ -9916,54 +10531,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9973,41 +10592,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -10017,34 +10638,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_LEN 4
@@ -10092,6 +10715,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -10105,18 +10740,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -10124,9 +10759,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -10157,11 +10792,11 @@
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
@@ -10201,7 +10836,16 @@
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_LBN 16
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_WIDTH 10
 #define       MC_CMD_V2_EXTN_IN_UNUSED2_LBN 26
-#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 6
+#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 2
+/* Type of command/response */
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_LBN 28
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_WIDTH 4
+/* enum: MCDI command directed to or response originating from the MC. */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_MC 0x0
+/* enum: MCDI command directed to a TSA controller. MCDI responses of this type
+ * are not defined.
+ */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_TSA 0x1
 
 
 /***********************************/
@@ -10412,15 +11056,15 @@
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN  0x1
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN 0x1
 /* enum: VEB */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB  0x2
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA  0x3
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA 0x3
 /* enum: MUX */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX  0x4
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX 0x4
 /* enum: Snapper specific; semantics TBD */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST  0x5
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST 0x5
 /* Flags controlling v-port creation */
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_LEN 4
@@ -10495,23 +11139,23 @@
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN  0x1
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN 0x1
 /* enum: VEB (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB  0x2
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA  0x3
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA 0x3
 /* enum: A normal v-port receives packets which match a specified MAC and/or
  * VLAN.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL  0x4
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL 0x4
 /* enum: An expansion v-port packets traffic which don't match any other
  * v-port.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION  0x5
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION 0x5
 /* enum: An test v-port receives packets which match any filters installed by
  * its downstream components.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST  0x6
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST 0x6
 /* Flags controlling v-port creation */
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_LEN 4
@@ -10595,7 +11239,7 @@
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_OFST 24
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_LEN 6
 /* enum: Derive the MAC address from the upstream port */
-#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC  0x0
+#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC 0x0
 
 /* MC_CMD_VADAPTOR_ALLOC_OUT msgresponse */
 #define    MC_CMD_VADAPTOR_ALLOC_OUT_LEN 0
@@ -10809,12 +11453,12 @@
 /* enum: Allocate a context for exclusive use. The key and indirection table
  * must be explicitly configured.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE  0x0
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE 0x0
 /* enum: Allocate a context for shared use; this will spread across a range of
  * queues, but the key and indirection table are pre-configured and may not be
  * changed. For this mode, NUM_QUEUES must 2, 4, 8, 16, 32 or 64.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED  0x1
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED 0x1
 /* Number of queues spanned by this context, in the range 1-64; valid offsets
  * in the indirection table will be in the range 0 to NUM_QUEUES-1.
  */
@@ -10830,7 +11474,7 @@
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_OFST 0
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_LEN 4
 /* enum: guaranteed invalid RSS context handle value */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID  0xffffffff
+#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID 0xffffffff
 
 
 /***********************************/
@@ -11073,7 +11717,7 @@
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_OFST 0
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_LEN 4
 /* enum: guaranteed invalid .1p mapping handle value */
-#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID  0xffffffff
+#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID 0xffffffff
 
 
 /***********************************/
@@ -11385,11 +12029,11 @@
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_LBN 1
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_WIDTH 2
 /* enum: pad to 64 bytes */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64  0x0
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64 0x0
 /* enum: pad to 128 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128  0x1
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128 0x1
 /* enum: pad to 256 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256   0x2
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256 0x2
 
 /* MC_CMD_SET_RXDP_CONFIG_OUT msgresponse */
 #define    MC_CMD_SET_RXDP_CONFIG_OUT_LEN 0
@@ -11453,37 +12097,37 @@
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_LEN 4
 /* enum: Leave the system clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for inter-core clock domain */
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_LEN 4
 /* enum: Leave the inter-core clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for DPCPU clock domain */
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_LEN 4
 /* enum: Leave the DPCPU clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for PCS clock domain */
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_LEN 4
 /* enum: Leave the PCS clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for MC clock domain */
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_LEN 4
 /* enum: Leave the MC clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for rmon clock domain */
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_LEN 4
 /* enum: Leave the rmon clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for vswitch clock domain */
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_LEN 4
 /* enum: Leave the vswitch clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE 0x0
 
 /* MC_CMD_SET_CLOCK_OUT msgresponse */
 #define    MC_CMD_SET_CLOCK_OUT_LEN 28
@@ -11491,37 +12135,37 @@
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_LEN 4
 /* enum: The system clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting inter-core frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_LEN 4
 /* enum: The inter-core clock domain doesn't exist / isn't used */
-#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED 0x0
 /* Resulting DPCPU frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_LEN 4
 /* enum: The dpcpu clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED 0x0
 /* Resulting PCS frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_LEN 4
 /* enum: The PCS clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting MC frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_LEN 4
 /* enum: The MC clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED 0x0
 /* Resulting rmon frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_LEN 4
 /* enum: The rmon clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED 0x0
 /* Resulting vswitch frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_LEN 4
 /* enum: The vswitch clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED 0x0
 
 
 /***********************************/
@@ -11537,21 +12181,21 @@
 #define       MC_CMD_DPCPU_RPC_IN_CPU_OFST 0
 #define       MC_CMD_DPCPU_RPC_IN_CPU_LEN 4
 /* enum: RxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0  0x0
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0 0x0
 /* enum: TxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0  0x1
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0 0x1
 /* enum: TxDPCPU1 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1  0x2
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1 0x2
 /* enum: RxDPCPU1 (Medford only) */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1   0x3
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1 0x3
 /* enum: RxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_RX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX   0x80
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX 0x80
 /* enum: TxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_TX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX   0x81
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX 0x81
 /* First 8 bits [39:32] of DATA are consumed by MC-DPCPU protocol and must be
  * initialised to zero
  */
@@ -11559,15 +12203,15 @@
 #define       MC_CMD_DPCPU_RPC_IN_DATA_LEN 32
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_LBN 8
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_WIDTH 8
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ  0x6 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE  0x7 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST  0xc /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS  0xe /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ  0x46 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE  0x47 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST  0x4a /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS  0x4c /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT  0x4d /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ 0x6 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE 0x7 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST 0xc /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS 0xe /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ 0x46 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE 0x47 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST 0x4a /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS 0x4c /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT 0x4d /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_ADDR_LBN 16
@@ -11578,11 +12222,11 @@
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_INFO_WIDTH 240
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT  0x0 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ  0x3 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ  0x4 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT 0x0 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ 0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ 0x4 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_LBN 48
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_RPT_COUNT_LBN 64
@@ -11591,9 +12235,9 @@
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_GAP_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST  0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST 0x3 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_LBN 64
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_WIDTH 16
 #define       MC_CMD_DPCPU_RPC_IN_WDATA_OFST 12
@@ -11660,7 +12304,7 @@
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_OFST 0
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_LEN 4
 /* enum: Copy slave_data section to the slave core. (Greenport only) */
-#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA  0x0
+#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA 0x0
 
 /* MC_CMD_SHMBOOT_OP_OUT msgresponse */
 #define    MC_CMD_SHMBOOT_OP_OUT_LEN 0
@@ -11709,14 +12353,14 @@
 #define       MC_CMD_DUMP_DO_IN_PADDING_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_OFST 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_OFST 8
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM  0x1 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY  0x2 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI  0x3 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART  0x4 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM 0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY 0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI 0x3 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART 0x4 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_OFFSET_OFST 16
@@ -11727,24 +12371,24 @@
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE  0x1000 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE 0x1000 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_OFST 16
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_OFST 20
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH  0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH 0x2 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_LEN 4
 /* enum: The uart port this command was received over (if using a uart
  * transport)
  */
-#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC  0xff
+#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC 0xff
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_OFST 24
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_OFST 28
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_OFST 32
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_LEN 4
 /*            Enum values, see field(s): */
@@ -11854,11 +12498,11 @@
 #define    MC_CMD_SET_PSU_IN_LEN 12
 #define       MC_CMD_SET_PSU_IN_PARAM_OFST 0
 #define       MC_CMD_SET_PSU_IN_PARAM_LEN 4
-#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE  0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE 0x0 /* enum */
 #define       MC_CMD_SET_PSU_IN_RAIL_OFST 4
 #define       MC_CMD_SET_PSU_IN_RAIL_LEN 4
-#define          MC_CMD_SET_PSU_IN_RAIL_0V9  0x0 /* enum */
-#define          MC_CMD_SET_PSU_IN_RAIL_1V2  0x1 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_0V9 0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_1V2 0x1 /* enum */
 /* desired value, eg voltage in mV */
 #define       MC_CMD_SET_PSU_IN_VALUE_OFST 8
 #define       MC_CMD_SET_PSU_IN_VALUE_LEN 4
@@ -12031,26 +12675,30 @@
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_OFST 0
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_KR_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_KR_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_KR_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_KR_TUNE_IN_TXEQ_SET 0x3
 /* enum: Force KR Serdes reset / recalibration */
-#define          MC_CMD_KR_TUNE_IN_RECAL  0x4
+#define          MC_CMD_KR_TUNE_IN_RECAL 0x4
 /* enum: Start KR Serdes Eye diagram plot on a given lane. Lane must have valid
  * signal.
  */
-#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll KR Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Read Figure Of Merit (eye quality, higher is better). */
-#define          MC_CMD_KR_TUNE_IN_READ_FOM  0x7
+#define          MC_CMD_KR_TUNE_IN_READ_FOM 0x7
+/* enum: Start/stop link training frames */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_RUN 0x8
+/* enum: Issue KR link training command (control training coefficients) */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_CMD 0x9
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_LEN 3
@@ -12084,98 +12732,98 @@
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: Edge DFE Tap1 (Huntington - 0 - max negative, 64 - zero, 127 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1  0x2
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1 0x2
 /* enum: Edge DFE Tap2 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2  0x3
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2 0x3
 /* enum: Edge DFE Tap3 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3  0x4
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3 0x4
 /* enum: Edge DFE Tap4 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4  0x5
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4 0x5
 /* enum: Edge DFE Tap5 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5  0x6
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5 0x6
 /* enum: Edge DFE DLEV (0-128 for Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV  0x7
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV 0x7
 /* enum: Variable Gain Amplifier (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA  0x8
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA 0x8
 /* enum: CTLE EQ Capacitor (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (0-7, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 /* enum: CTLE gain (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN  0xb
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN 0xb
 /* enum: CTLE pole (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE  0xc
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE 0xc
 /* enum: CTLE peaking (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK  0xd
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK 0xd
 /* enum: DFE Tap1 - even path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN  0xe
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN 0xe
 /* enum: DFE Tap1 - odd path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD  0xf
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD 0xf
 /* enum: DFE Tap2 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x10
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x10
 /* enum: DFE Tap3 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x11
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x11
 /* enum: DFE Tap4 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x12
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x12
 /* enum: DFE Tap5 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x13
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x13
 /* enum: DFE Tap6 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6  0x14
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6 0x14
 /* enum: DFE Tap7 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7  0x15
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7 0x15
 /* enum: DFE Tap8 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8  0x16
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8 0x16
 /* enum: DFE Tap9 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9  0x17
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9 0x17
 /* enum: DFE Tap10 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10  0x18
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10 0x18
 /* enum: DFE Tap11 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11  0x19
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11 0x19
 /* enum: DFE Tap12 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12  0x1a
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12 0x1a
 /* enum: I/Q clk offset (Medford2 - 4 bit signed (-5 - +5))) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF  0x1b
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF 0x1b
 /* enum: Negative h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN  0x1c
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN 0x1c
 /* enum: Negative h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD  0x1d
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD 0x1d
 /* enum: Positive h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN  0x1e
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN 0x1e
 /* enum: Positive h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD  0x1f
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD 0x1f
 /* enum: CDR calibration loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT  0x20
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT 0x20
 /* enum: CDR integral loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG  0x21
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG 0x21
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 11
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_RESERVED_LBN 12
@@ -12241,38 +12889,38 @@
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TX Amplitude (Huntington, Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV  0x0
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV 0x0
 /* enum: De-Emphasis Tap1 Magnitude (0-7) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE  0x1
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE 0x1
 /* enum: De-Emphasis Tap1 Fine */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV  0x2
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV 0x2
 /* enum: De-Emphasis Tap2 Magnitude (0-6) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2  0x3
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2 0x3
 /* enum: De-Emphasis Tap2 Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV  0x4
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV 0x4
 /* enum: Pre-Emphasis Magnitude (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E  0x5
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E 0x5
 /* enum: Pre-Emphasis Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV  0x6
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV 0x6
 /* enum: TX Slew Rate Coarse control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY  0x7
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY 0x7
 /* enum: TX Slew Rate Fine control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET  0x8
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET 0x8
 /* enum: TX Termination Impedance control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET  0x9
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET 0x9
 /* enum: TX Amplitude Fine control (Medford) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE  0xa
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE 0xa
 /* enum: Pre-shoot Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV  0xb
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV 0xb
 /* enum: De-emphasis Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY  0xc
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY 0xc
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_LBN 11
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_WIDTH 5
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_INITIAL_LBN 16
@@ -12345,9 +12993,12 @@
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_LEN 3
-/* Port-relative lane to scan eye on */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_WIDTH 1
 /* Scan duration / cycle count */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_OFST 8
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_LEN 4
@@ -12383,12 +13034,91 @@
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_KR_TUNE_RSVD_LEN 3
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_WIDTH 1
 
 /* MC_CMD_KR_TUNE_READ_FOM_OUT msgresponse */
 #define    MC_CMD_KR_TUNE_READ_FOM_OUT_LEN 4
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_OFST 0
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_LEN 4
 
+/* MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_LEN 8
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_STOP 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_START 0x1 /* enum */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LEN 28
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_LEN 4
+/* Set INITIALIZE state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_LEN 4
+/* Set PRESET state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_LEN 4
+/* C(-1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_HOLD 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_INCREMENT 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_DECREMENT 0x2 /* enum */
+/* C(0) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_OFST 24
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT msgresponse */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_LEN 24
+/* C(-1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_NOT_UPDATED 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_UPDATED 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MINIMUM 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MAXIMUM 0x3 /* enum */
+/* C(0) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(-1) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_LEN 4
+/* C(0) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_LEN 4
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_LEN 4
+
 
 /***********************************/
 /* MC_CMD_PCIE_TUNE
@@ -12406,22 +13136,22 @@
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_OFST 0
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET 0x3
 /* enum: Start PCIe Serdes Eye diagram plot on a given lane. */
-#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll PCIe Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Enable the SERDES BIST and set it to generate a 200MHz square wave */
-#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE  0x7
+#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE 0x7
 /* Align the arguments to 32 bits */
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_OFST 1
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_LEN 3
@@ -12455,46 +13185,46 @@
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: DFE Tap1 (0 - max negative, 64 - zero, 127 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1  0x2
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1 0x2
 /* enum: DFE Tap2 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x3
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x3
 /* enum: DFE Tap3 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x4
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x4
 /* enum: DFE Tap4 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x5
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x5
 /* enum: DFE Tap5 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x6
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x6
 /* enum: DFE DLev */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV  0x7
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV 0x7
 /* enum: Figure of Merit */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM  0x8
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM 0x8
 /* enum: CTLE EQ Capacitor (HF Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (DC Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 5
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4  0x4 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5  0x5 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6  0x6 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7  0x7 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8  0x8 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9  0x9 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10  0xa /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11  0xb /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12  0xc /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13  0xd /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14  0xe /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15  0xf /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL  0x10 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4 0x4 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5 0x5 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6 0x6 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7 0x7 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8 0x8 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9 0x9 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10 0xa /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11 0xb /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12 0xc /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13 0xd /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14 0xe /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15 0xf /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL 0x10 /* enum */
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 13
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_LBN 14
@@ -12558,15 +13288,15 @@
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TxMargin (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN  0x0
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN 0x0
 /* enum: TxSwing (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING  0x1
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING 0x1
 /* enum: De-emphasis coefficient C(-1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1  0x2
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1 0x2
 /* enum: De-emphasis coefficient C(0) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0  0x3
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0 0x3
 /* enum: De-emphasis coefficient C(+1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1  0x4
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1 0x4
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 4
 /*             Enum values, see field(s): */
@@ -12632,9 +13362,9 @@
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses */
-#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS  0x1
+#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS 0x1
 
 /* MC_CMD_LICENSING_OUT msgresponse */
 #define    MC_CMD_LICENSING_OUT_LEN 28
@@ -12665,9 +13395,9 @@
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_OFST 24
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS 0x1
 
 
 /***********************************/
@@ -12687,11 +13417,11 @@
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses Returns EAGAIN if license
  * processing (updating) has been started but not yet completed.
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
+#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE 0x1
 
 /* MC_CMD_LICENSING_V3_OUT msgresponse */
 #define    MC_CMD_LICENSING_V3_OUT_LEN 88
@@ -12718,9 +13448,9 @@
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_OFST 20
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS 0x1
 /* bitmask of licensed applications */
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_OFST 24
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_LEN 8
@@ -12806,9 +13536,9 @@
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
@@ -12837,9 +13567,9 @@
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
@@ -12891,9 +13621,9 @@
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_OFST 4
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_LEN 4
 /* enum: validate application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE  0x0
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE 0x0
 /* enum: mask application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK  0x1
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK 0x1
 /* arguments specific to this particular operation */
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_OFST 8
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_LEN 4
@@ -12984,9 +13714,9 @@
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 100
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_LEN 4
 /* enum: expiry units are accounting units */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC  0x0
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC 0x0
 /* enum: expiry units are calendar days */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS  0x1
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS 0x1
 /* base MAC address of the NIC stored in NVRAM (note that this is a constant
  * value for a given NIC regardless which function is calling, effectively this
  * is PF0 base MAC address)
@@ -13019,9 +13749,9 @@
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_OFST 8
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_LEN 4
 /* enum: turn the features off */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF  0x0
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF 0x0
 /* enum: turn the features back on */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON  0x1
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON 0x1
 
 /* MC_CMD_LICENSED_V3_MASK_FEATURES_OUT msgresponse */
 #define    MC_CMD_LICENSED_V3_MASK_FEATURES_OUT_LEN 0
@@ -13048,15 +13778,15 @@
  * This is an asynchronous operation owing to the time taken to validate an
  * ECDSA license
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_SET  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_SET 0x0
 /* enum: clear the license immediately rather than waiting for the next power
  * cycle
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR 0x1
 /* enum: get the status of the asynchronous MC_CMD_LICENSING_V3_TEMPORARY_SET
  * operation
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS 0x2
 
 /* MC_CMD_LICENSING_V3_TEMPORARY_IN_SET msgrequest */
 #define    MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LEN 164
@@ -13082,13 +13812,13 @@
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_OFST 0
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_LEN 4
 /* enum: finished validating and installing license */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK 0x0
 /* enum: license validation and installation in progress */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS 0x1
 /* enum: licensing error. More specific error messages are not provided to
  * avoid exposing details of the licensing system to the client
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR 0x2
 /* bitmask of licensed features */
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_OFST 4
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LEN 8
@@ -13124,9 +13854,9 @@
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
@@ -13146,7 +13876,7 @@
  */
 #define MC_CMD_GET_PORT_SNIFF_CONFIG 0xf8
 
-#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_PORT_SNIFF_CONFIG_IN_LEN 0
@@ -13167,9 +13897,9 @@
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
@@ -13193,12 +13923,12 @@
 /* enum: Per-TXQ enable for multicast UDP destination lookup for possible
  * internal loopback. (ENTITY is a queue handle, VALUE is a single boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN  0x0
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN 0x0
 /* enum: Per-v-adaptor enable for suppression of self-transmissions on the
  * internal loopback path. (ENTITY is an EVB_PORT_ID, VALUE is a single
  * boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX  0x1
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX 0x1
 /* handle for the entity to update: queue handle, EVB port ID, etc. depending
  * on the type of configuration setting being changed
  */
@@ -13278,9 +14008,9 @@
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
@@ -13300,7 +14030,7 @@
  */
 #define MC_CMD_GET_TX_PORT_SNIFF_CONFIG 0xfc
 
-#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN_LEN 0
@@ -13319,9 +14049,9 @@
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
@@ -13431,9 +14161,9 @@
 #define    MC_CMD_READ_ATB_IN_LEN 16
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_OFST 0
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_LEN 4
-#define          MC_CMD_READ_ATB_IN_BUS_CCOM  0x0 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CKR  0x1 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CPCIE  0x8 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CCOM 0x0 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CKR 0x1 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CPCIE 0x8 /* enum */
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_OFST 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_LEN 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_SEL_OFST 8
@@ -13503,46 +14233,46 @@
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_PF_WIDTH 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_LBN 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_WIDTH 16
-#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL 0xffff /* enum */
 /* New privilege mask to be set. The mask will only be changed if the MSB is
  * set to 1.
  */
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_OFST 4
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_LEN 4
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN             0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK              0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD            0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP               0x8 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS  0x10 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP 0x8 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS 0x10 /* enum */
 /* enum: Deprecated. Equivalent to MAC_SPOOFING_TX combined with CHANGE_MAC. */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING      0x20
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST           0x40 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST         0x80 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST         0x100 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST     0x200 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS       0x400 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING 0x20
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST 0x40 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST 0x80 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST 0x100 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST 0x200 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS 0x400 /* enum */
 /* enum: Allows to set the TX packets' source MAC address to any arbitrary MAC
  * adress.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX   0x800
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX 0x800
 /* enum: Privilege that allows a Function to change the MAC address configured
  * in its associated vAdapter/vPort.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC        0x1000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC 0x1000
 /* enum: Privilege that allows a Function to install filters that specify VLANs
  * that are not in the permit list for the associated vPort. This privilege is
  * primarily to support ESX where vPorts are created that restrict traffic to
  * only a set of permitted VLANs. See the vPort flag FLAG_VLAN_RESTRICT.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN  0x2000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN 0x2000
 /* enum: Privilege for insecure commands. Commands that belong to this group
  * are not permitted on secure adapters regardless of the privilege mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE          0x4000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE 0x4000
 /* enum: Set this bit to indicate that a new privilege mask is to be set,
  * otherwise the command will only read the existing mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE             0x80000000
+#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE 0x80000000
 
 /* MC_CMD_PRIVILEGE_MASK_OUT msgresponse */
 #define    MC_CMD_PRIVILEGE_MASK_OUT_LEN 4
@@ -13573,12 +14303,12 @@
 /* New link state mode to be set */
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_OFST 4
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_LEN 4
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO       0x0 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP         0x1 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN       0x2 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO 0x0 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP 0x1 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN 0x2 /* enum */
 /* enum: Use this value to just read the existing setting without modifying it.
  */
-#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE         0xffffffff
+#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE 0xffffffff
 
 /* MC_CMD_LINK_STATE_MODE_OUT msgresponse */
 #define    MC_CMD_LINK_STATE_MODE_OUT_LEN 4
@@ -13674,12 +14404,12 @@
 /* The groups of functions to have their privilege masks modified. */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_OFST 0
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_LEN 4
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE       0x0 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL        0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY   0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY   0x3 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF  0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE        0x5 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE 0x0 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY 0x3 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE 0x5 /* enum */
 /* For VFS_OF_PF specify the PF, for ONE specify the target function */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_OFST 4
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_LEN 4
@@ -13782,11 +14512,11 @@
 /* Sector type */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_OFST 0
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_LEN 4
-#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK            0x0 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128   0x1 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256   0x2 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA      0x3 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID          0xff /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK 0x0 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128 0x1 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256 0x2 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA 0x3 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID 0xff /* enum */
 /* Sector data */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_OFST 4
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_LEN 1
@@ -14001,18 +14731,18 @@
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_OFST 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LEN 2
 /* enum: the IANA allocated UDP port for VXLAN */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT  0x12b5
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT 0x12b5
 /* enum: the IANA allocated UDP port for Geneve */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT  0x17c1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT 0x17c1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LBN 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_WIDTH 16
 /* tunnel encapsulation protocol (only those named below are supported) */
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_OFST 2
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LEN 2
 /* enum: This port will be used for VXLAN on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN  0x0
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN 0x0
 /* enum: This port will be used for Geneve on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE  0x1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE 0x1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LBN 16
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_WIDTH 16
 
@@ -14180,10 +14910,10 @@
 /* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_LEN 4
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS  0x0 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START  0x1 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START  0x2 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF  0x3 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS 0x0 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START 0x1 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START 0x2 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF 0x3 /* enum */
 
 /* MC_CMD_SET_EVQ_TMR_OUT msgresponse */
 #define    MC_CMD_SET_EVQ_TMR_OUT_LEN 8
@@ -14269,7 +14999,7 @@
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_CP 0x11d
 
-#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_LEN 20
@@ -14281,9 +15011,9 @@
 /* Will the common pool be used as TX_vFIFO_ULL (1) */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_LEN 4
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED       0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED 0x1 /* enum */
 /* enum: Using this interface without TX_vFIFO_ULL is not supported for now */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED      0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED 0x0
 /* Number of buffers to reserve for the common pool */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_LEN 4
@@ -14291,20 +15021,20 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_LEN 4
 /* enum: Extracts information from function */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1
 /* Network port or RX Engine to which the common pool connects. */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_LEN 4
 /* enum: Extracts information from function */
-/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3          0x3 /* enum */
+/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1 0x5
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_LEN 4
@@ -14320,7 +15050,7 @@
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO 0x11e
 
-#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LEN 20
@@ -14332,20 +15062,20 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_LEN 4
 /* enum: Extracts information from common pool */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE   -0x1
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3          0x3 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1 0x5
 /* Minimum number of buffers that the pool must have */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_LEN 4
 /* enum: Do not check the space available */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM     0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM 0x0
 /* Will the vFIFO be used as TX_vFIFO_ULL */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_LEN 4
@@ -14353,7 +15083,7 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_LEN 4
 /* enum: Search for the lowest unused priority */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE  -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE -0x1
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_LEN 8
@@ -14372,7 +15102,7 @@
  */
 #define MC_CMD_TEARDOWN_TX_VFIFO_VF 0x11f
 
-#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_TEARDOWN_TX_VFIFO_VF_IN msgrequest */
 #define    MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_LEN 4
@@ -14391,7 +15121,7 @@
  */
 #define MC_CMD_DEALLOCATE_TX_VFIFO_CP 0x121
 
-#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_LEN 4
@@ -14410,7 +15140,7 @@
  */
 #define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS 0x124
 
-#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN msgrequest */
 #define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN_LEN 0
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index ce8aabf9091e..9382bb0b4d5a 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -352,6 +352,64 @@ static void efx_mcdi_phy_decode_link(struct efx_nic *efx,
 	link_state->speed = speed;
 }
 
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits.  Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+{
+	u32 ret = 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_OFF)
+		return 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_AUTO)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_RS)
+		ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_BASER)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+	return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi.  There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO.  This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+	bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+	     rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+	     baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+			    : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+	     baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+				: caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+	if (!baser && !rs)
+		return ETHTOOL_FEC_OFF;
+	return (rs_req ? ETHTOOL_FEC_RS : 0) |
+	       (baser_req ? ETHTOOL_FEC_BASER : 0) |
+	       (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
 static int efx_mcdi_phy_probe(struct efx_nic *efx)
 {
 	struct efx_mcdi_phy_data *phy_data;
@@ -438,6 +496,13 @@ static int efx_mcdi_phy_probe(struct efx_nic *efx)
 		MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
 		MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
 
+	/* Record the initial FEC configuration (or nearest approximation
+	 * representable in the ethtool configuration space)
+	 */
+	efx->fec_config = mcdi_fec_caps_to_ethtool(caps,
+						   efx->link_state.speed == 25000 ||
+						   efx->link_state.speed == 50000);
+
 	/* Default to Autonegotiated flow control if the PHY supports it */
 	efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
 	if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
@@ -458,6 +523,8 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
 		    ethtool_linkset_to_mcdi_cap(efx->link_advertising) :
 		    phy_cfg->forced_cap);
 
+	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
 	return efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
 				 efx->loopback_mode, 0);
 }
@@ -584,6 +651,8 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
 		}
 	}
 
+	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
 	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
 			       efx->loopback_mode, 0);
 	if (rc)
@@ -599,6 +668,85 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
 	return 0;
 }
 
+static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
+				     struct ethtool_fecparam *fec)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+	u32 caps, active, speed; /* MCDI format */
+	bool is_25g = false;
+	size_t outlen;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		return rc;
+	if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+		return -EOPNOTSUPP;
+
+	/* behaviour for 25G/50G links depends on 25G BASER bit */
+	speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+	is_25g = speed == 25000 || speed == 50000;
+
+	caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+	fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+	/* BASER is never supported on 100G */
+	if (speed == 100000)
+		fec->fec &= ~ETHTOOL_FEC_BASER;
+
+	active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+	switch (active) {
+	case MC_CMD_FEC_NONE:
+		fec->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	case MC_CMD_FEC_BASER:
+		fec->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case MC_CMD_FEC_RS:
+		fec->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		netif_warn(efx, hw, efx->net_dev,
+			   "Firmware reports unrecognised FEC_TYPE %u\n",
+			   active);
+		/* We don't know what firmware has picked.  AUTO is as good a
+		 * "can't happen" value as any other.
+		 */
+		fec->active_fec = ETHTOOL_FEC_AUTO;
+		break;
+	}
+
+	return 0;
+}
+
+static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
+				     const struct ethtool_fecparam *fec)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	u32 caps;
+	int rc;
+
+	/* Work out what efx_mcdi_phy_set_link_ksettings() would produce from
+	 * saved advertising bits
+	 */
+	if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising))
+		caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) |
+			1 << MC_CMD_PHY_CAP_AN_LBN);
+	else
+		caps = phy_cfg->forced_cap;
+
+	caps |= ethtool_fec_caps_to_mcdi(fec->fec);
+	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+			       efx->loopback_mode, 0);
+	if (rc)
+		return rc;
+
+	/* Record the new FEC setting for subsequent set_link calls */
+	efx->fec_config = fec->fec;
+	return 0;
+}
+
 static int efx_mcdi_phy_test_alive(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
@@ -977,6 +1125,8 @@ static const struct efx_phy_operations efx_mcdi_phy_ops = {
 	.remove		= efx_mcdi_phy_remove,
 	.get_link_ksettings = efx_mcdi_phy_get_link_ksettings,
 	.set_link_ksettings = efx_mcdi_phy_set_link_ksettings,
+	.get_fecparam	= efx_mcdi_phy_get_fecparam,
+	.set_fecparam	= efx_mcdi_phy_set_fecparam,
 	.test_alive	= efx_mcdi_phy_test_alive,
 	.run_tests	= efx_mcdi_phy_run_tests,
 	.test_name	= efx_mcdi_phy_test_name,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index d20a8660ee48..5e379a83c729 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -430,6 +430,7 @@ enum efx_sync_events_state {
  * @event_test_cpu: Last CPU to handle interrupt or test event for this channel
  * @irq_count: Number of IRQs since last adaptive moderation decision
  * @irq_mod_score: IRQ moderation score
+ * @filter_work: Work item for efx_filter_rfs_expire()
  * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
  *      indexed by filter ID
  * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
@@ -475,6 +476,7 @@ struct efx_channel {
 	unsigned int irq_mod_score;
 #ifdef CONFIG_RFS_ACCEL
 	unsigned int rfs_filters_added;
+	struct work_struct filter_work;
 #define RPS_FLOW_ID_INVALID 0xFFFFFFFF
 	u32 *rps_flow_id;
 #endif
@@ -627,6 +629,8 @@ static inline bool efx_link_state_equal(const struct efx_link_state *left,
  *	Serialised by the mac_lock.
  * @get_link_ksettings: Get ethtool settings. Serialised by the mac_lock.
  * @set_link_ksettings: Set ethtool settings. Serialised by the mac_lock.
+ * @get_fecparam: Get Forward Error Correction settings. Serialised by mac_lock.
+ * @set_fecparam: Set Forward Error Correction settings. Serialised by mac_lock.
  * @set_npage_adv: Set abilities advertised in (Extended) Next Page
  *	(only needed where AN bit is set in mmds)
  * @test_alive: Test that PHY is 'alive' (online)
@@ -645,6 +649,9 @@ struct efx_phy_operations {
 				   struct ethtool_link_ksettings *cmd);
 	int (*set_link_ksettings)(struct efx_nic *efx,
 				  const struct ethtool_link_ksettings *cmd);
+	int (*get_fecparam)(struct efx_nic *efx, struct ethtool_fecparam *fec);
+	int (*set_fecparam)(struct efx_nic *efx,
+			    const struct ethtool_fecparam *fec);
 	void (*set_npage_adv) (struct efx_nic *efx, u32);
 	int (*test_alive) (struct efx_nic *efx);
 	const char *(*test_name) (struct efx_nic *efx, unsigned int index);
@@ -704,6 +711,28 @@ union efx_multicast_hash {
 
 struct vfdi_status;
 
+/* The reserved RSS context value */
+#define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
+/**
+ * struct efx_rss_context - A user-defined RSS context for filtering
+ * @list: node of linked list on which this struct is stored
+ * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
+ *	%EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ *	For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID.
+ * @user_id: the rss_context ID exposed to userspace over ethtool.
+ * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rx_hash_key: Toeplitz hash key for this RSS context
+ * @indir_table: Indirection table for this RSS context
+ */
+struct efx_rss_context {
+	struct list_head list;
+	u32 context_id;
+	u32 user_id;
+	bool rx_hash_udp_4tuple;
+	u8 rx_hash_key[40];
+	u32 rx_indir_table[128];
+};
+
 /**
  * struct efx_nic - an Efx NIC
  * @name: Device name (net device name or bus id before net device registered)
@@ -764,11 +793,10 @@ struct vfdi_status;
  *	(valid only for NICs that set %EFX_RX_PKT_PREFIX_LEN; always negative)
  * @rx_packet_ts_offset: Offset of timestamp from start of packet data
  *	(valid only if channel->sync_timestamps_enabled; always negative)
- * @rx_hash_key: Toeplitz hash key for RSS
- * @rx_indir_table: Indirection table for RSS
  * @rx_scatter: Scatter mode enabled for receives
- * @rss_active: RSS enabled on hardware
- * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rss_context: Main RSS context.  Its @list member is the head of the list of
+ *	RSS contexts created by user requests
+ * @rss_lock: Protects custom RSS context software state in @rss_context.list
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -800,6 +828,8 @@ struct vfdi_status;
  * @mdio_bus: PHY MDIO bus ID (only used by Siena)
  * @phy_mode: PHY operating mode. Serialised by @mac_lock.
  * @link_advertising: Autonegotiation advertising flags
+ * @fec_config: Forward Error Correction configuration flags.  For bit positions
+ *	see &enum ethtool_fec_config_bits.
  * @link_state: Current state of the link
  * @n_link_state_changes: Number of times the link has changed state
  * @unicast_filter: Flag for Falcon-arch simple unicast filter.
@@ -814,9 +844,9 @@ struct vfdi_status;
  * @loopback_mode: Loopback status
  * @loopback_modes: Supported loopback mode bitmask
  * @loopback_selftest: Offline self-test private state
- * @filter_sem: Filter table rw_semaphore, for freeing the table
- * @filter_lock: Filter table lock, for mere content changes
+ * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
  * @filter_state: Architecture-dependent filter table state
+ * @rps_mutex: Protects RPS state of all channels
  * @rps_expire_channel: Next channel to check for expiry
  * @rps_expire_index: Next index to check for expiry in
  *	@rps_expire_channel's @rps_flow_id
@@ -909,11 +939,9 @@ struct efx_nic {
 	int rx_packet_hash_offset;
 	int rx_packet_len_offset;
 	int rx_packet_ts_offset;
-	u8 rx_hash_key[40];
-	u32 rx_indir_table[128];
 	bool rx_scatter;
-	bool rss_active;
-	bool rx_hash_udp_4tuple;
+	struct efx_rss_context rss_context;
+	struct mutex rss_lock;
 
 	unsigned int_error_count;
 	unsigned long int_error_expire;
@@ -955,6 +983,7 @@ struct efx_nic {
 	enum efx_phy_mode phy_mode;
 
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(link_advertising);
+	u32 fec_config;
 	struct efx_link_state link_state;
 	unsigned int n_link_state_changes;
 
@@ -970,9 +999,9 @@ struct efx_nic {
 	void *loopback_selftest;
 
 	struct rw_semaphore filter_sem;
-	spinlock_t filter_lock;
 	void *filter_state;
 #ifdef CONFIG_RFS_ACCEL
+	struct mutex rps_mutex;
 	unsigned int rps_expire_channel;
 	unsigned int rps_expire_index;
 #endif
@@ -1099,6 +1128,10 @@ struct efx_udp_tunnel {
  * @tx_write: Write TX descriptors and doorbell
  * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
  * @rx_pull_rss_config: Read RSS hash key and indirection table back from the NIC
+ * @rx_push_rss_context_config: Write RSS hash key and indirection table for
+ *	user RSS context to the NIC
+ * @rx_pull_rss_context_config: Read RSS hash key and indirection table for user
+ *	RSS context back from the NIC
  * @rx_probe: Allocate resources for RX queue
  * @rx_init: Initialise RX queue on the NIC
  * @rx_remove: Free resources for RX queue
@@ -1123,10 +1156,6 @@ struct efx_udp_tunnel {
  * @filter_count_rx_used: Get the number of filters in use at a given priority
  * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1
  * @filter_get_rx_ids: Get list of RX filters at a given priority
- * @filter_rfs_insert: Add or replace a filter for RFS.  This must be
- *	atomic.  The hardware change may be asynchronous but should
- *	not be delayed for long.  It may fail if this can't be done
- *	atomically.
  * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS.
  *	This must check whether the specified table entry is used by RFS
  *	and that rps_may_expire_flow() returns true for it.
@@ -1237,6 +1266,13 @@ struct efx_nic_type {
 	int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
 				  const u32 *rx_indir_table, const u8 *key);
 	int (*rx_pull_rss_config)(struct efx_nic *efx);
+	int (*rx_push_rss_context_config)(struct efx_nic *efx,
+					  struct efx_rss_context *ctx,
+					  const u32 *rx_indir_table,
+					  const u8 *key);
+	int (*rx_pull_rss_context_config)(struct efx_nic *efx,
+					  struct efx_rss_context *ctx);
+	void (*rx_restore_rss_contexts)(struct efx_nic *efx);
 	int (*rx_probe)(struct efx_rx_queue *rx_queue);
 	void (*rx_init)(struct efx_rx_queue *rx_queue);
 	void (*rx_remove)(struct efx_rx_queue *rx_queue);
@@ -1270,8 +1306,6 @@ struct efx_nic_type {
 				 enum efx_filter_priority priority,
 				 u32 *buf, u32 size);
 #ifdef CONFIG_RFS_ACCEL
-	s32 (*filter_rfs_insert)(struct efx_nic *efx,
-				 struct efx_filter_spec *spec);
 	bool (*filter_rfs_expire_one)(struct efx_nic *efx, u32 flow_id,
 				      unsigned int index);
 #endif
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 6549fc685a48..5640034bda10 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -365,6 +365,8 @@ enum {
  * @vi_base: Absolute index of first VI in this function
  * @n_allocated_vis: Number of VIs allocated to this function
  * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot
+ * @must_restore_rss_contexts: Flag: RSS contexts have yet to be restored after
+ *	MC reboot
  * @must_restore_filters: Flag: filters have yet to be restored after MC reboot
  * @n_piobufs: Number of PIO buffers allocated to this function
  * @wc_membase: Base address of write-combining mapping of the memory BAR
@@ -374,7 +376,6 @@ enum {
  * @piobuf_size: size of a single PIO buffer
  * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC
  *	reboot
- * @rx_rss_context: Firmware handle for our RSS context
  * @rx_rss_context_exclusive: Whether our RSS context is exclusive or shared
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
@@ -408,6 +409,7 @@ struct efx_ef10_nic_data {
 	unsigned int vi_base;
 	unsigned int n_allocated_vis;
 	bool must_realloc_vis;
+	bool must_restore_rss_contexts;
 	bool must_restore_filters;
 	unsigned int n_piobufs;
 	void __iomem *wc_membase, *pio_write_base;
@@ -415,7 +417,6 @@ struct efx_ef10_nic_data {
 	unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT];
 	u16 piobuf_size;
 	bool must_restore_piobufs;
-	u32 rx_rss_context;
 	bool rx_rss_context_exclusive;
 	u64 stats[EF10_STAT_COUNT];
 	bool workaround_35388;
@@ -603,8 +604,6 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 				enum efx_filter_priority priority, u32 *buf,
 				u32 size);
 #ifdef CONFIG_RFS_ACCEL
-s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
-				struct efx_filter_spec *spec);
 bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 				     unsigned int index);
 #endif
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index cfe76aad79ee..95682831484e 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -827,14 +827,67 @@ MODULE_PARM_DESC(rx_refill_threshold,
 
 #ifdef CONFIG_RFS_ACCEL
 
+/**
+ * struct efx_async_filter_insertion - Request to asynchronously insert a filter
+ * @net_dev: Reference to the netdevice
+ * @spec: The filter to insert
+ * @work: Workitem for this request
+ * @rxq_index: Identifies the channel for which this request was made
+ * @flow_id: Identifies the kernel-side flow for which this request was made
+ */
+struct efx_async_filter_insertion {
+	struct net_device *net_dev;
+	struct efx_filter_spec spec;
+	struct work_struct work;
+	u16 rxq_index;
+	u32 flow_id;
+};
+
+static void efx_filter_rfs_work(struct work_struct *data)
+{
+	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
+							      work);
+	struct efx_nic *efx = netdev_priv(req->net_dev);
+	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
+	int rc;
+
+	rc = efx->type->filter_insert(efx, &req->spec, false);
+	if (rc >= 0) {
+		/* Remember this so we can check whether to expire the filter
+		 * later.
+		 */
+		mutex_lock(&efx->rps_mutex);
+		channel->rps_flow_id[rc] = req->flow_id;
+		++channel->rfs_filters_added;
+		mutex_unlock(&efx->rps_mutex);
+
+		if (req->spec.ether_type == htons(ETH_P_IP))
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc);
+		else
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc);
+	}
+
+	/* Release references */
+	dev_put(req->net_dev);
+	kfree(req);
+}
+
 int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 		   u16 rxq_index, u32 flow_id)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	struct efx_channel *channel;
-	struct efx_filter_spec spec;
+	struct efx_async_filter_insertion *req;
 	struct flow_keys fk;
-	int rc;
 
 	if (flow_id == RPS_FLOW_ID_INVALID)
 		return -EINVAL;
@@ -847,50 +900,39 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT)
 		return -EPROTONOSUPPORT;
 
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT,
+	req = kmalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
+
+	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
 			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
 			   rxq_index);
-	spec.match_flags =
+	req->spec.match_flags =
 		EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
 		EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
 		EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
-	spec.ether_type = fk.basic.n_proto;
-	spec.ip_proto = fk.basic.ip_proto;
+	req->spec.ether_type = fk.basic.n_proto;
+	req->spec.ip_proto = fk.basic.ip_proto;
 
 	if (fk.basic.n_proto == htons(ETH_P_IP)) {
-		spec.rem_host[0] = fk.addrs.v4addrs.src;
-		spec.loc_host[0] = fk.addrs.v4addrs.dst;
+		req->spec.rem_host[0] = fk.addrs.v4addrs.src;
+		req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
 	} else {
-		memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr));
-		memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr));
+		memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
+		       sizeof(struct in6_addr));
+		memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
+		       sizeof(struct in6_addr));
 	}
 
-	spec.rem_port = fk.ports.src;
-	spec.loc_port = fk.ports.dst;
-
-	rc = efx->type->filter_rfs_insert(efx, &spec);
-	if (rc < 0)
-		return rc;
+	req->spec.rem_port = fk.ports.src;
+	req->spec.loc_port = fk.ports.dst;
 
-	/* Remember this so we can check whether to expire the filter later */
-	channel = efx_get_channel(efx, rxq_index);
-	channel->rps_flow_id[rc] = flow_id;
-	++channel->rfs_filters_added;
-
-	if (spec.ether_type == htons(ETH_P_IP))
-		netif_info(efx, rx_status, efx->net_dev,
-			   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
-			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
-			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
-	else
-		netif_info(efx, rx_status, efx->net_dev,
-			   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
-			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
-			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
-
-	return rc;
+	dev_hold(req->net_dev = net_dev);
+	INIT_WORK(&req->work, efx_filter_rfs_work);
+	req->rxq_index = rxq_index;
+	req->flow_id = flow_id;
+	schedule_work(&req->work);
+	return 0;
 }
 
 bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
@@ -899,9 +941,8 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
 	unsigned int channel_idx, index, size;
 	u32 flow_id;
 
-	if (!spin_trylock_bh(&efx->filter_lock))
+	if (!mutex_trylock(&efx->rps_mutex))
 		return false;
-
 	expire_one = efx->type->filter_rfs_expire_one;
 	channel_idx = efx->rps_expire_channel;
 	index = efx->rps_expire_index;
@@ -926,7 +967,7 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
 	efx->rps_expire_channel = channel_idx;
 	efx->rps_expire_index = index;
 
-	spin_unlock_bh(&efx->filter_lock);
+	mutex_unlock(&efx->rps_mutex);
 	return true;
 }
 
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index ae8645ae4492..65161f68265a 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -350,11 +350,11 @@ static int siena_rx_pull_rss_config(struct efx_nic *efx)
 	 * siena_rx_push_rss_config, below)
 	 */
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-	memcpy(efx->rx_hash_key, &temp, sizeof(temp));
+	memcpy(efx->rss_context.rx_hash_key, &temp, sizeof(temp));
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
-	memcpy(efx->rx_hash_key + sizeof(temp), &temp, sizeof(temp));
+	memcpy(efx->rss_context.rx_hash_key + sizeof(temp), &temp, sizeof(temp));
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
-	memcpy(efx->rx_hash_key + 2 * sizeof(temp), &temp,
+	memcpy(efx->rss_context.rx_hash_key + 2 * sizeof(temp), &temp,
 	       FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
 	efx_farch_rx_pull_indir_table(efx);
 	return 0;
@@ -367,26 +367,26 @@ static int siena_rx_push_rss_config(struct efx_nic *efx, bool user,
 
 	/* Set hash key for IPv4 */
 	if (key)
-		memcpy(efx->rx_hash_key, key, sizeof(temp));
-	memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+		memcpy(efx->rss_context.rx_hash_key, key, sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
 	efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
 
 	/* Enable IPv6 RSS */
-	BUILD_BUG_ON(sizeof(efx->rx_hash_key) <
+	BUILD_BUG_ON(sizeof(efx->rss_context.rx_hash_key) <
 		     2 * sizeof(temp) + FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8 ||
 		     FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN != 0);
-	memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-	memcpy(&temp, efx->rx_hash_key + sizeof(temp), sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key + sizeof(temp), sizeof(temp));
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
 	EFX_POPULATE_OWORD_2(temp, FRF_CZ_RX_RSS_IPV6_THASH_ENABLE, 1,
 			     FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE, 1);
-	memcpy(&temp, efx->rx_hash_key + 2 * sizeof(temp),
+	memcpy(&temp, efx->rss_context.rx_hash_key + 2 * sizeof(temp),
 	       FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
 
-	memcpy(efx->rx_indir_table, rx_indir_table,
-	       sizeof(efx->rx_indir_table));
+	memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+	       sizeof(efx->rss_context.rx_indir_table));
 	efx_farch_rx_push_indir_table(efx);
 
 	return 0;
@@ -432,8 +432,8 @@ static int siena_init_nic(struct efx_nic *efx)
 			    EFX_RX_USR_BUF_SIZE >> 5);
 	efx_writeo(efx, &temp, FR_AZ_RX_CFG);
 
-	siena_rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-	efx->rss_active = true;
+	siena_rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL);
+	efx->rss_context.context_id = 0; /* indicates RSS is active */
 
 	/* Enable event logging */
 	rc = efx_mcdi_log_ctrl(efx, true, false, 0);
@@ -1035,7 +1035,6 @@ const struct efx_nic_type siena_a0_nic_type = {
 	.filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
 	.filter_get_rx_ids = efx_farch_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_farch_filter_rfs_insert,
 	.filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index 63aca9f847e1..4c2f612e4414 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -20,7 +20,7 @@ if NET_VENDOR_SMSC
 
 config SMC9194
 	tristate "SMC 9194 support"
-	depends on (ISA || MAC && BROKEN)
+	depends on ISA
 	select CRC32
 	---help---
 	  This is support for the SMC9xxx based Ethernet cards. Choose this
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 012fb66eed8d..f0afb88d7bc2 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2335,14 +2335,14 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
 	pdata = netdev_priv(dev);
 	BUG_ON(!pdata);
 	BUG_ON(!pdata->ioaddr);
-	WARN_ON(dev->phydev);
 
 	SMSC_TRACE(pdata, ifdown, "Stopping driver");
 
+	unregister_netdev(dev);
+
 	mdiobus_unregister(pdata->mii_bus);
 	mdiobus_free(pdata->mii_bus);
 
-	unregister_netdev(dev);
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					   "smsc911x-memory");
 	if (!res)
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 111e7ca9df56..0b3b7a460641 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1295,7 +1295,7 @@ static int ave_open(struct net_device *ndev)
 	val |= AVE_IIRQC_EN0 | (AVE_INTM_COUNT << 16);
 	writel(val, priv->base + AVE_IIRQC);
 
-	val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX;
+	val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX | AVE_GI_RXDROP;
 	ave_irq_restore(ndev, val);
 
 	napi_enable(&priv->napi_rx);
@@ -1701,6 +1701,10 @@ static const struct ave_soc_data ave_ld20_data = {
 	.is_desc_64bit = true,
 };
 
+static const struct ave_soc_data ave_pxs3_data = {
+	.is_desc_64bit = false,
+};
+
 static const struct of_device_id of_ave_match[] = {
 	{
 		.compatible = "socionext,uniphier-pro4-ave4",
@@ -1718,6 +1722,10 @@ static const struct of_device_id of_ave_match[] = {
 		.compatible = "socionext,uniphier-ld20-ave4",
 		.data = &ave_ld20_data,
 	},
+	{
+		.compatible = "socionext,uniphier-pxs3-ave4",
+		.data = &ave_pxs3_data,
+	},
 	{ /* Sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, of_ave_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 5270d26f0bc6..2d5d4aea3bcb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -48,26 +48,18 @@
 #define MUX_CLK_NUM_PARENTS		2
 
 struct meson8b_dwmac {
-	struct platform_device	*pdev;
-
+	struct device		*dev;
 	void __iomem		*regs;
-
 	phy_interface_t		phy_mode;
+	struct clk		*rgmii_tx_clk;
+	u32			tx_delay_ns;
+};
 
+struct meson8b_dwmac_clk_configs {
 	struct clk_mux		m250_mux;
-	struct clk		*m250_mux_clk;
-	struct clk		*m250_mux_parent[MUX_CLK_NUM_PARENTS];
-
 	struct clk_divider	m250_div;
-	struct clk		*m250_div_clk;
-
 	struct clk_fixed_factor	fixed_div2;
-	struct clk		*fixed_div2_clk;
-
 	struct clk_gate		rgmii_tx_en;
-	struct clk		*rgmii_tx_en_clk;
-
-	u32			tx_delay_ns;
 };
 
 static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
@@ -82,106 +74,99 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
 	writel(data, dwmac->regs + reg);
 }
 
-static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
+					      const char *name_suffix,
+					      const char **parent_names,
+					      int num_parents,
+					      const struct clk_ops *ops,
+					      struct clk_hw *hw)
 {
 	struct clk_init_data init;
-	int i, ret;
-	struct device *dev = &dwmac->pdev->dev;
 	char clk_name[32];
-	const char *clk_div_parents[1];
-	const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
+
+	snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev),
+		 name_suffix);
+
+	init.name = clk_name;
+	init.ops = ops;
+	init.flags = CLK_SET_RATE_PARENT;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+
+	hw->init = &init;
+
+	return devm_clk_register(dwmac->dev, hw);
+}
+
+static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+{
+	int i, ret;
+	struct clk *clk;
+	struct device *dev = dwmac->dev;
+	const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+	struct meson8b_dwmac_clk_configs *clk_configs;
+
+	clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+	if (!clk_configs)
+		return -ENOMEM;
 
 	/* get the mux parents from DT */
 	for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
 		char name[16];
 
 		snprintf(name, sizeof(name), "clkin%d", i);
-		dwmac->m250_mux_parent[i] = devm_clk_get(dev, name);
-		if (IS_ERR(dwmac->m250_mux_parent[i])) {
-			ret = PTR_ERR(dwmac->m250_mux_parent[i]);
+		clk = devm_clk_get(dev, name);
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
 			if (ret != -EPROBE_DEFER)
 				dev_err(dev, "Missing clock %s\n", name);
 			return ret;
 		}
 
-		mux_parent_names[i] =
-			__clk_get_name(dwmac->m250_mux_parent[i]);
+		mux_parent_names[i] = __clk_get_name(clk);
 	}
 
-	/* create the m250_mux */
-	snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev));
-	init.name = clk_name;
-	init.ops = &clk_mux_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	init.parent_names = mux_parent_names;
-	init.num_parents = MUX_CLK_NUM_PARENTS;
-
-	dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0;
-	dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
-	dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
-	dwmac->m250_mux.flags = 0;
-	dwmac->m250_mux.table = NULL;
-	dwmac->m250_mux.hw.init = &init;
-
-	dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw);
-	if (WARN_ON(IS_ERR(dwmac->m250_mux_clk)))
-		return PTR_ERR(dwmac->m250_mux_clk);
-
-	/* create the m250_div */
-	snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
-	init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-	init.ops = &clk_divider_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-	dwmac->m250_div.reg = dwmac->regs + PRG_ETH0;
-	dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
-	dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
-	dwmac->m250_div.hw.init = &init;
-	dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+	clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
+	clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
+	clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names,
+					 MUX_CLK_NUM_PARENTS, &clk_mux_ops,
+					 &clk_configs->m250_mux.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	parent_name = __clk_get_name(clk);
+	clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+	clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+	clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
 				CLK_DIVIDER_ALLOW_ZERO |
 				CLK_DIVIDER_ROUND_CLOSEST;
-
-	dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw);
-	if (WARN_ON(IS_ERR(dwmac->m250_div_clk)))
-		return PTR_ERR(dwmac->m250_div_clk);
-
-	/* create the fixed_div2 */
-	snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev));
-	init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-	init.ops = &clk_fixed_factor_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-	dwmac->fixed_div2.mult = 1;
-	dwmac->fixed_div2.div = 2;
-	dwmac->fixed_div2.hw.init = &init;
-
-	dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw);
-	if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk)))
-		return PTR_ERR(dwmac->fixed_div2_clk);
-
-	/* create the rgmii_tx_en */
-	init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en",
-				   dev_name(dev));
-	init.ops = &clk_gate_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-	dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
-	dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
-	dwmac->rgmii_tx_en.hw.init = &init;
-
-	dwmac->rgmii_tx_en_clk = devm_clk_register(dev,
-						   &dwmac->rgmii_tx_en.hw);
-	if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk)))
-		return PTR_ERR(dwmac->rgmii_tx_en_clk);
+	clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+					 &clk_divider_ops,
+					 &clk_configs->m250_div.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	parent_name = __clk_get_name(clk);
+	clk_configs->fixed_div2.mult = 1;
+	clk_configs->fixed_div2.div = 2;
+	clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1,
+					 &clk_fixed_factor_ops,
+					 &clk_configs->fixed_div2.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	parent_name = __clk_get_name(clk);
+	clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
+	clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1,
+					 &clk_gate_ops,
+					 &clk_configs->rgmii_tx_en.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	dwmac->rgmii_tx_clk = clk;
 
 	return 0;
 }
@@ -219,19 +204,23 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 		 * a register) based on the line-speed (125MHz for Gbit speeds,
 		 * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s).
 		 */
-		ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000);
+		ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000);
 		if (ret) {
-			dev_err(&dwmac->pdev->dev,
+			dev_err(dwmac->dev,
 				"failed to set RGMII TX clock\n");
 			return ret;
 		}
 
-		ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk);
+		ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
 		if (ret) {
-			dev_err(&dwmac->pdev->dev,
+			dev_err(dwmac->dev,
 				"failed to enable the RGMII TX clock\n");
 			return ret;
 		}
+
+		devm_add_action_or_reset(dwmac->dev,
+					(void(*)(void *))clk_disable_unprepare,
+					dwmac->rgmii_tx_clk);
 		break;
 
 	case PHY_INTERFACE_MODE_RMII:
@@ -251,7 +240,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 		break;
 
 	default:
-		dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n",
+		dev_err(dwmac->dev, "unsupported phy-mode %s\n",
 			phy_modes(dwmac->phy_mode));
 		return -EINVAL;
 	}
@@ -292,7 +281,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
-	dwmac->pdev = pdev;
+	dwmac->dev = &pdev->dev;
 	dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
 	if (dwmac->phy_mode < 0) {
 		dev_err(&pdev->dev, "missing phy-mode property\n");
@@ -317,29 +306,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		goto err_clk_disable;
+		goto err_remove_config_dt;
 
 	return 0;
 
-err_clk_disable:
-	if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-		clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
 err_remove_config_dt:
 	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
 
-static int meson8b_dwmac_remove(struct platform_device *pdev)
-{
-	struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-
-	if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-		clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
-
-	return stmmac_pltfr_remove(pdev);
-}
-
 static const struct of_device_id meson8b_dwmac_match[] = {
 	{ .compatible = "amlogic,meson8b-dwmac" },
 	{ .compatible = "amlogic,meson-gxbb-dwmac" },
@@ -349,7 +325,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
 
 static struct platform_driver meson8b_dwmac_driver = {
 	.probe  = meson8b_dwmac_probe,
-	.remove = meson8b_dwmac_remove,
+	.remove = stmmac_pltfr_remove,
 	.driver = {
 		.name           = "meson8b-dwmac",
 		.pm		= &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 63795ecafc8d..46b9ae20ff6c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -120,7 +120,7 @@ static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
 	writel(value, ioaddr + base_register);
 }
 
-static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
+static void dwmac4_rx_queue_routing(struct mac_device_info *hw,
 				    u8 packet, u32 queue)
 {
 	void __iomem *ioaddr = hw->pcsr;
@@ -713,7 +713,7 @@ static const struct stmmac_ops dwmac4_ops = {
 	.rx_queue_enable = dwmac4_rx_queue_enable,
 	.rx_queue_prio = dwmac4_rx_queue_priority,
 	.tx_queue_prio = dwmac4_tx_queue_priority,
-	.rx_queue_routing = dwmac4_tx_queue_routing,
+	.rx_queue_routing = dwmac4_rx_queue_routing,
 	.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
 	.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
@@ -744,7 +744,7 @@ static const struct stmmac_ops dwmac410_ops = {
 	.rx_queue_enable = dwmac4_rx_queue_enable,
 	.rx_queue_prio = dwmac4_rx_queue_priority,
 	.tx_queue_prio = dwmac4_tx_queue_priority,
-	.rx_queue_routing = dwmac4_tx_queue_routing,
+	.rx_queue_routing = dwmac4_rx_queue_routing,
 	.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
 	.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index c728ffa095de..2a6521d33e43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 {
+	p->des0 = 0;
+	p->des1 = 0;
 	p->des2 = 0;
 	p->des3 = 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index a916e13624eb..75161e1b7e55 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -58,6 +58,7 @@ struct stmmac_tx_queue {
 	unsigned int dirty_tx;
 	dma_addr_t dma_tx_phy;
 	u32 tx_tail_addr;
+	u32 mss;
 };
 
 struct stmmac_rx_queue {
@@ -138,7 +139,6 @@ struct stmmac_priv {
 	spinlock_t ptp_lock;
 	void __iomem *mmcaddr;
 	void __iomem *ptpaddr;
-	u32 mss;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dbgfs_dir;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7ad841434ec8..9f983dd069d5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -57,36 +57,36 @@
 /* Module parameters */
 #define TX_TIMEO	5000
 static int watchdog = TX_TIMEO;
-module_param(watchdog, int, S_IRUGO | S_IWUSR);
+module_param(watchdog, int, 0644);
 MODULE_PARM_DESC(watchdog, "Transmit timeout in milliseconds (default 5s)");
 
 static int debug = -1;
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)");
 
 static int phyaddr = -1;
-module_param(phyaddr, int, S_IRUGO);
+module_param(phyaddr, int, 0444);
 MODULE_PARM_DESC(phyaddr, "Physical device address");
 
 #define STMMAC_TX_THRESH	(DMA_TX_SIZE / 4)
 #define STMMAC_RX_THRESH	(DMA_RX_SIZE / 4)
 
 static int flow_ctrl = FLOW_OFF;
-module_param(flow_ctrl, int, S_IRUGO | S_IWUSR);
+module_param(flow_ctrl, int, 0644);
 MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]");
 
 static int pause = PAUSE_TIME;
-module_param(pause, int, S_IRUGO | S_IWUSR);
+module_param(pause, int, 0644);
 MODULE_PARM_DESC(pause, "Flow Control Pause Time");
 
 #define TC_DEFAULT 64
 static int tc = TC_DEFAULT;
-module_param(tc, int, S_IRUGO | S_IWUSR);
+module_param(tc, int, 0644);
 MODULE_PARM_DESC(tc, "DMA threshold control value");
 
 #define	DEFAULT_BUFSIZE	1536
 static int buf_sz = DEFAULT_BUFSIZE;
-module_param(buf_sz, int, S_IRUGO | S_IWUSR);
+module_param(buf_sz, int, 0644);
 MODULE_PARM_DESC(buf_sz, "DMA buffer size");
 
 #define	STMMAC_RX_COPYBREAK	256
@@ -97,7 +97,7 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 
 #define STMMAC_DEFAULT_LPI_TIMER	1000
 static int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
-module_param(eee_timer, int, S_IRUGO | S_IWUSR);
+module_param(eee_timer, int, 0644);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x))
 
@@ -105,7 +105,7 @@ MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
  * but allow user to force to use the chain instead of the ring
  */
 static unsigned int chain_mode;
-module_param(chain_mode, int, S_IRUGO);
+module_param(chain_mode, int, 0444);
 MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode");
 
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
@@ -1355,6 +1355,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 
 		tx_q->dirty_tx = 0;
 		tx_q->cur_tx = 0;
+		tx_q->mss = 0;
 
 		netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
@@ -1843,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		if (unlikely(status & tx_dma_own))
 			break;
 
+		/* Make sure descriptor fields are read after reading
+		 * the own bit.
+		 */
+		dma_rmb();
+
 		/* Just consider the last segment and ...*/
 		if (likely(!(status & tx_not_ls))) {
 			/* ... verify the status error condition */
@@ -1946,6 +1952,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 						     (i == DMA_TX_SIZE - 1));
 	tx_q->dirty_tx = 0;
 	tx_q->cur_tx = 0;
+	tx_q->mss = 0;
 	netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
 	stmmac_start_tx_dma(priv, chan);
 
@@ -2430,7 +2437,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
 			continue;
 
 		packet = priv->plat->rx_queues_cfg[queue].pkt_route;
-		priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+		priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
 	}
 }
 
@@ -2632,7 +2639,6 @@ static int stmmac_open(struct net_device *dev)
 
 	priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
 	priv->rx_copybreak = STMMAC_RX_COPYBREAK;
-	priv->mss = 0;
 
 	ret = alloc_dma_desc_resources(priv);
 	if (ret < 0) {
@@ -2793,6 +2799,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 
 	while (tmp_len > 0) {
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 		desc = tx_q->dma_tx + tx_q->cur_tx;
 
 		desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
@@ -2872,11 +2879,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	mss = skb_shinfo(skb)->gso_size;
 
 	/* set new MSS value if needed */
-	if (mss != priv->mss) {
+	if (mss != tx_q->mss) {
 		mss_desc = tx_q->dma_tx + tx_q->cur_tx;
 		priv->hw->desc->set_mss(mss_desc, mss);
-		priv->mss = mss;
+		tx_q->mss = mss;
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 	}
 
 	if (netif_msg_tx_queued(priv)) {
@@ -2887,6 +2895,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	first_entry = tx_q->cur_tx;
+	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	desc = tx_q->dma_tx + first_entry;
 	first = desc;
@@ -2926,7 +2935,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
-		tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
 	}
 
@@ -2980,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 			tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
 	/* If context desc is used to change MSS */
-	if (mss_desc)
+	if (mss_desc) {
+		/* Make sure that first descriptor has been completely
+		 * written, including its own bit. This is because MSS is
+		 * actually before first descriptor, so we need to make
+		 * sure that MSS's own bit is the last thing written.
+		 */
+		dma_wmb();
 		priv->hw->desc->set_tx_owner(mss_desc);
+	}
 
 	/* The own bit must be the latest setting done when prepare the
 	 * descriptor and then barrier is needed to make sure that
 	 * all is coherent before granting the DMA engine.
 	 */
-	dma_wmb();
+	wmb();
 
 	if (netif_msg_pktdata(priv)) {
 		pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3062,6 +3077,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	entry = tx_q->cur_tx;
 	first_entry = entry;
+	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
@@ -3090,6 +3106,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		bool last_segment = (i == (nfrags - 1));
 
 		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[entry]);
 
 		if (likely(priv->extend_desc))
 			desc = (struct dma_desc *)(tx_q->dma_etx + entry);
@@ -3101,8 +3118,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (dma_mapping_error(priv->device, des))
 			goto dma_map_err; /* should reuse desc w/o issues */
 
-		tx_q->tx_skbuff[entry] = NULL;
-
 		tx_q->tx_skbuff_dma[entry].buf = des;
 		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
 			desc->des0 = cpu_to_le32(des);
@@ -3211,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * descriptor and then barrier is needed to make sure that
 		 * all is coherent before granting the DMA engine.
 		 */
-		dma_wmb();
+		wmb();
 	}
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
@@ -3986,7 +4001,7 @@ static int stmmac_init_fs(struct net_device *dev)
 
 	/* Entry to report DMA RX/TX rings */
 	priv->dbgfs_rings_status =
-		debugfs_create_file("descriptors_status", S_IRUGO,
+		debugfs_create_file("descriptors_status", 0444,
 				    priv->dbgfs_dir, dev,
 				    &stmmac_rings_status_fops);
 
@@ -3998,9 +4013,9 @@ static int stmmac_init_fs(struct net_device *dev)
 	}
 
 	/* Entry to report the DMA HW features */
-	priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", S_IRUGO,
-					    priv->dbgfs_dir,
-					    dev, &stmmac_dma_cap_fops);
+	priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", 0444,
+						  priv->dbgfs_dir,
+						  dev, &stmmac_dma_cap_fops);
 
 	if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) {
 		netdev_err(priv->dev, "ERROR creating stmmac MMC debugfs file\n");
@@ -4436,6 +4451,7 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv)
 
 		tx_q->cur_tx = 0;
 		tx_q->dirty_tx = 0;
+		tx_q->mss = 0;
 	}
 }
 
@@ -4481,11 +4497,6 @@ int stmmac_resume(struct device *dev)
 
 	stmmac_reset_queues_param(priv);
 
-	/* reset private mss value to force mss context settings at
-	 * next tso xmit (only used for gmac4).
-	 */
-	priv->mss = 0;
-
 	stmmac_clear_descriptors(priv);
 
 	stmmac_hw_setup(ndev, false);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 05f122b8424a..ebd3e5ffa73c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -135,13 +135,14 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
  * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
  * @pdev: platform device
  */
-static void stmmac_mtl_setup(struct platform_device *pdev,
-			     struct plat_stmmacenet_data *plat)
+static int stmmac_mtl_setup(struct platform_device *pdev,
+			    struct plat_stmmacenet_data *plat)
 {
 	struct device_node *q_node;
 	struct device_node *rx_node;
 	struct device_node *tx_node;
 	u8 queue = 0;
+	int ret = 0;
 
 	/* For backwards-compatibility with device trees that don't have any
 	 * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
@@ -159,12 +160,12 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 	rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
 	if (!rx_node)
-		return;
+		return ret;
 
 	tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
 	if (!tx_node) {
 		of_node_put(rx_node);
-		return;
+		return ret;
 	}
 
 	/* Processing RX queues common config */
@@ -220,6 +221,11 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 		queue++;
 	}
+	if (queue != plat->rx_queues_to_use) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Not all RX queues were configured\n");
+		goto out;
+	}
 
 	/* Processing TX queues common config */
 	if (of_property_read_u32(tx_node, "snps,tx-queues-to-use",
@@ -281,10 +287,18 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 		queue++;
 	}
+	if (queue != plat->tx_queues_to_use) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Not all TX queues were configured\n");
+		goto out;
+	}
 
+out:
 	of_node_put(rx_node);
 	of_node_put(tx_node);
 	of_node_put(q_node);
+
+	return ret;
 }
 
 /**
@@ -376,6 +390,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_dma_cfg *dma_cfg;
+	int rc;
 
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
@@ -402,8 +417,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
 	/* To Configure PHY by using all device-tree supported properties */
-	if (stmmac_dt_phy(plat, np, &pdev->dev))
-		return ERR_PTR(-ENODEV);
+	rc = stmmac_dt_phy(plat, np, &pdev->dev);
+	if (rc)
+		return ERR_PTR(rc);
 
 	of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
 
@@ -499,7 +515,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 
 	plat->axi = stmmac_axi_setup(pdev);
 
-	stmmac_mtl_setup(pdev, plat);
+	rc = stmmac_mtl_setup(pdev, plat);
+	if (rc) {
+		stmmac_remove_config_dt(pdev, plat);
+		return ERR_PTR(rc);
+	}
 
 	/* clock setup */
 	plat->stmmac_clk = devm_clk_get(&pdev->dev,
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 8dd545fed30d..f081de4f38d7 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9437,11 +9437,11 @@ static ssize_t show_num_ports(struct device *dev,
 }
 
 static struct device_attribute niu_parent_attributes[] = {
-	__ATTR(port_phy, S_IRUGO, show_port_phy, NULL),
-	__ATTR(plat_type, S_IRUGO, show_plat_type, NULL),
-	__ATTR(rxchan_per_port, S_IRUGO, show_rxchan_per_port, NULL),
-	__ATTR(txchan_per_port, S_IRUGO, show_txchan_per_port, NULL),
-	__ATTR(num_ports, S_IRUGO, show_num_ports, NULL),
+	__ATTR(port_phy, 0444, show_port_phy, NULL),
+	__ATTR(plat_type, 0444, show_plat_type, NULL),
+	__ATTR(rxchan_per_port, 0444, show_rxchan_per_port, NULL),
+	__ATTR(txchan_per_port, 0444, show_txchan_per_port, NULL),
+	__ATTR(num_ports, 0444, show_num_ports, NULL),
 	{}
 };
 
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 63d3d6b215f3..a94f50442613 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac,
 	dev->ethtool_ops = &vnet_ethtool_ops;
 	dev->watchdog_timeo = VNET_TX_TIMEOUT;
 
-	dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE |
+	dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_ALL_TSO |
 			   NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 1b1b78fdc138..1b4af54a4968 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -120,14 +120,18 @@ do {								\
 #define CPDMA_RXCP		0x60
 
 #define CPSW_POLL_WEIGHT	64
+#define CPSW_RX_VLAN_ENCAP_HDR_SIZE		4
 #define CPSW_MIN_PACKET_SIZE	(VLAN_ETH_ZLEN)
-#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN +\
+				 ETH_FCS_LEN +\
+				 CPSW_RX_VLAN_ENCAP_HDR_SIZE)
 
 #define RX_PRIORITY_MAPPING	0x76543210
 #define TX_PRIORITY_MAPPING	0x33221100
 #define CPDMA_TX_PRIORITY_MAP	0x01234567
 
 #define CPSW_VLAN_AWARE		BIT(1)
+#define CPSW_RX_VLAN_ENCAP	BIT(2)
 #define CPSW_ALE_VLAN_AWARE	1
 
 #define CPSW_FIFO_NORMAL_MODE		(0 << 16)
@@ -148,6 +152,18 @@ do {								\
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
 
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
+#define CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT	16
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT	8
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK	GENMASK(1, 0)
+enum {
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG = 0,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_UNTAG,
+};
+
 static int debug_level;
 module_param(debug_level, int, 0);
 MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)");
@@ -718,6 +734,49 @@ static void cpsw_tx_handler(void *token, int len, int status)
 	dev_kfree_skb_any(skb);
 }
 
+static void cpsw_rx_vlan_encap(struct sk_buff *skb)
+{
+	struct cpsw_priv *priv = netdev_priv(skb->dev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	u32 rx_vlan_encap_hdr = *((u32 *)skb->data);
+	u16 vtag, vid, prio, pkt_type;
+
+	/* Remove VLAN header encapsulation word */
+	skb_pull(skb, CPSW_RX_VLAN_ENCAP_HDR_SIZE);
+
+	pkt_type = (rx_vlan_encap_hdr >>
+		    CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT) &
+		    CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK;
+	/* Ignore unknown & Priority-tagged packets*/
+	if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV ||
+	    pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG)
+		return;
+
+	vid = (rx_vlan_encap_hdr >>
+	       CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT) &
+	       VLAN_VID_MASK;
+	/* Ignore vid 0 and pass packet as is */
+	if (!vid)
+		return;
+	/* Ignore default vlans in dual mac mode */
+	if (cpsw->data.dual_emac &&
+	    vid == cpsw->slaves[priv->emac_port].port_vlan)
+		return;
+
+	prio = (rx_vlan_encap_hdr >>
+		CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT) &
+		CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK;
+
+	vtag = (prio << VLAN_PRIO_SHIFT) | vid;
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+
+	/* strip vlan tag for VLAN-tagged packet */
+	if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG) {
+		memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+		skb_pull(skb, VLAN_HLEN);
+	}
+}
+
 static void cpsw_rx_handler(void *token, int len, int status)
 {
 	struct cpdma_chan	*ch;
@@ -752,6 +811,8 @@ static void cpsw_rx_handler(void *token, int len, int status)
 	if (new_skb) {
 		skb_copy_queue_mapping(new_skb, skb);
 		skb_put(skb, len);
+		if (status & CPDMA_RX_VLAN_ENCAP)
+			cpsw_rx_vlan_encap(skb);
 		cpts_rx_timestamp(cpsw->cpts, skb);
 		skb->protocol = eth_type_trans(skb, ndev);
 		netif_receive_skb(skb);
@@ -1014,7 +1075,8 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
 		/* set speed_in input in case RMII mode is used in 100Mbps */
 		if (phy->speed == 100)
 			mac_control |= BIT(15);
-		else if (phy->speed == 10)
+		/* in band mode only works in 10Mbps RGMII mode */
+		else if ((phy->speed == 10) && phy_interface_is_rgmii(phy))
 			mac_control |= BIT(18); /* In Band mode */
 
 		if (priv->rx_pause)
@@ -1406,7 +1468,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 	cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
 			     CPSW_ALE_VLAN_AWARE);
 	control_reg = readl(&cpsw->regs->control);
-	control_reg |= CPSW_VLAN_AWARE;
+	control_reg |= CPSW_VLAN_AWARE | CPSW_RX_VLAN_ENCAP;
 	writel(control_reg, &cpsw->regs->control);
 	fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
 		     CPSW_FIFO_NORMAL_MODE;
@@ -3122,7 +3184,7 @@ static int cpsw_probe(struct platform_device *pdev)
 			cpsw->quirk_irq = true;
 	}
 
-	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 6f9173ff9414..31ae04117f0a 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -1164,7 +1164,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
 		outlen -= CPDMA_DESC_CRC_LEN;
 
 	status	= status & (CPDMA_DESC_EOQ | CPDMA_DESC_TD_COMPLETE |
-			    CPDMA_DESC_PORT_MASK);
+			    CPDMA_DESC_PORT_MASK | CPDMA_RX_VLAN_ENCAP);
 
 	chan->head = desc_from_phys(pool, desc_read(desc, hw_next));
 	chan_write(chan, cp, desc_dma);
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index fd65ce2b83de..d399af5389b8 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -19,6 +19,8 @@
 
 #define CPDMA_RX_SOURCE_PORT(__status__)	((__status__ >> 16) & 0x7)
 
+#define CPDMA_RX_VLAN_ENCAP BIT(19)
+
 #define CPDMA_EOI_RX_THRESH	0x0
 #define CPDMA_EOI_RX		0x1
 #define CPDMA_EOI_TX		0x2
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index 78a6414c5fd9..dfabbae72efd 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -590,8 +590,7 @@ static int bpq_device_event(struct notifier_block *this,
 static int __init bpq_init_driver(void)
 {
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("bpqether", S_IRUGO, init_net.proc_net,
-			 &bpq_info_fops)) {
+	if (!proc_create("bpqether", 0444, init_net.proc_net, &bpq_info_fops)) {
 		printk(KERN_ERR
 			"bpq: cannot create /proc/net/bpqether entry.\n");
 		return -ENOENT;
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 14c3632b8cde..83034eb7ed4f 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -1168,7 +1168,7 @@ static int __init yam_init_driver(void)
 	yam_timer.expires = jiffies + HZ / 100;
 	add_timer(&yam_timer);
 
-	proc_create("yam", S_IRUGO, init_net.proc_net, &yam_info_fops);
+	proc_create("yam", 0444, init_net.proc_net, &yam_info_fops);
 	return 0;
  error:
 	while (--i >= 0) {
diff --git a/drivers/net/hyperv/Makefile b/drivers/net/hyperv/Makefile
index c8a66827100c..3f25b9c8ea59 100644
--- a/drivers/net/hyperv/Makefile
+++ b/drivers/net/hyperv/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o
 
-hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o
+hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 0db3bd1ea06f..960f06141472 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -173,6 +173,7 @@ struct rndis_device {
 	struct list_head req_list;
 
 	struct work_struct mcast_work;
+	u32 filter;
 
 	bool link_state;        /* 0 - link up, 1 - link down */
 
@@ -211,7 +212,6 @@ void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
 
 void rndis_set_subchannel(struct work_struct *w);
-bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
@@ -793,6 +793,7 @@ struct netvsc_device {
 
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
+	u32 recv_buf_size; /* allocated bytes */
 	u32 recv_buf_gpadl_handle;
 	u32 recv_section_cnt;
 	u32 recv_section_size;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 17e529af79dc..c9910c33e671 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -36,6 +36,7 @@
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 /*
  * Switch the data path from the synthetic interface to the VF
@@ -57,6 +58,8 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 		init_pkt->msg.v4_msg.active_dp.active_datapath =
 			NVSP_DATAPATH_SYNTHETIC;
 
+	trace_nvsp_send(ndev, init_pkt);
+
 	vmbus_sendpacket(dev->channel, init_pkt,
 			       sizeof(struct nvsp_message),
 			       (unsigned long)init_pkt,
@@ -90,6 +93,11 @@ static void free_netvsc_device(struct rcu_head *head)
 		= container_of(head, struct netvsc_device, rcu);
 	int i;
 
+	kfree(nvdev->extension);
+	vfree(nvdev->recv_buf);
+	vfree(nvdev->send_buf);
+	kfree(nvdev->send_section_map);
+
 	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
 		vfree(nvdev->chan_table[i].mrc.slots);
 
@@ -124,6 +132,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		revoke_packet->msg.v1_msg.
 		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+		trace_nvsp_send(ndev, revoke_packet);
+
 		ret = vmbus_sendpacket(device->channel,
 				       revoke_packet,
 				       sizeof(struct nvsp_message),
@@ -164,6 +174,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		revoke_packet->msg.v1_msg.revoke_send_buf.id =
 			NETVSC_SEND_BUFFER_ID;
 
+		trace_nvsp_send(ndev, revoke_packet);
+
 		ret = vmbus_sendpacket(device->channel,
 				       revoke_packet,
 				       sizeof(struct nvsp_message),
@@ -211,12 +223,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
 		net_device->recv_buf_gpadl_handle = 0;
 	}
 
-	if (net_device->recv_buf) {
-		/* Free up the receive buffer */
-		vfree(net_device->recv_buf);
-		net_device->recv_buf = NULL;
-	}
-
 	if (net_device->send_buf_gpadl_handle) {
 		ret = vmbus_teardown_gpadl(device->channel,
 					   net_device->send_buf_gpadl_handle);
@@ -231,12 +237,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
 		}
 		net_device->send_buf_gpadl_handle = 0;
 	}
-	if (net_device->send_buf) {
-		/* Free up the send buffer */
-		vfree(net_device->send_buf);
-		net_device->send_buf = NULL;
-	}
-	kfree(net_device->send_section_map);
 }
 
 int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
@@ -282,6 +282,8 @@ static int netvsc_init_buf(struct hv_device *device,
 		goto cleanup;
 	}
 
+	net_device->recv_buf_size = buf_size;
+
 	/*
 	 * Establish the gpadl handle for this buffer on this
 	 * channel.  Note: This call uses the vmbus connection rather
@@ -305,6 +307,8 @@ static int netvsc_init_buf(struct hv_device *device,
 	init_packet->msg.v1_msg.
 		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the gpadl notification request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -384,6 +388,8 @@ static int netvsc_init_buf(struct hv_device *device,
 		net_device->send_buf_gpadl_handle;
 	init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the gpadl notification request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -452,6 +458,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the init request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -484,6 +492,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 		init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
 	}
 
+	trace_nvsp_send(ndev, init_packet);
+
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
 				(unsigned long)init_packet,
@@ -496,6 +506,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
 			      struct netvsc_device *net_device,
 			      const struct netvsc_device_info *device_info)
 {
+	struct net_device *ndev = hv_get_drvdata(device);
 	static const u32 ver_list[] = {
 		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
@@ -536,6 +547,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
 		send_ndis_ver.ndis_minor_ver =
 				ndis_version & 0xFFFF;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the init request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
@@ -562,26 +575,29 @@ void netvsc_device_remove(struct hv_device *device)
 		= rtnl_dereference(net_device_ctx->nvdev);
 	int i;
 
-	cancel_work_sync(&net_device->subchan_work);
-
 	netvsc_revoke_buf(device, net_device);
 
 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
+	/* And disassociate NAPI context from device */
+	for (i = 0; i < net_device->num_chn; i++)
+		netif_napi_del(&net_device->chan_table[i].napi);
+
 	/*
 	 * At this point, no one should be accessing net_device
 	 * except in here
 	 */
 	netdev_dbg(ndev, "net device safe to remove\n");
 
+	/* older versions require that buffer be revoked before close */
+	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
+		netvsc_teardown_gpadl(device, net_device);
+
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
 
-	netvsc_teardown_gpadl(device, net_device);
-
-	/* And dissassociate NAPI context from device */
-	for (i = 0; i < net_device->num_chn; i++)
-		netif_napi_del(&net_device->chan_table[i].napi);
+	if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
+		netvsc_teardown_gpadl(device, net_device);
 
 	/* Release all resources */
 	free_netvsc_device_rcu(net_device);
@@ -645,14 +661,18 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
 	queue_sends =
 		atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
 
-	if (net_device->destroy && queue_sends == 0)
-		wake_up(&net_device->wait_drain);
+	if (unlikely(net_device->destroy)) {
+		if (queue_sends == 0)
+			wake_up(&net_device->wait_drain);
+	} else {
+		struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
 
-	if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-	    (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
-	     queue_sends < 1)) {
-		netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
-		ndev_ctx->eth_stats.wake_queue++;
+		if (netif_tx_queue_stopped(txq) &&
+		    (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+		     queue_sends < 1)) {
+			netif_tx_wake_queue(txq);
+			ndev_ctx->eth_stats.wake_queue++;
+		}
 	}
 }
 
@@ -747,7 +767,7 @@ static inline int netvsc_send_pkt(
 	struct sk_buff *skb)
 {
 	struct nvsp_message nvmsg;
-	struct nvsp_1_message_send_rndis_packet * const rpkt =
+	struct nvsp_1_message_send_rndis_packet *rpkt =
 		&nvmsg.msg.v1_msg.send_rndis_pkt;
 	struct netvsc_channel * const nvchan =
 		&net_device->chan_table[packet->q_idx];
@@ -776,6 +796,8 @@ static inline int netvsc_send_pkt(
 	if (out_channel->rescind)
 		return -ENODEV;
 
+	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+
 	if (packet->page_buf_cnt) {
 		if (packet->cp_partial)
 			pb += packet->rmsg_pgcnt;
@@ -852,13 +874,6 @@ int netvsc_send(struct net_device *ndev,
 	if (unlikely(!net_device || net_device->destroy))
 		return -ENODEV;
 
-	/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
-	 * here before the negotiation with the host is finished and
-	 * send_section_map may not be allocated yet.
-	 */
-	if (unlikely(!net_device->send_section_map))
-		return -EAGAIN;
-
 	nvchan = &net_device->chan_table[packet->q_idx];
 	packet->send_buf_index = NETVSC_INVALID_INDEX;
 	packet->cp_partial = false;
@@ -866,10 +881,8 @@ int netvsc_send(struct net_device *ndev,
 	/* Send control message directly without accessing msd (Multi-Send
 	 * Data) field which may be changed during data packet processing.
 	 */
-	if (!skb) {
-		cur_send = packet;
-		goto send_now;
-	}
+	if (!skb)
+		return netvsc_send_pkt(device, packet, net_device, pb, skb);
 
 	/* batch packets in send buffer if possible */
 	msdp = &nvchan->msd;
@@ -953,7 +966,6 @@ int netvsc_send(struct net_device *ndev,
 		}
 	}
 
-send_now:
 	if (cur_send)
 		ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
 
@@ -1085,13 +1097,30 @@ static int netvsc_receive(struct net_device *ndev,
 
 	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
 	for (i = 0; i < count; i++) {
-		void *data = recv_buf
-			+ vmxferpage_packet->ranges[i].byte_offset;
+		u32 offset = vmxferpage_packet->ranges[i].byte_offset;
 		u32 buflen = vmxferpage_packet->ranges[i].byte_count;
+		void *data;
+		int ret;
+
+		if (unlikely(offset + buflen > net_device->recv_buf_size)) {
+			status = NVSP_STAT_FAIL;
+			netif_err(net_device_ctx, rx_err, ndev,
+				  "Packet offset:%u + len:%u too big\n",
+				  offset, buflen);
+
+			continue;
+		}
+
+		data = recv_buf + offset;
+
+		trace_rndis_recv(ndev, q_idx, data);
 
 		/* Pass it to the upper layer */
-		status = rndis_filter_receive(ndev, net_device,
-					      channel, data, buflen);
+		ret = rndis_filter_receive(ndev, net_device,
+					   channel, data, buflen);
+
+		if (unlikely(ret != NVSP_STAT_SUCCESS))
+			status = NVSP_STAT_FAIL;
 	}
 
 	enq_receive_complete(ndev, net_device, q_idx,
@@ -1153,6 +1182,8 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	struct nvsp_message *nvmsg = hv_pkt_data(desc);
 
+	trace_nvsp_recv(ndev, channel, nvmsg);
+
 	switch (desc->type) {
 	case VM_PKT_COMP:
 		netvsc_send_completion(net_device, channel, device,
@@ -1217,9 +1248,10 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 	if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
 	    work_done < budget &&
 	    napi_complete_done(napi, work_done) &&
-	    hv_end_read(&channel->inbound)) {
+	    hv_end_read(&channel->inbound) &&
+	    napi_schedule_prep(napi)) {
 		hv_begin_read(&channel->inbound);
-		napi_reschedule(napi);
+		__napi_schedule(napi);
 	}
 
 	/* Driver may overshoot since multiple packets per descriptor */
@@ -1242,7 +1274,7 @@ void netvsc_channel_cb(void *context)
 		/* disable interupts from host */
 		hv_begin_read(rbi);
 
-		__napi_schedule(&nvchan->napi);
+		__napi_schedule_irqoff(&nvchan->napi);
 	}
 }
 
@@ -1296,7 +1328,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 			 netvsc_channel_cb, net_device->chan_table);
 
 	if (ret != 0) {
-		netif_napi_del(&net_device->chan_table[0].napi);
 		netdev_err(ndev, "unable to open channel: %d\n", ret);
 		goto cleanup;
 	}
@@ -1306,11 +1337,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 
 	napi_enable(&net_device->chan_table[0].napi);
 
-	/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
-	 * populated.
-	 */
-	rcu_assign_pointer(net_device_ctx->nvdev, net_device);
-
 	/* Connect with the NetVsp */
 	ret = netvsc_connect_vsp(device, net_device, device_info);
 	if (ret != 0) {
@@ -1319,6 +1345,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 		goto close;
 	}
 
+	/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
+	 * populated.
+	 */
+	rcu_assign_pointer(net_device_ctx->nvdev, net_device);
+
 	return net_device;
 
 close:
@@ -1329,6 +1360,7 @@ close:
 	vmbus_close(device->channel);
 
 cleanup:
+	netif_napi_del(&net_device->chan_table[0].napi);
 	free_netvsc_device(&net_device->rcu);
 
 	return ERR_PTR(ret);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c5584c2d440e..ecc84954c511 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -46,13 +46,16 @@
 
 #include "hyperv_net.h"
 
-#define RING_SIZE_MIN		64
+#define RING_SIZE_MIN	64
+#define RETRY_US_LO	5000
+#define RETRY_US_HI	10000
+#define RETRY_MAX	2000	/* >10 sec */
 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
 
 static unsigned int ring_size __ro_after_init = 128;
-module_param(ring_size, uint, S_IRUGO);
+module_param(ring_size, uint, 0444);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
 unsigned int netvsc_ring_bytes __ro_after_init;
 struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
@@ -63,15 +66,46 @@ static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
 				NETIF_MSG_TX_ERR;
 
 static int debug = -1;
-module_param(debug, int, S_IRUGO);
+module_param(debug, int, 0444);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
-static void netvsc_set_multicast_list(struct net_device *net)
+static void netvsc_change_rx_flags(struct net_device *net, int change)
 {
-	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	int inc;
+
+	if (!vf_netdev)
+		return;
+
+	if (change & IFF_PROMISC) {
+		inc = (net->flags & IFF_PROMISC) ? 1 : -1;
+		dev_set_promiscuity(vf_netdev, inc);
+	}
+
+	if (change & IFF_ALLMULTI) {
+		inc = (net->flags & IFF_ALLMULTI) ? 1 : -1;
+		dev_set_allmulti(vf_netdev, inc);
+	}
+}
 
-	rndis_filter_update(nvdev);
+static void netvsc_set_rx_mode(struct net_device *net)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct net_device *vf_netdev;
+	struct netvsc_device *nvdev;
+
+	rcu_read_lock();
+	vf_netdev = rcu_dereference(ndev_ctx->vf_netdev);
+	if (vf_netdev) {
+		dev_uc_sync(vf_netdev, net);
+		dev_mc_sync(vf_netdev, net);
+	}
+
+	nvdev = rcu_dereference(ndev_ctx->nvdev);
+	if (nvdev)
+		rndis_filter_update(nvdev);
+	rcu_read_unlock();
 }
 
 static int netvsc_open(struct net_device *net)
@@ -91,10 +125,7 @@ static int netvsc_open(struct net_device *net)
 		return ret;
 	}
 
-	netif_tx_wake_all_queues(net);
-
 	rdev = nvdev->extension;
-
 	if (!rdev->link_state)
 		netif_carrier_on(net);
 
@@ -112,36 +143,25 @@ static int netvsc_open(struct net_device *net)
 	return 0;
 }
 
-static int netvsc_close(struct net_device *net)
+static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
 {
-	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct net_device *vf_netdev
-		= rtnl_dereference(net_device_ctx->vf_netdev);
-	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
-	int ret = 0;
-	u32 aread, i, msec = 10, retry = 0, retry_max = 20;
-	struct vmbus_channel *chn;
-
-	netif_tx_disable(net);
-
-	/* No need to close rndis filter if it is removed already */
-	if (!nvdev)
-		goto out;
-
-	ret = rndis_filter_close(nvdev);
-	if (ret != 0) {
-		netdev_err(net, "unable to close device (ret %d).\n", ret);
-		return ret;
-	}
+	unsigned int retry = 0;
+	int i;
 
 	/* Ensure pending bytes in ring are read */
-	while (true) {
-		aread = 0;
+	for (;;) {
+		u32 aread = 0;
+
 		for (i = 0; i < nvdev->num_chn; i++) {
-			chn = nvdev->chan_table[i].channel;
+			struct vmbus_channel *chn
+				= nvdev->chan_table[i].channel;
+
 			if (!chn)
 				continue;
 
+			/* make sure receive not running now */
+			napi_synchronize(&nvdev->chan_table[i].napi);
+
 			aread = hv_get_bytes_to_read(&chn->inbound);
 			if (aread)
 				break;
@@ -151,22 +171,40 @@ static int netvsc_close(struct net_device *net)
 				break;
 		}
 
-		retry++;
-		if (retry > retry_max || aread == 0)
-			break;
+		if (aread == 0)
+			return 0;
 
-		msleep(msec);
+		if (++retry > RETRY_MAX)
+			return -ETIMEDOUT;
 
-		if (msec < 1000)
-			msec *= 2;
+		usleep_range(RETRY_US_LO, RETRY_US_HI);
 	}
+}
 
-	if (aread) {
-		netdev_err(net, "Ring buffer not empty after closing rndis\n");
-		ret = -ETIMEDOUT;
+static int netvsc_close(struct net_device *net)
+{
+	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct net_device *vf_netdev
+		= rtnl_dereference(net_device_ctx->vf_netdev);
+	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+	int ret;
+
+	netif_tx_disable(net);
+
+	/* No need to close rndis filter if it is removed already */
+	if (!nvdev)
+		return 0;
+
+	ret = rndis_filter_close(nvdev);
+	if (ret != 0) {
+		netdev_err(net, "unable to close device (ret %d).\n", ret);
+		return ret;
 	}
 
-out:
+	ret = netvsc_wait_until_empty(nvdev);
+	if (ret)
+		netdev_err(net, "Ring buffer not empty after closing rndis\n");
+
 	if (vf_netdev)
 		dev_close(vf_netdev);
 
@@ -299,8 +337,19 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 	rcu_read_lock();
 	vf_netdev = rcu_dereference(ndc->vf_netdev);
 	if (vf_netdev) {
-		txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
-		qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+		const struct net_device_ops *vf_ops = vf_netdev->netdev_ops;
+
+		if (vf_ops->ndo_select_queue)
+			txq = vf_ops->ndo_select_queue(vf_netdev, skb,
+						       accel_priv, fallback);
+		else
+			txq = fallback(vf_netdev, skb);
+
+		/* Record the queue selected by VF so that it can be
+		 * used for common case where VF has more queues than
+		 * the synthetic device.
+		 */
+		qdisc_skb_cb(skb)->slave_dev_queue_mapping = txq;
 	} else {
 		txq = netvsc_pick_tx(ndev, skb);
 	}
@@ -782,7 +831,7 @@ int netvsc_recv_callback(struct net_device *net,
 	u64_stats_update_end(&rx_stats->syncp);
 
 	napi_gro_receive(&nvchan->napi, skb);
-	return 0;
+	return NVSP_STAT_SUCCESS;
 }
 
 static void netvsc_get_drvinfo(struct net_device *net,
@@ -804,16 +853,81 @@ static void netvsc_get_channels(struct net_device *net,
 	}
 }
 
+static int netvsc_detach(struct net_device *ndev,
+			 struct netvsc_device *nvdev)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct hv_device *hdev = ndev_ctx->device_ctx;
+	int ret;
+
+	/* Don't try continuing to try and setup sub channels */
+	if (cancel_work_sync(&nvdev->subchan_work))
+		nvdev->num_chn = 1;
+
+	/* If device was up (receiving) then shutdown */
+	if (netif_running(ndev)) {
+		netif_tx_disable(ndev);
+
+		ret = rndis_filter_close(nvdev);
+		if (ret) {
+			netdev_err(ndev,
+				   "unable to close device (ret %d).\n", ret);
+			return ret;
+		}
+
+		ret = netvsc_wait_until_empty(nvdev);
+		if (ret) {
+			netdev_err(ndev,
+				   "Ring buffer not empty after closing rndis\n");
+			return ret;
+		}
+	}
+
+	netif_device_detach(ndev);
+
+	rndis_filter_device_remove(hdev, nvdev);
+
+	return 0;
+}
+
+static int netvsc_attach(struct net_device *ndev,
+			 struct netvsc_device_info *dev_info)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct hv_device *hdev = ndev_ctx->device_ctx;
+	struct netvsc_device *nvdev;
+	struct rndis_device *rdev;
+	int ret;
+
+	nvdev = rndis_filter_device_add(hdev, dev_info);
+	if (IS_ERR(nvdev))
+		return PTR_ERR(nvdev);
+
+	/* Note: enable and attach happen when sub-channels setup */
+
+	netif_carrier_off(ndev);
+
+	if (netif_running(ndev)) {
+		ret = rndis_filter_open(nvdev);
+		if (ret)
+			return ret;
+
+		rdev = nvdev->extension;
+		if (!rdev->link_state)
+			netif_carrier_on(ndev);
+	}
+
+	return 0;
+}
+
 static int netvsc_set_channels(struct net_device *net,
 			       struct ethtool_channels *channels)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct hv_device *dev = net_device_ctx->device_ctx;
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 	unsigned int orig, count = channels->combined_count;
 	struct netvsc_device_info device_info;
-	bool was_opened;
-	int ret = 0;
+	int ret;
 
 	/* We do not support separate count for rx, tx, or other */
 	if (count == 0 ||
@@ -830,9 +944,6 @@ static int netvsc_set_channels(struct net_device *net,
 		return -EINVAL;
 
 	orig = nvdev->num_chn;
-	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
-		rndis_filter_close(nvdev);
 
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.num_chn = count;
@@ -841,28 +952,17 @@ static int netvsc_set_channels(struct net_device *net,
 	device_info.recv_sections = nvdev->recv_section_cnt;
 	device_info.recv_section_size = nvdev->recv_section_size;
 
-	rndis_filter_device_remove(dev, nvdev);
+	ret = netvsc_detach(net, nvdev);
+	if (ret)
+		return ret;
 
-	nvdev = rndis_filter_device_add(dev, &device_info);
-	if (IS_ERR(nvdev)) {
-		ret = PTR_ERR(nvdev);
+	ret = netvsc_attach(net, &device_info);
+	if (ret) {
 		device_info.num_chn = orig;
-		nvdev = rndis_filter_device_add(dev, &device_info);
-
-		if (IS_ERR(nvdev)) {
-			netdev_err(net, "restoring channel setting failed: %ld\n",
-				   PTR_ERR(nvdev));
-			return ret;
-		}
+		if (netvsc_attach(net, &device_info))
+			netdev_err(net, "restoring channel setting failed\n");
 	}
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
-
-	/* We may have missed link change notifications */
-	net_device_ctx->last_reconfig = 0;
-	schedule_delayed_work(&net_device_ctx->dwork, 0);
-
 	return ret;
 }
 
@@ -928,10 +1028,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	struct net_device_context *ndevctx = netdev_priv(ndev);
 	struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
-	struct hv_device *hdev = ndevctx->device_ctx;
 	int orig_mtu = ndev->mtu;
 	struct netvsc_device_info device_info;
-	bool was_opened;
 	int ret = 0;
 
 	if (!nvdev || nvdev->destroy)
@@ -944,11 +1042,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 			return ret;
 	}
 
-	netif_device_detach(ndev);
-	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
-		rndis_filter_close(nvdev);
-
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.num_chn = nvdev->num_chn;
 	device_info.send_sections = nvdev->send_section_cnt;
@@ -956,35 +1049,27 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	device_info.recv_sections = nvdev->recv_section_cnt;
 	device_info.recv_section_size = nvdev->recv_section_size;
 
-	rndis_filter_device_remove(hdev, nvdev);
+	ret = netvsc_detach(ndev, nvdev);
+	if (ret)
+		goto rollback_vf;
 
 	ndev->mtu = mtu;
 
-	nvdev = rndis_filter_device_add(hdev, &device_info);
-	if (IS_ERR(nvdev)) {
-		ret = PTR_ERR(nvdev);
-
-		/* Attempt rollback to original MTU */
-		ndev->mtu = orig_mtu;
-		nvdev = rndis_filter_device_add(hdev, &device_info);
-
-		if (vf_netdev)
-			dev_set_mtu(vf_netdev, orig_mtu);
-
-		if (IS_ERR(nvdev)) {
-			netdev_err(ndev, "restoring mtu failed: %ld\n",
-				   PTR_ERR(nvdev));
-			return ret;
-		}
-	}
+	ret = netvsc_attach(ndev, &device_info);
+	if (ret)
+		goto rollback;
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
+	return 0;
 
-	netif_device_attach(ndev);
+rollback:
+	/* Attempt rollback to original MTU */
+	ndev->mtu = orig_mtu;
 
-	/* We may have missed link change notifications */
-	schedule_delayed_work(&ndevctx->dwork, 0);
+	if (netvsc_attach(ndev, &device_info))
+		netdev_err(ndev, "restoring mtu failed\n");
+rollback_vf:
+	if (vf_netdev)
+		dev_set_mtu(vf_netdev, orig_mtu);
 
 	return ret;
 }
@@ -1490,11 +1575,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 {
 	struct net_device_context *ndevctx = netdev_priv(ndev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
-	struct hv_device *hdev = ndevctx->device_ctx;
 	struct netvsc_device_info device_info;
 	struct ethtool_ringparam orig;
 	u32 new_tx, new_rx;
-	bool was_opened;
 	int ret = 0;
 
 	if (!nvdev || nvdev->destroy)
@@ -1519,34 +1602,18 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 	device_info.recv_sections = new_rx;
 	device_info.recv_section_size = nvdev->recv_section_size;
 
-	netif_device_detach(ndev);
-	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
-		rndis_filter_close(nvdev);
-
-	rndis_filter_device_remove(hdev, nvdev);
-
-	nvdev = rndis_filter_device_add(hdev, &device_info);
-	if (IS_ERR(nvdev)) {
-		ret = PTR_ERR(nvdev);
+	ret = netvsc_detach(ndev, nvdev);
+	if (ret)
+		return ret;
 
+	ret = netvsc_attach(ndev, &device_info);
+	if (ret) {
 		device_info.send_sections = orig.tx_pending;
 		device_info.recv_sections = orig.rx_pending;
-		nvdev = rndis_filter_device_add(hdev, &device_info);
-		if (IS_ERR(nvdev)) {
-			netdev_err(ndev, "restoring ringparam failed: %ld\n",
-				   PTR_ERR(nvdev));
-			return ret;
-		}
-	}
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
-	netif_device_attach(ndev);
-
-	/* We may have missed link change notifications */
-	ndevctx->last_reconfig = 0;
-	schedule_delayed_work(&ndevctx->dwork, 0);
+		if (netvsc_attach(ndev, &device_info))
+			netdev_err(ndev, "restoring ringparam failed");
+	}
 
 	return ret;
 }
@@ -1576,7 +1643,8 @@ static const struct net_device_ops device_ops = {
 	.ndo_open =			netvsc_open,
 	.ndo_stop =			netvsc_close,
 	.ndo_start_xmit =		netvsc_start_xmit,
-	.ndo_set_rx_mode =		netvsc_set_multicast_list,
+	.ndo_change_rx_flags =		netvsc_change_rx_flags,
+	.ndo_set_rx_mode =		netvsc_set_rx_mode,
 	.ndo_change_mtu =		netvsc_change_mtu,
 	.ndo_validate_addr =		eth_validate_addr,
 	.ndo_set_mac_address =		netvsc_set_mac_addr,
@@ -1807,6 +1875,15 @@ static void __netvsc_vf_setup(struct net_device *ndev,
 		netdev_warn(vf_netdev,
 			    "unable to change mtu to %u\n", ndev->mtu);
 
+	/* set multicast etc flags on VF */
+	dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE);
+
+	/* sync address list from ndev to VF */
+	netif_addr_lock_bh(ndev);
+	dev_uc_sync(vf_netdev, ndev);
+	dev_mc_sync(vf_netdev, ndev);
+	netif_addr_unlock_bh(ndev);
+
 	if (netif_running(ndev)) {
 		ret = dev_open(vf_netdev);
 		if (ret)
@@ -2021,8 +2098,8 @@ no_net:
 static int netvsc_remove(struct hv_device *dev)
 {
 	struct net_device_context *ndev_ctx;
-	struct net_device *vf_netdev;
-	struct net_device *net;
+	struct net_device *vf_netdev, *net;
+	struct netvsc_device *nvdev;
 
 	net = hv_get_drvdata(dev);
 	if (net == NULL) {
@@ -2032,10 +2109,14 @@ static int netvsc_remove(struct hv_device *dev)
 
 	ndev_ctx = netdev_priv(net);
 
-	netif_device_detach(net);
-
 	cancel_delayed_work_sync(&ndev_ctx->dwork);
 
+	rcu_read_lock();
+	nvdev = rcu_dereference(ndev_ctx->nvdev);
+
+	if  (nvdev)
+		cancel_work_sync(&nvdev->subchan_work);
+
 	/*
 	 * Call to the vsc driver to let it know that the device is being
 	 * removed. Also blocks mtu and channel changes.
@@ -2045,11 +2126,13 @@ static int netvsc_remove(struct hv_device *dev)
 	if (vf_netdev)
 		netvsc_unregister_vf(vf_netdev);
 
+	if (nvdev)
+		rndis_filter_device_remove(dev, nvdev);
+
 	unregister_netdevice(net);
 
-	rndis_filter_device_remove(dev,
-				   rtnl_dereference(ndev_ctx->nvdev));
 	rtnl_unlock();
+	rcu_read_unlock();
 
 	hv_set_drvdata(dev, NULL);
 
diff --git a/drivers/net/hyperv/netvsc_trace.c b/drivers/net/hyperv/netvsc_trace.c
new file mode 100644
index 000000000000..bb0ce5a2bcd5
--- /dev/null
+++ b/drivers/net/hyperv/netvsc_trace.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/netdevice.h>
+
+#include "hyperv_net.h"
+
+#define CREATE_TRACE_POINTS
+#include "netvsc_trace.h"
diff --git a/drivers/net/hyperv/netvsc_trace.h b/drivers/net/hyperv/netvsc_trace.h
new file mode 100644
index 000000000000..f7585563dea5
--- /dev/null
+++ b/drivers/net/hyperv/netvsc_trace.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if !defined(_NETVSC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _NETVSC_TRACE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM netvsc
+#define TRACE_INCLUDE_FILE netvsc_trace
+
+TRACE_DEFINE_ENUM(RNDIS_MSG_PACKET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INDICATE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_HALT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE_C);
+
+#define show_rndis_type(type)					\
+	__print_symbolic(type,					\
+		 { RNDIS_MSG_PACKET,	  "PACKET" },		\
+		 { RNDIS_MSG_INDICATE,	  "INDICATE", },	\
+		 { RNDIS_MSG_INIT,	  "INIT", },		\
+		 { RNDIS_MSG_INIT_C,	  "INIT_C", },		\
+		 { RNDIS_MSG_HALT,	  "HALT", },		\
+		 { RNDIS_MSG_QUERY,	  "QUERY", },		\
+		 { RNDIS_MSG_QUERY_C,	  "QUERY_C", },		\
+		 { RNDIS_MSG_SET,	  "SET", },		\
+		 { RNDIS_MSG_SET_C,	  "SET_C", },		\
+		 { RNDIS_MSG_RESET,	  "RESET", },		\
+		 { RNDIS_MSG_RESET_C,	  "RESET_C", },		\
+		 { RNDIS_MSG_KEEPALIVE,	  "KEEPALIVE", },	\
+		 { RNDIS_MSG_KEEPALIVE_C, "KEEPALIVE_C", })
+
+DECLARE_EVENT_CLASS(rndis_msg_class,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg),
+       TP_STRUCT__entry(
+	       __string( name, ndev->name  )
+	       __field(	 u16,  queue	   )
+	       __field(	 u32,  req_id	   )
+	       __field(	 u32,  msg_type	   )
+	       __field(	 u32,  msg_len	   )
+       ),
+       TP_fast_assign(
+	       __assign_str(name, ndev->name);
+	       __entry->queue	 = q;
+	       __entry->req_id	 = msg->msg.init_req.req_id;
+	       __entry->msg_type = msg->ndis_msg_type;
+	       __entry->msg_len	 = msg->msg_len;
+       ),
+       TP_printk("dev=%s q=%u req=%#x type=%s msg_len=%u",
+		 __get_str(name), __entry->queue, __entry->req_id,
+		 show_rndis_type(__entry->msg_type), __entry->msg_len)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_send,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_recv,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT);
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_NDIS_VER);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG2_TYPE_SEND_NDIS_CONFIG);
+
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION);
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SWITCH_DATA_PATH);
+
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SUBCHANNEL);
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE);
+
+#define show_nvsp_type(type)								\
+	__print_symbolic(type,								\
+		  { NVSP_MSG_TYPE_INIT,			   "INIT" },			\
+		  { NVSP_MSG_TYPE_INIT_COMPLETE,	   "INIT_COMPLETE" },		\
+		  { NVSP_MSG1_TYPE_SEND_NDIS_VER,	   "SEND_NDIS_VER" },		\
+		  { NVSP_MSG1_TYPE_SEND_RECV_BUF,	   "SEND_RECV_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE, "SEND_RECV_BUF_COMPLETE" },	\
+		  { NVSP_MSG1_TYPE_REVOKE_RECV_BUF,	   "REVOKE_RECV_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_SEND_BUF,	   "SEND_SEND_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE, "SEND_SEND_BUF_COMPLETE" },	\
+		  { NVSP_MSG1_TYPE_REVOKE_SEND_BUF,	   "REVOKE_SEND_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_RNDIS_PKT,	   "SEND_RNDIS_PKT" },		\
+		  { NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, "SEND_RNDIS_PKT_COMPLETE" },\
+		  { NVSP_MSG2_TYPE_SEND_NDIS_CONFIG,	   "SEND_NDIS_CONFIG" },	\
+		  { NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION,	   "SEND_VF_ASSOCIATION" },	\
+		  { NVSP_MSG4_TYPE_SWITCH_DATA_PATH,	   "SWITCH_DATA_PATH" },	\
+		  { NVSP_MSG5_TYPE_SUBCHANNEL,		    "SUBCHANNEL" },		\
+		  { NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,  "SEND_INDIRECTION_TABLE" })
+
+TRACE_EVENT(nvsp_send,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct nvsp_message *msg),
+	TP_ARGS(ndev, msg),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name  )
+		__field(  u32,	msg_type    )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->msg_type = msg->hdr.msg_type;
+	),
+	TP_printk("dev=%s type=%s",
+		  __get_str(name),
+		  show_nvsp_type(__entry->msg_type))
+);
+
+TRACE_EVENT(nvsp_send_pkt,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct vmbus_channel *chan,
+		 const struct nvsp_1_message_send_rndis_packet *rpkt),
+	TP_ARGS(ndev, chan, rpkt),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name    )
+		__field(  u16,	qid	      )
+		__field(  u32,	channel_type  )
+		__field(  u32,	section_index )
+		__field(  u32,	section_size  )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->qid = chan->offermsg.offer.sub_channel_index;
+		__entry->channel_type = rpkt->channel_type;
+		__entry->section_index = rpkt->send_buf_section_index;
+		__entry->section_size = rpkt->send_buf_section_size;
+	),
+	TP_printk("dev=%s qid=%u type=%s section=%u size=%d",
+		  __get_str(name), __entry->qid,
+		  __entry->channel_type ? "CONTROL" : "DATA",
+		  __entry->section_index, __entry->section_size)
+);
+
+TRACE_EVENT(nvsp_recv,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct vmbus_channel *chan,
+		 const struct nvsp_message *msg),
+	TP_ARGS(ndev, chan, msg),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name  )
+		__field(  u16,	qid	    )
+		__field(  u32,	msg_type    )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->qid = chan->offermsg.offer.sub_channel_index;
+		__entry->msg_type = msg->hdr.msg_type;
+	),
+	TP_printk("dev=%s qid=%u type=%s",
+		  __get_str(name), __entry->qid,
+		  show_nvsp_type(__entry->msg_type))
+);
+
+#endif /* _NETVSC_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/net/hyperv
+#include <trace/define_trace.h>
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index c3ca191fea7f..4a4952363e8a 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -31,6 +31,7 @@
 #include <linux/rtnetlink.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 static void rndis_set_multicast(struct work_struct *w);
 
@@ -241,6 +242,8 @@ static int rndis_filter_send_request(struct rndis_device *dev,
 			pb[0].len;
 	}
 
+	trace_rndis_send(dev->ndev, 0, &req->request_msg);
+
 	rcu_read_lock_bh();
 	ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
 	rcu_read_unlock_bh();
@@ -264,13 +267,23 @@ static void rndis_set_link_state(struct rndis_device *rdev,
 	}
 }
 
-static void rndis_filter_receive_response(struct rndis_device *dev,
-				       struct rndis_message *resp)
+static void rndis_filter_receive_response(struct net_device *ndev,
+					  struct netvsc_device *nvdev,
+					  const struct rndis_message *resp)
 {
+	struct rndis_device *dev = nvdev->extension;
 	struct rndis_request *request = NULL;
 	bool found = false;
 	unsigned long flags;
-	struct net_device *ndev = dev->ndev;
+
+	/* This should never happen, it means control message
+	 * response received after device removed.
+	 */
+	if (dev->state == RNDIS_DEV_UNINITIALIZED) {
+		netdev_err(ndev,
+			   "got rndis message uninitialized\n");
+		return;
+	}
 
 	spin_lock_irqsave(&dev->request_lock, flags);
 	list_for_each_entry(request, &dev->req_list, list_ent) {
@@ -352,7 +365,6 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
 
 static int rndis_filter_receive_data(struct net_device *ndev,
 				     struct netvsc_device *nvdev,
-				     struct rndis_device *dev,
 				     struct rndis_message *msg,
 				     struct vmbus_channel *channel,
 				     void *data, u32 data_buflen)
@@ -372,7 +384,7 @@ static int rndis_filter_receive_data(struct net_device *ndev,
 	 * should be the data packet size plus the trailer padding size
 	 */
 	if (unlikely(data_buflen < rndis_pkt->data_len)) {
-		netdev_err(dev->ndev, "rndis message buffer "
+		netdev_err(ndev, "rndis message buffer "
 			   "overflow detected (got %u, min %u)"
 			   "...dropping this message!\n",
 			   data_buflen, rndis_pkt->data_len);
@@ -400,35 +412,20 @@ int rndis_filter_receive(struct net_device *ndev,
 			 void *data, u32 buflen)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct rndis_device *rndis_dev = net_dev->extension;
 	struct rndis_message *rndis_msg = data;
 
-	/* Make sure the rndis device state is initialized */
-	if (unlikely(!rndis_dev)) {
-		netif_dbg(net_device_ctx, rx_err, ndev,
-			  "got rndis message but no rndis device!\n");
-		return NVSP_STAT_FAIL;
-	}
-
-	if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
-		netif_dbg(net_device_ctx, rx_err, ndev,
-			  "got rndis message uninitialized\n");
-		return NVSP_STAT_FAIL;
-	}
-
 	if (netif_msg_rx_status(net_device_ctx))
 		dump_rndis_message(ndev, rndis_msg);
 
 	switch (rndis_msg->ndis_msg_type) {
 	case RNDIS_MSG_PACKET:
-		return rndis_filter_receive_data(ndev, net_dev,
-						 rndis_dev, rndis_msg,
+		return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
 						 channel, data, buflen);
 	case RNDIS_MSG_INIT_C:
 	case RNDIS_MSG_QUERY_C:
 	case RNDIS_MSG_SET_C:
 		/* completion msgs */
-		rndis_filter_receive_response(rndis_dev, rndis_msg);
+		rndis_filter_receive_response(ndev, net_dev, rndis_msg);
 		break;
 
 	case RNDIS_MSG_INDICATE:
@@ -440,10 +437,10 @@ int rndis_filter_receive(struct net_device *ndev,
 			"unhandled rndis message (type %u len %u)\n",
 			   rndis_msg->ndis_msg_type,
 			   rndis_msg->msg_len);
-		break;
+		return NVSP_STAT_FAIL;
 	}
 
-	return 0;
+	return NVSP_STAT_SUCCESS;
 }
 
 static int rndis_filter_query_device(struct rndis_device *dev,
@@ -825,13 +822,15 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev,
 	struct rndis_set_request *set;
 	int ret;
 
+	if (dev->filter == new_filter)
+		return 0;
+
 	request = get_rndis_request(dev, RNDIS_MSG_SET,
 			RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
 			sizeof(u32));
 	if (!request)
 		return -ENOMEM;
 
-
 	/* Setup the rndis set */
 	set = &request->request_msg.msg.set_req;
 	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
@@ -842,8 +841,10 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev,
 	       &new_filter, sizeof(u32));
 
 	ret = rndis_filter_send_request(dev, request);
-	if (ret == 0)
+	if (ret == 0) {
 		wait_for_completion(&request->wait_event);
+		dev->filter = new_filter;
+	}
 
 	put_rndis_request(dev, request);
 
@@ -854,15 +855,19 @@ static void rndis_set_multicast(struct work_struct *w)
 {
 	struct rndis_device *rdev
 		= container_of(w, struct rndis_device, mcast_work);
+	u32 filter = NDIS_PACKET_TYPE_DIRECTED;
+	unsigned int flags = rdev->ndev->flags;
 
-	if (rdev->ndev->flags & IFF_PROMISC)
-		rndis_filter_set_packet_filter(rdev,
-					       NDIS_PACKET_TYPE_PROMISCUOUS);
-	else
-		rndis_filter_set_packet_filter(rdev,
-					       NDIS_PACKET_TYPE_BROADCAST |
-					       NDIS_PACKET_TYPE_ALL_MULTICAST |
-					       NDIS_PACKET_TYPE_DIRECTED);
+	if (flags & IFF_PROMISC) {
+		filter = NDIS_PACKET_TYPE_PROMISCUOUS;
+	} else {
+		if (flags & IFF_ALLMULTI)
+			filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
+		if (flags & IFF_BROADCAST)
+			filter |= NDIS_PACKET_TYPE_BROADCAST;
+	}
+
+	rndis_filter_set_packet_filter(rdev, filter);
 }
 
 void rndis_filter_update(struct netvsc_device *nvdev)
@@ -940,12 +945,11 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
 	return true;
 }
 
-static void rndis_filter_halt_device(struct rndis_device *dev)
+static void rndis_filter_halt_device(struct netvsc_device *nvdev,
+				     struct rndis_device *dev)
 {
 	struct rndis_request *request;
 	struct rndis_halt_request *halt;
-	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
 	/* Attempt to do a rndis device halt */
 	request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1084,6 +1088,8 @@ void rndis_set_subchannel(struct work_struct *w)
 	init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
 	init_packet->msg.v5_msg.subchn_req.num_subchannels =
 						nvdev->num_chn - 1;
+	trace_nvsp_send(ndev, init_packet);
+
 	ret = vmbus_sendpacket(hv_dev->channel, init_packet,
 			       sizeof(struct nvsp_message),
 			       (unsigned long)init_packet,
@@ -1116,6 +1122,7 @@ void rndis_set_subchannel(struct work_struct *w)
 	for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
 		ndev_ctx->tx_table[i] = i % nvdev->num_chn;
 
+	netif_device_attach(ndev);
 	rtnl_unlock();
 	return;
 
@@ -1126,6 +1133,8 @@ failed:
 
 	nvdev->max_chn = 1;
 	nvdev->num_chn = 1;
+
+	netif_device_attach(ndev);
 unlock:
 	rtnl_unlock();
 }
@@ -1328,6 +1337,10 @@ out:
 		net_device->num_chn = 1;
 	}
 
+	/* No sub channels, device is ready */
+	if (net_device->num_chn == 1)
+		netif_device_attach(net);
+
 	return net_device;
 
 err_dev_remv:
@@ -1341,12 +1354,11 @@ void rndis_filter_device_remove(struct hv_device *dev,
 	struct rndis_device *rndis_dev = net_dev->extension;
 
 	/* Halt and release the rndis device */
-	rndis_filter_halt_device(rndis_dev);
+	rndis_filter_halt_device(net_dev, rndis_dev);
 
 	net_dev->extension = NULL;
 
 	netvsc_device_remove(dev);
-	kfree(rndis_dev);
 }
 
 int rndis_filter_open(struct netvsc_device *nvdev)
@@ -1364,10 +1376,3 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 
 	return rndis_filter_close_device(nvdev->extension);
 }
-
-bool rndis_filter_opened(const struct netvsc_device *nvdev)
-{
-	const struct rndis_device *dev = nvdev->extension;
-
-	return dev->state == RNDIS_DEV_DATAINITIALIZED;
-}
diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig
index 303ba4133920..8782f5655e3f 100644
--- a/drivers/net/ieee802154/Kconfig
+++ b/drivers/net/ieee802154/Kconfig
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
 	  exposes a debugfs node for each CA8210 instance which allows
 	  direct use of the Cascoda API, exposing the 802.15.4 MAC
 	  management entities.
+
+config IEEE802154_MCR20A
+       tristate "MCR20A transceiver driver"
+       depends on IEEE802154_DRIVERS && MAC802154
+       depends on SPI
+	---help---
+	  Say Y here to enable the MCR20A SPI 802.15.4 wireless
+	  controller.
+
+	  This driver can also be built as a module. To do so, say M here.
+	  the module will be called 'mcr20a'.
diff --git a/drivers/net/ieee802154/Makefile b/drivers/net/ieee802154/Makefile
index bea1de5e726c..104744d5a668 100644
--- a/drivers/net/ieee802154/Makefile
+++ b/drivers/net/ieee802154/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
 obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
 obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
 obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 548d9d026a85..77abedf0b524 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1661,7 +1661,7 @@ static int at86rf230_debugfs_init(struct at86rf230_local *lp)
 	if (!at86rf230_debugfs_root)
 		return -ENOMEM;
 
-	stats = debugfs_create_file("trac_stats", S_IRUGO,
+	stats = debugfs_create_file("trac_stats", 0444,
 				    at86rf230_debugfs_root, lp,
 				    &at86rf230_stats_fops);
 	if (!stats)
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644
index 000000000000..d9eb22a52551
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define	SPI_COMMAND_BUFFER		3
+
+#define REGISTER_READ			BIT(7)
+#define REGISTER_WRITE			(0 << 7)
+#define REGISTER_ACCESS			(0 << 6)
+#define PACKET_BUFF_BURST_ACCESS	BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS		BIT(5)
+
+#define MCR20A_WRITE_REG(x)		(x)
+#define MCR20A_READ_REG(x)		(REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF	(0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF	(0x40)
+
+#define MCR20A_CMD_REG		0x80
+#define MCR20A_CMD_REG_MASK	0x3f
+#define MCR20A_CMD_WRITE	0x40
+#define MCR20A_CMD_FB		0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+	MCR20A_CCA_ED,	  // energy detect - CCA bit not active,
+			  // not to be used for T and CCCA sequences
+	MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+	MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+	MCR20A_CCA_MODE3
+};
+
+enum {
+	MCR20A_XCVSEQ_IDLE	= 0x00,
+	MCR20A_XCVSEQ_RX	= 0x01,
+	MCR20A_XCVSEQ_TX	= 0x02,
+	MCR20A_XCVSEQ_CCA	= 0x03,
+	MCR20A_XCVSEQ_TR	= 0x04,
+	MCR20A_XCVSEQ_CCCA	= 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define	MCR20A_MIN_CHANNEL	(11)
+#define	MCR20A_MAX_CHANNEL	(26)
+#define	MCR20A_CHANNEL_SPACING	(5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8  PLL_INT[16] = {
+	/* 2405 */ 0x0B,	/* 2410 */ 0x0B,	/* 2415 */ 0x0B,
+	/* 2420 */ 0x0B,	/* 2425 */ 0x0B,	/* 2430 */ 0x0B,
+	/* 2435 */ 0x0C,	/* 2440 */ 0x0C,	/* 2445 */ 0x0C,
+	/* 2450 */ 0x0C,	/* 2455 */ 0x0C,	/* 2460 */ 0x0C,
+	/* 2465 */ 0x0D,	/* 2470 */ 0x0D,	/* 2475 */ 0x0D,
+	/* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+	/* 2405 */ 0x28,	/* 2410 */ 0x50,	/* 2415 */ 0x78,
+	/* 2420 */ 0xA0,	/* 2425 */ 0xC8,	/* 2430 */ 0xF0,
+	/* 2435 */ 0x18,	/* 2440 */ 0x40,	/* 2445 */ 0x68,
+	/* 2450 */ 0x90,	/* 2455 */ 0xB8,	/* 2460 */ 0xE0,
+	/* 2465 */ 0x08,	/* 2470 */ 0x30,	/* 2475 */ 0x58,
+	/* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+	{ IAR_MISC_PAD_CTRL,	0x02 },
+	{ IAR_VCO_CTRL1,	0xB3 },
+	{ IAR_VCO_CTRL2,	0x07 },
+	{ IAR_PA_TUNING,	0x71 },
+	{ IAR_CHF_IBUF,		0x2F },
+	{ IAR_CHF_QBUF,		0x2F },
+	{ IAR_CHF_IRIN,		0x24 },
+	{ IAR_CHF_QRIN,		0x24 },
+	{ IAR_CHF_IL,		0x24 },
+	{ IAR_CHF_QL,		0x24 },
+	{ IAR_CHF_CC1,		0x32 },
+	{ IAR_CHF_CCL,		0x1D },
+	{ IAR_CHF_CC2,		0x2D },
+	{ IAR_CHF_IROUT,	0x24 },
+	{ IAR_CHF_QROUT,	0x24 },
+	{ IAR_PA_CAL,		0x28 },
+	{ IAR_AGC_THR1,		0x55 },
+	{ IAR_AGC_THR2,		0x2D },
+	{ IAR_ATT_RSSI1,	0x5F },
+	{ IAR_ATT_RSSI2,	0x8F },
+	{ IAR_RSSI_OFFSET,	0x61 },
+	{ IAR_CHF_PMA_GAIN,	0x03 },
+	{ IAR_CCA1_THRESH,	0x50 },
+	{ IAR_CORR_NVAL,	0x13 },
+	{ IAR_ACKDELAY,		0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+	int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF		(127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE	0x01
+#define MCR20A_DAR_READ		0x00
+#define MCR20A_DAR_NUMREGS	0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS	0x80
+#define MCR20A_IAR_NUMREGS	0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg)	((reg) << 1)
+#define MCR20A_WRITESHORT(reg)	((reg) << 1 | 1)
+#define MCR20A_READLONG(reg)	(1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg)	(1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers  */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+	case DAR_PHY_CTRL1:
+	case DAR_PHY_CTRL2:
+	case DAR_PHY_CTRL3:
+	case DAR_PHY_CTRL4:
+	case DAR_SRC_CTRL:
+	case DAR_SRC_ADDRS_SUM_LSB:
+	case DAR_SRC_ADDRS_SUM_MSB:
+	case DAR_T3CMP_LSB:
+	case DAR_T3CMP_MSB:
+	case DAR_T3CMP_USB:
+	case DAR_T2PRIMECMP_LSB:
+	case DAR_T2PRIMECMP_MSB:
+	case DAR_T1CMP_LSB:
+	case DAR_T1CMP_MSB:
+	case DAR_T1CMP_USB:
+	case DAR_T2CMP_LSB:
+	case DAR_T2CMP_MSB:
+	case DAR_T2CMP_USB:
+	case DAR_T4CMP_LSB:
+	case DAR_T4CMP_MSB:
+	case DAR_T4CMP_USB:
+	case DAR_PLL_INT0:
+	case DAR_PLL_FRAC0_LSB:
+	case DAR_PLL_FRAC0_MSB:
+	case DAR_PA_PWR:
+	/* no DAR_ACM */
+	case DAR_OVERWRITE_VER:
+	case DAR_CLK_OUT_CTRL:
+	case DAR_PWR_MODES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+	bool rc;
+
+	/* all writeable are also readable */
+	rc = mcr20a_dar_writeable(dev, reg);
+	if (rc)
+		return rc;
+
+	/* readonly regs */
+	switch (reg) {
+	case DAR_RX_FRM_LEN:
+	case DAR_CCA1_ED_FNL:
+	case DAR_EVENT_TMR_LSB:
+	case DAR_EVENT_TMR_MSB:
+	case DAR_EVENT_TMR_USB:
+	case DAR_TIMESTAMP_LSB:
+	case DAR_TIMESTAMP_MSB:
+	case DAR_TIMESTAMP_USB:
+	case DAR_SEQ_STATE:
+	case DAR_LQI_VALUE:
+	case DAR_RSSI_CCA_CONT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+	/* can be changed during runtime */
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+	/* use them in spi_async and regmap so it's volatile */
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+	/* don't clear irq line on read */
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+	.name			= "mcr20a_dar",
+	.reg_bits		= 8,
+	.val_bits		= 8,
+	.write_flag_mask	= REGISTER_ACCESS | REGISTER_WRITE,
+	.read_flag_mask		= REGISTER_ACCESS | REGISTER_READ,
+	.cache_type		= REGCACHE_RBTREE,
+	.writeable_reg		= mcr20a_dar_writeable,
+	.readable_reg		= mcr20a_dar_readable,
+	.volatile_reg		= mcr20a_dar_volatile,
+	.precious_reg		= mcr20a_dar_precious,
+	.fast_io		= true,
+	.can_multi_write	= true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case IAR_XTAL_TRIM:
+	case IAR_PMC_LP_TRIM:
+	case IAR_MACPANID0_LSB:
+	case IAR_MACPANID0_MSB:
+	case IAR_MACSHORTADDRS0_LSB:
+	case IAR_MACSHORTADDRS0_MSB:
+	case IAR_MACLONGADDRS0_0:
+	case IAR_MACLONGADDRS0_8:
+	case IAR_MACLONGADDRS0_16:
+	case IAR_MACLONGADDRS0_24:
+	case IAR_MACLONGADDRS0_32:
+	case IAR_MACLONGADDRS0_40:
+	case IAR_MACLONGADDRS0_48:
+	case IAR_MACLONGADDRS0_56:
+	case IAR_RX_FRAME_FILTER:
+	case IAR_PLL_INT1:
+	case IAR_PLL_FRAC1_LSB:
+	case IAR_PLL_FRAC1_MSB:
+	case IAR_MACPANID1_LSB:
+	case IAR_MACPANID1_MSB:
+	case IAR_MACSHORTADDRS1_LSB:
+	case IAR_MACSHORTADDRS1_MSB:
+	case IAR_MACLONGADDRS1_0:
+	case IAR_MACLONGADDRS1_8:
+	case IAR_MACLONGADDRS1_16:
+	case IAR_MACLONGADDRS1_24:
+	case IAR_MACLONGADDRS1_32:
+	case IAR_MACLONGADDRS1_40:
+	case IAR_MACLONGADDRS1_48:
+	case IAR_MACLONGADDRS1_56:
+	case IAR_DUAL_PAN_CTRL:
+	case IAR_DUAL_PAN_DWELL:
+	case IAR_CCA1_THRESH:
+	case IAR_CCA1_ED_OFFSET_COMP:
+	case IAR_LQI_OFFSET_COMP:
+	case IAR_CCA_CTRL:
+	case IAR_CCA2_CORR_PEAKS:
+	case IAR_CCA2_CORR_THRESH:
+	case IAR_TMR_PRESCALE:
+	case IAR_ANT_PAD_CTRL:
+	case IAR_MISC_PAD_CTRL:
+	case IAR_BSM_CTRL:
+	case IAR_RNG:
+	case IAR_RX_WTR_MARK:
+	case IAR_SOFT_RESET:
+	case IAR_TXDELAY:
+	case IAR_ACKDELAY:
+	case IAR_CORR_NVAL:
+	case IAR_ANT_AGC_CTRL:
+	case IAR_AGC_THR1:
+	case IAR_AGC_THR2:
+	case IAR_PA_CAL:
+	case IAR_ATT_RSSI1:
+	case IAR_ATT_RSSI2:
+	case IAR_RSSI_OFFSET:
+	case IAR_XTAL_CTRL:
+	case IAR_CHF_PMA_GAIN:
+	case IAR_CHF_IBUF:
+	case IAR_CHF_QBUF:
+	case IAR_CHF_IRIN:
+	case IAR_CHF_QRIN:
+	case IAR_CHF_IL:
+	case IAR_CHF_QL:
+	case IAR_CHF_CC1:
+	case IAR_CHF_CCL:
+	case IAR_CHF_CC2:
+	case IAR_CHF_IROUT:
+	case IAR_CHF_QROUT:
+	case IAR_PA_TUNING:
+	case IAR_VCO_CTRL1:
+	case IAR_VCO_CTRL2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+	bool rc;
+
+	/* all writeable are also readable */
+	rc = mcr20a_iar_writeable(dev, reg);
+	if (rc)
+		return rc;
+
+	/* readonly regs */
+	switch (reg) {
+	case IAR_PART_ID:
+	case IAR_DUAL_PAN_STS:
+	case IAR_RX_BYTE_COUNT:
+	case IAR_FILTERFAIL_CODE1:
+	case IAR_FILTERFAIL_CODE2:
+	case IAR_RSSI:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+	switch (reg) {
+	case IAR_DUAL_PAN_STS:
+	case IAR_RX_BYTE_COUNT:
+	case IAR_FILTERFAIL_CODE1:
+	case IAR_FILTERFAIL_CODE2:
+	case IAR_RSSI:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+	.name			= "mcr20a_iar",
+	.reg_bits		= 16,
+	.val_bits		= 8,
+	.write_flag_mask	= REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+	.read_flag_mask		= REGISTER_ACCESS | REGISTER_READ  | IAR_INDEX,
+	.cache_type		= REGCACHE_RBTREE,
+	.writeable_reg		= mcr20a_iar_writeable,
+	.readable_reg		= mcr20a_iar_readable,
+	.volatile_reg		= mcr20a_iar_volatile,
+	.fast_io		= true,
+};
+
+struct mcr20a_local {
+	struct spi_device *spi;
+
+	struct ieee802154_hw *hw;
+	struct mcr20a_platform_data *pdata;
+	struct regmap *regmap_dar;
+	struct regmap *regmap_iar;
+
+	u8 *buf;
+
+	bool is_tx;
+
+	/* for writing tx buffer */
+	struct spi_message tx_buf_msg;
+	u8 tx_header[1];
+	/* burst buffer write command */
+	struct spi_transfer tx_xfer_header;
+	u8 tx_len[1];
+	/* len of tx packet */
+	struct spi_transfer tx_xfer_len;
+	/* data of tx packet */
+	struct spi_transfer tx_xfer_buf;
+	struct sk_buff *tx_skb;
+
+	/* for read length rxfifo */
+	struct spi_message reg_msg;
+	u8 reg_cmd[1];
+	u8 reg_data[MCR20A_IRQSTS_NUM];
+	struct spi_transfer reg_xfer_cmd;
+	struct spi_transfer reg_xfer_data;
+
+	/* receive handling */
+	struct spi_message rx_buf_msg;
+	u8 rx_header[1];
+	struct spi_transfer rx_xfer_header;
+	u8 rx_lqi[1];
+	struct spi_transfer rx_xfer_lqi;
+	u8 rx_buf[MCR20A_MAX_BUF];
+	struct spi_transfer rx_xfer_buf;
+
+	/* isr handling for reading intstat */
+	struct spi_message irq_msg;
+	u8 irq_header[1];
+	u8 irq_data[MCR20A_IRQSTS_NUM];
+	struct spi_transfer irq_xfer_data;
+	struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	lp->reg_msg.complete = NULL;
+	lp->reg_cmd[0]	= MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+	lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+	lp->reg_xfer_data.len = 1;
+
+	ret = spi_async(lp->spi, &lp->reg_msg);
+	if (ret)
+		dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	lp->tx_skb = skb;
+
+	print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+			     skb->data, skb->len, 0);
+
+	lp->is_tx = 1;
+
+	lp->reg_msg.complete	= NULL;
+	lp->reg_cmd[0]		= MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+	lp->reg_data[0]		= MCR20A_XCVSEQ_IDLE;
+	lp->reg_xfer_data.len	= 1;
+
+	return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+	WARN_ON(!level);
+	*level = 0xbe;
+	return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+	if (ret)
+		return ret;
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+	if (ret)
+		return ret;
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+			   PLL_FRAC[channel - 11]);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* No slotted operation */
+	dev_dbg(printdev(lp), "no slotted operation\n");
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+	/* enable irq */
+	enable_irq(lp->spi->irq);
+
+	/* Unmask SEQ interrupt */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+				 DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+	/* Start the RX sequence */
+	dev_dbg(printdev(lp), "start the RX sequence\n");
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+	return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* stop all running sequence */
+	regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+			   DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+	/* disable irq */
+	disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+			struct ieee802154_hw_addr_filt *filt,
+			unsigned long changed)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+		u16 addr = le16_to_cpu(filt->short_addr);
+
+		regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+		regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+	}
+
+	if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+		u16 pan = le16_to_cpu(filt->pan_id);
+
+		regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+		regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+	}
+
+	if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+		u8 addr[8], i;
+
+		memcpy(addr, &filt->ieee_addr, 8);
+		for (i = 0; i < 8; i++)
+			regmap_write(lp->regmap_iar,
+				     IAR_MACLONGADDRS0_0 + i, addr[i]);
+	}
+
+	if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+		if (filt->pan_coord) {
+			regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					   DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+		} else {
+			regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					   DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+		}
+	}
+
+	return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+	-3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+	-1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+	struct mcr20a_local *lp = hw->priv;
+	u32 i;
+
+	dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+	for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+		if (lp->hw->phy->supported.tx_powers[i] == mbm)
+			return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+					    ((i + 8) & 0x1F));
+	}
+
+	return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+		    const struct wpan_phy_cca *cca)
+{
+	struct mcr20a_local *lp = hw->priv;
+	unsigned int cca_mode = 0xff;
+	bool cca_mode_and = false;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* mapping 802.15.4 to driver spec */
+	switch (cca->mode) {
+	case NL802154_CCA_ENERGY:
+		cca_mode = MCR20A_CCA_MODE1;
+		break;
+	case NL802154_CCA_CARRIER:
+		cca_mode = MCR20A_CCA_MODE2;
+		break;
+	case NL802154_CCA_ENERGY_CARRIER:
+		switch (cca->opt) {
+		case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+			cca_mode = MCR20A_CCA_MODE3;
+			cca_mode_and = true;
+			break;
+		case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+			cca_mode = MCR20A_CCA_MODE3;
+			cca_mode_and = false;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+				 DAR_PHY_CTRL4_CCATYPE_MASK,
+				 cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+	if (ret < 0)
+		return ret;
+
+	if (cca_mode == MCR20A_CCA_MODE3) {
+		if (cca_mode_and) {
+			ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+						 IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+						 0x08);
+		} else {
+			ret = regmap_update_bits(lp->regmap_iar,
+						 IAR_CCA_CTRL,
+						 IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+						 0x00);
+		}
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+	struct mcr20a_local *lp = hw->priv;
+	u32 i;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+		if (hw->phy->supported.cca_ed_levels[i] == mbm)
+			return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+	}
+
+	return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+	u8 rx_frame_filter_reg = 0x0;
+	u8 val;
+
+	dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+	if (on) {
+		/* All frame types accepted*/
+		val |= DAR_PHY_CTRL4_PROMISCUOUS;
+		rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+		rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+				  IAR_RX_FRAME_FLT_NS_FT);
+
+		ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					 DAR_PHY_CTRL4_PROMISCUOUS,
+					 DAR_PHY_CTRL4_PROMISCUOUS);
+		if (ret < 0)
+			return ret;
+
+		ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+				   rx_frame_filter_reg);
+		if (ret < 0)
+			return ret;
+	} else {
+		ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					 DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+		if (ret < 0)
+			return ret;
+
+		ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+				   IAR_RX_FRAME_FLT_FRM_VER |
+				   IAR_RX_FRAME_FLT_BEACON_FT |
+				   IAR_RX_FRAME_FLT_DATA_FT |
+				   IAR_RX_FRAME_FLT_CMD_FT);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+	.owner			= THIS_MODULE,
+	.xmit_async		= mcr20a_xmit,
+	.ed			= mcr20a_ed,
+	.set_channel		= mcr20a_set_channel,
+	.start			= mcr20a_start,
+	.stop			= mcr20a_stop,
+	.set_hw_addr_filt	= mcr20a_set_hw_addr_filt,
+	.set_txpower		= mcr20a_set_txpower,
+	.set_cca_mode		= mcr20a_set_cca_mode,
+	.set_cca_ed_level	= mcr20a_set_cca_ed_level,
+	.set_promiscuous_mode	= mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* Start the RX sequence */
+	regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+	return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+	struct sk_buff *skb;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	dev_dbg(printdev(lp), "RX is done\n");
+
+	if (!ieee802154_is_valid_psdu_len(len)) {
+		dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+		len = IEEE802154_MTU;
+	}
+
+	len = len - 2;  /* get rid of frame check field */
+
+	skb = dev_alloc_skb(len);
+	if (!skb)
+		return;
+
+	memcpy(skb_put(skb, len), lp->rx_buf, len);
+	ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+	print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+			     lp->rx_buf, len, 0);
+	pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+	/* start RX sequence */
+	mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 len;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* get the length of received frame */
+	len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+	dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+	/* prepare to read the rx buf */
+	lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+	lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+	lp->rx_xfer_buf.len = len;
+
+	ret = spi_async(lp->spi, &lp->rx_buf_msg);
+	if (ret)
+		dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+	lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+	lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+	lp->reg_xfer_data.len	= 1;
+
+	return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+	return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* write tx buffer */
+	lp->tx_header[0]	= MCR20A_BURST_WRITE_PACKET_BUF;
+	/* add 2 bytes of FCS */
+	lp->tx_len[0]		= lp->tx_skb->len + 2;
+	lp->tx_xfer_buf.tx_buf	= lp->tx_skb->data;
+	/* add 1 byte psduLength */
+	lp->tx_xfer_buf.len	= lp->tx_skb->len + 1;
+
+	ret = spi_async(lp->spi, &lp->tx_buf_msg);
+	if (ret) {
+		dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	enable_irq(lp->spi->irq);
+
+	dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+		lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+	switch (seq_state) {
+	/* TX IRQ, RX IRQ and SEQ IRQ */
+	case (0x03):
+		if (lp->is_tx) {
+			lp->is_tx = 0;
+			dev_dbg(printdev(lp), "TX is done. No ACK\n");
+			mcr20a_handle_tx_complete(lp);
+		}
+		break;
+	case (0x05):
+			/* rx is starting */
+			dev_dbg(printdev(lp), "RX is starting\n");
+			mcr20a_handle_rx(lp);
+		break;
+	case (0x07):
+		if (lp->is_tx) {
+			/* tx is done */
+			lp->is_tx = 0;
+			dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+			mcr20a_handle_tx_complete(lp);
+		} else {
+			/* rx is starting */
+			dev_dbg(printdev(lp), "RX is starting\n");
+			mcr20a_handle_rx(lp);
+		}
+		break;
+	case (0x01):
+		if (lp->is_tx) {
+			dev_dbg(printdev(lp), "TX is starting\n");
+			mcr20a_handle_tx(lp);
+		} else {
+			dev_dbg(printdev(lp), "MCR20A is stop\n");
+		}
+		break;
+	}
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+	int ret;
+	struct mcr20a_local *lp = context;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+	regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+	lp->reg_msg.complete = mcr20a_irq_clean_complete;
+	lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+	memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+	lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+	ret = spi_async(lp->spi, &lp->reg_msg);
+
+	if (ret)
+		dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+	struct mcr20a_local *lp = data;
+	int ret;
+
+	disable_irq_nosync(irq);
+
+	lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+	/* read IRQSTSx */
+	ret = spi_async(lp->spi, &lp->irq_msg);
+	if (ret) {
+		enable_irq(irq);
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+				    struct mcr20a_platform_data *pdata)
+{
+	int ret = 0;
+
+	if (!spi->dev.of_node)
+		return -EINVAL;
+
+	pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+	dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+	return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+	u8 i;
+	struct ieee802154_hw *hw = lp->hw;
+	struct wpan_phy *phy = lp->hw->phy;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	phy->symbol_duration = 16;
+	phy->lifs_period = 40;
+	phy->sifs_period = 12;
+
+	hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+			IEEE802154_HW_AFILT |
+			IEEE802154_HW_PROMISCUOUS;
+
+	phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+			WPAN_PHY_FLAG_CCA_MODE;
+
+	phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+		BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+	phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+		BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+	/* initiating cca_ed_levels */
+	for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+	      ++i) {
+		mcr20a_ed_levels[i] =  -i * 100;
+	}
+
+	phy->supported.cca_ed_levels = mcr20a_ed_levels;
+	phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+	phy->cca.mode = NL802154_CCA_ENERGY;
+
+	phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+	phy->current_page = 0;
+	/* MCR20A default reset value */
+	phy->current_channel = 20;
+	phy->symbol_duration = 16;
+	phy->supported.tx_powers = mcr20a_powers;
+	phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+	phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+	phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->tx_buf_msg);
+	lp->tx_buf_msg.context = lp;
+	lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+	lp->tx_xfer_header.len = 1;
+	lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+	lp->tx_xfer_len.len = 1;
+	lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+	spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+	spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+	spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->reg_msg);
+	lp->reg_msg.context = lp;
+
+	lp->reg_xfer_cmd.len = 1;
+	lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+	lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+	lp->reg_xfer_data.rx_buf = lp->reg_data;
+	lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+	spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+	spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+	spi_message_init(&lp->rx_buf_msg);
+	lp->rx_buf_msg.context = lp;
+	lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+	lp->rx_xfer_header.len = 1;
+	lp->rx_xfer_header.tx_buf = lp->rx_header;
+	lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+	lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+	lp->rx_xfer_lqi.len = 1;
+	lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+	spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+	spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+	spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->irq_msg);
+	lp->irq_msg.context		= lp;
+	lp->irq_msg.complete	= mcr20a_irq_status_complete;
+	lp->irq_xfer_header.len	= 1;
+	lp->irq_xfer_header.tx_buf = lp->irq_header;
+	lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+	lp->irq_xfer_data.len	= MCR20A_IRQSTS_NUM;
+	lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+	spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+	spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+	u8 index;
+	unsigned int phy_reg = 0;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* Disable Tristate on COCO MISO for SPI reads */
+	ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+	if (ret)
+		goto err_ret;
+
+	/* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+	 * immediately after init
+	 */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+	if (ret)
+		goto err_ret;
+
+	/* Clear all PP IRQ bits in IRQSTS2 */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+			   DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+			   DAR_IRQSTS2_WAKE_IRQ);
+	if (ret)
+		goto err_ret;
+
+	/* Disable all timer interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+	if (ret)
+		goto err_ret;
+
+	/*  PHY_CTRL1 : default HW settings + AUTOACK enabled */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+	/*  PHY_CTRL2 : disable all interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+	if (ret)
+		goto err_ret;
+
+	/* PHY_CTRL3 : disable all timers and remaining interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+			   DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+			   DAR_PHY_CTRL3_WAKE_MSK);
+	if (ret)
+		goto err_ret;
+
+	/* SRC_CTRL : enable Acknowledge Frame Pending and
+	 * Source Address Matching Enable
+	 */
+	ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+			   DAR_SRC_CTRL_ACK_FRM_PND |
+			   (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+	if (ret)
+		goto err_ret;
+
+	/*  RX_FRAME_FILTER */
+	/*  FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+	ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+			   IAR_RX_FRAME_FLT_FRM_VER |
+			   IAR_RX_FRAME_FLT_BEACON_FT |
+			   IAR_RX_FRAME_FLT_DATA_FT |
+			   IAR_RX_FRAME_FLT_CMD_FT);
+	if (ret)
+		goto err_ret;
+
+	dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+		 MCR20A_OVERWRITE_VERSION);
+
+	/* Overwrites direct registers  */
+	ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+			   MCR20A_OVERWRITE_VERSION);
+	if (ret)
+		goto err_ret;
+
+	/* Overwrites indirect registers  */
+	ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+				     ARRAY_SIZE(mar20a_iar_overwrites));
+	if (ret)
+		goto err_ret;
+
+	/* Clear HW indirect queue */
+	dev_dbg(printdev(lp), "clear HW indirect queue\n");
+	for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+		phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+			       DAR_SRC_CTRL_INDEX_SHIFT)
+			      | (DAR_SRC_CTRL_SRCADDR_EN)
+			      | (DAR_SRC_CTRL_INDEX_DISABLE));
+		ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+		if (ret)
+			goto err_ret;
+		phy_reg = 0;
+	}
+
+	/* Assign HW Indirect hash table to PAN0 */
+	ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+	if (ret)
+		goto err_ret;
+
+	/* Clear current lvl */
+	phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+	/* Set new lvl */
+	phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+		IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+	ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+	if (ret)
+		goto err_ret;
+
+	/* Set CCA threshold to -75 dBm */
+	ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+	if (ret)
+		goto err_ret;
+
+	/* Set prescaller to obtain 1 symbol (16us) timebase */
+	ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+	if (ret)
+		goto err_ret;
+
+	/* Enable autodoze mode. */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+				 DAR_PWR_MODES_AUTODOZE,
+				 DAR_PWR_MODES_AUTODOZE);
+	if (ret)
+		goto err_ret;
+
+	/* Disable clk_out */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+				 DAR_CLK_OUT_CTRL_EN, 0x0);
+	if (ret)
+		goto err_ret;
+
+	return 0;
+
+err_ret:
+	return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+	struct ieee802154_hw *hw;
+	struct mcr20a_local *lp;
+	struct mcr20a_platform_data *pdata;
+	int irq_type;
+	int ret = -ENOMEM;
+
+	dev_dbg(&spi->dev, "%s\n", __func__);
+
+	if (!spi->irq) {
+		dev_err(&spi->dev, "no IRQ specified\n");
+		return -EINVAL;
+	}
+
+	pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	/* set mcr20a platform data */
+	ret = mcr20a_get_platform_data(spi, pdata);
+	if (ret < 0) {
+		dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+		return ret;
+	}
+
+	/* init reset gpio */
+	if (gpio_is_valid(pdata->rst_gpio)) {
+		ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+					    GPIOF_OUT_INIT_HIGH, "reset");
+		if (ret)
+			return ret;
+	}
+
+	/* reset mcr20a */
+	if (gpio_is_valid(pdata->rst_gpio)) {
+		usleep_range(10, 20);
+		gpio_set_value_cansleep(pdata->rst_gpio, 0);
+		usleep_range(10, 20);
+		gpio_set_value_cansleep(pdata->rst_gpio, 1);
+		usleep_range(120, 240);
+	}
+
+	/* allocate ieee802154_hw and private data */
+	hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+	if (!hw) {
+		dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+		return -ENOMEM;
+	}
+
+	/* init mcr20a local data */
+	lp = hw->priv;
+	lp->hw = hw;
+	lp->spi = spi;
+	lp->spi->dev.platform_data = pdata;
+	lp->pdata = pdata;
+
+	/* init ieee802154_hw */
+	hw->parent = &spi->dev;
+	ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+	/* init buf */
+	lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+	if (!lp->buf)
+		return -ENOMEM;
+
+	mcr20a_setup_tx_spi_messages(lp);
+	mcr20a_setup_rx_spi_messages(lp);
+	mcr20a_setup_irq_spi_messages(lp);
+
+	/* setup regmap */
+	lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+	if (IS_ERR(lp->regmap_dar)) {
+		ret = PTR_ERR(lp->regmap_dar);
+		dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+			ret);
+		goto free_dev;
+	}
+
+	lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+	if (IS_ERR(lp->regmap_iar)) {
+		ret = PTR_ERR(lp->regmap_iar);
+		dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+		goto free_dev;
+	}
+
+	mcr20a_hw_setup(lp);
+
+	spi_set_drvdata(spi, lp);
+
+	ret = mcr20a_phy_init(lp);
+	if (ret < 0) {
+		dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+		goto free_dev;
+	}
+
+	irq_type = irq_get_trigger_type(spi->irq);
+	if (!irq_type)
+		irq_type = IRQF_TRIGGER_FALLING;
+
+	ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+			       irq_type, dev_name(&spi->dev), lp);
+	if (ret) {
+		dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+		ret = -ENODEV;
+		goto free_dev;
+	}
+
+	/* disable_irq by default and wait for starting hardware */
+	disable_irq(spi->irq);
+
+	ret = ieee802154_register_hw(hw);
+	if (ret) {
+		dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+		goto free_dev;
+	}
+
+	return ret;
+
+free_dev:
+	ieee802154_free_hw(lp->hw);
+
+	return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+	struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+	dev_dbg(&spi->dev, "%s\n", __func__);
+
+	ieee802154_unregister_hw(lp->hw);
+	ieee802154_free_hw(lp->hw);
+
+	return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+	{ .compatible = "nxp,mcr20a", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+	{ .name = "mcr20a", },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+	.id_table = mcr20a_device_id,
+	.driver = {
+		.of_match_table = of_match_ptr(mcr20a_of_match),
+		.name	= "mcr20a",
+	},
+	.probe      = mcr20a_probe,
+	.remove     = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644
index 000000000000..6da4fd00b3c5
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.h
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1		0x00
+#define DAR_IRQ_STS2		0x01
+#define DAR_IRQ_STS3		0x02
+#define DAR_PHY_CTRL1		0x03
+#define DAR_PHY_CTRL2		0x04
+#define DAR_PHY_CTRL3		0x05
+#define DAR_RX_FRM_LEN		0x06
+#define DAR_PHY_CTRL4		0x07
+#define DAR_SRC_CTRL		0x08
+#define DAR_SRC_ADDRS_SUM_LSB	0x09
+#define DAR_SRC_ADDRS_SUM_MSB	0x0A
+#define DAR_CCA1_ED_FNL		0x0B
+#define DAR_EVENT_TMR_LSB	0x0C
+#define DAR_EVENT_TMR_MSB	0x0D
+#define DAR_EVENT_TMR_USB	0x0E
+#define DAR_TIMESTAMP_LSB	0x0F
+#define DAR_TIMESTAMP_MSB	0x10
+#define DAR_TIMESTAMP_USB	0x11
+#define DAR_T3CMP_LSB		0x12
+#define DAR_T3CMP_MSB		0x13
+#define DAR_T3CMP_USB		0x14
+#define DAR_T2PRIMECMP_LSB	0x15
+#define DAR_T2PRIMECMP_MSB	0x16
+#define DAR_T1CMP_LSB		0x17
+#define DAR_T1CMP_MSB		0x18
+#define DAR_T1CMP_USB		0x19
+#define DAR_T2CMP_LSB		0x1A
+#define DAR_T2CMP_MSB		0x1B
+#define DAR_T2CMP_USB		0x1C
+#define DAR_T4CMP_LSB		0x1D
+#define DAR_T4CMP_MSB		0x1E
+#define DAR_T4CMP_USB		0x1F
+#define DAR_PLL_INT0		0x20
+#define DAR_PLL_FRAC0_LSB	0x21
+#define DAR_PLL_FRAC0_MSB	0x22
+#define DAR_PA_PWR		0x23
+#define DAR_SEQ_STATE		0x24
+#define DAR_LQI_VALUE		0x25
+#define DAR_RSSI_CCA_CONT	0x26
+/*------------------            0x27 */
+#define DAR_ASM_CTRL1		0x28
+#define DAR_ASM_CTRL2		0x29
+#define DAR_ASM_DATA_0		0x2A
+#define DAR_ASM_DATA_1		0x2B
+#define DAR_ASM_DATA_2		0x2C
+#define DAR_ASM_DATA_3		0x2D
+#define DAR_ASM_DATA_4		0x2E
+#define DAR_ASM_DATA_5		0x2F
+#define DAR_ASM_DATA_6		0x30
+#define DAR_ASM_DATA_7		0x31
+#define DAR_ASM_DATA_8		0x32
+#define DAR_ASM_DATA_9		0x33
+#define DAR_ASM_DATA_A		0x34
+#define DAR_ASM_DATA_B		0x35
+#define DAR_ASM_DATA_C		0x36
+#define DAR_ASM_DATA_D		0x37
+#define DAR_ASM_DATA_E		0x38
+#define DAR_ASM_DATA_F		0x39
+/*-----------------------       0x3A */
+#define DAR_OVERWRITE_VER	0x3B
+#define DAR_CLK_OUT_CTRL	0x3C
+#define DAR_PWR_MODES		0x3D
+#define IAR_INDEX		0x3E
+#define IAR_DATA		0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID		0x00
+#define IAR_XTAL_TRIM		0x01
+#define IAR_PMC_LP_TRIM		0x02
+#define IAR_MACPANID0_LSB	0x03
+#define IAR_MACPANID0_MSB	0x04
+#define IAR_MACSHORTADDRS0_LSB	0x05
+#define IAR_MACSHORTADDRS0_MSB	0x06
+#define IAR_MACLONGADDRS0_0	0x07
+#define IAR_MACLONGADDRS0_8	0x08
+#define IAR_MACLONGADDRS0_16	0x09
+#define IAR_MACLONGADDRS0_24	0x0A
+#define IAR_MACLONGADDRS0_32	0x0B
+#define IAR_MACLONGADDRS0_40	0x0C
+#define IAR_MACLONGADDRS0_48	0x0D
+#define IAR_MACLONGADDRS0_56	0x0E
+#define IAR_RX_FRAME_FILTER	0x0F
+#define IAR_PLL_INT1		0x10
+#define IAR_PLL_FRAC1_LSB	0x11
+#define IAR_PLL_FRAC1_MSB	0x12
+#define IAR_MACPANID1_LSB	0x13
+#define IAR_MACPANID1_MSB	0x14
+#define IAR_MACSHORTADDRS1_LSB	0x15
+#define IAR_MACSHORTADDRS1_MSB	0x16
+#define IAR_MACLONGADDRS1_0	0x17
+#define IAR_MACLONGADDRS1_8	0x18
+#define IAR_MACLONGADDRS1_16	0x19
+#define IAR_MACLONGADDRS1_24	0x1A
+#define IAR_MACLONGADDRS1_32	0x1B
+#define IAR_MACLONGADDRS1_40	0x1C
+#define IAR_MACLONGADDRS1_48	0x1D
+#define IAR_MACLONGADDRS1_56	0x1E
+#define IAR_DUAL_PAN_CTRL	0x1F
+#define IAR_DUAL_PAN_DWELL	0x20
+#define IAR_DUAL_PAN_STS	0x21
+#define IAR_CCA1_THRESH		0x22
+#define IAR_CCA1_ED_OFFSET_COMP	0x23
+#define IAR_LQI_OFFSET_COMP	0x24
+#define IAR_CCA_CTRL		0x25
+#define IAR_CCA2_CORR_PEAKS	0x26
+#define IAR_CCA2_CORR_THRESH	0x27
+#define IAR_TMR_PRESCALE	0x28
+/*--------------------          0x29 */
+#define IAR_GPIO_DATA		0x2A
+#define IAR_GPIO_DIR		0x2B
+#define IAR_GPIO_PUL_EN		0x2C
+#define IAR_GPIO_PUL_SEL	0x2D
+#define IAR_GPIO_DS		0x2E
+/*------------------            0x2F */
+#define IAR_ANT_PAD_CTRL	0x30
+#define IAR_MISC_PAD_CTRL	0x31
+#define IAR_BSM_CTRL		0x32
+/*-------------------           0x33 */
+#define IAR_RNG			0x34
+#define IAR_RX_BYTE_COUNT	0x35
+#define IAR_RX_WTR_MARK		0x36
+#define IAR_SOFT_RESET		0x37
+#define IAR_TXDELAY		0x38
+#define IAR_ACKDELAY		0x39
+#define IAR_SEQ_MGR_CTRL	0x3A
+#define IAR_SEQ_MGR_STS		0x3B
+#define IAR_SEQ_T_STS		0x3C
+#define IAR_ABORT_STS		0x3D
+#define IAR_CCCA_BUSY_CNT	0x3E
+#define IAR_SRC_ADDR_CHECKSUM1	0x3F
+#define IAR_SRC_ADDR_CHECKSUM2	0x40
+#define IAR_SRC_TBL_VALID1	0x41
+#define IAR_SRC_TBL_VALID2	0x42
+#define IAR_FILTERFAIL_CODE1	0x43
+#define IAR_FILTERFAIL_CODE2	0x44
+#define IAR_SLOT_PRELOAD	0x45
+/*--------------------          0x46 */
+#define IAR_CORR_VT		0x47
+#define IAR_SYNC_CTRL		0x48
+#define IAR_PN_LSB_0		0x49
+#define IAR_PN_LSB_1		0x4A
+#define IAR_PN_MSB_0		0x4B
+#define IAR_PN_MSB_1		0x4C
+#define IAR_CORR_NVAL		0x4D
+#define IAR_TX_MODE_CTRL	0x4E
+#define IAR_SNF_THR		0x4F
+#define IAR_FAD_THR		0x50
+#define IAR_ANT_AGC_CTRL	0x51
+#define IAR_AGC_THR1		0x52
+#define IAR_AGC_THR2		0x53
+#define IAR_AGC_HYS		0x54
+#define IAR_AFC			0x55
+/*-------------------           0x56 */
+/*-------------------           0x57 */
+#define IAR_PHY_STS		0x58
+#define IAR_RX_MAX_CORR		0x59
+#define IAR_RX_MAX_PREAMBLE	0x5A
+#define IAR_RSSI		0x5B
+/*-------------------           0x5C */
+/*-------------------           0x5D */
+#define IAR_PLL_DIG_CTRL	0x5E
+#define IAR_VCO_CAL		0x5F
+#define IAR_VCO_BEST_DIFF	0x60
+#define IAR_VCO_BIAS		0x61
+#define IAR_KMOD_CTRL		0x62
+#define IAR_KMOD_CAL		0x63
+#define IAR_PA_CAL		0x64
+#define IAR_PA_PWRCAL		0x65
+#define IAR_ATT_RSSI1		0x66
+#define IAR_ATT_RSSI2		0x67
+#define IAR_RSSI_OFFSET		0x68
+#define IAR_RSSI_SLOPE		0x69
+#define IAR_RSSI_CAL1		0x6A
+#define IAR_RSSI_CAL2		0x6B
+/*-------------------           0x6C */
+/*-------------------           0x6D */
+#define IAR_XTAL_CTRL		0x6E
+#define IAR_XTAL_COMP_MIN	0x6F
+#define IAR_XTAL_COMP_MAX	0x70
+#define IAR_XTAL_GM		0x71
+/*-------------------           0x72 */
+/*-------------------           0x73 */
+#define IAR_LNA_TUNE		0x74
+#define IAR_LNA_AGCGAIN		0x75
+/*-------------------           0x76 */
+/*-------------------           0x77 */
+#define IAR_CHF_PMA_GAIN	0x78
+#define IAR_CHF_IBUF		0x79
+#define IAR_CHF_QBUF		0x7A
+#define IAR_CHF_IRIN		0x7B
+#define IAR_CHF_QRIN		0x7C
+#define IAR_CHF_IL		0x7D
+#define IAR_CHF_QL		0x7E
+#define IAR_CHF_CC1		0x7F
+#define IAR_CHF_CCL		0x80
+#define IAR_CHF_CC2		0x81
+#define IAR_CHF_IROUT		0x82
+#define IAR_CHF_QROUT		0x83
+/*-------------------           0x84 */
+/*-------------------           0x85 */
+#define IAR_RSSI_CTRL		0x86
+/*-------------------           0x87 */
+/*-------------------           0x88 */
+#define IAR_PA_BIAS		0x89
+#define IAR_PA_TUNING		0x8A
+/*-------------------           0x8B */
+/*-------------------           0x8C */
+#define IAR_PMC_HP_TRIM		0x8D
+#define IAR_VREGA_TRIM		0x8E
+/*-------------------           0x8F */
+/*-------------------           0x90 */
+#define IAR_VCO_CTRL1		0x91
+#define IAR_VCO_CTRL2		0x92
+/*-------------------           0x93 */
+/*-------------------           0x94 */
+#define IAR_ANA_SPARE_OUT1	0x95
+#define IAR_ANA_SPARE_OUT2	0x96
+#define IAR_ANA_SPARE_IN	0x97
+#define IAR_MISCELLANEOUS	0x98
+/*-------------------           0x99 */
+#define IAR_SEQ_MGR_OVRD0	0x9A
+#define IAR_SEQ_MGR_OVRD1	0x9B
+#define IAR_SEQ_MGR_OVRD2	0x9C
+#define IAR_SEQ_MGR_OVRD3	0x9D
+#define IAR_SEQ_MGR_OVRD4	0x9E
+#define IAR_SEQ_MGR_OVRD5	0x9F
+#define IAR_SEQ_MGR_OVRD6	0xA0
+#define IAR_SEQ_MGR_OVRD7	0xA1
+/*-------------------           0xA2 */
+#define IAR_TESTMODE_CTRL	0xA3
+#define IAR_DTM_CTRL1		0xA4
+#define IAR_DTM_CTRL2		0xA5
+#define IAR_ATM_CTRL1		0xA6
+#define IAR_ATM_CTRL2		0xA7
+#define IAR_ATM_CTRL3		0xA8
+/*-------------------           0xA9 */
+#define IAR_LIM_FE_TEST_CTRL	0xAA
+#define IAR_CHF_TEST_CTRL	0xAB
+#define IAR_VCO_TEST_CTRL	0xAC
+#define IAR_PLL_TEST_CTRL	0xAD
+#define IAR_PA_TEST_CTRL	0xAE
+#define IAR_PMC_TEST_CTRL	0xAF
+#define IAR_SCAN_DTM_PROTECT_1	0xFE
+#define IAR_SCAN_DTM_PROTECT_0	0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND		BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ	BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ	BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ		BIT(4)
+#define DAR_IRQSTS1_CCAIRQ		BIT(3)
+#define DAR_IRQSTS1_RXIRQ		BIT(2)
+#define DAR_IRQSTS1_TXIRQ		BIT(1)
+#define DAR_IRQSTS1_SEQIRQ		BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID		BIT(7)
+#define DAR_IRQSTS2_CCA			BIT(6)
+#define DAR_IRQSTS2_SRCADDR		BIT(5)
+#define DAR_IRQSTS2_PI			BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS		BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ		BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ		BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ		BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK		BIT(7)
+#define DAR_IRQSTS3_TMR3MSK		BIT(6)
+#define DAR_IRQSTS3_TMR2MSK		BIT(5)
+#define DAR_IRQSTS3_TMR1MSK		BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ		BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ		BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ		BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ		BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN		BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED		BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX		BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT	5
+#define DAR_PHY_CTRL1_RXACKRQD		BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK		BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK	0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK		BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK	BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK	BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK	BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK		BIT(3)
+#define DAR_PHY_CTRL2_RXMSK		BIT(2)
+#define DAR_PHY_CTRL2_TXMSK		BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK		BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN	BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN	BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN	BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN	BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK		BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK	BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK		BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK	(0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK		BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT		BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0	BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE		(3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT	(3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK	(0x18)
+#define DAR_PHY_CTRL4_TMRLOAD		BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS	BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN	BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX		(0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT	(4)
+#define DAR_SRC_CTRL_ACK_FRM_PND	BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN		BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN		BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE	BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR		BIT(7)
+#define DAR_ASM_CTRL1_START		BIT(6)
+#define DAR_ASM_CTRL1_SELFTST		BIT(5)
+#define DAR_ASM_CTRL1_CTR		BIT(4)
+#define DAR_ASM_CTRL1_CBC		BIT(3)
+#define DAR_ASM_CTRL1_AES		BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC		BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL		(7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT	(5)
+#define DAR_ASM_CTRL2_TSTPAS			BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND		BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ		BIT(6)
+#define DAR_CLK_OUT_CTRL_SR		BIT(5)
+#define DAR_CLK_OUT_CTRL_DS		BIT(4)
+#define DAR_CLK_OUT_CTRL_EN		BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV		(7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY	BIT(5)
+#define DAR_PWR_MODES_XTALEN		BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN	BIT(3)
+#define DAR_PWR_MODES_AUTODOZE		BIT(1)
+#define DAR_PWR_MODES_PMC_MODE		BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER		(0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT		(6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS	BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT			BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT			BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT			BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT		BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT		BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK	(0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT	(4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK	BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1		BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO		BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK	BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1		BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0		BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN	(0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN			BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN		BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR	BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR	BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI	BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED	BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1	BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL	(0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT	(4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE	BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ	BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN	(3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN	BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD	BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS	BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR	(1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT	(0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK	(1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT	(1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK	BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN		(1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST		BIT(7)
+#define IAR_SOFT_RESET_REGS_RST		BIT(4)
+#define IAR_SOFT_RESET_PLL_RST		BIT(3)
+#define IAR_SOFT_RESET_TX_RST		BIT(2)
+#define IAR_SOFT_RESET_RX_RST		BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST	BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL		(3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT	(6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE		BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE		BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH	BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT	BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS		BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD		BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED	BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE			BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING	BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT		BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE		BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL		(7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED	BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED	BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED	BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL	BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8	(3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK		BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR	BIT(6)
+#define IAR_PHY_STS_PLL_LOCK		BIT(5)
+#define IAR_PHY_STS_CRCVALID		BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL	BIT(2)
+#define IAR_PHY_STS_SFD_DET		BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET	BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT		BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN	BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN		BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN	BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN		BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED	BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN		BIT(6)
+#define IAR_DTM_CTRL1_PAGE5		BIT(5)
+#define IAR_DTM_CTRL1_PAGE4		BIT(4)
+#define IAR_DTM_CTRL1_PAGE3		BIT(3)
+#define IAR_DTM_CTRL1_PAGE2		BIT(2)
+#define IAR_DTM_CTRL1_PAGE1		BIT(1)
+#define IAR_DTM_CTRL1_PAGE0		BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV		BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN		BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2		BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1		BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0		BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK	(7)
+
+#endif /* _MCR20A_H */
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 5166575a164d..adb826f55e60 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -74,6 +74,7 @@ struct ipvl_dev {
 	DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
 	netdev_features_t	sfeatures;
 	u32			msg_enable;
+	spinlock_t		addrs_lock;
 };
 
 struct ipvl_addr {
@@ -176,4 +177,10 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
 void ipvlan_link_setup(struct net_device *dev);
 int ipvlan_link_register(struct rtnl_link_ops *ops);
+
+static inline bool netif_is_ipvlan_port(const struct net_device *dev)
+{
+	return rcu_access_pointer(dev->rx_handler) == ipvlan_handle_frame;
+}
+
 #endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index c1f008fe4e1d..1a8132eb2a3e 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -35,6 +35,7 @@ void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
 }
 EXPORT_SYMBOL_GPL(ipvlan_count_rx);
 
+#if IS_ENABLED(CONFIG_IPV6)
 static u8 ipvlan_get_v6_hash(const void *iaddr)
 {
 	const struct in6_addr *ip6_addr = iaddr;
@@ -42,6 +43,12 @@ static u8 ipvlan_get_v6_hash(const void *iaddr)
 	return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
 	       IPVLAN_HASH_MASK;
 }
+#else
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+	return 0;
+}
+#endif
 
 static u8 ipvlan_get_v4_hash(const void *iaddr)
 {
@@ -51,6 +58,23 @@ static u8 ipvlan_get_v4_hash(const void *iaddr)
 	       IPVLAN_HASH_MASK;
 }
 
+static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
+{
+	if (!is_v6 && addr->atype == IPVL_IPV4) {
+		struct in_addr *i4addr = (struct in_addr *)iaddr;
+
+		return addr->ip4addr.s_addr == i4addr->s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (is_v6 && addr->atype == IPVL_IPV6) {
+		struct in6_addr *i6addr = (struct in6_addr *)iaddr;
+
+		return ipv6_addr_equal(&addr->ip6addr, i6addr);
+#endif
+	}
+
+	return false;
+}
+
 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
 					       const void *iaddr, bool is_v6)
 {
@@ -59,15 +83,9 @@ static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
 
 	hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
 	       ipvlan_get_v4_hash(iaddr);
-	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
-		if (is_v6 && addr->atype == IPVL_IPV6 &&
-		    ipv6_addr_equal(&addr->ip6addr, iaddr))
-			return addr;
-		else if (!is_v6 && addr->atype == IPVL_IPV4 &&
-			 addr->ip4addr.s_addr ==
-				((struct in_addr *)iaddr)->s_addr)
+	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
+		if (addr_equal(is_v6, addr, iaddr))
 			return addr;
-	}
 	return NULL;
 }
 
@@ -91,29 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6)
 {
-	struct ipvl_addr *addr;
+	struct ipvl_addr *addr, *ret = NULL;
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode) {
-		if ((is_v6 && addr->atype == IPVL_IPV6 &&
-		    ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
-		    (!is_v6 && addr->atype == IPVL_IPV4 &&
-		    addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
-			return addr;
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+		if (addr_equal(is_v6, addr, iaddr)) {
+			ret = addr;
+			break;
+		}
 	}
-	return NULL;
+	rcu_read_unlock();
+	return ret;
 }
 
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 {
 	struct ipvl_dev *ipvlan;
+	bool ret = false;
 
-	ASSERT_RTNL();
-
-	list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-		if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
-			return true;
+	rcu_read_lock();
+	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+		if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+			ret = true;
+			break;
+		}
 	}
-	return false;
+	rcu_read_unlock();
+	return ret;
 }
 
 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -150,6 +172,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
 		lyr3h = ip4h;
 		break;
 	}
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *ip6h;
 
@@ -188,6 +211,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
 		}
 		break;
 	}
+#endif
 	default:
 		return NULL;
 	}
@@ -337,14 +361,18 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 {
 	struct ipvl_addr *addr = NULL;
 
-	if (addr_type == IPVL_IPV6) {
+	switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPVL_IPV6: {
 		struct ipv6hdr *ip6h;
 		struct in6_addr *i6addr;
 
 		ip6h = (struct ipv6hdr *)lyr3h;
 		i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
 		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
-	} else if (addr_type == IPVL_ICMPV6) {
+		break;
+	}
+	case IPVL_ICMPV6: {
 		struct nd_msg *ndmh;
 		struct in6_addr *i6addr;
 
@@ -356,14 +384,19 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 			i6addr = &ndmh->target;
 			addr = ipvlan_ht_addr_lookup(port, i6addr, true);
 		}
-	} else if (addr_type == IPVL_IPV4) {
+		break;
+	}
+#endif
+	case IPVL_IPV4: {
 		struct iphdr *ip4h;
 		__be32 *i4addr;
 
 		ip4h = (struct iphdr *)lyr3h;
 		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
 		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
-	} else if (addr_type == IPVL_ARP) {
+		break;
+	}
+	case IPVL_ARP: {
 		struct arphdr *arph;
 		unsigned char *arp_ptr;
 		__be32 dip;
@@ -377,6 +410,8 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 
 		memcpy(&dip, arp_ptr, 4);
 		addr = ipvlan_ht_addr_lookup(port, &dip, false);
+		break;
+	}
 	}
 
 	return addr;
@@ -420,6 +455,7 @@ out:
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -456,6 +492,12 @@ err:
 out:
 	return ret;
 }
+#else
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+	return NET_XMIT_DROP;
+}
+#endif
 
 static int ipvlan_process_outbound(struct sk_buff *skb)
 {
@@ -464,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 
 	/* In this mode we dont care about multicast and broadcast traffic */
 	if (is_multicast_ether_addr(ethh->h_dest)) {
-		pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
-				    ntohs(skb->protocol));
+		pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+				     ntohs(skb->protocol));
 		kfree_skb(skb);
 		goto out;
 	}
@@ -759,6 +801,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 			goto out;
 		break;
 	}
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 	{
 		struct dst_entry *dst;
@@ -774,10 +817,12 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 		};
 
 		skb_dst_drop(skb);
-		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+					     skb, flags);
 		skb_dst_set(skb, dst);
 		break;
 	}
+#endif
 	default:
 		break;
 	}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 2469df118fbf..450eec264a5e 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -22,12 +22,14 @@ static const struct nf_hook_ops ipvl_nfops[] = {
 		.hooknum  = NF_INET_LOCAL_IN,
 		.priority = INT_MAX,
 	},
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.hook     = ipvlan_nf_input,
 		.pf       = NFPROTO_IPV6,
 		.hooknum  = NF_INET_LOCAL_IN,
 		.priority = INT_MAX,
 	},
+#endif
 };
 
 static const struct l3mdev_ops ipvl_l3mdev_ops = {
@@ -127,7 +129,6 @@ static int ipvlan_port_create(struct net_device *dev)
 	if (err)
 		goto err;
 
-	dev->priv_flags |= IFF_IPVLAN_MASTER;
 	return 0;
 
 err:
@@ -140,7 +141,6 @@ static void ipvlan_port_destroy(struct net_device *dev)
 	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
 	struct sk_buff *skb;
 
-	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
 	if (port->mode == IPVLAN_MODE_L3S) {
 		dev->priv_flags &= ~IFF_L3MDEV_MASTER;
 		ipvlan_unregister_nf_hook(dev_net(dev));
@@ -176,7 +176,7 @@ static int ipvlan_init(struct net_device *dev)
 	dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
 		     (phy_dev->state & IPVLAN_STATE_MASK);
 	dev->features = phy_dev->features & IPVLAN_FEATURES;
-	dev->features |= NETIF_F_LLTX;
+	dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED;
 	dev->gso_max_size = phy_dev->gso_max_size;
 	dev->gso_max_segs = phy_dev->gso_max_segs;
 	dev->hard_header_len = phy_dev->hard_header_len;
@@ -225,8 +225,10 @@ static int ipvlan_open(struct net_device *dev)
 	else
 		dev->flags &= ~IFF_NOARP;
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode)
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 		ipvlan_ht_addr_add(ipvlan, addr);
+	rcu_read_unlock();
 
 	return dev_uc_add(phy_dev, phy_dev->dev_addr);
 }
@@ -242,8 +244,10 @@ static int ipvlan_stop(struct net_device *dev)
 
 	dev_uc_del(phy_dev, phy_dev->dev_addr);
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode)
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 		ipvlan_ht_addr_del(addr);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -417,6 +421,12 @@ static const struct header_ops ipvlan_header_ops = {
 	.cache_update	= eth_header_cache_update,
 };
 
+static bool netif_is_ipvlan(const struct net_device *dev)
+{
+	/* both ipvlan and ipvtap devices use the same netdev_ops */
+	return dev->netdev_ops == &ipvlan_netdev_ops;
+}
+
 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
 					     struct ethtool_link_ksettings *cmd)
 {
@@ -586,6 +596,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	ipvlan->sfeatures = IPVLAN_FEATURES;
 	ipvlan_adjust_mtu(ipvlan, phy_dev);
 	INIT_LIST_HEAD(&ipvlan->addrs);
+	spin_lock_init(&ipvlan->addrs_lock);
 
 	/* TODO Probably put random address here to be presented to the
 	 * world but keep using the physical-dev address for the outgoing
@@ -593,7 +604,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	 */
 	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
 
-	dev->priv_flags |= IFF_IPVLAN_SLAVE;
+	dev->priv_flags |= IFF_NO_RX_HANDLER;
 
 	err = register_netdevice(dev);
 	if (err < 0)
@@ -663,11 +674,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_addr *addr, *next;
 
+	spin_lock_bh(&ipvlan->addrs_lock);
 	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 		ipvlan_ht_addr_del(addr);
-		list_del(&addr->anode);
+		list_del_rcu(&addr->anode);
 		kfree_rcu(addr, rcu);
 	}
+	spin_unlock_bh(&ipvlan->addrs_lock);
 
 	ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
 	list_del_rcu(&ipvlan->pnode);
@@ -758,8 +771,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
-					 pnode)
+		list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
 			ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
 							    &lst_kill);
 		unregister_netdevice_many(&lst_kill);
@@ -791,6 +803,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 	return NOTIFY_DONE;
 }
 
+/* the caller must held the addrs lock */
 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
@@ -800,14 +813,17 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 		return -ENOMEM;
 
 	addr->master = ipvlan;
-	if (is_v6) {
-		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
-		addr->atype = IPVL_IPV6;
-	} else {
+	if (!is_v6) {
 		memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
 		addr->atype = IPVL_IPV4;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
+		addr->atype = IPVL_IPV6;
+#endif
 	}
-	list_add_tail(&addr->anode, &ipvlan->addrs);
+
+	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 
 	/* If the interface is not up, the address will be added to the hash
 	 * list by ipvlan_open.
@@ -822,32 +838,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
+	spin_lock_bh(&ipvlan->addrs_lock);
 	addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
-	if (!addr)
+	if (!addr) {
+		spin_unlock_bh(&ipvlan->addrs_lock);
 		return;
+	}
 
 	ipvlan_ht_addr_del(addr);
-	list_del(&addr->anode);
+	list_del_rcu(&addr->anode);
+	spin_unlock_bh(&ipvlan->addrs_lock);
 	kfree_rcu(addr, rcu);
-
-	return;
-}
-
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
-	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
-		netif_err(ipvlan, ifup, ipvlan->dev,
-			  "Failed to add IPv6=%pI6c addr for %s intf\n",
-			  ip6_addr, ipvlan->dev->name);
-		return -EINVAL;
-	}
-
-	return ipvlan_add_addr(ipvlan, ip6_addr, true);
-}
-
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
-	return ipvlan_del_addr(ipvlan, ip6_addr, true);
 }
 
 static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -863,6 +864,27 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
 	return true;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
+		netif_err(ipvlan, ifup, ipvlan->dev,
+			  "Failed to add IPv6=%pI6c addr for %s intf\n",
+			  ip6_addr, ipvlan->dev->name);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
 static int ipvlan_addr6_event(struct notifier_block *unused,
 			      unsigned long event, void *ptr)
 {
@@ -894,10 +916,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 	struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 
-	/* FIXME IPv6 autoconf calls us from bh without RTNL */
-	if (in_softirq())
-		return NOTIFY_DONE;
-
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
@@ -913,17 +931,21 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 
 	return NOTIFY_OK;
 }
+#endif
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
 		netif_err(ipvlan, ifup, ipvlan->dev,
 			  "Failed to add IPv4=%pI4 on %s intf.\n",
 			  ip4_addr, ipvlan->dev->name);
-		return -EINVAL;
-	}
-
-	return ipvlan_add_addr(ipvlan, ip4_addr, false);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
@@ -993,6 +1015,7 @@ static struct notifier_block ipvlan_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_device_event,
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr6_event,
 };
@@ -1000,6 +1023,7 @@ static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr6_validator_event,
 };
+#endif
 
 static void ipvlan_ns_exit(struct net *net)
 {
@@ -1024,9 +1048,11 @@ static int __init ipvlan_init_module(void)
 
 	ipvlan_init_secret();
 	register_netdevice_notifier(&ipvlan_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	register_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 	register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
 
@@ -1045,9 +1071,11 @@ error:
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	unregister_inetaddr_validator_notifier(
 	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	unregister_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 	unregister_netdevice_notifier(&ipvlan_notifier_block);
 	return err;
 }
@@ -1060,9 +1088,11 @@ static void __exit ipvlan_cleanup_module(void)
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	unregister_inetaddr_validator_notifier(
 	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	unregister_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 }
 
 module_init(ipvlan_init_module);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 7de88b33d5b9..9cbb0c8a896a 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3277,7 +3277,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
 	err = netdev_upper_dev_link(real_dev, dev, extack);
 	if (err < 0)
-		goto unregister;
+		goto put_dev;
 
 	/* need to be already registered so that ->init has run and
 	 * the MAC addr is set
@@ -3316,7 +3316,8 @@ del_dev:
 	macsec_del_dev(macsec);
 unlink:
 	netdev_upper_dev_unlink(real_dev, dev);
-unregister:
+put_dev:
+	dev_put(real_dev);
 	unregister_netdevice(dev);
 	return err;
 }
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index a0f2be81d52e..725f4b4afc6d 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1036,7 +1036,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev,
 	lowerdev_features &= (features | ~NETIF_F_LRO);
 	features = netdev_increment_features(lowerdev_features, features, mask);
 	features |= ALWAYS_ON_FEATURES;
-	features &= ~NETIF_F_NETNS_LOCAL;
+	features &= (ALWAYS_ON_FEATURES | MACVLAN_FEATURES);
 
 	return features;
 }
@@ -1451,7 +1451,7 @@ destroy_macvlan_port:
 	/* the macvlan port may be freed by macvlan_uninit when fail to register.
 	 * so we destroy the macvlan port only when it's valid.
 	 */
-	if (create && macvlan_port_get_rtnl(dev))
+	if (create && macvlan_port_get_rtnl(lowerdev))
 		macvlan_port_destroy(port->dev);
 	return err;
 }
diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c
index e8ae50e1255e..319edc9c8ec7 100644
--- a/drivers/net/phy/aquantia.c
+++ b/drivers/net/phy/aquantia.c
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
-	int reg;
-
-	reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-	return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
 static int aquantia_config_intr(struct phy_device *phydev)
 {
 	int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQ1202",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQ2104",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR105",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR106",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR107",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR405",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 171010eb4d9c..5ad130c3da43 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -341,8 +341,8 @@ void bcm_phy_get_strings(struct phy_device *phydev, u8 *data)
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++)
-		memcpy(data + i * ETH_GSTRING_LEN,
-		       bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN);
+		strlcpy(data + i * ETH_GSTRING_LEN,
+			bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN);
 }
 EXPORT_SYMBOL_GPL(bcm_phy_get_strings);
 
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 421feb8f92fe..29b1c88b55cc 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -565,7 +565,7 @@ static int bcm7xxx_28nm_set_tunable(struct phy_device *phydev,
 	if (ret)
 		return ret;
 
-	/* Disable EEE advertisment since this prevents the PHY
+	/* Disable EEE advertisement since this prevents the PHY
 	 * from successfully linking up, trigger auto-negotiation restart
 	 * to let the MAC decide what to do.
 	 */
diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index 9442db221834..8022cd317f62 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
 			    MII_ADDR_C45 | regnum);
 }
 
-static int cortina_config_aneg(struct phy_device *phydev)
-{
-	phydev->supported = SUPPORTED_10000baseT_Full;
-	phydev->advertising = SUPPORTED_10000baseT_Full;
-
-	return 0;
-}
-
 static int cortina_read_status(struct phy_device *phydev)
 {
 	int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ err:
 	return ret;
 }
 
-static int cortina_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int cortina_probe(struct phy_device *phydev)
 {
 	u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
 	.phy_id		= PHY_ID_CS4340,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Cortina CS4340",
-	.config_aneg	= cortina_config_aneg,
+	.config_init	= gen10g_config_init,
+	.config_aneg	= gen10g_config_aneg,
 	.read_status	= cortina_read_status,
-	.soft_reset	= cortina_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.probe		= cortina_probe,
 },
 };
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index ab58224f897f..b3935778b19f 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -75,6 +75,8 @@
 
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX	0x0
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN	0x1f
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK	(0x1f << 8)
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT	8
 
 /* CFG4 bits */
 #define DP83867_CFG4_PORT_MIRROR_EN              BIT(0)
@@ -92,6 +94,7 @@ struct dp83867_private {
 	int io_impedance;
 	int port_mirroring;
 	bool rxctrl_strap_quirk;
+	int clk_output_sel;
 };
 
 static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -160,6 +163,14 @@ static int dp83867_of_init(struct phy_device *phydev)
 	dp83867->io_impedance = -EINVAL;
 
 	/* Optional configuration */
+	ret = of_property_read_u32(of_node, "ti,clk-output-sel",
+				   &dp83867->clk_output_sel);
+	if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
+		/* Keep the default value if ti,clk-output-sel is not set
+		 * or too high
+		 */
+		dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
+
 	if (of_property_read_bool(of_node, "ti,max-output-impedance"))
 		dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
 	else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
@@ -295,6 +306,14 @@ static int dp83867_config_init(struct phy_device *phydev)
 	if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP)
 		dp83867_config_port_mirroring(phydev);
 
+	/* Clock output selection if muxing property is set */
+	if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) {
+		val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG);
+		val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
+		val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
+		phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index a11f80cb5388..7d936fb61c22 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -149,8 +149,10 @@
 #define PHY_ID_PHY22F_1_4		0xD565A410
 #define PHY_ID_PHY11G_1_5		0xD565A401
 #define PHY_ID_PHY22F_1_5		0xD565A411
-#define PHY_ID_PHY11G_VR9		0xD565A409
-#define PHY_ID_PHY22F_VR9		0xD565A419
+#define PHY_ID_PHY11G_VR9_1_1		0xD565A408
+#define PHY_ID_PHY22F_VR9_1_1		0xD565A418
+#define PHY_ID_PHY11G_VR9_1_2		0xD565A409
+#define PHY_ID_PHY22F_VR9_1_2		0xD565A419
 
 static int xway_gphy_config_init(struct phy_device *phydev)
 {
@@ -312,9 +314,9 @@ static struct phy_driver xway_gphy[] = {
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 	}, {
-		.phy_id		= PHY_ID_PHY11G_VR9,
+		.phy_id		= PHY_ID_PHY11G_VR9_1_1,
 		.phy_id_mask	= 0xffffffff,
-		.name		= "Intel XWAY PHY11G (xRX integrated)",
+		.name		= "Intel XWAY PHY11G (xRX v1.1 integrated)",
 		.features	= PHY_GBIT_FEATURES,
 		.flags		= PHY_HAS_INTERRUPT,
 		.config_init	= xway_gphy_config_init,
@@ -324,9 +326,33 @@ static struct phy_driver xway_gphy[] = {
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 	}, {
-		.phy_id		= PHY_ID_PHY22F_VR9,
+		.phy_id		= PHY_ID_PHY22F_VR9_1_1,
 		.phy_id_mask	= 0xffffffff,
-		.name		= "Intel XWAY PHY22F (xRX integrated)",
+		.name		= "Intel XWAY PHY22F (xRX v1.1 integrated)",
+		.features	= PHY_BASIC_FEATURES,
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY11G_VR9_1_2,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY11G (xRX v1.2 integrated)",
+		.features	= PHY_GBIT_FEATURES,
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY22F_VR9_1_2,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY22F (xRX v1.2 integrated)",
 		.features	= PHY_BASIC_FEATURES,
 		.flags		= PHY_HAS_INTERRUPT,
 		.config_init	= xway_gphy_config_init,
@@ -346,8 +372,10 @@ static struct mdio_device_id __maybe_unused xway_gphy_tbl[] = {
 	{ PHY_ID_PHY22F_1_4, 0xffffffff },
 	{ PHY_ID_PHY11G_1_5, 0xffffffff },
 	{ PHY_ID_PHY22F_1_5, 0xffffffff },
-	{ PHY_ID_PHY11G_VR9, 0xffffffff },
-	{ PHY_ID_PHY22F_VR9, 0xffffffff },
+	{ PHY_ID_PHY11G_VR9_1_1, 0xffffffff },
+	{ PHY_ID_PHY22F_VR9_1_1, 0xffffffff },
+	{ PHY_ID_PHY11G_VR9_1_2, 0xffffffff },
+	{ PHY_ID_PHY22F_VR9_1_2, 0xffffffff },
 	{ }
 };
 MODULE_DEVICE_TABLE(mdio, xway_gphy_tbl);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 22d9bc9c33a4..a75c511950c3 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -860,7 +860,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 			return err;
 
 		/* There appears to be a bug in the 88e1512 when used in
-		 * SGMII to copper mode, where the AN advertisment register
+		 * SGMII to copper mode, where the AN advertisement register
 		 * clears the pause bits each time a negotiation occurs.
 		 * This means we can never be truely sure what was advertised,
 		 * so disable Pause support.
@@ -1452,8 +1452,8 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) {
-		memcpy(data + i * ETH_GSTRING_LEN,
-		       marvell_hw_stats[i].string, ETH_GSTRING_LEN);
+		strlcpy(data + i * ETH_GSTRING_LEN,
+			marvell_hw_stats[i].string, ETH_GSTRING_LEN);
 	}
 }
 
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 8a0bd98fdec7..9564916d2d7b 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
 	return 0;
 }
 
-/*
- * Resetting the MV88X3310 causes it to become non-responsive.  Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int mv3310_config_init(struct phy_device *phydev)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -317,7 +308,7 @@ static int mv3310_read_status(struct phy_device *phydev)
 		if (val < 0)
 			return val;
 
-		/* Read the link partner's 1G advertisment */
+		/* Read the link partner's 1G advertisement */
 		val = phy_read_mmd(phydev, MDIO_MMD_AN, MV_AN_STAT1000);
 		if (val < 0)
 			return val;
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
 				  SUPPORTED_10000baseT_Full |
 				  SUPPORTED_Backplane,
 		.probe		= mv3310_probe,
-		.soft_reset	= mv3310_soft_reset,
+		.soft_reset	= gen10g_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.config_aneg	= mv3310_config_aneg,
 		.aneg_done	= mv3310_aneg_done,
diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 2573ab012f16..70f6115530af 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -163,8 +163,9 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 			    mdio_mux_mmioreg_switch_fn,
 			    &s->mux_handle, s, NULL);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to register mdio-mux bus %pOF\n",
-			np);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"failed to register mdio-mux bus %pOF\n", np);
 		return ret;
 	}
 
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 0f45310300f6..f41b224a9cdb 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -635,25 +635,6 @@ static int ksz8873mll_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
-/* This routine returns -1 as an indication to the caller that the
- * Micrel ksz9021 10/100/1000 PHY does not support standard IEEE
- * MMD extended PHY registers.
- */
-static int
-ksz9021_rd_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum)
-{
-	return -1;
-}
-
-/* This routine does nothing since the Micrel ksz9021 does not support
- * standard IEEE MMD extended PHY registers.
- */
-static int
-ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum, u16 val)
-{
-	return -1;
-}
-
 static int kszphy_get_sset_count(struct phy_device *phydev)
 {
 	return ARRAY_SIZE(kszphy_hw_stats);
@@ -664,8 +645,8 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) {
-		memcpy(data + i * ETH_GSTRING_LEN,
-		       kszphy_hw_stats[i].string, ETH_GSTRING_LEN);
+		strlcpy(data + i * ETH_GSTRING_LEN,
+			kszphy_hw_stats[i].string, ETH_GSTRING_LEN);
 	}
 }
 
@@ -946,8 +927,8 @@ static struct phy_driver ksphy_driver[] = {
 	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
-	.read_mmd	= ksz9021_rd_mmd_phyreg,
-	.write_mmd	= ksz9021_wr_mmd_phyreg,
+	.read_mmd	= genphy_read_mmd_unsupported,
+	.write_mmd	= genphy_write_mmd_unsupported,
 }, {
 	.phy_id		= PHY_ID_KSZ9031,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index a4576859afae..e1225545362d 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -163,11 +163,11 @@ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
 EXPORT_SYMBOL_GPL(genphy_c45_read_link);
 
 /**
- * genphy_c45_read_lpa - read the link partner advertisment and pause
+ * genphy_c45_read_lpa - read the link partner advertisement and pause
  * @phydev: target phy_device struct
  *
  * Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,
- * filling in the link partner advertisment, pause and asym_pause members
+ * filling in the link partner advertisement, pause and asym_pause members
  * in @phydev.  This assumes that the auto-negotiation MMD is present, and
  * the backplane bit (7.48.0) is clear.  Clause 45 PHY drivers are expected
  * to fill in the remainder of the link partner advert from vendor registers.
@@ -176,7 +176,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 {
 	int val;
 
-	/* Read the link partner's base page advertisment */
+	/* Read the link partner's base page advertisement */
 	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
 	if (val < 0)
 		return val;
@@ -185,7 +185,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 	phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0;
 	phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0;
 
-	/* Read the link partner's 10G advertisment */
+	/* Read the link partner's 10G advertisement */
 	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
 	if (val < 0)
 		return val;
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
 {
 	u32 mmd_mask = phydev->c45_ids.devices_in_package;
 	int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_read_status);
 
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
 {
 	/* Do nothing for now */
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
 
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
 {
 	/* Temporarily just say we support everything */
 	phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_init);
 
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_suspend);
 
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_resume);
 
 struct phy_driver genphy_10g_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
 	.name           = "Generic 10G PHY",
-	.soft_reset	= gen10g_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.config_init    = gen10g_config_init,
 	.features       = 0,
 	.config_aneg    = gen10g_config_aneg,
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 4083f00c97a5..c7da4cbb1103 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -190,10 +190,10 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 }
 
 /**
- * phy_resolve_aneg_linkmode - resolve the advertisments into phy settings
+ * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
  *
- * Resolve our and the link partner advertisments into their corresponding
+ * Resolve our and the link partner advertisements into their corresponding
  * speed and duplex. If full duplex was negotiated, extract the pause mode
  * from the link partner mask.
  */
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e3e29c2b028b..05c1e8ef15e6 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -618,6 +618,68 @@ static void phy_error(struct phy_device *phydev)
 }
 
 /**
+ * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
+ * @phydev: target phy_device struct
+ */
+static int phy_disable_interrupts(struct phy_device *phydev)
+{
+	int err;
+
+	/* Disable PHY interrupts */
+	err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
+	if (err)
+		return err;
+
+	/* Clear the interrupt */
+	return phy_clear_interrupt(phydev);
+}
+
+/**
+ * phy_change - Called by the phy_interrupt to handle PHY changes
+ * @phydev: phy_device struct that interrupted
+ */
+static irqreturn_t phy_change(struct phy_device *phydev)
+{
+	if (phy_interrupt_is_valid(phydev)) {
+		if (phydev->drv->did_interrupt &&
+		    !phydev->drv->did_interrupt(phydev))
+			return IRQ_NONE;
+
+		if (phydev->state == PHY_HALTED)
+			if (phy_disable_interrupts(phydev))
+				goto phy_err;
+	}
+
+	mutex_lock(&phydev->lock);
+	if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
+		phydev->state = PHY_CHANGELINK;
+	mutex_unlock(&phydev->lock);
+
+	/* reschedule state queue work to run as soon as possible */
+	phy_trigger_machine(phydev, true);
+
+	if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
+		goto phy_err;
+	return IRQ_HANDLED;
+
+phy_err:
+	phy_error(phydev);
+	return IRQ_NONE;
+}
+
+/**
+ * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
+ * @work: work_struct that describes the work to be done
+ */
+void phy_change_work(struct work_struct *work)
+{
+	struct phy_device *phydev =
+		container_of(work, struct phy_device, phy_queue);
+
+	phy_change(phydev);
+}
+
+/**
  * phy_interrupt - PHY interrupt handler
  * @irq: interrupt line
  * @phy_dat: phy_device pointer
@@ -632,9 +694,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 	if (PHY_HALTED == phydev->state)
 		return IRQ_NONE;		/* It can't be ours.  */
 
-	phy_change(phydev);
-
-	return IRQ_HANDLED;
+	return phy_change(phydev);
 }
 
 /**
@@ -652,32 +712,6 @@ static int phy_enable_interrupts(struct phy_device *phydev)
 }
 
 /**
- * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
- * @phydev: target phy_device struct
- */
-static int phy_disable_interrupts(struct phy_device *phydev)
-{
-	int err;
-
-	/* Disable PHY interrupts */
-	err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
-	if (err)
-		goto phy_err;
-
-	/* Clear the interrupt */
-	err = phy_clear_interrupt(phydev);
-	if (err)
-		goto phy_err;
-
-	return 0;
-
-phy_err:
-	phy_error(phydev);
-
-	return err;
-}
-
-/**
  * phy_start_interrupts - request and enable interrupts for a PHY device
  * @phydev: target phy_device struct
  *
@@ -720,50 +754,6 @@ int phy_stop_interrupts(struct phy_device *phydev)
 EXPORT_SYMBOL(phy_stop_interrupts);
 
 /**
- * phy_change - Called by the phy_interrupt to handle PHY changes
- * @phydev: phy_device struct that interrupted
- */
-void phy_change(struct phy_device *phydev)
-{
-	if (phy_interrupt_is_valid(phydev)) {
-		if (phydev->drv->did_interrupt &&
-		    !phydev->drv->did_interrupt(phydev))
-			return;
-
-		if (phydev->state == PHY_HALTED)
-			if (phy_disable_interrupts(phydev))
-				goto phy_err;
-	}
-
-	mutex_lock(&phydev->lock);
-	if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
-		phydev->state = PHY_CHANGELINK;
-	mutex_unlock(&phydev->lock);
-
-	/* reschedule state queue work to run as soon as possible */
-	phy_trigger_machine(phydev, true);
-
-	if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
-		goto phy_err;
-	return;
-
-phy_err:
-	phy_error(phydev);
-}
-
-/**
- * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
- * @work: work_struct that describes the work to be done
- */
-void phy_change_work(struct work_struct *work)
-{
-	struct phy_device *phydev =
-		container_of(work, struct phy_device, phy_queue);
-
-	phy_change(phydev);
-}
-
-/**
  * phy_stop - Bring down the PHY link, and stop checking the status
  * @phydev: target phy_device struct
  */
@@ -774,13 +764,8 @@ void phy_stop(struct phy_device *phydev)
 	if (PHY_HALTED == phydev->state)
 		goto out_unlock;
 
-	if (phy_interrupt_is_valid(phydev)) {
-		/* Disable PHY Interrupts */
-		phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
-
-		/* Clear any pending interrupts */
-		phy_clear_interrupt(phydev);
-	}
+	if (phy_interrupt_is_valid(phydev))
+		phy_disable_interrupts(phydev);
 
 	phydev->state = PHY_HALTED;
 
@@ -819,7 +804,7 @@ void phy_start(struct phy_device *phydev)
 		break;
 	case PHY_HALTED:
 		/* if phy was suspended, bring the physical link up again */
-		phy_resume(phydev);
+		__phy_resume(phydev);
 
 		/* make sure interrupts are re-enabled for the PHY */
 		if (phy_interrupt_is_valid(phydev)) {
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b13eed21c87d..ac23322a32e1 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -135,9 +135,7 @@ static int mdio_bus_phy_resume(struct device *dev)
 	if (!mdio_bus_phy_may_suspend(phydev))
 		goto no_resume;
 
-	mutex_lock(&phydev->lock);
 	ret = phy_resume(phydev);
-	mutex_unlock(&phydev->lock);
 	if (ret < 0)
 		return ret;
 
@@ -376,7 +374,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 	dev->duplex = -1;
 	dev->pause = 0;
 	dev->asym_pause = 0;
-	dev->link = 1;
+	dev->link = 0;
 	dev->interface = PHY_INTERFACE_MODE_GMII;
 
 	dev->autoneg = AUTONEG_ENABLE;
@@ -1014,10 +1012,17 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
 				"attached_dev");
 	if (!err) {
-		err = sysfs_create_link(&dev->dev.kobj, &phydev->mdio.dev.kobj,
-					"phydev");
-		if (err)
-			goto error;
+		err = sysfs_create_link_nowarn(&dev->dev.kobj,
+					       &phydev->mdio.dev.kobj,
+					       "phydev");
+		if (err) {
+			dev_err(&dev->dev, "could not add device link to %s err %d\n",
+				kobject_name(&phydev->mdio.dev.kobj),
+				err);
+			/* non-fatal - some net drivers can use one netdevice
+			 * with more then one phy
+			 */
+		}
 
 		phydev->sysfs_links = true;
 	}
@@ -1041,9 +1046,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	if (err)
 		goto error;
 
-	mutex_lock(&phydev->lock);
 	phy_resume(phydev);
-	mutex_unlock(&phydev->lock);
 	phy_led_triggers_register(phydev);
 
 	return err;
@@ -1172,7 +1175,7 @@ int phy_suspend(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_suspend);
 
-int phy_resume(struct phy_device *phydev)
+int __phy_resume(struct phy_device *phydev)
 {
 	struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
 	int ret = 0;
@@ -1189,6 +1192,18 @@ int phy_resume(struct phy_device *phydev)
 
 	return ret;
 }
+EXPORT_SYMBOL(__phy_resume);
+
+int phy_resume(struct phy_device *phydev)
+{
+	int ret;
+
+	mutex_lock(&phydev->lock);
+	ret = __phy_resume(phydev);
+	mutex_unlock(&phydev->lock);
+
+	return ret;
+}
 EXPORT_SYMBOL(phy_resume);
 
 int phy_loopback(struct phy_device *phydev, bool enable)
@@ -1382,7 +1397,7 @@ int genphy_setup_forced(struct phy_device *phydev)
 		ctl |= BMCR_FULLDPLX;
 
 	return phy_modify(phydev, MII_BMCR,
-			  BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN, ctl);
+			  ~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl);
 }
 EXPORT_SYMBOL(genphy_setup_forced);
 
@@ -1658,6 +1673,23 @@ int genphy_config_init(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_config_init);
 
+/* This is used for the phy device which doesn't support the MMD extended
+ * register access, but it does have side effect when we are trying to access
+ * the MMD register via indirect method.
+ */
+int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(genphy_read_mmd_unsupported);
+
+int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
+				 u16 regnum, u16 val)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(genphy_write_mmd_unsupported);
+
 int genphy_suspend(struct phy_device *phydev)
 {
 	return phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6ac8b29b2dc3..51a011a349fe 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -364,7 +364,7 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat
 }
 
 /* Flow control is resolved according to our and the link partners
- * advertisments using the following drawn from the 802.3 specs:
+ * advertisements using the following drawn from the 802.3 specs:
  *  Local device  Link partner
  *  Pause AsymDir Pause AsymDir Result
  *    1     X       1     X     TX+RX
@@ -679,12 +679,11 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 
 	mutex_lock(&phy->lock);
 	mutex_lock(&pl->state_mutex);
-	pl->netdev->phydev = phy;
 	pl->phydev = phy;
 	linkmode_copy(pl->supported, supported);
 	linkmode_copy(pl->link_config.advertising, config.advertising);
 
-	/* Restrict the phy advertisment according to the MAC support. */
+	/* Restrict the phy advertisement according to the MAC support. */
 	ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
 	phy->advertising = advertising;
 	mutex_unlock(&pl->state_mutex);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
 	if (phy) {
 		mutex_lock(&phy->lock);
 		mutex_lock(&pl->state_mutex);
-		pl->netdev->phydev = NULL;
 		pl->phydev = NULL;
 		mutex_unlock(&pl->state_mutex);
 		mutex_unlock(&phy->lock);
@@ -889,7 +887,7 @@ void phylink_start(struct phylink *pl)
 
 	/* Apply the link configuration to the MAC when starting. This allows
 	 * a fixed-link to start with the correct parameters, and also
-	 * ensures that we set the appropriate advertisment for Serdes links.
+	 * ensures that we set the appropriate advertisement for Serdes links.
 	 */
 	phylink_resolve_flow(pl, &pl->link_config);
 	phylink_mac_config(pl, &pl->link_config);
@@ -1076,7 +1074,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 
 	config = pl->link_config;
 
-	/* Mask out unsupported advertisments */
+	/* Mask out unsupported advertisements */
 	linkmode_and(config.advertising, kset->link_modes.advertising,
 		     pl->supported);
 
@@ -1121,7 +1119,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 	if (phylink_validate(pl, pl->supported, &config))
 		return -EINVAL;
 
-	/* If autonegotiation is enabled, we must have an advertisment */
+	/* If autonegotiation is enabled, we must have an advertisement */
 	if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
 		return -EINVAL;
 
@@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream,
 	bool changed;
 	u8 port;
 
-	sfp_parse_support(pl->sfp_bus, id, support);
-	port = sfp_parse_port(pl->sfp_bus, id, support);
-	iface = sfp_parse_interface(pl->sfp_bus, id);
-
 	ASSERT_RTNL();
 
-	switch (iface) {
-	case PHY_INTERFACE_MODE_SGMII:
-	case PHY_INTERFACE_MODE_1000BASEX:
-	case PHY_INTERFACE_MODE_2500BASEX:
-	case PHY_INTERFACE_MODE_10GKR:
-		break;
-	default:
-		return -EINVAL;
-	}
+	sfp_parse_support(pl->sfp_bus, id, support);
+	port = sfp_parse_port(pl->sfp_bus, id, support);
 
 	memset(&config, 0, sizeof(config));
 	linkmode_copy(config.advertising, support);
-	config.interface = iface;
+	config.interface = PHY_INTERFACE_MODE_NA;
 	config.speed = SPEED_UNKNOWN;
 	config.duplex = DUPLEX_UNKNOWN;
 	config.pause = MLO_PAUSE_AN;
@@ -1611,6 +1598,22 @@ static int phylink_sfp_module_insert(void *upstream,
 	/* Ignore errors if we're expecting a PHY to attach later */
 	ret = phylink_validate(pl, support, &config);
 	if (ret) {
+		netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+		return ret;
+	}
+
+	iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+	if (iface == PHY_INTERFACE_MODE_NA) {
+		netdev_err(pl->netdev,
+			   "selection of interface failed, advertisement %*pb\n",
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+		return -EINVAL;
+	}
+
+	config.interface = iface;
+	ret = phylink_validate(pl, support, &config);
+	if (ret) {
 		netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
 			   phylink_an_mode_str(MLO_AN_INBAND),
 			   phy_modes(config.interface),
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index ee3ca4a2f12b..9f48ecf9c627 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -172,6 +172,8 @@ static struct phy_driver realtek_drvs[] = {
 		.flags		= PHY_HAS_INTERRUPT,
 		.ack_interrupt	= &rtl821x_ack_interrupt,
 		.config_intr	= &rtl8211b_config_intr,
+		.read_mmd	= &genphy_read_mmd_unsupported,
+		.write_mmd	= &genphy_write_mmd_unsupported,
 	}, {
 		.phy_id		= 0x001cc914,
 		.name		= "RTL8211DN Gigabit Ethernet",
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8961209ee949..3d4ff5d0d2a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -106,68 +106,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 EXPORT_SYMBOL_GPL(sfp_parse_port);
 
 /**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-				    const struct sfp_eeprom_id *id)
-{
-	phy_interface_t iface;
-
-	/* Setting the serdes link mode is guesswork: there's no field in
-	 * the EEPROM which indicates what mode should be used.
-	 *
-	 * If the module wants 64b66b, then it must be >= 10G.
-	 *
-	 * If it's a gigabit-only fiber module, it probably does not have
-	 * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
-	 * to SGMII mode (which is required to support non-gigabit speeds).
-	 */
-	switch (id->base.encoding) {
-	case SFP_ENCODING_8472_64B66B:
-		iface = PHY_INTERFACE_MODE_10GKR;
-		break;
-
-	case SFP_ENCODING_8B10B:
-		if (!id->base.e1000_base_t &&
-		    !id->base.e100_base_lx &&
-		    !id->base.e100_base_fx)
-			iface = PHY_INTERFACE_MODE_1000BASEX;
-		else
-			iface = PHY_INTERFACE_MODE_SGMII;
-		break;
-
-	default:
-		if (id->base.e1000_base_cx) {
-			iface = PHY_INTERFACE_MODE_1000BASEX;
-			break;
-		}
-
-		iface = PHY_INTERFACE_MODE_NA;
-		dev_err(bus->sfp_dev,
-			"SFP module encoding does not support 8b10b nor 64b66b\n");
-		break;
-	}
-
-	return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
-/**
  * sfp_parse_support() - Parse the eeprom id for supported link modes
  * @bus: a pointer to the &struct sfp_bus structure for the sfp module
  * @id: a pointer to the module's &struct sfp_eeprom_id
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		       unsigned long *support)
 {
 	unsigned int br_min, br_nom, br_max;
-
-	phylink_set(support, Autoneg);
-	phylink_set(support, Pause);
-	phylink_set(support, Asym_Pause);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
 
 	/* Decode the bitrate information to MBd */
 	br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 
 	/* Set ethtool support from the compliance fields. */
 	if (id->base.e10g_base_sr)
-		phylink_set(support, 10000baseSR_Full);
+		phylink_set(modes, 10000baseSR_Full);
 	if (id->base.e10g_base_lr)
-		phylink_set(support, 10000baseLR_Full);
+		phylink_set(modes, 10000baseLR_Full);
 	if (id->base.e10g_base_lrm)
-		phylink_set(support, 10000baseLRM_Full);
+		phylink_set(modes, 10000baseLRM_Full);
 	if (id->base.e10g_base_er)
-		phylink_set(support, 10000baseER_Full);
+		phylink_set(modes, 10000baseER_Full);
 	if (id->base.e1000_base_sx ||
 	    id->base.e1000_base_lx ||
 	    id->base.e1000_base_cx)
-		phylink_set(support, 1000baseX_Full);
+		phylink_set(modes, 1000baseX_Full);
 	if (id->base.e1000_base_t) {
-		phylink_set(support, 1000baseT_Half);
-		phylink_set(support, 1000baseT_Full);
+		phylink_set(modes, 1000baseT_Half);
+		phylink_set(modes, 1000baseT_Full);
 	}
 
 	/* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
 		/* This may look odd, but some manufacturers use 12000MBd */
 		if (br_min <= 12000 && br_max >= 10300)
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 		if (br_min <= 3200 && br_max >= 3100)
-			phylink_set(support, 2500baseX_Full);
+			phylink_set(modes, 2500baseX_Full);
 		if (br_min <= 1300 && br_max >= 1200)
-			phylink_set(support, 1000baseX_Full);
+			phylink_set(modes, 1000baseX_Full);
 	}
 	if (id->base.sfp_ct_passive) {
 		if (id->base.passive.sff8431_app_e)
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 	}
 	if (id->base.sfp_ct_active) {
 		if (id->base.active.sff8431_app_e ||
 		    id->base.active.sff8431_lim) {
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 		}
 	}
 
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	case 0x00: /* Unspecified */
 		break;
 	case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
-		phylink_set(support, 100000baseSR4_Full);
-		phylink_set(support, 25000baseSR_Full);
+		phylink_set(modes, 100000baseSR4_Full);
+		phylink_set(modes, 25000baseSR_Full);
 		break;
 	case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
 	case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
-		phylink_set(support, 100000baseLR4_ER4_Full);
+		phylink_set(modes, 100000baseLR4_ER4_Full);
 		break;
 	case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
 	case 0x0c: /* 25Gbase-CR CA-S */
 	case 0x0d: /* 25Gbase-CR CA-N */
-		phylink_set(support, 100000baseCR4_Full);
-		phylink_set(support, 25000baseCR_Full);
+		phylink_set(modes, 100000baseCR4_Full);
+		phylink_set(modes, 25000baseCR_Full);
 		break;
 	default:
 		dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	    id->base.fc_speed_200 ||
 	    id->base.fc_speed_400) {
 		if (id->base.br_nominal >= 31)
-			phylink_set(support, 2500baseX_Full);
+			phylink_set(modes, 2500baseX_Full);
 		if (id->base.br_nominal >= 12)
-			phylink_set(support, 1000baseX_Full);
+			phylink_set(modes, 1000baseX_Full);
 	}
+
+	/* If we haven't discovered any modes that this module supports, try
+	 * the encoding and bitrate to determine supported modes. Some BiDi
+	 * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+	 * the differing wavelengths, so do not set any transceiver bits.
+	 */
+	if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+		/* If the encoding and bit rate allows 1000baseX */
+		if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+		    br_min <= 1300 && br_max >= 1200)
+			phylink_set(modes, 1000baseX_Full);
+	}
+
+	bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	phylink_set(support, Autoneg);
+	phylink_set(support, Pause);
+	phylink_set(support, Asym_Pause);
 }
 EXPORT_SYMBOL_GPL(sfp_parse_support);
 
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *link_modes)
+{
+	if (phylink_test(link_modes, 10000baseCR_Full) ||
+	    phylink_test(link_modes, 10000baseSR_Full) ||
+	    phylink_test(link_modes, 10000baseLR_Full) ||
+	    phylink_test(link_modes, 10000baseLRM_Full) ||
+	    phylink_test(link_modes, 10000baseER_Full))
+		return PHY_INTERFACE_MODE_10GKR;
+
+	if (phylink_test(link_modes, 2500baseX_Full))
+		return PHY_INTERFACE_MODE_2500BASEX;
+
+	if (id->base.e1000_base_t ||
+	    id->base.e100_base_lx ||
+	    id->base.e100_base_fx)
+		return PHY_INTERFACE_MODE_SGMII;
+
+	if (phylink_test(link_modes, 1000baseX_Full))
+		return PHY_INTERFACE_MODE_1000BASEX;
+
+	dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+	return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
 static LIST_HEAD(sfp_buses);
 static DEFINE_MUTEX(sfp_mutex);
 
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 6c7d9289078d..83bf4959b043 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -42,6 +42,7 @@ enum {
 
 	SFP_MOD_EMPTY = 0,
 	SFP_MOD_PROBE,
+	SFP_MOD_HPOWER,
 	SFP_MOD_PRESENT,
 	SFP_MOD_ERROR,
 
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
  * access the I2C EEPROM.  However, Avago modules require 300ms.
  */
 #define T_PROBE_INIT	msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL	msecs_to_jiffies(300)
 #define T_PROBE_RETRY	msecs_to_jiffies(100)
 
 /* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
 	struct sfp_bus *sfp_bus;
 	struct phy_device *mod_phy;
 	const struct sff_data *type;
+	u32 max_power_mW;
 
 	unsigned int (*get_state)(struct sfp *);
 	void (*set_state)(struct sfp *, unsigned int);
 	int (*read)(struct sfp *, bool, u8, void *, size_t);
+	int (*write)(struct sfp *, bool, u8, void *, size_t);
 
 	struct gpio_desc *gpio[GPIO_MAX];
 
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
 	}
 }
 
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
-			 void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+			size_t len)
 {
 	struct i2c_msg msgs[2];
+	u8 bus_addr = a2 ? 0x51 : 0x50;
 	int ret;
 
 	msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
 	msgs[1].len = len;
 	msgs[1].buf = buf;
 
-	ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+	ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
 	if (ret < 0)
 		return ret;
 
 	return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
-			size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+	size_t len)
 {
-	return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+	struct i2c_msg msgs[1];
+	u8 bus_addr = a2 ? 0x51 : 0x50;
+	int ret;
+
+	msgs[0].addr = bus_addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1 + len;
+	msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+	if (!msgs[0].buf)
+		return -ENOMEM;
+
+	msgs[0].buf[0] = dev_addr;
+	memcpy(&msgs[0].buf[1], buf, len);
+
+	ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+	kfree(msgs[0].buf);
+
+	if (ret < 0)
+		return ret;
+
+	return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
 static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
 
 	sfp->i2c = i2c;
 	sfp->read = sfp_i2c_read;
+	sfp->write = sfp_i2c_write;
 
 	i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
 	if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
 	return sfp->read(sfp, a2, addr, buf, len);
 }
 
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+	return sfp->write(sfp, a2, addr, buf, len);
+}
+
 static unsigned int sfp_check(void *buf, size_t len)
 {
 	u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
 		sfp_sm_probe_phy(sfp);
 }
 
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+	u32 power;
+	u8 val;
+	int err;
+
+	power = 1000;
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+		power = 1500;
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+		power = 2000;
+
+	if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+	    (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+	    SFP_DIAGMON_DDM) {
+		/* The module appears not to implement bus address 0xa2,
+		 * or requires an address change sequence, so assume that
+		 * the module powers up in the indicated power mode.
+		 */
+		if (power > sfp->max_power_mW) {
+			dev_err(sfp->dev,
+				"Host does not support %u.%uW modules\n",
+				power / 1000, (power / 100) % 10);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
+	if (power > sfp->max_power_mW) {
+		dev_warn(sfp->dev,
+			 "Host does not support %u.%uW modules, module left in power mode 1\n",
+			 power / 1000, (power / 100) % 10);
+		return 0;
+	}
+
+	if (power <= 1000)
+		return 0;
+
+	err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err != sizeof(val)) {
+		dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+		err = -EAGAIN;
+		goto err;
+	}
+
+	val |= BIT(0);
+
+	err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err != sizeof(val)) {
+		dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+		err = -EAGAIN;
+		goto err;
+	}
+
+	dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+		 power / 1000, (power / 100) % 10);
+	return T_HPOWER_LEVEL;
+
+err:
+	return err;
+}
+
 static int sfp_sm_mod_probe(struct sfp *sfp)
 {
 	/* SFP module inserted - read I2C data */
 	struct sfp_eeprom_id id;
 	u8 check;
-	int err;
+	int ret;
 
-	err = sfp_read(sfp, false, 0, &id, sizeof(id));
-	if (err < 0) {
-		dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+	ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+	if (ret < 0) {
+		dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
 		return -EAGAIN;
 	}
 
-	if (err != sizeof(id)) {
-		dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+	if (ret != sizeof(id)) {
+		dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
 		return -EAGAIN;
 	}
 
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		dev_warn(sfp->dev,
 			 "module address swap to access page 0xA2 is not supported.\n");
 
-	return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+	ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+	if (ret < 0)
+		return ret;
+
+	return sfp_sm_mod_hpower(sfp);
 }
 
 static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 		if (event == SFP_E_REMOVE) {
 			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
 		} else if (event == SFP_E_TIMEOUT) {
-			int err = sfp_sm_mod_probe(sfp);
+			int val = sfp_sm_mod_probe(sfp);
 
-			if (err == 0)
+			if (val == 0)
 				sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-			else if (err == -EAGAIN)
-				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
-			else
+			else if (val > 0)
+				sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+			else if (val != -EAGAIN)
 				sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+			else
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
 		}
 		break;
 
+	case SFP_MOD_HPOWER:
+		if (event == SFP_E_TIMEOUT) {
+			sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+			break;
+		}
+		/* fallthrough */
 	case SFP_MOD_PRESENT:
 	case SFP_MOD_ERROR:
 		if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
 	if (!(sfp->gpio[GPIO_MODDEF0]))
 		sfp->get_state = sff_gpio_get_state;
 
+	device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+				 &sfp->max_power_mW);
+	if (!sfp->max_power_mW)
+		sfp->max_power_mW = 1000;
+
+	dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+		 sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
 	sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
 	if (!sfp->sfp_bus)
 		return -ENOMEM;
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 1e2d4f1179da..f17b3441779b 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -417,7 +417,7 @@ static void ks8995_parse_dt(struct ks8995_switch *ks)
 static const struct bin_attribute ks8995_registers_attr = {
 	.attr = {
 		.name   = "registers",
-		.mode   = S_IRUSR | S_IWUSR,
+		.mode   = 0600,
 	},
 	.size   = KS8995_REGS_SIZE,
 	.read   = ks8995_registers_read,
diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index fb2cef764e9a..22f3bdd8206c 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
 				MDIO_PHYXS_LNSTAT_SYNC3 | \
 				MDIO_PHYXS_LNSTAT_ALIGN)
 
-static int teranetics_config_init(struct phy_device *phydev)
-{
-	phydev->supported = SUPPORTED_10000baseT_Full;
-	phydev->advertising = SUPPORTED_10000baseT_Full;
-
-	return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int teranetics_aneg_done(struct phy_device *phydev)
 {
-	int reg;
-
 	/* auto negotiation state can only be checked when using copper
 	 * port, if using fiber port, just lie it's done.
 	 */
-	if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
-		reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-		return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-	}
+	if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+		return genphy_c45_aneg_done(phydev);
 
 	return 1;
 }
 
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int teranetics_read_status(struct phy_device *phydev)
 {
 	int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
 	.phy_id		= PHY_ID_TN2020,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Teranetics TN2020",
-	.soft_reset	= teranetics_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.aneg_done	= teranetics_aneg_done,
-	.config_init    = teranetics_config_init,
-	.config_aneg    = teranetics_config_aneg,
+	.config_init    = gen10g_config_init,
+	.config_aneg    = gen10g_config_aneg,
 	.read_status	= teranetics_read_status,
 	.match_phy_device = teranetics_match_phy_device,
 },
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 255a5def56e9..dc7c7ec43202 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -257,7 +257,7 @@ struct ppp_net {
 /* Prototypes. */
 static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 			struct file *file, unsigned int cmd, unsigned long arg);
-static void ppp_xmit_process(struct ppp *ppp);
+static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb);
 static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
 static void ppp_push(struct ppp *ppp);
 static void ppp_channel_push(struct channel *pch);
@@ -513,13 +513,12 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
 		goto out;
 	}
 
-	skb_queue_tail(&pf->xq, skb);
-
 	switch (pf->kind) {
 	case INTERFACE:
-		ppp_xmit_process(PF_TO_PPP(pf));
+		ppp_xmit_process(PF_TO_PPP(pf), skb);
 		break;
 	case CHANNEL:
+		skb_queue_tail(&pf->xq, skb);
 		ppp_channel_push(PF_TO_CHANNEL(pf));
 		break;
 	}
@@ -1267,8 +1266,8 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	put_unaligned_be16(proto, pp);
 
 	skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev)));
-	skb_queue_tail(&ppp->file.xq, skb);
-	ppp_xmit_process(ppp);
+	ppp_xmit_process(ppp, skb);
+
 	return NETDEV_TX_OK;
 
  outf:
@@ -1420,13 +1419,14 @@ static void ppp_setup(struct net_device *dev)
  */
 
 /* Called to do any work queued up on the transmit side that can now be done */
-static void __ppp_xmit_process(struct ppp *ppp)
+static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
 {
-	struct sk_buff *skb;
-
 	ppp_xmit_lock(ppp);
 	if (!ppp->closing) {
 		ppp_push(ppp);
+
+		if (skb)
+			skb_queue_tail(&ppp->file.xq, skb);
 		while (!ppp->xmit_pending &&
 		       (skb = skb_dequeue(&ppp->file.xq)))
 			ppp_send_frame(ppp, skb);
@@ -1440,7 +1440,7 @@ static void __ppp_xmit_process(struct ppp *ppp)
 	ppp_xmit_unlock(ppp);
 }
 
-static void ppp_xmit_process(struct ppp *ppp)
+static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
 {
 	local_bh_disable();
 
@@ -1448,7 +1448,7 @@ static void ppp_xmit_process(struct ppp *ppp)
 		goto err;
 
 	(*this_cpu_ptr(ppp->xmit_recursion))++;
-	__ppp_xmit_process(ppp);
+	__ppp_xmit_process(ppp, skb);
 	(*this_cpu_ptr(ppp->xmit_recursion))--;
 
 	local_bh_enable();
@@ -1458,6 +1458,8 @@ static void ppp_xmit_process(struct ppp *ppp)
 err:
 	local_bh_enable();
 
+	kfree_skb(skb);
+
 	if (net_ratelimit())
 		netdev_err(ppp->dev, "recursion detected\n");
 }
@@ -1684,7 +1686,7 @@ ppp_push(struct ppp *ppp)
 
 #ifdef CONFIG_PPP_MULTILINK
 static bool mp_protocol_compress __read_mostly = true;
-module_param(mp_protocol_compress, bool, S_IRUGO | S_IWUSR);
+module_param(mp_protocol_compress, bool, 0644);
 MODULE_PARM_DESC(mp_protocol_compress,
 		 "compress protocol id in multilink fragments");
 
@@ -1942,7 +1944,7 @@ static void __ppp_channel_push(struct channel *pch)
 	if (skb_queue_empty(&pch->file.xq)) {
 		ppp = pch->ppp;
 		if (ppp)
-			__ppp_xmit_process(ppp);
+			__ppp_xmit_process(ppp, NULL);
 	}
 }
 
@@ -3161,6 +3163,15 @@ ppp_connect_channel(struct channel *pch, int unit)
 		goto outl;
 
 	ppp_lock(ppp);
+	spin_lock_bh(&pch->downl);
+	if (!pch->chan) {
+		/* Don't connect unregistered channels */
+		spin_unlock_bh(&pch->downl);
+		ppp_unlock(ppp);
+		ret = -ENOTCONN;
+		goto outl;
+	}
+	spin_unlock_bh(&pch->downl);
 	if (pch->file.hdrlen > ppp->file.hdrlen)
 		ppp->file.hdrlen = pch->file.hdrlen;
 	hdrlen = pch->file.hdrlen + 2;	/* for protocol bytes */
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index bd89d1c559ce..1483bc7b01e1 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1142,7 +1142,7 @@ static __net_init int pppoe_init_net(struct net *net)
 
 	rwlock_init(&pn->hash_lock);
 
-	pde = proc_create("pppoe", S_IRUGO, net->proc_net, &pppoe_seq_fops);
+	pde = proc_create("pppoe", 0444, net->proc_net, &pppoe_seq_fops);
 #ifdef CONFIG_PROC_FS
 	if (!pde)
 		return -ENOMEM;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a468439969df..222093e878a8 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
 }
 #endif
 
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+			       struct netlink_ext_ack *extack)
 {
 	struct netdev_lag_upper_info lag_upper_info;
 	int err;
 
 	lag_upper_info.tx_type = team->mode->lag_tx_type;
 	err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
-					   &lag_upper_info, NULL);
+					   &lag_upper_info, extack);
 	if (err)
 		return err;
 	port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
 static int team_dev_type_check_change(struct net_device *dev,
 				      struct net_device *port_dev);
 
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+			 struct netlink_ext_ack *extack)
 {
 	struct net_device *dev = team->dev;
 	struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 	int err;
 
 	if (port_dev->flags & IFF_LOOPBACK) {
+		NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
 		netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
 			   portname);
 		return -EINVAL;
 	}
 
 	if (team_port_exists(port_dev)) {
+		NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
 		netdev_err(dev, "Device %s is already a port "
 				"of a team device\n", portname);
 		return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 
 	if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
 	    vlan_uses_dev(dev)) {
+		NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
 		netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
 			   portname);
 		return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		return err;
 
 	if (port_dev->flags & IFF_UP) {
+		NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
 		netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
 			   portname);
 		return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_handler_register;
 	}
 
-	err = team_upper_dev_link(team, port);
+	err = team_upper_dev_link(team, port, extack);
 	if (err) {
 		netdev_err(dev, "Device %s failed to set upper link\n",
 			   portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
 	int err;
 
 	mutex_lock(&team->lock);
-	err = team_port_add(team, port_dev);
+	err = team_port_add(team, port_dev, extack);
 	mutex_unlock(&team->lock);
 
 	if (!err)
@@ -2395,7 +2401,7 @@ send_done:
 	if (!nlh) {
 		err = __send_and_alloc_skb(&skb, team, portid, send_func);
 		if (err)
-			goto errout;
+			return err;
 		goto send_done;
 	}
 
@@ -2681,7 +2687,7 @@ send_done:
 	if (!nlh) {
 		err = __send_and_alloc_skb(&skb, team, portid, send_func);
 		if (err)
-			goto errout;
+			return err;
 		goto send_done;
 	}
 
diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
index ca5e375de27c..e0d6760f3219 100644
--- a/drivers/net/thunderbolt.c
+++ b/drivers/net/thunderbolt.c
@@ -166,6 +166,8 @@ struct tbnet_ring {
  * @connected_work: Worker that finalizes the ThunderboltIP connection
  *		    setup and enables DMA paths for high speed data
  *		    transfers
+ * @disconnect_work: Worker that handles tearing down the ThunderboltIP
+ *		     connection
  * @rx_hdr: Copy of the currently processed Rx frame. Used when a
  *	    network packet consists of multiple Thunderbolt frames.
  *	    In host byte order.
@@ -190,6 +192,7 @@ struct tbnet {
 	int login_retries;
 	struct delayed_work login_work;
 	struct work_struct connected_work;
+	struct work_struct disconnect_work;
 	struct thunderbolt_ip_frame_header rx_hdr;
 	struct tbnet_ring rx_ring;
 	atomic_t frame_id;
@@ -445,7 +448,7 @@ static int tbnet_handle_packet(const void *buf, size_t size, void *data)
 	case TBIP_LOGOUT:
 		ret = tbnet_logout_response(net, route, sequence, command_id);
 		if (!ret)
-			tbnet_tear_down(net, false);
+			queue_work(system_long_wq, &net->disconnect_work);
 		break;
 
 	default:
@@ -659,6 +662,13 @@ static void tbnet_login_work(struct work_struct *work)
 	}
 }
 
+static void tbnet_disconnect_work(struct work_struct *work)
+{
+	struct tbnet *net = container_of(work, typeof(*net), disconnect_work);
+
+	tbnet_tear_down(net, false);
+}
+
 static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
 			      const struct thunderbolt_ip_frame_header *hdr)
 {
@@ -881,6 +891,7 @@ static int tbnet_stop(struct net_device *dev)
 
 	napi_disable(&net->napi);
 
+	cancel_work_sync(&net->disconnect_work);
 	tbnet_tear_down(net, true);
 
 	tb_ring_free(net->rx_ring.ring);
@@ -1195,6 +1206,7 @@ static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id)
 	net = netdev_priv(dev);
 	INIT_DELAYED_WORK(&net->login_work, tbnet_login_work);
 	INIT_WORK(&net->connected_work, tbnet_connected_work);
+	INIT_WORK(&net->disconnect_work, tbnet_disconnect_work);
 	mutex_init(&net->connection_lock);
 	atomic_set(&net->command_id, 0);
 	atomic_set(&net->frame_id, 0);
@@ -1270,10 +1282,7 @@ static int __maybe_unused tbnet_suspend(struct device *dev)
 	stop_login(net);
 	if (netif_running(net->dev)) {
 		netif_device_detach(net->dev);
-		tb_ring_stop(net->rx_ring.ring);
-		tb_ring_stop(net->tx_ring.ring);
-		tbnet_free_buffers(&net->rx_ring);
-		tbnet_free_buffers(&net->tx_ring);
+		tbnet_tear_down(net, true);
 	}
 
 	return 0;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 81e6cc951e7f..a1ba262f40ad 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -78,6 +78,7 @@
 #include <linux/mutex.h>
 
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
@@ -181,7 +182,6 @@ struct tun_file {
 	struct tun_struct *detached;
 	struct ptr_ring tx_ring;
 	struct xdp_rxq_info xdp_rxq;
-	int xdp_pending_pkts;
 };
 
 struct tun_flow_entry {
@@ -656,7 +656,7 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
 	return tun;
 }
 
-static void tun_ptr_free(void *ptr)
+void tun_ptr_free(void *ptr)
 {
 	if (!ptr)
 		return;
@@ -668,6 +668,7 @@ static void tun_ptr_free(void *ptr)
 		__skb_array_destroy_skb(ptr);
 	}
 }
+EXPORT_SYMBOL_GPL(tun_ptr_free);
 
 static void tun_queue_purge(struct tun_file *tfile)
 {
@@ -1489,27 +1490,23 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
 	skb->truesize += skb->data_len;
 
 	for (i = 1; i < it->nr_segs; i++) {
+		struct page_frag *pfrag = &current->task_frag;
 		size_t fragsz = it->iov[i].iov_len;
-		unsigned long offset;
-		struct page *page;
-		void *data;
 
 		if (fragsz == 0 || fragsz > PAGE_SIZE) {
 			err = -EINVAL;
 			goto free;
 		}
 
-		local_bh_disable();
-		data = napi_alloc_frag(fragsz);
-		local_bh_enable();
-		if (!data) {
+		if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
 			err = -ENOMEM;
 			goto free;
 		}
 
-		page = virt_to_head_page(data);
-		offset = data - page_address(page);
-		skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+		skb_fill_page_desc(skb, i - 1, pfrag->page,
+				   pfrag->offset, fragsz);
+		page_ref_inc(pfrag->page);
+		pfrag->offset += fragsz;
 	}
 
 	return skb;
@@ -1617,7 +1614,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	unsigned int delta = 0;
 	char *buf;
 	size_t copied;
-	bool xdp_xmit = false;
 	int err, pad = TUN_RX_PAD;
 
 	rcu_read_lock();
@@ -1647,6 +1643,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	else
 		*skb_xdp = 0;
 
+	preempt_disable();
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(tun->xdp_prog);
 	if (xdp_prog && !*skb_xdp) {
@@ -1666,15 +1663,22 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 		case XDP_REDIRECT:
 			get_page(alloc_frag->page);
 			alloc_frag->offset += buflen;
-			++tfile->xdp_pending_pkts;
 			err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
+			xdp_do_flush_map();
 			if (err)
 				goto err_redirect;
 			rcu_read_unlock();
+			preempt_enable();
 			return NULL;
 		case XDP_TX:
-			xdp_xmit = true;
-			/* fall through */
+			get_page(alloc_frag->page);
+			alloc_frag->offset += buflen;
+			if (tun_xdp_xmit(tun->dev, &xdp))
+				goto err_redirect;
+			tun_xdp_flush(tun->dev);
+			rcu_read_unlock();
+			preempt_enable();
+			return NULL;
 		case XDP_PASS:
 			delta = orig_data - xdp.data;
 			break;
@@ -1692,6 +1696,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	skb = build_skb(buf, buflen);
 	if (!skb) {
 		rcu_read_unlock();
+		preempt_enable();
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1700,14 +1705,8 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	get_page(alloc_frag->page);
 	alloc_frag->offset += buflen;
 
-	if (xdp_xmit) {
-		skb->dev = tun->dev;
-		generic_xdp_tx(skb, xdp_prog);
-		rcu_read_unlock();
-		return NULL;
-	}
-
 	rcu_read_unlock();
+	preempt_enable();
 
 	return skb;
 
@@ -1715,6 +1714,7 @@ err_redirect:
 	put_page(alloc_frag->page);
 err_xdp:
 	rcu_read_unlock();
+	preempt_enable();
 	this_cpu_inc(tun->pcpu_stats->rx_dropped);
 	return NULL;
 }
@@ -1988,11 +1988,6 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	result = tun_get_user(tun, tfile, NULL, from,
 			      file->f_flags & O_NONBLOCK, false);
 
-	if (tfile->xdp_pending_pkts) {
-		tfile->xdp_pending_pkts = 0;
-		xdp_do_flush_map();
-	}
-
 	tun_put(tun);
 	return result;
 }
@@ -2290,11 +2285,67 @@ static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
 	return -EINVAL;
 }
 
+static size_t tun_get_size(const struct net_device *dev)
+{
+	BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
+	BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
+
+	return nla_total_size(sizeof(uid_t)) + /* OWNER */
+	       nla_total_size(sizeof(gid_t)) + /* GROUP */
+	       nla_total_size(sizeof(u8)) + /* TYPE */
+	       nla_total_size(sizeof(u8)) + /* PI */
+	       nla_total_size(sizeof(u8)) + /* VNET_HDR */
+	       nla_total_size(sizeof(u8)) + /* PERSIST */
+	       nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
+	       nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
+	       nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
+	       0;
+}
+
+static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+
+	if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
+		goto nla_put_failure;
+	if (uid_valid(tun->owner) &&
+	    nla_put_u32(skb, IFLA_TUN_OWNER,
+			from_kuid_munged(current_user_ns(), tun->owner)))
+		goto nla_put_failure;
+	if (gid_valid(tun->group) &&
+	    nla_put_u32(skb, IFLA_TUN_GROUP,
+			from_kgid_munged(current_user_ns(), tun->group)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
+		       !!(tun->flags & IFF_MULTI_QUEUE)))
+		goto nla_put_failure;
+	if (tun->flags & IFF_MULTI_QUEUE) {
+		if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
+				tun->numdisabled))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static struct rtnl_link_ops tun_link_ops __read_mostly = {
 	.kind		= DRV_NAME,
 	.priv_size	= sizeof(struct tun_struct),
 	.setup		= tun_setup,
 	.validate	= tun_validate,
+	.get_size       = tun_get_size,
+	.fill_info      = tun_fill_info,
 };
 
 static void tun_sock_write_space(struct sock *sk)
@@ -2329,13 +2380,6 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 	ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
 			   m->msg_flags & MSG_DONTWAIT,
 			   m->msg_flags & MSG_MORE);
-
-	if (tfile->xdp_pending_pkts >= NAPI_POLL_WEIGHT ||
-	    !(m->msg_flags & MSG_MORE)) {
-		tfile->xdp_pending_pkts = 0;
-		xdp_do_flush_map();
-	}
-
 	tun_put(tun);
 	return ret;
 }
@@ -2793,6 +2837,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
+	struct net *net;
 	kuid_t owner;
 	kgid_t group;
 	int sndbuf;
@@ -2801,7 +2846,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	int le;
 	int ret;
 
-	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
+	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
+	    (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
 		if (copy_from_user(&ifr, argp, ifreq_len))
 			return -EFAULT;
 	} else {
@@ -2821,6 +2867,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	rtnl_lock();
 
 	tun = tun_get(tfile);
+	net = sock_net(&tfile->sk);
 	if (cmd == TUNSETIFF) {
 		ret = -EEXIST;
 		if (tun)
@@ -2828,7 +2875,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 
 		ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
-		ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
+		ret = tun_set_iff(net, file, &ifr);
 
 		if (ret)
 			goto unlock;
@@ -2850,6 +2897,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		tfile->ifindex = ifindex;
 		goto unlock;
 	}
+	if (cmd == SIOCGSKNS) {
+		ret = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto unlock;
+
+		ret = open_related_ns(&net->ns, get_net_ns);
+		goto unlock;
+	}
 
 	ret = -EBADFD;
 	if (!tun)
@@ -3167,7 +3222,6 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
 
 	memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
-	tfile->xdp_pending_pkts = 0;
 
 	return 0;
 }
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index f32261ecd215..fb1b78d4b9ef 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1223,7 +1223,7 @@ static int ax88179_led_setting(struct usbnet *dev)
 	return 0;
 }
 
-static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 {
 	u8 buf[5];
 	u16 *tmp16;
@@ -1231,12 +1231,11 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
 	struct ethtool_eee eee_data;
 
-	usbnet_get_endpoints(dev, intf);
-
 	tmp16 = (u16 *)buf;
 	tmp = (u8 *)buf;
 
-	memset(ax179_data, 0, sizeof(*ax179_data));
+	if (!do_reset)
+		memset(ax179_data, 0, sizeof(*ax179_data));
 
 	/* Power up ethernet PHY */
 	*tmp16 = 0;
@@ -1249,9 +1248,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
 	msleep(100);
 
+	if (do_reset)
+		ax88179_auto_detach(dev, 0);
+
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
 			 ETH_ALEN, dev->net->dev_addr);
-	memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
+	if (!do_reset)
+		memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
 	/* RX bulk configuration */
 	memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
@@ -1266,19 +1269,21 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH,
 			  1, 1, tmp);
 
-	dev->net->netdev_ops = &ax88179_netdev_ops;
-	dev->net->ethtool_ops = &ax88179_ethtool_ops;
-	dev->net->needed_headroom = 8;
-	dev->net->max_mtu = 4088;
-
-	/* Initialize MII structure */
-	dev->mii.dev = dev->net;
-	dev->mii.mdio_read = ax88179_mdio_read;
-	dev->mii.mdio_write = ax88179_mdio_write;
-	dev->mii.phy_id_mask = 0xff;
-	dev->mii.reg_num_mask = 0xff;
-	dev->mii.phy_id = 0x03;
-	dev->mii.supports_gmii = 1;
+	if (!do_reset) {
+		dev->net->netdev_ops = &ax88179_netdev_ops;
+		dev->net->ethtool_ops = &ax88179_ethtool_ops;
+		dev->net->needed_headroom = 8;
+		dev->net->max_mtu = 4088;
+
+		/* Initialize MII structure */
+		dev->mii.dev = dev->net;
+		dev->mii.mdio_read = ax88179_mdio_read;
+		dev->mii.mdio_write = ax88179_mdio_write;
+		dev->mii.phy_id_mask = 0xff;
+		dev->mii.reg_num_mask = 0xff;
+		dev->mii.phy_id = 0x03;
+		dev->mii.supports_gmii = 1;
+	}
 
 	dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			      NETIF_F_RXCSUM;
@@ -1330,6 +1335,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	return 0;
 }
 
+static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	usbnet_get_endpoints(dev, intf);
+
+	return ax88179_link_bind_or_reset(dev, false);
+}
+
 static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	u16 tmp16;
@@ -1458,74 +1470,7 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 
 static int ax88179_link_reset(struct usbnet *dev)
 {
-	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
-	u8 tmp[5], link_sts;
-	u16 mode, tmp16, delay = HZ / 10;
-	u32 tmp32 = 0x40000000;
-	unsigned long jtimeout;
-
-	jtimeout = jiffies + delay;
-	while (tmp32 & 0x40000000) {
-		mode = 0;
-		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &mode);
-		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2,
-				  &ax179_data->rxctl);
-
-		/*link up, check the usb device control TX FIFO full or empty*/
-		ax88179_read_cmd(dev, 0x81, 0x8c, 0, 4, &tmp32);
-
-		if (time_after(jiffies, jtimeout))
-			return 0;
-	}
-
-	mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
-	       AX_MEDIUM_RXFLOW_CTRLEN;
-
-	ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS,
-			 1, 1, &link_sts);
-
-	ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID,
-			 GMII_PHY_PHYSR, 2, &tmp16);
-
-	if (!(tmp16 & GMII_PHY_PHYSR_LINK)) {
-		return 0;
-	} else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-		mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ;
-		if (dev->net->mtu > 1500)
-			mode |= AX_MEDIUM_JUMBO_EN;
-
-		if (link_sts & AX_USB_SS)
-			memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
-		else if (link_sts & AX_USB_HS)
-			memcpy(tmp, &AX88179_BULKIN_SIZE[1], 5);
-		else
-			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	} else if (GMII_PHY_PHYSR_100 == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-		mode |= AX_MEDIUM_PS;
-
-		if (link_sts & (AX_USB_SS | AX_USB_HS))
-			memcpy(tmp, &AX88179_BULKIN_SIZE[2], 5);
-		else
-			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	} else {
-		memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	}
-
-	/* RX bulk configuration */
-	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp);
-
-	dev->rx_urb_size = (1024 * (tmp[3] + 2));
-
-	if (tmp16 & GMII_PHY_PHYSR_FULL)
-		mode |= AX_MEDIUM_FULL_DUPLEX;
-	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
-			  2, 2, &mode);
-
-	ax179_data->eee_enabled = ax88179_chk_eee(dev);
-
-	netif_carrier_on(dev->net);
-
-	return 0;
+	return ax88179_link_bind_or_reset(dev, true);
 }
 
 static int ax88179_reset(struct usbnet *dev)
@@ -1556,7 +1501,6 @@ static int ax88179_reset(struct usbnet *dev)
 
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN,
 			 dev->net->dev_addr);
-	memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
 	/* RX bulk configuration */
 	memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index f7180f8db39e..61ea4eaace5d 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -83,11 +83,8 @@ static int eem_bind(struct usbnet *dev, struct usb_interface *intf)
 	int status = 0;
 
 	status = usbnet_get_endpoints(dev, intf);
-	if (status < 0) {
-		usb_set_intfdata(intf, NULL);
-		usb_driver_release_interface(driver_of(intf), intf);
+	if (status < 0)
 		return status;
-	}
 
 	/* no jumbogram (16K) support for now */
 
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 05dca3e5c93d..fff4b13eece2 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -896,6 +896,12 @@ static const struct usb_device_id	products[] = {
 				      USB_CDC_PROTO_NONE),
 	.driver_info = (unsigned long)&wwan_info,
 }, {
+	/* Cinterion PLS8 modem by GEMALTO */
+	USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0061, USB_CLASS_COMM,
+				      USB_CDC_SUBCLASS_ETHERNET,
+				      USB_CDC_PROTO_NONE),
+	.driver_info = (unsigned long)&wwan_info,
+}, {
 	USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
 			USB_CDC_PROTO_NONE),
 	.driver_info = (unsigned long) &cdc_info,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9e1b74590682..90d07ed224d5 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -58,7 +58,7 @@ static bool prefer_mbim = true;
 #else
 static bool prefer_mbim;
 #endif
-module_param(prefer_mbim, bool, S_IRUGO | S_IWUSR);
+module_param(prefer_mbim, bool, 0644);
 MODULE_PARM_DESC(prefer_mbim, "Prefer MBIM setting on dual NCM/MBIM functions");
 
 static void cdc_ncm_txpath_bh(unsigned long param);
@@ -281,10 +281,10 @@ static ssize_t cdc_ncm_store_tx_timer_usecs(struct device *d,  struct device_att
 	return len;
 }
 
-static DEVICE_ATTR(min_tx_pkt, S_IRUGO | S_IWUSR, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt);
-static DEVICE_ATTR(rx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max);
-static DEVICE_ATTR(tx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max);
-static DEVICE_ATTR(tx_timer_usecs, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs);
+static DEVICE_ATTR(min_tx_pkt, 0644, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt);
+static DEVICE_ATTR(rx_max, 0644, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max);
+static DEVICE_ATTR(tx_max, 0644, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max);
+static DEVICE_ATTR(tx_timer_usecs, 0644, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs);
 
 static ssize_t ndp_to_end_show(struct device *d, struct device_attribute *attr, char *buf)
 {
@@ -335,7 +335,7 @@ static ssize_t cdc_ncm_show_##name(struct device *d, struct device_attribute *at
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; \
 	return sprintf(buf, format "\n", tocpu(ctx->ncm_parm.name));	\
 } \
-static DEVICE_ATTR(name, S_IRUGO, cdc_ncm_show_##name, NULL)
+static DEVICE_ATTR(name, 0444, cdc_ncm_show_##name, NULL)
 
 NCM_PARM_ATTR(bmNtbFormatsSupported, "0x%04x", le16_to_cpu);
 NCM_PARM_ATTR(dwNtbInMaxSize, "%u", le32_to_cpu);
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 981c931a7a1f..e53883ad6107 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -519,7 +519,7 @@ static ssize_t hso_sysfs_show_porttype(struct device *dev,
 
 	return sprintf(buf, "%s\n", port_name);
 }
-static DEVICE_ATTR(hsotype, S_IRUGO, hso_sysfs_show_porttype, NULL);
+static DEVICE_ATTR(hsotype, 0444, hso_sysfs_show_porttype, NULL);
 
 static struct attribute *hso_serial_dev_attrs[] = {
 	&dev_attr_hsotype.attr,
@@ -3289,12 +3289,12 @@ MODULE_LICENSE("GPL");
 
 /* change the debug level (eg: insmod hso.ko debug=0x04) */
 MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 
 /* set the major tty number (eg: insmod hso.ko tty_major=245) */
 MODULE_PARM_DESC(tty_major, "Set the major tty number");
-module_param(tty_major, int, S_IRUGO | S_IWUSR);
+module_param(tty_major, int, 0644);
 
 /* disable network interface (eg: insmod hso.ko disable_net=1) */
 MODULE_PARM_DESC(disable_net, "Disable the network interface");
-module_param(disable_net, int, S_IRUGO | S_IWUSR);
+module_param(disable_net, int, 0644);
diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
index ce0b0b4e3a57..bd2ba3659028 100644
--- a/drivers/net/usb/kalmia.c
+++ b/drivers/net/usb/kalmia.c
@@ -114,14 +114,14 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr)
 		return -ENOMEM;
 
 	memcpy(usb_buf, init_msg_1, 12);
-	status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_1)
-		/ sizeof(init_msg_1[0]), usb_buf, 24);
+	status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1),
+					 usb_buf, 24);
 	if (status != 0)
 		return status;
 
 	memcpy(usb_buf, init_msg_2, 12);
-	status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_2)
-		/ sizeof(init_msg_2[0]), usb_buf, 28);
+	status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2),
+					 usb_buf, 28);
 	if (status != 0)
 		return status;
 
@@ -150,12 +150,8 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->rx_urb_size = dev->hard_mtu * 10; // Found as optimal after testing
 
 	status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr);
-
-	if (status) {
-		usb_set_intfdata(intf, NULL);
-		usb_driver_release_interface(driver_of(intf), intf);
+	if (status)
 		return status;
-	}
 
 	memcpy(dev->net->dev_addr, ethernet_addr, ETH_ALEN);
 
diff --git a/drivers/net/usb/lg-vl600.c b/drivers/net/usb/lg-vl600.c
index dbabd7ca5268..257916f172cd 100644
--- a/drivers/net/usb/lg-vl600.c
+++ b/drivers/net/usb/lg-vl600.c
@@ -157,12 +157,8 @@ static int vl600_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 		s->current_rx_buf = skb_copy_expand(skb, 0,
 				le32_to_cpup(&frame->len), GFP_ATOMIC);
-		if (!s->current_rx_buf) {
-			netif_err(dev, ifup, dev->net, "Reserving %i bytes "
-					"for packet assembly failed.\n",
-					le32_to_cpup(&frame->len));
+		if (!s->current_rx_buf)
 			dev->net->stats.rx_errors++;
-		}
 
 		return 0;
 	}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 958b2e8b90f6..86f7196f9d91 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1794,7 +1794,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 
 		tx_data += len;
 		agg->skb_len += len;
-		agg->skb_num++;
+		agg->skb_num += skb_shinfo(skb)->gso_segs ?: 1;
 
 		dev_kfree_skb_any(skb);
 
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index d0a113743195..7a6a1fe79309 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -954,10 +954,11 @@ static int smsc75xx_set_features(struct net_device *netdev,
 	/* it's racing here! */
 
 	ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
-	if (ret < 0)
+	if (ret < 0) {
 		netdev_warn(dev->net, "Error writing RFE_CTL\n");
-
-	return ret;
+		return ret;
+	}
+	return 0;
 }
 
 static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 8a22ff67b026..d9eea8cfe6cb 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -315,6 +315,7 @@ static void __usbnet_status_stop_force(struct usbnet *dev)
 void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 {
 	struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64);
+	unsigned long flags;
 	int	status;
 
 	if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
@@ -326,10 +327,10 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 	if (skb->protocol == 0)
 		skb->protocol = eth_type_trans (skb, dev->net);
 
-	u64_stats_update_begin(&stats64->syncp);
+	flags = u64_stats_update_begin_irqsave(&stats64->syncp);
 	stats64->rx_packets++;
 	stats64->rx_bytes += skb->len;
-	u64_stats_update_end(&stats64->syncp);
+	u64_stats_update_end_irqrestore(&stats64->syncp, flags);
 
 	netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n",
 		  skb->len + sizeof (struct ethhdr), skb->protocol);
@@ -1248,11 +1249,12 @@ static void tx_complete (struct urb *urb)
 
 	if (urb->status == 0) {
 		struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64);
+		unsigned long flags;
 
-		u64_stats_update_begin(&stats64->syncp);
+		flags = u64_stats_update_begin_irqsave(&stats64->syncp);
 		stats64->tx_packets += entry->packets;
 		stats64->tx_bytes += entry->length;
-		u64_stats_update_end(&stats64->syncp);
+		u64_stats_update_end_irqrestore(&stats64->syncp, flags);
 	} else {
 		dev->net->stats.tx_errors++;
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 626c27352ae2..7b187ec7411e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -443,12 +443,8 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
 	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
 
 	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
-	if (unlikely(err)) {
-		struct page *page = virt_to_head_page(xdp->data);
-
-		put_page(page);
-		return false;
-	}
+	if (unlikely(err))
+		return false; /* Caller handle free/refcnt */
 
 	return true;
 }
@@ -456,8 +452,18 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
 static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	bool sent = __virtnet_xdp_xmit(vi, xdp);
+	struct receive_queue *rq = vi->rq;
+	struct bpf_prog *xdp_prog;
+	bool sent;
+
+	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+	 * indicate XDP resources have been successfully allocated.
+	 */
+	xdp_prog = rcu_dereference(rq->xdp_prog);
+	if (!xdp_prog)
+		return -ENXIO;
 
+	sent = __virtnet_xdp_xmit(vi, xdp);
 	if (!sent)
 		return -ENOSPC;
 	return 0;
@@ -498,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 	page_off += *len;
 
 	while (--*num_buf) {
+		int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 		unsigned int buflen;
 		void *buf;
 		int off;
@@ -512,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 		/* guard against a misconfigured or uncooperative backend that
 		 * is sending packet larger than the MTU.
 		 */
-		if ((page_off + buflen) > PAGE_SIZE) {
+		if ((page_off + buflen + tailroom) > PAGE_SIZE) {
 			put_page(p);
 			goto err_buf;
 		}
@@ -546,8 +553,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
 	unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
 			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	struct page *page = virt_to_head_page(buf);
-	unsigned int delta = 0, err;
+	unsigned int delta = 0;
 	struct page *xdp_page;
+	bool sent;
+	int err;
+
 	len -= vi->hdr_len;
 
 	rcu_read_lock();
@@ -558,7 +568,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		void *orig_data;
 		u32 act;
 
-		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
+		if (unlikely(hdr->hdr.gso_type))
 			goto err_xdp;
 
 		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
@@ -596,16 +606,19 @@ static struct sk_buff *receive_small(struct net_device *dev,
 			delta = orig_data - xdp.data;
 			break;
 		case XDP_TX:
-			if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
+			sent = __virtnet_xdp_xmit(vi, &xdp);
+			if (unlikely(!sent)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
-			else
-				*xdp_xmit = true;
+				goto err_xdp;
+			}
+			*xdp_xmit = true;
 			rcu_read_unlock();
 			goto xdp_xmit;
 		case XDP_REDIRECT:
 			err = xdp_do_redirect(dev, &xdp, xdp_prog);
-			if (!err)
-				*xdp_xmit = true;
+			if (err)
+				goto err_xdp;
+			*xdp_xmit = true;
 			rcu_read_unlock();
 			goto xdp_xmit;
 		default:
@@ -677,6 +690,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct bpf_prog *xdp_prog;
 	unsigned int truesize;
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+	bool sent;
 	int err;
 
 	head_skb = NULL;
@@ -689,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		void *data;
 		u32 act;
 
-		/* This happens when rx buffer size is underestimated */
+		/* This happens when rx buffer size is underestimated
+		 * or headroom is not enough because of the buffer
+		 * was refilled before XDP is set. This should only
+		 * happen for the first several packets, so we don't
+		 * care much about its performance.
+		 */
 		if (unlikely(num_buf > 1 ||
 			     headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
@@ -724,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-		if (act != XDP_PASS)
-			ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
 		switch (act) {
 		case XDP_PASS:
 			/* recalculate offset to account for any header
@@ -746,18 +762,28 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			}
 			break;
 		case XDP_TX:
-			if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
+			sent = __virtnet_xdp_xmit(vi, &xdp);
+			if (unlikely(!sent)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
-			else
-				*xdp_xmit = true;
+				if (unlikely(xdp_page != page))
+					put_page(xdp_page);
+				goto err_xdp;
+			}
+			*xdp_xmit = true;
 			if (unlikely(xdp_page != page))
 				goto err_xdp;
 			rcu_read_unlock();
 			goto xdp_xmit;
 		case XDP_REDIRECT:
 			err = xdp_do_redirect(dev, &xdp, xdp_prog);
-			if (!err)
-				*xdp_xmit = true;
+			if (err) {
+				if (unlikely(xdp_page != page))
+					put_page(xdp_page);
+				goto err_xdp;
+			}
+			*xdp_xmit = true;
+			if (unlikely(xdp_page != page))
+				goto err_xdp;
 			rcu_read_unlock();
 			goto xdp_xmit;
 		default:
@@ -1003,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 }
 
 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
-					  struct ewma_pkt_len *avg_pkt_len)
+					  struct ewma_pkt_len *avg_pkt_len,
+					  unsigned int room)
 {
 	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	unsigned int len;
 
-	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+	if (room)
+		return PAGE_SIZE - room;
+
+	len = hdr_len +	clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
 				rq->min_buf_len, PAGE_SIZE - hdr_len);
+
 	return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -1018,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 {
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+	unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
 	char *buf;
 	void *ctx;
 	int err;
 	unsigned int len, hole;
 
-	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
-	if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+	/* Extra tailroom is needed to satisfy XDP's assumption. This
+	 * means rx frags coalescing won't work, but consider we've
+	 * disabled GSO for XDP, it won't be a big issue.
+	 */
+	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+	if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
 		return -ENOMEM;
 
 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 	buf += headroom; /* advance address leaving hole at front of pkt */
 	get_page(alloc_frag->page);
-	alloc_frag->offset += len + headroom;
+	alloc_frag->offset += len + room;
 	hole = alloc_frag->size - alloc_frag->offset;
-	if (hole < len + headroom) {
+	if (hole < len + room) {
 		/* To avoid internal fragmentation, if there is very likely not
 		 * enough space for another buffer, add the remaining space to
 		 * the current buffer.
@@ -2175,8 +2212,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 	}
 
 	/* Make sure NAPI is not using any XDP TX queues for RX. */
-	for (i = 0; i < vi->max_queue_pairs; i++)
-		napi_disable(&vi->rq[i].napi);
+	if (netif_running(dev))
+		for (i = 0; i < vi->max_queue_pairs; i++)
+			napi_disable(&vi->rq[i].napi);
 
 	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
 	err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
@@ -2195,7 +2233,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 		}
 		if (old_prog)
 			bpf_prog_put(old_prog);
-		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+		if (netif_running(dev))
+			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 	}
 
 	return 0;
@@ -2566,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 {
 	struct virtnet_info *vi = netdev_priv(queue->dev);
 	unsigned int queue_index = get_netdev_rx_queue_index(queue);
+	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
 	struct ewma_pkt_len *avg;
 
 	BUG_ON(queue_index >= vi->max_queue_pairs);
 	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
 	return sprintf(buf, "%u\n",
-		       get_mergeable_buf_len(&vi->rq[queue_index], avg));
+		       get_mergeable_buf_len(&vi->rq[queue_index], avg,
+				       SKB_DATA_ALIGN(headroom + tailroom)));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
@@ -2815,8 +2857,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 	/* Assume link up if device can't report link status,
 	   otherwise get link status from config. */
+	netif_carrier_off(dev);
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
-		netif_carrier_off(dev);
 		schedule_work(&vi->config_work);
 	} else {
 		vi->status = VIRTIO_NET_S_LINK_UP;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 8b39c160743d..e04937f44f33 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -977,6 +977,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 {
 	int ret;
 	u32 count;
+	int num_pkts;
+	int tx_num_deferred;
 	unsigned long flags;
 	struct vmxnet3_tx_ctx ctx;
 	union Vmxnet3_GenericDesc *gdesc;
@@ -1075,12 +1077,12 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 #else
 	gdesc = ctx.sop_txd;
 #endif
+	tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
 	if (ctx.mss) {
 		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
 		gdesc->txd.om = VMXNET3_OM_TSO;
 		gdesc->txd.msscof = ctx.mss;
-		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
-			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
+		num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss;
 	} else {
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
@@ -1091,8 +1093,10 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 			gdesc->txd.om = 0;
 			gdesc->txd.msscof = 0;
 		}
-		le32_add_cpu(&tq->shared->txNumDeferred, 1);
+		num_pkts = 1;
 	}
+	le32_add_cpu(&tq->shared->txNumDeferred, num_pkts);
+	tx_num_deferred += num_pkts;
 
 	if (skb_vlan_tag_present(skb)) {
 		gdesc->txd.ti = 1;
@@ -1118,8 +1122,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 
 	spin_unlock_irqrestore(&tq->tx_lock, flags);
 
-	if (le32_to_cpu(tq->shared->txNumDeferred) >=
-					le32_to_cpu(tq->shared->txThreshold)) {
+	if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) {
 		tq->shared->txNumDeferred = 0;
 		VMXNET3_WRITE_BAR0_REG(adapter,
 				       VMXNET3_REG_TXPROD + tq->qid * 8,
@@ -1470,7 +1473,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 			vmxnet3_rx_csum(adapter, skb,
 					(union Vmxnet3_GenericDesc *)rcd);
 			skb->protocol = eth_type_trans(skb, adapter->netdev);
-			if (!rcd->tcp || !adapter->lro)
+			if (!rcd->tcp ||
+			    !(adapter->netdev->features & NETIF_F_LRO))
 				goto not_lro;
 
 			if (segCnt != 0 && mss != 0) {
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 5ba222920e80..59ec34052a65 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.11.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.13.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040b00
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040d00
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
@@ -342,9 +342,6 @@ struct vmxnet3_adapter {
 	u8			__iomem *hw_addr1; /* for BAR 1 */
 	u8                              version;
 
-	bool				rxcsum;
-	bool				lro;
-
 #ifdef VMXNET3_RSS
 	struct UPT1_RSSConf		*rss_conf;
 	bool				rss;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 139c61c8244a..102582459bef 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -736,7 +736,6 @@ static int vrf_rtable_create(struct net_device *dev)
 		return -ENOMEM;
 
 	rth->dst.output	= vrf_output;
-	rth->rt_table_id = vrf->tb_id;
 
 	rcu_assign_pointer(vrf->rth, rth);
 
@@ -942,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
 					     const struct net_device *dev,
 					     struct flowi6 *fl6,
 					     int ifindex,
+					     const struct sk_buff *skb,
 					     int flags)
 {
 	struct net_vrf *vrf = netdev_priv(dev);
@@ -960,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
 	if (!table)
 		return NULL;
 
-	return ip6_pol_route(net, table, ifindex, fl6, flags);
+	return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
 }
 
 static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
@@ -978,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
 	struct net *net = dev_net(vrf_dev);
 	struct rt6_info *rt6;
 
-	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
 				   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
 	if (unlikely(!rt6))
 		return;
@@ -1111,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
 	if (!ipv6_addr_any(&fl6->saddr))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
 	if (rt)
 		dst = &rt->dst;
 
@@ -1146,6 +1146,7 @@ static inline size_t vrf_fib_rule_nl_size(void)
 	sz  = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
 	sz += nla_total_size(sizeof(u8));	/* FRA_L3MDEV */
 	sz += nla_total_size(sizeof(u32));	/* FRA_PRIORITY */
+	sz += nla_total_size(sizeof(u8));       /* FRA_PROTOCOL */
 
 	return sz;
 }
@@ -1176,6 +1177,9 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
 	frh->family = family;
 	frh->action = FR_ACT_TO_TBL;
 
+	if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
+		goto nla_put_failure;
+
 	if (nla_put_u8(skb, FRA_L3MDEV, 1))
 		goto nla_put_failure;
 
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index afeca6bcdade..ab8b3cbbb205 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -574,7 +574,10 @@ static void ppp_timer(struct timer_list *t)
 			ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0,
 				     0, NULL);
 			proto->restart_counter--;
-		} else
+		} else if (netif_carrier_ok(proto->dev))
+			ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0,
+				     0, NULL);
+		else
 			ppp_cp_event(proto->dev, proto->pid, TO_BAD, 0, 0,
 				     0, NULL);
 		break;
diff --git a/drivers/net/wimax/i2400m/usb-rx.c b/drivers/net/wimax/i2400m/usb-rx.c
index b78ee676e102..5b64bda7d9e7 100644
--- a/drivers/net/wimax/i2400m/usb-rx.c
+++ b/drivers/net/wimax/i2400m/usb-rx.c
@@ -263,9 +263,6 @@ retry:
 		new_skb = skb_copy_expand(rx_skb, 0, rx_size - rx_skb->len,
 					  GFP_KERNEL);
 		if (new_skb == NULL) {
-			if (printk_ratelimit())
-				dev_err(dev, "RX: Can't reallocate skb to %d; "
-					"RX dropped\n", rx_size);
 			kfree_skb(rx_skb);
 			rx_skb = NULL;
 			goto out;	/* drop it...*/
diff --git a/drivers/net/wireless/admtek/Kconfig b/drivers/net/wireless/admtek/Kconfig
index d5a2dc728078..9317367e37f0 100644
--- a/drivers/net/wireless/admtek/Kconfig
+++ b/drivers/net/wireless/admtek/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ADMTEK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ADMTEK
diff --git a/drivers/net/wireless/ath/Kconfig b/drivers/net/wireless/ath/Kconfig
index 44b2470af81d..82ab7c33cf97 100644
--- a/drivers/net/wireless/ath/Kconfig
+++ b/drivers/net/wireless/ath/Kconfig
@@ -8,8 +8,8 @@ config WLAN_VENDOR_ATH
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 	  For more information and documentation on this module you can visit:
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index e89e5ef2c2a4..f246e9ed4a81 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -729,6 +729,7 @@ static void ath9k_set_hw_capab(struct ath9k_htc_priv *priv,
 	ieee80211_hw_set(hw, SPECTRUM_MGMT);
 	ieee80211_hw_set(hw, SIGNAL_DBM);
 	ieee80211_hw_set(hw, AMPDU_AGGREGATION);
+	ieee80211_hw_set(hw, DOESNT_SUPPORT_QOS_NDP);
 
 	if (ath9k_ps_enable)
 		ieee80211_hw_set(hw, SUPPORTS_PS);
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 768f63f38341..b799a5384abb 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -1599,7 +1599,8 @@ static void wil_probe_client_handle(struct wil6210_priv *wil,
 	 */
 	bool alive = (sta->status == wil_sta_connected);
 
-	cfg80211_probe_status(ndev, sta->addr, req->cookie, alive, GFP_KERNEL);
+	cfg80211_probe_status(ndev, sta->addr, req->cookie, alive,
+			      0, false, GFP_KERNEL);
 }
 
 static struct list_head *next_probe_client(struct wil6210_priv *wil)
diff --git a/drivers/net/wireless/atmel/Kconfig b/drivers/net/wireless/atmel/Kconfig
index a43cfd163254..3e684f8c1f93 100644
--- a/drivers/net/wireless/atmel/Kconfig
+++ b/drivers/net/wireless/atmel/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ATMEL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ATMEL
diff --git a/drivers/net/wireless/broadcom/Kconfig b/drivers/net/wireless/broadcom/Kconfig
index d3651ceb5046..eebe2864835f 100644
--- a/drivers/net/wireless/broadcom/Kconfig
+++ b/drivers/net/wireless/broadcom/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_BROADCOM
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_BROADCOM
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
index 0b76a615708e..0b90a63bdeb1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
@@ -253,7 +253,6 @@ void brcmf_dev_reset(struct device *dev);
 /* Configure the "global" bus state used by upper layers */
 void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state);
 
-int brcmf_bus_started(struct device *dev);
 s32 brcmf_iovar_data_set(struct device *dev, char *name, void *data, u32 len);
 void brcmf_bus_add_txhdrlen(struct device *dev, uint len);
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 15fa00d79fc6..74a83020c073 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -5124,6 +5124,9 @@ static int brcmf_cfg80211_set_pmk(struct wiphy *wiphy, struct net_device *dev,
 	if (WARN_ON(ifp->vif->profile.use_fwsup != BRCMF_PROFILE_FWSUP_1X))
 		return -EINVAL;
 
+	if (conf->pmk_len > BRCMF_WSEC_MAX_PSK_LEN)
+		return -ERANGE;
+
 	return brcmf_set_pmk(ifp, conf->pmk, conf->pmk_len);
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 9be0b051066a..70ef9835b647 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -365,9 +365,6 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 
 	/* Enable tx beamforming, errors can be ignored (not supported) */
 	(void)brcmf_fil_iovar_int_set(ifp, "txbf", 1);
-
-	/* do bus specific preinit here */
-	err = brcmf_bus_preinit(ifp->drvr->bus_if);
 done:
 	return err;
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 930e423f83a8..19048526b4af 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -914,55 +914,6 @@ static int brcmf_inet6addr_changed(struct notifier_block *nb,
 }
 #endif
 
-int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
-{
-	struct brcmf_pub *drvr = NULL;
-	int ret = 0;
-	int i;
-
-	brcmf_dbg(TRACE, "Enter\n");
-
-	/* Allocate primary brcmf_info */
-	drvr = kzalloc(sizeof(struct brcmf_pub), GFP_ATOMIC);
-	if (!drvr)
-		return -ENOMEM;
-
-	for (i = 0; i < ARRAY_SIZE(drvr->if2bss); i++)
-		drvr->if2bss[i] = BRCMF_BSSIDX_INVALID;
-
-	mutex_init(&drvr->proto_block);
-
-	/* Link to bus module */
-	drvr->hdrlen = 0;
-	drvr->bus_if = dev_get_drvdata(dev);
-	drvr->bus_if->drvr = drvr;
-	drvr->settings = settings;
-
-	/* attach debug facilities */
-	brcmf_debug_attach(drvr);
-
-	/* Attach and link in the protocol */
-	ret = brcmf_proto_attach(drvr);
-	if (ret != 0) {
-		brcmf_err("brcmf_prot_attach failed\n");
-		goto fail;
-	}
-
-	/* Attach to events important for core code */
-	brcmf_fweh_register(drvr, BRCMF_E_PSM_WATCHDOG,
-			    brcmf_psm_watchdog_notify);
-
-	/* attach firmware event handler */
-	brcmf_fweh_attach(drvr);
-
-	return ret;
-
-fail:
-	brcmf_detach(dev);
-
-	return ret;
-}
-
 static int brcmf_revinfo_read(struct seq_file *s, void *data)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(s->private);
@@ -993,11 +944,10 @@ static int brcmf_revinfo_read(struct seq_file *s, void *data)
 	return 0;
 }
 
-int brcmf_bus_started(struct device *dev)
+static int brcmf_bus_started(struct brcmf_pub *drvr)
 {
 	int ret = -1;
-	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_pub *drvr = bus_if->drvr;
+	struct brcmf_bus *bus_if = drvr->bus_if;
 	struct brcmf_if *ifp;
 	struct brcmf_if *p2p_ifp;
 
@@ -1013,6 +963,11 @@ int brcmf_bus_started(struct device *dev)
 	/* signal bus ready */
 	brcmf_bus_change_state(bus_if, BRCMF_BUS_UP);
 
+	/* do bus specific preinit here */
+	ret = brcmf_bus_preinit(bus_if);
+	if (ret < 0)
+		goto fail;
+
 	/* Bus is ready, do any initialization */
 	ret = brcmf_c_preinit_dcmds(ifp);
 	if (ret < 0)
@@ -1088,6 +1043,60 @@ fail:
 	return ret;
 }
 
+int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
+{
+	struct brcmf_pub *drvr = NULL;
+	int ret = 0;
+	int i;
+
+	brcmf_dbg(TRACE, "Enter\n");
+
+	/* Allocate primary brcmf_info */
+	drvr = kzalloc(sizeof(*drvr), GFP_ATOMIC);
+	if (!drvr)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(drvr->if2bss); i++)
+		drvr->if2bss[i] = BRCMF_BSSIDX_INVALID;
+
+	mutex_init(&drvr->proto_block);
+
+	/* Link to bus module */
+	drvr->hdrlen = 0;
+	drvr->bus_if = dev_get_drvdata(dev);
+	drvr->bus_if->drvr = drvr;
+	drvr->settings = settings;
+
+	/* attach debug facilities */
+	brcmf_debug_attach(drvr);
+
+	/* Attach and link in the protocol */
+	ret = brcmf_proto_attach(drvr);
+	if (ret != 0) {
+		brcmf_err("brcmf_prot_attach failed\n");
+		goto fail;
+	}
+
+	/* Attach to events important for core code */
+	brcmf_fweh_register(drvr, BRCMF_E_PSM_WATCHDOG,
+			    brcmf_psm_watchdog_notify);
+
+	/* attach firmware event handler */
+	brcmf_fweh_attach(drvr);
+
+	ret = brcmf_bus_started(drvr);
+	if (ret != 0) {
+		brcmf_err("dongle is not responding: err=%d\n", ret);
+		goto fail;
+	}
+	return 0;
+
+fail:
+	brcmf_detach(dev);
+
+	return ret;
+}
+
 void brcmf_bus_add_txhdrlen(struct device *dev, uint len)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
@@ -1185,6 +1194,12 @@ void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state)
 	int ifidx;
 
 	brcmf_dbg(TRACE, "%d -> %d\n", bus->state, state);
+
+	if (!drvr) {
+		brcmf_dbg(INFO, "ignoring transition, bus not attached yet\n");
+		return;
+	}
+
 	bus->state = state;
 
 	if (state == BRCMF_BUS_UP) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
index df8a1ecb9924..232dcbb83311 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
@@ -181,6 +181,7 @@ enum brcmf_netif_stop_reason {
  * @netif_stop_lock: spinlock for update netif_stop from multiple sources.
  * @pend_8021x_cnt: tracks outstanding number of 802.1x frames.
  * @pend_8021x_wait: used for signalling change in count.
+ * @fwil_fwerr: flag indicating fwil layer should return firmware error codes.
  */
 struct brcmf_if {
 	struct brcmf_pub *drvr;
@@ -198,6 +199,7 @@ struct brcmf_if {
 	wait_queue_head_t pend_8021x_wait;
 	struct in6_addr ipv6_addr_tbl[NDOL_MAX_ENTRIES];
 	u8 ipv6addr_idx;
+	bool fwil_fwerr;
 };
 
 int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
index 47de35a33853..bede7b7fd996 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
@@ -104,6 +104,9 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp,
 	u32 data;
 	int err;
 
+	/* we need to know firmware error */
+	ifp->fwil_fwerr = true;
+
 	err = brcmf_fil_iovar_int_get(ifp, name, &data);
 	if (err == 0) {
 		brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]);
@@ -112,6 +115,8 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp,
 		brcmf_dbg(TRACE, "%s feature check failed: %d\n",
 			  brcmf_feat_names[id], err);
 	}
+
+	ifp->fwil_fwerr = false;
 }
 
 static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp,
@@ -120,6 +125,9 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp,
 {
 	int err;
 
+	/* we need to know firmware error */
+	ifp->fwil_fwerr = true;
+
 	err = brcmf_fil_iovar_data_set(ifp, name, data, len);
 	if (err != -BRCMF_FW_UNSUPPORTED) {
 		brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]);
@@ -128,6 +136,8 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp,
 		brcmf_dbg(TRACE, "%s feature check failed: %d\n",
 			  brcmf_feat_names[id], err);
 	}
+
+	ifp->fwil_fwerr = false;
 }
 
 #define MAX_CAPS_BUFFER_SIZE	512
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
index f2cfdd3b2bf1..fc5751116d99 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
@@ -131,6 +131,9 @@ brcmf_fil_cmd_data(struct brcmf_if *ifp, u32 cmd, void *data, u32 len, bool set)
 			  brcmf_fil_get_errstr((u32)(-fwerr)), fwerr);
 		err = -EBADE;
 	}
+	if (ifp->fwil_fwerr)
+		return fwerr;
+
 	return err;
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 2ee54133efa1..82064e909784 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -462,25 +462,23 @@ static int brcmf_p2p_set_firmware(struct brcmf_if *ifp, u8 *p2p_mac)
  * @dev_addr: optional device address.
  *
  * P2P needs mac addresses for P2P device and interface. If no device
- * address it specified, these are derived from the primary net device, ie.
- * the permanent ethernet address of the device.
+ * address it specified, these are derived from a random ethernet
+ * address.
  */
 static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr)
 {
-	struct brcmf_if *pri_ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp;
-	bool local_admin = false;
+	bool random_addr = false;
 
-	if (!dev_addr || is_zero_ether_addr(dev_addr)) {
-		dev_addr = pri_ifp->mac_addr;
-		local_admin = true;
-	}
+	if (!dev_addr || is_zero_ether_addr(dev_addr))
+		random_addr = true;
 
-	/* Generate the P2P Device Address.  This consists of the device's
-	 * primary MAC address with the locally administered bit set.
+	/* Generate the P2P Device Address obtaining a random ethernet
+	 * address with the locally administered bit set.
 	 */
-	memcpy(p2p->dev_addr, dev_addr, ETH_ALEN);
-	if (local_admin)
-		p2p->dev_addr[0] |= 0x02;
+	if (random_addr)
+		eth_random_addr(p2p->dev_addr);
+	else
+		memcpy(p2p->dev_addr, dev_addr, ETH_ALEN);
 
 	/* Generate the P2P Interface Address.  If the discovery and connection
 	 * BSSCFGs need to simultaneously co-exist, then this address must be
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index 8752707557bf..a7d827ce1684 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1581,24 +1581,6 @@ static void brcmf_pcie_release_resource(struct brcmf_pciedev_info *devinfo)
 }
 
 
-static int brcmf_pcie_attach_bus(struct brcmf_pciedev_info *devinfo)
-{
-	int ret;
-
-	/* Attach to the common driver interface */
-	ret = brcmf_attach(&devinfo->pdev->dev, devinfo->settings);
-	if (ret) {
-		brcmf_err("brcmf_attach failed\n");
-	} else {
-		ret = brcmf_bus_started(&devinfo->pdev->dev);
-		if (ret)
-			brcmf_err("dongle is not responding\n");
-	}
-
-	return ret;
-}
-
-
 static u32 brcmf_pcie_buscore_prep_addr(const struct pci_dev *pdev, u32 addr)
 {
 	u32 ret_addr;
@@ -1735,7 +1717,7 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
 	init_waitqueue_head(&devinfo->mbdata_resp_wait);
 
 	brcmf_pcie_intr_enable(devinfo);
-	if (brcmf_pcie_attach_bus(devinfo) == 0)
+	if (brcmf_attach(&devinfo->pdev->dev, devinfo->settings) == 0)
 		return;
 
 	brcmf_pcie_bus_console_read(devinfo);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 08686147b59d..4a6459a429ec 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -1706,8 +1706,7 @@ brcmf_sdio_read_control(struct brcmf_sdio *bus, u8 *hdr, uint len, uint doff)
 	u8 *buf = NULL, *rbuf;
 	int sdret;
 
-	brcmf_dbg(TRACE, "Enter\n");
-
+	brcmf_dbg(SDIO, "Enter\n");
 	if (bus->rxblen)
 		buf = vzalloc(bus->rxblen);
 	if (!buf)
@@ -1810,7 +1809,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes)
 	struct brcmf_sdio_hdrinfo *rd = &bus->cur_read, rd_new;
 	u8 head_read = 0;
 
-	brcmf_dbg(TRACE, "Enter\n");
+	brcmf_dbg(SDIO, "Enter\n");
 
 	/* Not finished unless we encounter no more frames indication */
 	bus->rxpending = true;
@@ -2345,7 +2344,7 @@ static int brcmf_sdio_tx_ctrlframe(struct brcmf_sdio *bus, u8 *frame, u16 len)
 	struct brcmf_sdio_hdrinfo hd_info = {0};
 	int ret;
 
-	brcmf_dbg(TRACE, "Enter\n");
+	brcmf_dbg(SDIO, "Enter\n");
 
 	/* Back the pointer to make room for bus header */
 	frame -= bus->tx_hdrlen;
@@ -2521,7 +2520,7 @@ static void brcmf_sdio_dpc(struct brcmf_sdio *bus)
 	uint framecnt;			/* Temporary counter of tx/rx frames */
 	int err = 0;
 
-	brcmf_dbg(TRACE, "Enter\n");
+	brcmf_dbg(SDIO, "Enter\n");
 
 	sdio_claim_host(bus->sdiodev->func1);
 
@@ -2606,7 +2605,7 @@ static void brcmf_sdio_dpc(struct brcmf_sdio *bus)
 
 	/* Would be active due to wake-wlan in gSPI */
 	if (intstatus & I_CHIPACTIVE) {
-		brcmf_dbg(INFO, "Dongle reports CHIPACTIVE\n");
+		brcmf_dbg(SDIO, "Dongle reports CHIPACTIVE\n");
 		intstatus &= ~I_CHIPACTIVE;
 	}
 
@@ -3411,6 +3410,20 @@ static int brcmf_sdio_bus_preinit(struct device *dev)
 	u32 value;
 	int err;
 
+	/* maxctl provided by common layer */
+	if (WARN_ON(!bus_if->maxctl))
+		return -EINVAL;
+
+	/* Allocate control receive buffer */
+	bus_if->maxctl += bus->roundup;
+	value = roundup((bus_if->maxctl + SDPCM_HDRLEN), ALIGNMENT);
+	value += bus->head_align;
+	bus->rxbuf = kmalloc(value, GFP_ATOMIC);
+	if (bus->rxbuf)
+		bus->rxblen = value;
+
+	brcmf_sdio_debugfs_create(bus);
+
 	/* the commands below use the terms tx and rx from
 	 * a device perspective, ie. bus:txglom affects the
 	 * bus transfers from device to host.
@@ -4026,9 +4039,8 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 					 void *nvram, u32 nvram_len)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-	struct brcmf_sdio *bus = sdiodev->bus;
-	struct brcmf_sdio_dev *sdiod = bus->sdiodev;
+	struct brcmf_sdio_dev *sdiod = bus_if->bus_priv.sdio;
+	struct brcmf_sdio *bus = sdiod->bus;
 	struct brcmf_core *core = bus->sdio_core;
 	u8 saveclk;
 
@@ -4037,9 +4049,6 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	if (err)
 		goto fail;
 
-	if (!bus_if->drvr)
-		return;
-
 	/* try to download image and nvram to the dongle */
 	bus->alp_only = true;
 	err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
@@ -4051,7 +4060,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	bus->sdcnt.tickcnt = 0;
 	brcmf_sdio_wd_timer(bus, true);
 
-	sdio_claim_host(sdiodev->func1);
+	sdio_claim_host(sdiod->func1);
 
 	/* Make sure backplane clock is on, needed to generate F2 interrupt */
 	brcmf_sdio_clkctl(bus, CLK_AVAIL, false);
@@ -4059,9 +4068,9 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 		goto release;
 
 	/* Force clocks on backplane to be sure F2 interrupt propagates */
-	saveclk = brcmf_sdiod_readb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR, &err);
+	saveclk = brcmf_sdiod_readb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR, &err);
 	if (!err) {
-		brcmf_sdiod_writeb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+		brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR,
 				   (saveclk | SBSDIO_FORCE_HT), &err);
 	}
 	if (err) {
@@ -4073,7 +4082,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	brcmf_sdiod_writel(sdiod, core->base + SD_REG(tosbmailboxdata),
 			   SDPCM_PROT_VERSION << SMB_DATA_VERSION_SHIFT, NULL);
 
-	err = sdio_enable_func(sdiodev->func2);
+	err = sdio_enable_func(sdiod->func2);
 
 	brcmf_dbg(INFO, "enable F2: err=%d\n", err);
 
@@ -4085,10 +4094,10 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 				   bus->hostintmask, NULL);
 
 
-		brcmf_sdiod_writeb(sdiodev, SBSDIO_WATERMARK, 8, &err);
+		brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK, 8, &err);
 	} else {
 		/* Disable F2 again */
-		sdio_disable_func(sdiodev->func2);
+		sdio_disable_func(sdiod->func2);
 		goto release;
 	}
 
@@ -4096,7 +4105,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 		brcmf_sdio_sr_init(bus);
 	} else {
 		/* Restore previous clock setting */
-		brcmf_sdiod_writeb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+		brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR,
 				   saveclk, &err);
 	}
 
@@ -4104,7 +4113,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 		/* Allow full data communication using DPC from now on. */
 		brcmf_sdiod_change_state(bus->sdiodev, BRCMF_SDIOD_DATA);
 
-		err = brcmf_sdiod_intr_register(sdiodev);
+		err = brcmf_sdiod_intr_register(sdiod);
 		if (err != 0)
 			brcmf_err("intr register failed:%d\n", err);
 	}
@@ -4113,20 +4122,29 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	if (err != 0)
 		brcmf_sdio_clkctl(bus, CLK_NONE, false);
 
-	sdio_release_host(sdiodev->func1);
+	sdio_release_host(sdiod->func1);
 
-	err = brcmf_bus_started(dev);
+	/* Assign bus interface call back */
+	sdiod->bus_if->dev = sdiod->dev;
+	sdiod->bus_if->ops = &brcmf_sdio_bus_ops;
+	sdiod->bus_if->chip = bus->ci->chip;
+	sdiod->bus_if->chiprev = bus->ci->chiprev;
+
+	/* Attach to the common layer, reserve hdr space */
+	err = brcmf_attach(sdiod->dev, sdiod->settings);
 	if (err != 0) {
-		brcmf_err("dongle is not responding\n");
+		brcmf_err("brcmf_attach failed\n");
 		goto fail;
 	}
+
+	/* ready */
 	return;
 
 release:
-	sdio_release_host(sdiodev->func1);
+	sdio_release_host(sdiod->func1);
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
-	device_release_driver(&sdiodev->func2->dev);
+	device_release_driver(&sdiod->func2->dev);
 	device_release_driver(dev);
 }
 
@@ -4188,39 +4206,13 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	bus->dpc_triggered = false;
 	bus->dpc_running = false;
 
-	/* Assign bus interface call back */
-	bus->sdiodev->bus_if->dev = bus->sdiodev->dev;
-	bus->sdiodev->bus_if->ops = &brcmf_sdio_bus_ops;
-	bus->sdiodev->bus_if->chip = bus->ci->chip;
-	bus->sdiodev->bus_if->chiprev = bus->ci->chiprev;
-
 	/* default sdio bus header length for tx packet */
 	bus->tx_hdrlen = SDPCM_HWHDR_LEN + SDPCM_SWHDR_LEN;
 
-	/* Attach to the common layer, reserve hdr space */
-	ret = brcmf_attach(bus->sdiodev->dev, bus->sdiodev->settings);
-	if (ret != 0) {
-		brcmf_err("brcmf_attach failed\n");
-		goto fail;
-	}
-
 	/* Query the F2 block size, set roundup accordingly */
 	bus->blocksize = bus->sdiodev->func2->cur_blksize;
 	bus->roundup = min(max_roundup, bus->blocksize);
 
-	/* Allocate buffers */
-	if (bus->sdiodev->bus_if->maxctl) {
-		bus->sdiodev->bus_if->maxctl += bus->roundup;
-		bus->rxblen =
-		    roundup((bus->sdiodev->bus_if->maxctl + SDPCM_HDRLEN),
-			    ALIGNMENT) + bus->head_align;
-		bus->rxbuf = kmalloc(bus->rxblen, GFP_ATOMIC);
-		if (!(bus->rxbuf)) {
-			brcmf_err("rxbuf allocation failed\n");
-			goto fail;
-		}
-	}
-
 	sdio_claim_host(bus->sdiodev->func1);
 
 	/* Disable F2 to clear any intermediate frame state on the dongle */
@@ -4241,7 +4233,6 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	/* SR state */
 	bus->sr_enabled = false;
 
-	brcmf_sdio_debugfs_create(bus);
 	brcmf_dbg(INFO, "completed!!\n");
 
 	ret = brcmf_fw_map_chip_to_name(bus->ci->chip, bus->ci->chiprev,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index b27170c12482..41642dda40fd 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1146,39 +1146,15 @@ static int brcmf_usb_get_fwname(struct device *dev, u32 chip, u32 chiprev,
 }
 
 static const struct brcmf_bus_ops brcmf_usb_bus_ops = {
-	.txdata = brcmf_usb_tx,
+	.preinit = brcmf_usb_up,
 	.stop = brcmf_usb_down,
+	.txdata = brcmf_usb_tx,
 	.txctl = brcmf_usb_tx_ctlpkt,
 	.rxctl = brcmf_usb_rx_ctlpkt,
 	.wowl_config = brcmf_usb_wowl_config,
 	.get_fwname = brcmf_usb_get_fwname,
 };
 
-static int brcmf_usb_bus_setup(struct brcmf_usbdev_info *devinfo)
-{
-	int ret;
-
-	/* Attach to the common driver interface */
-	ret = brcmf_attach(devinfo->dev, devinfo->settings);
-	if (ret) {
-		brcmf_err("brcmf_attach failed\n");
-		return ret;
-	}
-
-	ret = brcmf_usb_up(devinfo->dev);
-	if (ret)
-		goto fail;
-
-	ret = brcmf_bus_started(devinfo->dev);
-	if (ret)
-		goto fail;
-
-	return 0;
-fail:
-	brcmf_detach(devinfo->dev);
-	return ret;
-}
-
 static void brcmf_usb_probe_phase2(struct device *dev, int ret,
 				   const struct firmware *fw,
 				   void *nvram, u32 nvlen)
@@ -1206,7 +1182,8 @@ static void brcmf_usb_probe_phase2(struct device *dev, int ret,
 	if (ret)
 		goto error;
 
-	ret = brcmf_usb_bus_setup(devinfo);
+	/* Attach to the common driver interface */
+	ret = brcmf_attach(devinfo->dev, devinfo->settings);
 	if (ret)
 		goto error;
 
@@ -1256,7 +1233,7 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
 	}
 
 	if (!brcmf_usb_dlneeded(devinfo)) {
-		ret = brcmf_usb_bus_setup(devinfo);
+		ret = brcmf_attach(devinfo->dev, devinfo->settings);
 		if (ret)
 			goto fail;
 		/* we are done */
@@ -1459,7 +1436,7 @@ static int brcmf_usb_resume(struct usb_interface *intf)
 
 	brcmf_dbg(USB, "Enter\n");
 	if (!devinfo->wowl_enabled)
-		return brcmf_usb_bus_setup(devinfo);
+		return brcmf_attach(devinfo->dev, devinfo->settings);
 
 	devinfo->bus_pub.state = BRCMFMAC_USB_STATE_UP;
 	brcmf_usb_rx_fill_all(devinfo);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c
index 3a03287fa912..db783e94f929 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c
@@ -652,7 +652,6 @@ static void brcms_reg_apply_radar_flags(struct wiphy *wiphy)
 		 */
 		if (!(ch->flags & IEEE80211_CHAN_DISABLED))
 			ch->flags |= IEEE80211_CHAN_RADAR |
-				     IEEE80211_CHAN_NO_IR |
 				     IEEE80211_CHAN_NO_IR;
 	}
 }
diff --git a/drivers/net/wireless/cisco/Kconfig b/drivers/net/wireless/cisco/Kconfig
index b22567dff893..26eb8b0c2104 100644
--- a/drivers/net/wireless/cisco/Kconfig
+++ b/drivers/net/wireless/cisco/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_CISCO
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_CISCO
diff --git a/drivers/net/wireless/intel/Kconfig b/drivers/net/wireless/intel/Kconfig
index 5b14f2f64a8a..6fdc14b08b8e 100644
--- a/drivers/net/wireless/intel/Kconfig
+++ b/drivers/net/wireless/intel/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_INTEL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_INTEL
diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
index c5f2ddf9b0fe..e5a2fc738ac3 100644
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@ -91,7 +91,6 @@ config IWLWIFI_BCAST_FILTERING
 config IWLWIFI_PCIE_RTPM
        bool "Enable runtime power management mode for PCIe devices"
        depends on IWLMVM && PM && EXPERT
-       default false
        help
          Say Y here to enable runtime power management for PCIe
          devices.  If enabled, the device will go into low power mode
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
index 3721a3ed358b..f824bebceb06 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
@@ -211,7 +211,7 @@ enum {
  * @TE_V2_NOTIF_HOST_FRAG_END:request/receive notification on frag end
  * @TE_V2_NOTIF_INTERNAL_FRAG_START: internal FW use.
  * @TE_V2_NOTIF_INTERNAL_FRAG_END: internal FW use.
- * @T2_V2_START_IMMEDIATELY: start time event immediately
+ * @TE_V2_START_IMMEDIATELY: start time event immediately
  * @TE_V2_DEP_OTHER: depends on another time event
  * @TE_V2_DEP_TSF: depends on a specific time
  * @TE_V2_EVENT_SOCIOPATHIC: can't co-exist with other events of tha same MAC
@@ -230,7 +230,7 @@ enum iwl_time_event_policy {
 	TE_V2_NOTIF_HOST_FRAG_END = BIT(5),
 	TE_V2_NOTIF_INTERNAL_FRAG_START = BIT(6),
 	TE_V2_NOTIF_INTERNAL_FRAG_END = BIT(7),
-	T2_V2_START_IMMEDIATELY = BIT(11),
+	TE_V2_START_IMMEDIATELY = BIT(11),
 
 	/* placement characteristics */
 	TE_V2_DEP_OTHER = BIT(TE_V2_PLACEMENT_POS),
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 67aefc8fc9ac..7bd704a3e640 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -8,6 +8,7 @@
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -942,7 +944,6 @@ dump_trans_data:
 
 out:
 	iwl_fw_free_dump_desc(fwrt);
-	fwrt->dump.trig = NULL;
 	clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
 	IWL_DEBUG_INFO(fwrt, "WRT dump done\n");
 }
@@ -1112,6 +1113,14 @@ void iwl_fw_error_dump_wk(struct work_struct *work)
 	    fwrt->ops->dump_start(fwrt->ops_ctx))
 		return;
 
+	if (fwrt->ops && fwrt->ops->fw_running &&
+	    !fwrt->ops->fw_running(fwrt->ops_ctx)) {
+		IWL_ERR(fwrt, "Firmware not running - cannot dump error\n");
+		iwl_fw_free_dump_desc(fwrt);
+		clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
+		goto out;
+	}
+
 	if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
 		/* stop recording */
 		iwl_fw_dbg_stop_recording(fwrt);
@@ -1145,7 +1154,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work)
 			iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, out_ctrl);
 		}
 	}
-
+out:
 	if (fwrt->ops && fwrt->ops->dump_end)
 		fwrt->ops->dump_end(fwrt->ops_ctx);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index 223fb77a3aa9..72259bff9922 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -8,6 +8,7 @@
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -91,6 +93,7 @@ static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt)
 	if (fwrt->dump.desc != &iwl_dump_desc_assert)
 		kfree(fwrt->dump.desc);
 	fwrt->dump.desc = NULL;
+	fwrt->dump.trig = NULL;
 }
 
 void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h
index e57ff92a68ae..3da468d2cc92 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h
@@ -75,6 +75,20 @@ static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt)
 	cancel_delayed_work_sync(&fwrt->timestamp.wk);
 }
 
+static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt)
+{
+	cancel_delayed_work_sync(&fwrt->timestamp.wk);
+}
+
+static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt)
+{
+	if (!fwrt->timestamp.delay)
+		return;
+
+	schedule_delayed_work(&fwrt->timestamp.wk,
+			      round_jiffies_relative(fwrt->timestamp.delay));
+}
+
 #else
 static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt,
 					  struct dentry *dbgfs_dir)
@@ -84,4 +98,8 @@ static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt,
 
 static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt) {}
 
+static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt) {}
+
+static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt) {}
+
 #endif /* CONFIG_IWLWIFI_DEBUGFS */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c
index c39fe84bb4c4..2efac307909e 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/init.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c
@@ -77,8 +77,14 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 }
 IWL_EXPORT_SYMBOL(iwl_fw_runtime_init);
 
-void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt)
+void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt)
 {
-	iwl_fw_cancel_timestamp(fwrt);
+	iwl_fw_suspend_timestamp(fwrt);
 }
-IWL_EXPORT_SYMBOL(iwl_fw_runtime_exit);
+IWL_EXPORT_SYMBOL(iwl_fw_runtime_suspend);
+
+void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt)
+{
+	iwl_fw_resume_timestamp(fwrt);
+}
+IWL_EXPORT_SYMBOL(iwl_fw_runtime_resume);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
index e25c049f980f..3fb940ebd74a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -26,6 +27,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -68,6 +70,7 @@
 struct iwl_fw_runtime_ops {
 	int (*dump_start)(void *ctx);
 	void (*dump_end)(void *ctx);
+	bool (*fw_running)(void *ctx);
 };
 
 #define MAX_NUM_LMAC 2
@@ -150,6 +153,10 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 
 void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt);
 
+void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt);
+
+void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt);
+
 static inline void iwl_fw_set_current_image(struct iwl_fw_runtime *fwrt,
 					    enum iwl_ucode_type cur_fw_img)
 {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 0e6cf39285f4..2efe9b099556 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1098,6 +1098,8 @@ int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 	/* make sure the d0i3 exit work is not pending */
 	flush_work(&mvm->d0i3_exit_work);
 
+	iwl_fw_runtime_suspend(&mvm->fwrt);
+
 	ret = iwl_trans_suspend(trans);
 	if (ret)
 		return ret;
@@ -2012,6 +2014,8 @@ int iwl_mvm_resume(struct ieee80211_hw *hw)
 
 	mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
 
+	iwl_fw_runtime_resume(&mvm->fwrt);
+
 	return ret;
 }
 
@@ -2038,6 +2042,8 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file)
 
 	mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_D3;
 
+	iwl_fw_runtime_suspend(&mvm->fwrt);
+
 	/* start pseudo D3 */
 	rtnl_lock();
 	err = __iwl_mvm_suspend(mvm->hw, mvm->hw->wiphy->wowlan_config, true);
@@ -2098,6 +2104,8 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
 	__iwl_mvm_resume(mvm, true);
 	rtnl_unlock();
 
+	iwl_fw_runtime_resume(&mvm->fwrt);
+
 	mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
 
 	iwl_abort_notification_waits(&mvm->notif_wait);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index a7892c1254a2..9c436d8d001d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1281,9 +1283,6 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm,
 {
 	int ret;
 
-	if (!iwl_mvm_firmware_running(mvm))
-		return -EIO;
-
 	ret = iwl_mvm_ref_sync(mvm, IWL_MVM_REF_PRPH_WRITE);
 	if (ret)
 		return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 2f22e14e00fe..8ba16fc24e3a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -438,7 +438,8 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	}
 
 	/* Allocate the CAB queue for softAP and GO interfaces */
-	if (vif->type == NL80211_IFTYPE_AP) {
+	if (vif->type == NL80211_IFTYPE_AP ||
+	    vif->type == NL80211_IFTYPE_ADHOC) {
 		/*
 		 * For TVQM this will be overwritten later with the FW assigned
 		 * queue value (when queue is enabled).
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 8aed40a8bc38..ebf511150f4d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -2106,15 +2107,40 @@ static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw,
 	if (ret)
 		goto out_remove;
 
-	ret = iwl_mvm_add_mcast_sta(mvm, vif);
-	if (ret)
-		goto out_unbind;
-
-	/* Send the bcast station. At this stage the TBTT and DTIM time events
-	 * are added and applied to the scheduler */
-	ret = iwl_mvm_send_add_bcast_sta(mvm, vif);
-	if (ret)
-		goto out_rm_mcast;
+	/*
+	 * This is not very nice, but the simplest:
+	 * For older FWs adding the mcast sta before the bcast station may
+	 * cause assert 0x2b00.
+	 * This is fixed in later FW so make the order of removal depend on
+	 * the TLV
+	 */
+	if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) {
+		ret = iwl_mvm_add_mcast_sta(mvm, vif);
+		if (ret)
+			goto out_unbind;
+		/*
+		 * Send the bcast station. At this stage the TBTT and DTIM time
+		 * events are added and applied to the scheduler
+		 */
+		ret = iwl_mvm_send_add_bcast_sta(mvm, vif);
+		if (ret) {
+			iwl_mvm_rm_mcast_sta(mvm, vif);
+			goto out_unbind;
+		}
+	} else {
+		/*
+		 * Send the bcast station. At this stage the TBTT and DTIM time
+		 * events are added and applied to the scheduler
+		 */
+		iwl_mvm_send_add_bcast_sta(mvm, vif);
+		if (ret)
+			goto out_unbind;
+		iwl_mvm_add_mcast_sta(mvm, vif);
+		if (ret) {
+			iwl_mvm_send_rm_bcast_sta(mvm, vif);
+			goto out_unbind;
+		}
+	}
 
 	/* must be set before quota calculations */
 	mvmvif->ap_ibss_active = true;
@@ -2144,7 +2170,6 @@ out_quota_failed:
 	iwl_mvm_power_update_mac(mvm);
 	mvmvif->ap_ibss_active = false;
 	iwl_mvm_send_rm_bcast_sta(mvm, vif);
-out_rm_mcast:
 	iwl_mvm_rm_mcast_sta(mvm, vif);
 out_unbind:
 	iwl_mvm_binding_remove_vif(mvm, vif);
@@ -2682,6 +2707,10 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 
 		/* enable beacon filtering */
 		WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
+
+		iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band,
+				     false);
+
 		ret = 0;
 	} else if (old_state == IEEE80211_STA_AUTHORIZED &&
 		   new_state == IEEE80211_STA_ASSOC) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 2d28e0804218..89ff02d7c876 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -90,6 +90,7 @@
 #include "fw/runtime.h"
 #include "fw/dbg.h"
 #include "fw/acpi.h"
+#include "fw/debugfs.h"
 
 #define IWL_MVM_MAX_ADDRESSES		5
 /* RSSI offset for WkP */
@@ -1783,6 +1784,7 @@ static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm)
 
 static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm)
 {
+	iwl_fw_cancel_timestamp(&mvm->fwrt);
 	iwl_free_fw_paging(&mvm->fwrt);
 	clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status);
 	iwl_fw_dump_conf_clear(&mvm->fwrt);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 5d525a0023dc..ab7fb5aad984 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -552,9 +554,15 @@ static void iwl_mvm_fwrt_dump_end(void *ctx)
 	iwl_mvm_unref(mvm, IWL_MVM_REF_FW_DBG_COLLECT);
 }
 
+static bool iwl_mvm_fwrt_fw_running(void *ctx)
+{
+	return iwl_mvm_firmware_running(ctx);
+}
+
 static const struct iwl_fw_runtime_ops iwl_mvm_fwrt_ops = {
 	.dump_start = iwl_mvm_fwrt_dump_start,
 	.dump_end = iwl_mvm_fwrt_dump_end,
+	.fw_running = iwl_mvm_fwrt_fw_running,
 };
 
 static struct iwl_op_mode *
@@ -802,7 +810,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	iwl_mvm_leds_exit(mvm);
 	iwl_mvm_thermal_exit(mvm);
  out_free:
-	iwl_fw_runtime_exit(&mvm->fwrt);
 	iwl_fw_flush_dump(&mvm->fwrt);
 
 	if (iwlmvm_mod_params.init_dbg)
@@ -843,7 +850,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode)
 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_IWLWIFI_DEBUGFS)
 	kfree(mvm->d3_resume_sram);
 #endif
-	iwl_fw_runtime_exit(&mvm->fwrt);
 	iwl_trans_op_mode_leave(mvm->trans);
 
 	iwl_phy_db_free(mvm->phy_db);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 60abb0084ee5..47f4c7a1d80d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -2684,7 +2684,8 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 				struct ieee80211_sta *sta,
 				struct iwl_lq_sta *lq_sta,
 				enum nl80211_band band,
-				struct rs_rate *rate)
+				struct rs_rate *rate,
+				bool init)
 {
 	int i, nentries;
 	unsigned long active_rate;
@@ -2738,14 +2739,25 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 	 */
 	if (sta->vht_cap.vht_supported &&
 	    best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
-		switch (sta->bandwidth) {
-		case IEEE80211_STA_RX_BW_160:
-		case IEEE80211_STA_RX_BW_80:
-		case IEEE80211_STA_RX_BW_40:
+		/*
+		 * In AP mode, when a new station associates, rs is initialized
+		 * immediately upon association completion, before the phy
+		 * context is updated with the association parameters, so the
+		 * sta bandwidth might be wider than the phy context allows.
+		 * To avoid this issue, always initialize rs with 20mhz
+		 * bandwidth rate, and after authorization, when the phy context
+		 * is already up-to-date, re-init rs with the correct bw.
+		 */
+		u32 bw = init ? RATE_MCS_CHAN_WIDTH_20 : rs_bw_from_sta_bw(sta);
+
+		switch (bw) {
+		case RATE_MCS_CHAN_WIDTH_40:
+		case RATE_MCS_CHAN_WIDTH_80:
+		case RATE_MCS_CHAN_WIDTH_160:
 			initial_rates = rs_optimal_rates_vht;
 			nentries = ARRAY_SIZE(rs_optimal_rates_vht);
 			break;
-		case IEEE80211_STA_RX_BW_20:
+		case RATE_MCS_CHAN_WIDTH_20:
 			initial_rates = rs_optimal_rates_vht_20mhz;
 			nentries = ARRAY_SIZE(rs_optimal_rates_vht_20mhz);
 			break;
@@ -2756,7 +2768,7 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 
 		active_rate = lq_sta->active_siso_rate;
 		rate->type = LQ_VHT_SISO;
-		rate->bw = rs_bw_from_sta_bw(sta);
+		rate->bw = bw;
 	} else if (sta->ht_cap.ht_supported &&
 		   best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
 		initial_rates = rs_optimal_rates_ht;
@@ -2839,7 +2851,7 @@ static void rs_initialize_lq(struct iwl_mvm *mvm,
 	tbl = &(lq_sta->lq_info[active_tbl]);
 	rate = &tbl->rate;
 
-	rs_get_initial_rate(mvm, sta, lq_sta, band, rate);
+	rs_get_initial_rate(mvm, sta, lq_sta, band, rate, init);
 	rs_init_optimal_rate(mvm, sta, lq_sta);
 
 	WARN_ONCE(rate->ant != ANT_A && rate->ant != ANT_B,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index a3f7c1bf3cc8..580de5851fc7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -71,6 +71,7 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct ieee80211_rx_status *stats = IEEE80211_SKB_RXCB(skb);
 	struct iwl_mvm_key_pn *ptk_pn;
+	int res;
 	u8 tid, keyidx;
 	u8 pn[IEEE80211_CCMP_PN_LEN];
 	u8 *extiv;
@@ -127,12 +128,13 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 	pn[4] = extiv[1];
 	pn[5] = extiv[0];
 
-	if (memcmp(pn, ptk_pn->q[queue].pn[tid],
-		   IEEE80211_CCMP_PN_LEN) <= 0)
+	res = memcmp(pn, ptk_pn->q[queue].pn[tid], IEEE80211_CCMP_PN_LEN);
+	if (res < 0)
+		return -1;
+	if (!res && !(stats->flag & RX_FLAG_ALLOW_SAME_PN))
 		return -1;
 
-	if (!(stats->flag & RX_FLAG_AMSDU_MORE))
-		memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
+	memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
 	stats->flag |= RX_FLAG_PN_VALIDATED;
 
 	return 0;
@@ -314,28 +316,21 @@ static void iwl_mvm_rx_csum(struct ieee80211_sta *sta,
 }
 
 /*
- * returns true if a packet outside BA session is a duplicate and
- * should be dropped
+ * returns true if a packet is a duplicate and should be dropped.
+ * Updates AMSDU PN tracking info
  */
-static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue,
-				  struct ieee80211_rx_status *rx_status,
-				  struct ieee80211_hdr *hdr,
-				  struct iwl_rx_mpdu_desc *desc)
+static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
+			   struct ieee80211_rx_status *rx_status,
+			   struct ieee80211_hdr *hdr,
+			   struct iwl_rx_mpdu_desc *desc)
 {
 	struct iwl_mvm_sta *mvm_sta;
 	struct iwl_mvm_rxq_dup_data *dup_data;
-	u8 baid, tid, sub_frame_idx;
+	u8 tid, sub_frame_idx;
 
 	if (WARN_ON(IS_ERR_OR_NULL(sta)))
 		return false;
 
-	baid = (le32_to_cpu(desc->reorder_data) &
-		IWL_RX_MPDU_REORDER_BAID_MASK) >>
-		IWL_RX_MPDU_REORDER_BAID_SHIFT;
-
-	if (baid != IWL_RX_REORDER_DATA_INVALID_BAID)
-		return false;
-
 	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 	dup_data = &mvm_sta->dup_data[queue];
 
@@ -365,6 +360,12 @@ static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue,
 		     dup_data->last_sub_frame[tid] >= sub_frame_idx))
 		return true;
 
+	/* Allow same PN as the first subframe for following sub frames */
+	if (dup_data->last_seq[tid] == hdr->seq_ctrl &&
+	    sub_frame_idx > dup_data->last_sub_frame[tid] &&
+	    desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU)
+		rx_status->flag |= RX_FLAG_ALLOW_SAME_PN;
+
 	dup_data->last_seq[tid] = hdr->seq_ctrl;
 	dup_data->last_sub_frame[tid] = sub_frame_idx;
 
@@ -971,7 +972,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 		if (ieee80211_is_data(hdr->frame_control))
 			iwl_mvm_rx_csum(sta, skb, desc);
 
-		if (iwl_mvm_is_nonagg_dup(sta, queue, rx_status, hdr, desc)) {
+		if (iwl_mvm_is_dup(sta, queue, rx_status, hdr, desc)) {
 			kfree_skb(skb);
 			goto out;
 		}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 6b2674e02606..630e23cb0ffb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2039,7 +2039,7 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	struct iwl_trans_txq_scd_cfg cfg = {
 		.fifo = IWL_MVM_TX_FIFO_MCAST,
 		.sta_id = msta->sta_id,
-		.tid = IWL_MAX_TID_COUNT,
+		.tid = 0,
 		.aggregate = false,
 		.frame_limit = IWL_FRAME_LIMIT,
 	};
@@ -2053,6 +2053,17 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 		return -ENOTSUPP;
 
 	/*
+	 * In IBSS, ieee80211_check_queues() sets the cab_queue to be
+	 * invalid, so make sure we use the queue we want.
+	 * Note that this is done here as we want to avoid making DQA
+	 * changes in mac80211 layer.
+	 */
+	if (vif->type == NL80211_IFTYPE_ADHOC) {
+		vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
+		mvmvif->cab_queue = vif->cab_queue;
+	}
+
+	/*
 	 * While in previous FWs we had to exclude cab queue from TFD queue
 	 * mask, now it is needed as any other queue.
 	 */
@@ -2079,24 +2090,13 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	if (iwl_mvm_has_new_tx_api(mvm)) {
 		int queue = iwl_mvm_tvqm_enable_txq(mvm, vif->cab_queue,
 						    msta->sta_id,
-						    IWL_MAX_TID_COUNT,
+						    0,
 						    timeout);
 		mvmvif->cab_queue = queue;
 	} else if (!fw_has_api(&mvm->fw->ucode_capa,
-			       IWL_UCODE_TLV_API_STA_TYPE)) {
-		/*
-		 * In IBSS, ieee80211_check_queues() sets the cab_queue to be
-		 * invalid, so make sure we use the queue we want.
-		 * Note that this is done here as we want to avoid making DQA
-		 * changes in mac80211 layer.
-		 */
-		if (vif->type == NL80211_IFTYPE_ADHOC) {
-			vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
-			mvmvif->cab_queue = vif->cab_queue;
-		}
+			       IWL_UCODE_TLV_API_STA_TYPE))
 		iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0,
 				   &cfg, timeout);
-	}
 
 	return 0;
 }
@@ -2115,7 +2115,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0);
 
 	iwl_mvm_disable_txq(mvm, mvmvif->cab_queue, vif->cab_queue,
-			    IWL_MAX_TID_COUNT, 0);
+			    0, 0);
 
 	ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id);
 	if (ret)
@@ -3170,8 +3170,9 @@ static int __iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, u8 sta_id,
 	int ret, size;
 	u32 status;
 
+	/* This is a valid situation for GTK removal */
 	if (sta_id == IWL_MVM_INVALID_STA)
-		return -EINVAL;
+		return 0;
 
 	key_flags = cpu_to_le16((keyconf->keyidx << STA_KEY_FLG_KEYID_POS) &
 				 STA_KEY_FLG_KEYID_MSK);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 200ab50ec86b..acb217e666db 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -616,7 +616,7 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm,
 	time_cmd.repeat = 1;
 	time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START |
 				      TE_V2_NOTIF_HOST_EVENT_END |
-				      T2_V2_START_IMMEDIATELY);
+				      TE_V2_START_IMMEDIATELY);
 
 	if (!wait_for_notif) {
 		iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd);
@@ -803,7 +803,7 @@ int iwl_mvm_start_p2p_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	time_cmd.repeat = 1;
 	time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START |
 				      TE_V2_NOTIF_HOST_EVENT_END |
-				      T2_V2_START_IMMEDIATELY);
+				      TE_V2_START_IMMEDIATELY);
 
 	return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd);
 }
@@ -913,6 +913,8 @@ int iwl_mvm_schedule_csa_period(struct iwl_mvm *mvm,
 	time_cmd.interval = cpu_to_le32(1);
 	time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START |
 				      TE_V2_ABSENCE);
+	if (!apply_time)
+		time_cmd.policy |= cpu_to_le16(TE_V2_START_IMMEDIATELY);
 
 	return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index dda77b327c98..af6dfceab6b8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -419,11 +419,11 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
 {
 	struct ieee80211_key_conf *keyconf = info->control.hw_key;
 	u8 *crypto_hdr = skb_frag->data + hdrlen;
+	enum iwl_tx_cmd_sec_ctrl type = TX_CMD_SEC_CCM;
 	u64 pn;
 
 	switch (keyconf->cipher) {
 	case WLAN_CIPHER_SUITE_CCMP:
-	case WLAN_CIPHER_SUITE_CCMP_256:
 		iwl_mvm_set_tx_cmd_ccmp(info, tx_cmd);
 		iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
 		break;
@@ -447,13 +447,16 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
 		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
+		type = TX_CMD_SEC_GCMP;
+		/* Fall through */
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		/* TODO: Taking the key from the table might introduce a race
 		 * when PTK rekeying is done, having an old packets with a PN
 		 * based on the old key but the message encrypted with a new
 		 * one.
 		 * Need to handle this.
 		 */
-		tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE;
+		tx_cmd->sec_ctl |= type | TX_CMD_SEC_KEY_FROM_TABLE;
 		tx_cmd->key[0] = keyconf->hw_key_idx;
 		iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
 		break;
@@ -645,7 +648,11 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 		if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE ||
 		    info.control.vif->type == NL80211_IFTYPE_AP ||
 		    info.control.vif->type == NL80211_IFTYPE_ADHOC) {
-			sta_id = mvmvif->bcast_sta.sta_id;
+			if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE)
+				sta_id = mvmvif->bcast_sta.sta_id;
+			else
+				sta_id = mvmvif->mcast_sta.sta_id;
+
 			queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info,
 							   hdr->frame_control);
 			if (queue < 0)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 6d0a907d5ba5..fabae0f60683 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -147,7 +147,7 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
 	/* Sanity check on number of chunks */
 	num_tbs = iwl_pcie_gen2_get_num_tbs(trans, tfd);
 
-	if (num_tbs >= trans_pcie->max_tbs) {
+	if (num_tbs > trans_pcie->max_tbs) {
 		IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
 		return;
 	}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 3f85713c41dc..1a566287993d 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -378,7 +378,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 	/* Sanity check on number of chunks */
 	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
 
-	if (num_tbs >= trans_pcie->max_tbs) {
+	if (num_tbs > trans_pcie->max_tbs) {
 		IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
 		/* @todo issue fatal error, it is quite serious situation */
 		return;
diff --git a/drivers/net/wireless/intersil/Kconfig b/drivers/net/wireless/intersil/Kconfig
index 9da136049955..e89fce1d4f27 100644
--- a/drivers/net/wireless/intersil/Kconfig
+++ b/drivers/net/wireless/intersil/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_INTERSIL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_INTERSIL
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1cf22e62e3dd..a37f4b1d9d30 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
 
 struct hwsim_net {
 	int netgroup;
@@ -267,11 +267,13 @@ static inline int hwsim_net_get_netgroup(struct net *net)
 	return hwsim_net->netgroup;
 }
 
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
 {
 	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
 
-	hwsim_net->netgroup = hwsim_netgroup++;
+	hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+					     0, 0, GFP_KERNEL);
+	return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
 }
 
 static inline u32 hwsim_net_get_wmediumd(struct net *net)
@@ -493,6 +495,7 @@ static LIST_HEAD(hwsim_radios);
 static struct workqueue_struct *hwsim_wq;
 static struct rhashtable hwsim_radios_rht;
 static int hwsim_radio_idx;
+static int hwsim_radios_generation = 1;
 
 static struct platform_driver mac80211_hwsim_driver = {
 	.driver = {
@@ -637,6 +640,7 @@ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = {
 	[HWSIM_ATTR_RADIO_NAME] = { .type = NLA_STRING },
 	[HWSIM_ATTR_NO_VIF] = { .type = NLA_FLAG },
 	[HWSIM_ATTR_FREQ] = { .type = NLA_U32 },
+	[HWSIM_ATTR_PERM_ADDR] = { .type = NLA_UNSPEC, .len = ETH_ALEN },
 };
 
 static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
@@ -2408,6 +2412,7 @@ struct hwsim_new_radio_params {
 	bool destroy_on_close;
 	const char *hwname;
 	bool no_vif;
+	const u8 *perm_addr;
 };
 
 static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb,
@@ -2572,15 +2577,25 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	skb_queue_head_init(&data->pending);
 
 	SET_IEEE80211_DEV(hw, data->dev);
-	eth_zero_addr(addr);
-	addr[0] = 0x02;
-	addr[3] = idx >> 8;
-	addr[4] = idx;
-	memcpy(data->addresses[0].addr, addr, ETH_ALEN);
-	memcpy(data->addresses[1].addr, addr, ETH_ALEN);
-	data->addresses[1].addr[0] |= 0x40;
-	hw->wiphy->n_addresses = 2;
-	hw->wiphy->addresses = data->addresses;
+	if (!param->perm_addr) {
+		eth_zero_addr(addr);
+		addr[0] = 0x02;
+		addr[3] = idx >> 8;
+		addr[4] = idx;
+		memcpy(data->addresses[0].addr, addr, ETH_ALEN);
+		/* Why need here second address ? */
+		data->addresses[1].addr[0] |= 0x40;
+		memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+		hw->wiphy->n_addresses = 2;
+		hw->wiphy->addresses = data->addresses;
+		/* possible address clash is checked at hash table insertion */
+	} else {
+		memcpy(data->addresses[0].addr, param->perm_addr, ETH_ALEN);
+		/* compatibility with automatically generated mac addr */
+		memcpy(data->addresses[1].addr, param->perm_addr, ETH_ALEN);
+		hw->wiphy->n_addresses = 2;
+		hw->wiphy->addresses = data->addresses;
+	}
 
 	data->channels = param->channels;
 	data->use_chanctx = param->use_chanctx;
@@ -2727,6 +2742,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	mutex_init(&data->mutex);
 
 	data->netgroup = hwsim_net_get_netgroup(net);
+	data->wmediumd = hwsim_net_get_wmediumd(net);
 
 	/* Enable frame retransmissions for lossy channels */
 	hw->max_rates = 4;
@@ -2785,13 +2801,17 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
 				     hwsim_rht_params);
 	if (err < 0) {
-		pr_debug("mac80211_hwsim: radio index %d already present\n",
-			 idx);
+		if (info) {
+			GENL_SET_ERR_MSG(info, "perm addr already present");
+			NL_SET_BAD_ATTR(info->extack,
+					info->attrs[HWSIM_ATTR_PERM_ADDR]);
+		}
 		spin_unlock_bh(&hwsim_radio_lock);
 		goto failed_final_insert;
 	}
 
 	list_add_tail(&data->list, &hwsim_radios);
+	hwsim_radios_generation++;
 	spin_unlock_bh(&hwsim_radio_lock);
 
 	if (idx > 0)
@@ -3210,6 +3230,19 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		param.regd = hwsim_world_regdom_custom[idx];
 	}
 
+	if (info->attrs[HWSIM_ATTR_PERM_ADDR]) {
+		if (!is_valid_ether_addr(
+				nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]))) {
+			GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
+			NL_SET_BAD_ATTR(info->extack,
+					info->attrs[HWSIM_ATTR_PERM_ADDR]);
+			return -EINVAL;
+		}
+
+
+		param.perm_addr = nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]);
+	}
+
 	ret = mac80211_hwsim_new_radio(info, &param);
 	kfree(hwname);
 	return ret;
@@ -3249,6 +3282,7 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		list_del(&data->list);
 		rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
 				       hwsim_rht_params);
+		hwsim_radios_generation++;
 		spin_unlock_bh(&hwsim_radio_lock);
 		mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
 					 info);
@@ -3305,17 +3339,19 @@ out_err:
 static int hwsim_dump_radio_nl(struct sk_buff *skb,
 			       struct netlink_callback *cb)
 {
-	int idx = cb->args[0];
+	int last_idx = cb->args[0];
 	struct mac80211_hwsim_data *data = NULL;
-	int res;
+	int res = 0;
+	void *hdr;
 
 	spin_lock_bh(&hwsim_radio_lock);
+	cb->seq = hwsim_radios_generation;
 
-	if (idx == hwsim_radio_idx)
+	if (last_idx >= hwsim_radio_idx-1)
 		goto done;
 
 	list_for_each_entry(data, &hwsim_radios, list) {
-		if (data->idx < idx)
+		if (data->idx <= last_idx)
 			continue;
 
 		if (!net_eq(wiphy_net(data->hw->wiphy), sock_net(skb->sk)))
@@ -3328,14 +3364,25 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb,
 		if (res < 0)
 			break;
 
-		idx = data->idx + 1;
+		last_idx = data->idx;
 	}
 
-	cb->args[0] = idx;
+	cb->args[0] = last_idx;
+
+	/* list changed, but no new element sent, set interrupted flag */
+	if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
+		hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, &hwsim_genl_family,
+				  NLM_F_MULTI, HWSIM_CMD_GET_RADIO);
+		if (!hdr)
+			res = -EMSGSIZE;
+		genl_dump_check_consistent(cb, hdr);
+		genlmsg_end(skb, hdr);
+	}
 
 done:
 	spin_unlock_bh(&hwsim_radio_lock);
-	return skb->len;
+	return res ?: skb->len;
 }
 
 /* Generic Netlink operations array */
@@ -3393,6 +3440,7 @@ static void destroy_radio(struct work_struct *work)
 	struct mac80211_hwsim_data *data =
 		container_of(work, struct mac80211_hwsim_data, destroy_work);
 
+	hwsim_radios_generation++;
 	mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL);
 }
 
@@ -3462,9 +3510,7 @@ failure:
 
 static __net_init int hwsim_init_net(struct net *net)
 {
-	hwsim_net_set_netgroup(net);
-
-	return 0;
+	return hwsim_net_set_netgroup(net);
 }
 
 static void __net_exit hwsim_exit_net(struct net *net)
@@ -3487,6 +3533,8 @@ static void __net_exit hwsim_exit_net(struct net *net)
 		queue_work(hwsim_wq, &data->destroy_work);
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
+
+	ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
 }
 
 static struct pernet_operations hwsim_net_ops = {
@@ -3516,7 +3564,7 @@ static int __init init_mac80211_hwsim(void)
 
 	spin_lock_init(&hwsim_radio_lock);
 
-	hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0);
+	hwsim_wq = alloc_workqueue("hwsim_wq", 0, 0);
 	if (!hwsim_wq)
 		return -ENOMEM;
 	rhashtable_init(&hwsim_radios_rht, &hwsim_rht_params);
diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h
index a96a79c1eff5..0fe3199f8c72 100644
--- a/drivers/net/wireless/mac80211_hwsim.h
+++ b/drivers/net/wireless/mac80211_hwsim.h
@@ -68,7 +68,12 @@ enum hwsim_tx_control_flags {
  *	%HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE
  * @HWSIM_CMD_NEW_RADIO: create a new radio with the given parameters,
  *	returns the radio ID (>= 0) or negative on errors, if successful
- *	then multicast the result
+ *	then multicast the result, uses optional parameter:
+ *	%HWSIM_ATTR_REG_STRICT_REG, %HWSIM_ATTR_SUPPORT_P2P_DEVICE,
+ *	%HWSIM_ATTR_DESTROY_RADIO_ON_CLOSE, %HWSIM_ATTR_CHANNELS,
+ *	%HWSIM_ATTR_NO_VIF, %HWSIM_ATTR_RADIO_NAME, %HWSIM_ATTR_USE_CHANCTX,
+ *	%HWSIM_ATTR_REG_HINT_ALPHA2, %HWSIM_ATTR_REG_CUSTOM_REG,
+ *	%HWSIM_ATTR_PERM_ADDR
  * @HWSIM_CMD_DEL_RADIO: destroy a radio, reply is multicasted
  * @HWSIM_CMD_GET_RADIO: fetch information about existing radios, uses:
  *	%HWSIM_ATTR_RADIO_ID
@@ -126,6 +131,7 @@ enum {
  * @HWSIM_ATTR_FREQ: Frequency at which packet is transmitted or received.
  * @HWSIM_ATTR_TX_INFO_FLAGS: additional flags for corresponding
  *	rates of %HWSIM_ATTR_TX_INFO
+ * @HWSIM_ATTR_PERM_ADDR: permanent mac address of new radio
  * @__HWSIM_ATTR_MAX: enum limit
  */
 
@@ -153,6 +159,7 @@ enum {
 	HWSIM_ATTR_FREQ,
 	HWSIM_ATTR_PAD,
 	HWSIM_ATTR_TX_INFO_FLAGS,
+	HWSIM_ATTR_PERM_ADDR,
 	__HWSIM_ATTR_MAX,
 };
 #define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1)
diff --git a/drivers/net/wireless/marvell/Kconfig b/drivers/net/wireless/marvell/Kconfig
index 4938c7ec0009..27038901d3ee 100644
--- a/drivers/net/wireless/marvell/Kconfig
+++ b/drivers/net/wireless/marvell/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_MARVELL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_MARVELL
diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index 8772e3949327..feebfdcf025a 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -341,6 +341,36 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv,
 		       le16_to_cpu(ht_cap->header.len));
 
 		mwifiex_fill_cap_info(priv, radio_type, &ht_cap->ht_cap);
+		/* Update HT40 capability from current channel information */
+		if (bss_desc->bcn_ht_oper) {
+			u8 ht_param = bss_desc->bcn_ht_oper->ht_param;
+			u8 radio =
+			mwifiex_band_to_radio_type(bss_desc->bss_band);
+			int freq =
+			ieee80211_channel_to_frequency(bss_desc->channel,
+						       radio);
+			struct ieee80211_channel *chan =
+			ieee80211_get_channel(priv->adapter->wiphy, freq);
+
+			switch (ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+			case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+				if (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) {
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SGI_40;
+				}
+				break;
+			case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+				if (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) {
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SGI_40;
+				}
+				break;
+			}
+		}
 
 		*buffer += sizeof(struct mwifiex_ie_types_htcap);
 		ret_len += sizeof(struct mwifiex_ie_types_htcap);
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index ce4432c535f0..7f7e9de2db1c 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -95,18 +95,32 @@ u8 mwifiex_chan_type_to_sec_chan_offset(enum nl80211_channel_type chan_type)
 
 /* This function maps IEEE HT secondary channel type to NL80211 channel type
  */
-u8 mwifiex_sec_chan_offset_to_chan_type(u8 second_chan_offset)
+u8 mwifiex_get_chan_type(struct mwifiex_private *priv)
 {
-	switch (second_chan_offset) {
-	case IEEE80211_HT_PARAM_CHA_SEC_NONE:
-		return NL80211_CHAN_HT20;
-	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-		return NL80211_CHAN_HT40PLUS;
-	case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-		return NL80211_CHAN_HT40MINUS;
-	default:
-		return NL80211_CHAN_HT20;
+	struct mwifiex_channel_band channel_band;
+	int ret;
+
+	ret = mwifiex_get_chan_info(priv, &channel_band);
+
+	if (!ret) {
+		switch (channel_band.band_config.chan_width) {
+		case CHAN_BW_20MHZ:
+			if (IS_11N_ENABLED(priv))
+				return NL80211_CHAN_HT20;
+			else
+				return NL80211_CHAN_NO_HT;
+		case CHAN_BW_40MHZ:
+			if (channel_band.band_config.chan2_offset ==
+			    SEC_CHAN_ABOVE)
+				return NL80211_CHAN_HT40PLUS;
+			else
+				return NL80211_CHAN_HT40MINUS;
+		default:
+			return NL80211_CHAN_HT20;
+		}
 	}
+
+	return NL80211_CHAN_HT20;
 }
 
 /*
@@ -3937,7 +3951,6 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy,
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev);
 	struct mwifiex_bssdescriptor *curr_bss;
 	struct ieee80211_channel *chan;
-	u8 second_chan_offset;
 	enum nl80211_channel_type chan_type;
 	enum nl80211_band band;
 	int freq;
@@ -3954,10 +3967,7 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy,
 		chan = ieee80211_get_channel(wiphy, freq);
 
 		if (priv->ht_param_present) {
-			second_chan_offset = priv->assoc_resp_ht_param &
-					IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
-			chan_type = mwifiex_sec_chan_offset_to_chan_type
-							(second_chan_offset);
+			chan_type = mwifiex_get_chan_type(priv);
 			cfg80211_chandef_create(chandef, chan, chan_type);
 		} else {
 			cfg80211_chandef_create(chandef, chan,
diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 874660052055..7014f440e6f8 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -1529,7 +1529,8 @@ int mwifiex_ret_get_hw_spec(struct mwifiex_private *priv,
 
 	adapter->fw_release_number = le32_to_cpu(hw_spec->fw_release_number);
 	adapter->fw_api_ver = (adapter->fw_release_number >> 16) & 0xff;
-	adapter->number_of_antenna = le16_to_cpu(hw_spec->number_of_antenna);
+	adapter->number_of_antenna =
+			le16_to_cpu(hw_spec->number_of_antenna) & 0xf;
 
 	if (le32_to_cpu(hw_spec->dot_11ac_dev_cap)) {
 		adapter->is_hw_11ac_capable = true;
diff --git a/drivers/net/wireless/marvell/mwifiex/decl.h b/drivers/net/wireless/marvell/mwifiex/decl.h
index 188e4c370836..46696ea0b23e 100644
--- a/drivers/net/wireless/marvell/mwifiex/decl.h
+++ b/drivers/net/wireless/marvell/mwifiex/decl.h
@@ -294,4 +294,21 @@ enum rdwr_status {
 	RDWR_STATUS_DONE = 2
 };
 
+enum mwifiex_chan_width {
+	CHAN_BW_20MHZ = 0,
+	CHAN_BW_10MHZ,
+	CHAN_BW_40MHZ,
+	CHAN_BW_80MHZ,
+	CHAN_BW_8080MHZ,
+	CHAN_BW_160MHZ,
+	CHAN_BW_5MHZ,
+};
+
+enum mwifiex_chan_offset {
+	SEC_CHAN_NONE = 0,
+	SEC_CHAN_ABOVE = 1,
+	SEC_CHAN_5MHZ = 2,
+	SEC_CHAN_BELOW = 3
+};
+
 #endif /* !_MWIFIEX_DECL_H_ */
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 9c2cdef54074..c5dc518f768b 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -411,6 +411,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER {
 #define HostCmd_CMD_TDLS_OPER                         0x0122
 #define HostCmd_CMD_FW_DUMP_EVENT		      0x0125
 #define HostCmd_CMD_SDIO_SP_RX_AGGR_CFG               0x0223
+#define HostCmd_CMD_STA_CONFIGURE		      0x023f
 #define HostCmd_CMD_CHAN_REGION_CFG		      0x0242
 #define HostCmd_CMD_PACKET_AGGR_CTRL		      0x0251
 
@@ -2285,6 +2286,11 @@ struct host_cmd_ds_pkt_aggr_ctrl {
 	__le16 tx_aggr_align;
 } __packed;
 
+struct host_cmd_ds_sta_configure {
+	__le16 action;
+	u8 tlv_buffer[0];
+} __packed;
+
 struct host_cmd_ds_command {
 	__le16 command;
 	__le16 size;
@@ -2361,6 +2367,7 @@ struct host_cmd_ds_command {
 		struct host_cmd_ds_gtk_rekey_params rekey;
 		struct host_cmd_ds_chan_region_cfg reg_cfg;
 		struct host_cmd_ds_pkt_aggr_ctrl pkt_aggr_ctrl;
+		struct host_cmd_ds_sta_configure sta_cfg;
 	} params;
 } __packed;
 
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 12e739950332..b6484582845a 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -943,13 +943,26 @@ int mwifiex_set_mac_address(struct mwifiex_private *priv,
 			    struct net_device *dev)
 {
 	int ret;
-	u64 mac_addr;
+	u64 mac_addr, old_mac_addr;
 
-	if (priv->bss_type != MWIFIEX_BSS_TYPE_P2P)
-		goto done;
+	if (priv->bss_type == MWIFIEX_BSS_TYPE_ANY)
+		return -ENOTSUPP;
 
 	mac_addr = ether_addr_to_u64(priv->curr_addr);
-	mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+	old_mac_addr = mac_addr;
+
+	if (priv->bss_type == MWIFIEX_BSS_TYPE_P2P)
+		mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+
+	if (mwifiex_get_intf_num(priv->adapter, priv->bss_type) > 1) {
+		/* Set mac address based on bss_type/bss_num */
+		mac_addr ^= BIT_ULL(priv->bss_type + 8);
+		mac_addr += priv->bss_num;
+	}
+
+	if (mac_addr == old_mac_addr)
+		goto done;
+
 	u64_to_ether_addr(mac_addr, priv->curr_addr);
 
 	/* Send request to firmware */
@@ -957,13 +970,14 @@ int mwifiex_set_mac_address(struct mwifiex_private *priv,
 			       HostCmd_ACT_GEN_SET, 0, NULL, true);
 
 	if (ret) {
+		u64_to_ether_addr(old_mac_addr, priv->curr_addr);
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "set mac address failed: ret=%d\n", ret);
 		return ret;
 	}
 
 done:
-	memcpy(dev->dev_addr, priv->curr_addr, ETH_ALEN);
+	ether_addr_copy(dev->dev_addr, priv->curr_addr);
 	return 0;
 }
 
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 6b5539b1f4d8..9bde181700dc 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -517,6 +517,18 @@ enum mwifiex_iface_work_flags {
 	MWIFIEX_IFACE_WORK_CARD_RESET,
 };
 
+struct mwifiex_band_config {
+	u8 chan_band:2;
+	u8 chan_width:2;
+	u8 chan2_offset:2;
+	u8 scan_mode:2;
+} __packed;
+
+struct mwifiex_channel_band {
+	struct mwifiex_band_config band_config;
+	u8 channel;
+};
+
 struct mwifiex_private {
 	struct mwifiex_adapter *adapter;
 	u8 bss_type;
@@ -1280,6 +1292,19 @@ mwifiex_copy_rates(u8 *dest, u32 pos, u8 *src, int len)
 	return pos;
 }
 
+/* This function return interface number with the same bss_type.
+ */
+static inline u8
+mwifiex_get_intf_num(struct mwifiex_adapter *adapter, u8 bss_type)
+{
+	u8 i, num = 0;
+
+	for (i = 0; i < adapter->priv_num; i++)
+		if (adapter->priv[i] && adapter->priv[i]->bss_type == bss_type)
+			num++;
+	return num;
+}
+
 /*
  * This function returns the correct private structure pointer based
  * upon the BSS type and BSS number.
@@ -1544,7 +1569,7 @@ int mwifiex_check_network_compatibility(struct mwifiex_private *priv,
 					struct mwifiex_bssdescriptor *bss_desc);
 
 u8 mwifiex_chan_type_to_sec_chan_offset(enum nl80211_channel_type chan_type);
-u8 mwifiex_sec_chan_offset_to_chan_type(u8 second_chan_offset);
+u8 mwifiex_get_chan_type(struct mwifiex_private *priv);
 
 struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 					      const char *name,
@@ -1670,6 +1695,8 @@ void mwifiex_queue_main_work(struct mwifiex_adapter *adapter);
 int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action,
 			      int cmd_type,
 			      struct mwifiex_ds_wakeup_reason *wakeup_reason);
+int mwifiex_get_chan_info(struct mwifiex_private *priv,
+			  struct mwifiex_channel_band *channel_band);
 int mwifiex_ret_wakeup_reason(struct mwifiex_private *priv,
 			      struct host_cmd_ds_command *resp,
 			      struct host_cmd_ds_wakeup_reason *wakeup_reason);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
index 211e47d8b318..4ed10cf82f9a 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
@@ -1898,6 +1898,25 @@ static int mwifiex_cmd_get_wakeup_reason(struct mwifiex_private *priv,
 	return 0;
 }
 
+static int mwifiex_cmd_get_chan_info(struct host_cmd_ds_command *cmd,
+				     u16 cmd_action)
+{
+	struct host_cmd_ds_sta_configure *sta_cfg_cmd = &cmd->params.sta_cfg;
+	struct host_cmd_tlv_channel_band *tlv_band_channel =
+	(struct host_cmd_tlv_channel_band *)sta_cfg_cmd->tlv_buffer;
+
+	cmd->command = cpu_to_le16(HostCmd_CMD_STA_CONFIGURE);
+	cmd->size = cpu_to_le16(sizeof(*sta_cfg_cmd) +
+				sizeof(*tlv_band_channel) + S_DS_GEN);
+	sta_cfg_cmd->action = cpu_to_le16(cmd_action);
+	memset(tlv_band_channel, 0, sizeof(*tlv_band_channel));
+	tlv_band_channel->header.type = cpu_to_le16(TLV_TYPE_CHANNELBANDLIST);
+	tlv_band_channel->header.len  = cpu_to_le16(sizeof(*tlv_band_channel) -
+					sizeof(struct mwifiex_ie_types_header));
+
+	return 0;
+}
+
 /* This function check if the command is supported by firmware */
 static int mwifiex_is_cmd_supported(struct mwifiex_private *priv, u16 cmd_no)
 {
@@ -2210,6 +2229,9 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no,
 		cmd_ptr->command = cpu_to_le16(cmd_no);
 		cmd_ptr->size = cpu_to_le16(S_DS_GEN);
 		break;
+	case HostCmd_CMD_STA_CONFIGURE:
+		ret = mwifiex_cmd_get_chan_info(cmd_ptr, cmd_action);
+		break;
 	default:
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "PREP_CMD: unknown cmd- %#x\n", cmd_no);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
index 1bd4e13b8449..69e3b624adbb 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
@@ -1170,6 +1170,22 @@ static int mwifiex_ret_pkt_aggr_ctrl(struct mwifiex_private *priv,
 	return 0;
 }
 
+static int mwifiex_ret_get_chan_info(struct mwifiex_private *priv,
+				     struct host_cmd_ds_command *resp,
+				     struct mwifiex_channel_band *channel_band)
+{
+	struct host_cmd_ds_sta_configure *sta_cfg_cmd = &resp->params.sta_cfg;
+	struct host_cmd_tlv_channel_band *tlv_band_channel;
+
+	tlv_band_channel =
+	(struct host_cmd_tlv_channel_band *)sta_cfg_cmd->tlv_buffer;
+	memcpy(&channel_band->band_config, &tlv_band_channel->band_config,
+	       sizeof(struct mwifiex_band_config));
+	channel_band->channel = tlv_band_channel->channel;
+
+	return 0;
+}
+
 /*
  * This function handles the command responses.
  *
@@ -1393,6 +1409,9 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no,
 	case HostCmd_CMD_CHAN_REGION_CFG:
 		ret = mwifiex_ret_chan_region_cfg(priv, resp);
 		break;
+	case HostCmd_CMD_STA_CONFIGURE:
+		ret = mwifiex_ret_get_chan_info(priv, resp, data_buf);
+		break;
 	default:
 		mwifiex_dbg(adapter, ERROR,
 			    "CMD_RESP: unknown cmd response %#x\n",
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index a6077ab3efc3..5414b755cf82 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -146,7 +146,6 @@ int mwifiex_fill_new_bss_desc(struct mwifiex_private *priv,
 	size_t beacon_ie_len;
 	struct mwifiex_bss_priv *bss_priv = (void *)bss->priv;
 	const struct cfg80211_bss_ies *ies;
-	int ret;
 
 	rcu_read_lock();
 	ies = rcu_dereference(bss->ies);
@@ -190,48 +189,7 @@ int mwifiex_fill_new_bss_desc(struct mwifiex_private *priv,
 	if (bss_desc->cap_info_bitmap & WLAN_CAPABILITY_SPECTRUM_MGMT)
 		bss_desc->sensed_11h = true;
 
-	ret = mwifiex_update_bss_desc_with_ie(priv->adapter, bss_desc);
-	if (ret)
-		return ret;
-
-	/* Update HT40 capability based on current channel information */
-	if (bss_desc->bcn_ht_oper && bss_desc->bcn_ht_cap) {
-		u8 ht_param = bss_desc->bcn_ht_oper->ht_param;
-		u8 radio = mwifiex_band_to_radio_type(bss_desc->bss_band);
-		struct ieee80211_supported_band *sband =
-						priv->wdev.wiphy->bands[radio];
-		int freq = ieee80211_channel_to_frequency(bss_desc->channel,
-							  radio);
-		struct ieee80211_channel *chan =
-			ieee80211_get_channel(priv->adapter->wiphy, freq);
-
-		switch (ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
-		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-			if (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) {
-				sband->ht_cap.cap &=
-					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
-			} else {
-				sband->ht_cap.cap |=
-					IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
-					IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-			if (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) {
-				sband->ht_cap.cap &=
-					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
-			} else {
-				sband->ht_cap.cap |=
-					IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
-					IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		}
-	}
-
-	return 0;
+	return mwifiex_update_bss_desc_with_ie(priv->adapter, bss_desc);
 }
 
 void mwifiex_dnld_txpwr_table(struct mwifiex_private *priv)
@@ -1523,3 +1481,15 @@ int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action,
 
 	return status;
 }
+
+int mwifiex_get_chan_info(struct mwifiex_private *priv,
+			  struct mwifiex_channel_band *channel_band)
+{
+	int status = 0;
+
+	status = mwifiex_send_cmd(priv, HostCmd_CMD_STA_CONFIGURE,
+				  HostCmd_ACT_GEN_GET, 0, channel_band,
+				  MWIFIEX_SYNC_CMD);
+
+	return status;
+}
diff --git a/drivers/net/wireless/mediatek/Kconfig b/drivers/net/wireless/mediatek/Kconfig
index 92ce4062f307..ff5fc8987b0a 100644
--- a/drivers/net/wireless/mediatek/Kconfig
+++ b/drivers/net/wireless/mediatek/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_MEDIATEK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_MEDIATEK
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 85f8d324ebf8..4f30cdcd2b53 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -119,6 +119,52 @@ static int mt76_led_init(struct mt76_dev *dev)
 	return devm_led_classdev_register(dev->dev, &dev->led_cdev);
 }
 
+static void mt76_init_stream_cap(struct mt76_dev *dev,
+				 struct ieee80211_supported_band *sband,
+				 bool vht)
+{
+	struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap;
+	int i, nstream = __sw_hweight8(dev->antenna_mask);
+	struct ieee80211_sta_vht_cap *vht_cap;
+	u16 mcs_map = 0;
+
+	if (nstream > 1)
+		ht_cap->cap |= IEEE80211_HT_CAP_TX_STBC;
+	else
+		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
+
+	for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
+		ht_cap->mcs.rx_mask[i] = i < nstream ? 0xff : 0;
+
+	if (!vht)
+		return;
+
+	vht_cap = &sband->vht_cap;
+	if (nstream > 1)
+		vht_cap->cap |= IEEE80211_VHT_CAP_TXSTBC;
+	else
+		vht_cap->cap &= ~IEEE80211_VHT_CAP_TXSTBC;
+
+	for (i = 0; i < 8; i++) {
+		if (i < nstream)
+			mcs_map |= (IEEE80211_VHT_MCS_SUPPORT_0_9 << (i * 2));
+		else
+			mcs_map |=
+				(IEEE80211_VHT_MCS_NOT_SUPPORTED << (i * 2));
+	}
+	vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
+	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
+}
+
+void mt76_set_stream_caps(struct mt76_dev *dev, bool vht)
+{
+	if (dev->cap.has_2ghz)
+		mt76_init_stream_cap(dev, &dev->sband_2g.sband, false);
+	if (dev->cap.has_5ghz)
+		mt76_init_stream_cap(dev, &dev->sband_5g.sband, vht);
+}
+EXPORT_SYMBOL_GPL(mt76_set_stream_caps);
+
 static int
 mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 		const struct ieee80211_channel *chan, int n_chan,
@@ -128,7 +174,6 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
 	void *chanlist;
-	u16 mcs_map;
 	int size;
 
 	size = n_chan * sizeof(*chan);
@@ -153,34 +198,20 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 		       IEEE80211_HT_CAP_GRN_FLD |
 		       IEEE80211_HT_CAP_SGI_20 |
 		       IEEE80211_HT_CAP_SGI_40 |
-		       IEEE80211_HT_CAP_TX_STBC |
 		       (1 << IEEE80211_HT_CAP_RX_STBC_SHIFT);
 
-	ht_cap->mcs.rx_mask[0] = 0xff;
-	ht_cap->mcs.rx_mask[1] = 0xff;
 	ht_cap->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 	ht_cap->ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
 	ht_cap->ampdu_density = IEEE80211_HT_MPDU_DENSITY_4;
 
+	mt76_init_stream_cap(dev, sband, vht);
+
 	if (!vht)
 		return 0;
 
 	vht_cap = &sband->vht_cap;
 	vht_cap->vht_supported = true;
-
-	mcs_map = (IEEE80211_VHT_MCS_SUPPORT_0_9 << (0 * 2)) |
-		  (IEEE80211_VHT_MCS_SUPPORT_0_9 << (1 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (2 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (3 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (4 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (5 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (6 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (7 * 2));
-
-	vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
-	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
 	vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC |
-			IEEE80211_VHT_CAP_TXSTBC |
 			IEEE80211_VHT_CAP_RXSTBC_1 |
 			IEEE80211_VHT_CAP_SHORT_GI_80;
 
@@ -262,6 +293,9 @@ int mt76_register_device(struct mt76_dev *dev, bool vht,
 
 	wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;
 
+	wiphy->available_antennas_tx = dev->antenna_mask;
+	wiphy->available_antennas_rx = dev->antenna_mask;
+
 	hw->txq_data_size = sizeof(struct mt76_txq);
 	hw->max_tx_fragments = 16;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index d2ce15093edd..065ff78059c3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -253,6 +253,8 @@ struct mt76_dev {
 	u32 rev;
 	unsigned long state;
 
+	u8 antenna_mask;
+
 	struct mt76_sband sband_2g;
 	struct mt76_sband sband_5g;
 	struct debugfs_blob_wrapper eeprom;
@@ -423,6 +425,7 @@ void mt76_release_buffered_frames(struct ieee80211_hw *hw,
 void mt76_set_channel(struct mt76_dev *dev);
 int mt76_get_survey(struct ieee80211_hw *hw, int idx,
 		    struct survey_info *survey);
+void mt76_set_stream_caps(struct mt76_dev *dev, bool vht);
 
 int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tid,
 		       u16 ssn, u8 size);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2.h b/drivers/net/wireless/mediatek/mt76/mt76x2.h
index e62131b88102..783b8122ec3c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2.h
@@ -180,6 +180,7 @@ int mt76x2_eeprom_init(struct mt76x2_dev *dev);
 int mt76x2_apply_calibration_data(struct mt76x2_dev *dev, int channel);
 void mt76x2_set_tx_ackto(struct mt76x2_dev *dev);
 
+void mt76x2_phy_set_antenna(struct mt76x2_dev *dev);
 int mt76x2_phy_start(struct mt76x2_dev *dev);
 int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
 			 struct cfg80211_chan_def *chandef);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c
index 9c9bf3e785ba..5bb50027c1e8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c
@@ -222,11 +222,10 @@ static int
 mt76x2_eeprom_load(struct mt76x2_dev *dev)
 {
 	void *efuse;
-	int len = MT7662_EEPROM_SIZE;
 	bool found;
 	int ret;
 
-	ret = mt76_eeprom_init(&dev->mt76, len);
+	ret = mt76_eeprom_init(&dev->mt76, MT7662_EEPROM_SIZE);
 	if (ret < 0)
 		return ret;
 
@@ -234,14 +233,15 @@ mt76x2_eeprom_load(struct mt76x2_dev *dev)
 	if (found)
 		found = !mt76x2_check_eeprom(dev);
 
-	dev->mt76.otp.data = devm_kzalloc(dev->mt76.dev, len, GFP_KERNEL);
-	dev->mt76.otp.size = len;
+	dev->mt76.otp.data = devm_kzalloc(dev->mt76.dev, MT7662_EEPROM_SIZE,
+					  GFP_KERNEL);
+	dev->mt76.otp.size = MT7662_EEPROM_SIZE;
 	if (!dev->mt76.otp.data)
 		return -ENOMEM;
 
 	efuse = dev->mt76.otp.data;
 
-	if (mt76x2_get_efuse_data(dev, efuse, len))
+	if (mt76x2_get_efuse_data(dev, efuse, MT7662_EEPROM_SIZE))
 		goto out;
 
 	if (found) {
@@ -249,7 +249,7 @@ mt76x2_eeprom_load(struct mt76x2_dev *dev)
 	} else {
 		/* FIXME: check if efuse data is complete */
 		found = true;
-		memcpy(dev->mt76.eeprom.data, efuse, len);
+		memcpy(dev->mt76.eeprom.data, efuse, MT7662_EEPROM_SIZE);
 	}
 
 out:
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2_init.c
index 9dbf94947324..934c331d995e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_init.c
@@ -857,6 +857,9 @@ int mt76x2_register_device(struct mt76x2_dev *dev)
 	dev->mt76.led_cdev.brightness_set = mt76x2_led_set_brightness;
 	dev->mt76.led_cdev.blink_set = mt76x2_led_set_blink;
 
+	/* init antenna configuration */
+	dev->mt76.antenna_mask = 3;
+
 	ret = mt76_register_device(&dev->mt76, true, mt76x2_rates,
 				   ARRAY_SIZE(mt76x2_rates));
 	if (ret)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
index 7ea3d841918e..d18315652583 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
@@ -198,8 +198,8 @@ void mt76x2_mac_write_txwi(struct mt76x2_dev *dev, struct mt76x2_txwi *txwi,
 		ccmp_pn[5] = pn >> 24;
 		ccmp_pn[6] = pn >> 32;
 		ccmp_pn[7] = pn >> 40;
-		txwi->iv = *((u32 *) &ccmp_pn[0]);
-		txwi->eiv = *((u32 *) &ccmp_pn[1]);
+		txwi->iv = *((__le32 *)&ccmp_pn[0]);
+		txwi->eiv = *((__le32 *)&ccmp_pn[1]);
 	}
 
 	spin_lock_bh(&dev->mt76.lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
index 205043b470b2..25f4cebef26d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
@@ -549,6 +549,40 @@ mt76x2_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set)
 	return 0;
 }
 
+static int mt76x2_set_antenna(struct ieee80211_hw *hw, u32 tx_ant,
+			      u32 rx_ant)
+{
+	struct mt76x2_dev *dev = hw->priv;
+
+	if (!tx_ant || tx_ant > 3 || tx_ant != rx_ant)
+		return -EINVAL;
+
+	mutex_lock(&dev->mutex);
+
+	dev->chainmask = (tx_ant == 3) ? 0x202 : 0x101;
+	dev->mt76.antenna_mask = tx_ant;
+
+	mt76_set_stream_caps(&dev->mt76, true);
+	mt76x2_phy_set_antenna(dev);
+
+	mutex_unlock(&dev->mutex);
+
+	return 0;
+}
+
+static int mt76x2_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant,
+			      u32 *rx_ant)
+{
+	struct mt76x2_dev *dev = hw->priv;
+
+	mutex_lock(&dev->mutex);
+	*tx_ant = dev->mt76.antenna_mask;
+	*rx_ant = dev->mt76.antenna_mask;
+	mutex_unlock(&dev->mutex);
+
+	return 0;
+}
+
 const struct ieee80211_ops mt76x2_ops = {
 	.tx = mt76x2_tx,
 	.start = mt76x2_start,
@@ -573,5 +607,7 @@ const struct ieee80211_ops mt76x2_ops = {
 	.set_coverage_class = mt76x2_set_coverage_class,
 	.get_survey = mt76_get_survey,
 	.set_tim = mt76x2_set_tim,
+	.set_antenna = mt76x2_set_antenna,
+	.get_antenna = mt76x2_get_antenna,
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
index 5b742749d5de..fcc37eb7ce0b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
@@ -361,30 +361,53 @@ mt76x2_phy_set_band(struct mt76x2_dev *dev, int band, bool primary_upper)
 		       primary_upper);
 }
 
-static void
-mt76x2_set_rx_chains(struct mt76x2_dev *dev)
+void mt76x2_phy_set_antenna(struct mt76x2_dev *dev)
 {
 	u32 val;
 
 	val = mt76_rr(dev, MT_BBP(AGC, 0));
-	val &= ~(BIT(3) | BIT(4));
+	val &= ~(BIT(4) | BIT(1));
+	switch (dev->mt76.antenna_mask) {
+	case 1:
+		/* disable mac DAC control */
+		mt76_clear(dev, MT_BBP(IBI, 9), BIT(11));
+		mt76_clear(dev, MT_BBP(TXBE, 5), 3);
+		mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0x3);
+		mt76_rmw_field(dev, MT_BBP(CORE, 32), GENMASK(21, 20), 2);
+		/* disable DAC 1 */
+		mt76_rmw_field(dev, MT_BBP(CORE, 33), GENMASK(12, 9), 4);
 
-	if (dev->chainmask & BIT(1))
-		val |= BIT(3);
+		val &= ~(BIT(3) | BIT(0));
+		break;
+	case 2:
+		/* disable mac DAC control */
+		mt76_clear(dev, MT_BBP(IBI, 9), BIT(11));
+		mt76_rmw_field(dev, MT_BBP(TXBE, 5), 3, 1);
+		mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0xc);
+		mt76_rmw_field(dev, MT_BBP(CORE, 32), GENMASK(21, 20), 1);
+		/* disable DAC 0 */
+		mt76_rmw_field(dev, MT_BBP(CORE, 33), GENMASK(12, 9), 1);
+
+		val &= ~BIT(3);
+		val |= BIT(0);
+		break;
+	case 3:
+	default:
+		/* enable mac DAC control */
+		mt76_set(dev, MT_BBP(IBI, 9), BIT(11));
+		mt76_set(dev, MT_BBP(TXBE, 5), 3);
+		mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0xf);
+		mt76_clear(dev, MT_BBP(CORE, 32), GENMASK(21, 20));
+		mt76_clear(dev, MT_BBP(CORE, 33), GENMASK(12, 9));
 
+		val &= ~BIT(0);
+		val |= BIT(3);
+		break;
+	}
 	mt76_wr(dev, MT_BBP(AGC, 0), val);
 }
 
 static void
-mt76x2_set_tx_dac(struct mt76x2_dev *dev)
-{
-	if (dev->chainmask & BIT(1))
-		mt76_set(dev, MT_BBP(TXBE, 5), 3);
-	else
-		mt76_clear(dev, MT_BBP(TXBE, 5), 3);
-}
-
-static void
 mt76x2_get_agc_gain(struct mt76x2_dev *dev, u8 *dest)
 {
 	dest[0] = mt76_get_field(dev, MT_BBP(AGC, 8), MT_BBP_AGC_GAIN);
@@ -585,10 +608,8 @@ int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
 	mt76x2_configure_tx_delay(dev, band, bw);
 	mt76x2_phy_set_txpower(dev);
 
-	mt76x2_set_rx_chains(dev);
 	mt76x2_phy_set_band(dev, chan->band, ch_group_index & 1);
 	mt76x2_phy_set_bw(dev, chandef->width, ch_group_index);
-	mt76x2_set_tx_dac(dev);
 
 	mt76_rmw(dev, MT_EXT_CCA_CFG,
 		 (MT_EXT_CCA_CFG_CCA0 |
@@ -604,6 +625,8 @@ int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
 
 	mt76x2_mcu_init_gain(dev, channel, dev->cal.rx.mcu_gain, true);
 
+	mt76x2_phy_set_antenna(dev);
+
 	/* Enable LDPC Rx */
 	if (mt76xx_rev(dev) >= MT76XX_REV_E3)
 		mt76_set(dev, MT_BBP(RXO, 13), BIT(10));
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h b/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h
index ce3ab85c8b0f..b9c334d9e5b8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h
@@ -321,6 +321,8 @@
 #define MT_TX_PWR_CFG_2			0x131c
 #define MT_TX_PWR_CFG_3			0x1320
 #define MT_TX_PWR_CFG_4			0x1324
+#define MT_TX_PIN_CFG			0x1328
+#define MT_TX_PIN_CFG_TXANT		GENMASK(3, 0)
 
 #define MT_TX_BAND_CFG			0x132c
 #define MT_TX_BAND_CFG_UPPER_40M	BIT(0)
diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
index da6faea092d6..76117b402880 100644
--- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
@@ -19,6 +19,7 @@
 #include <asm/unaligned.h>
 #include "mt7601u.h"
 #include "eeprom.h"
+#include "mac.h"
 
 static bool
 field_valid(u8 val)
@@ -74,7 +75,7 @@ static int
 mt7601u_efuse_physical_size_check(struct mt7601u_dev *dev)
 {
 	const int map_reads = DIV_ROUND_UP(MT_EFUSE_USAGE_MAP_SIZE, 16);
-	u8 data[map_reads * 16];
+	u8 data[round_up(MT_EFUSE_USAGE_MAP_SIZE, 16)];
 	int ret, i;
 	u32 start = 0, end = 0, cnt_free;
 
@@ -134,27 +135,6 @@ mt7601u_set_chip_cap(struct mt7601u_dev *dev, u8 *eeprom)
 			"Error: device has more than 1 RX/TX stream!\n");
 }
 
-static int
-mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *eeprom)
-{
-	const void *src = eeprom + MT_EE_MAC_ADDR;
-
-	ether_addr_copy(dev->macaddr, src);
-
-	if (!is_valid_ether_addr(dev->macaddr)) {
-		eth_random_addr(dev->macaddr);
-		dev_info(dev->dev,
-			 "Invalid MAC address, using random address %pM\n",
-			 dev->macaddr);
-	}
-
-	mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr));
-	mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) |
-		FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
-
-	return 0;
-}
-
 static void mt7601u_set_channel_target_power(struct mt7601u_dev *dev,
 					     u8 *eeprom, u8 max_pwr)
 {
@@ -400,7 +380,7 @@ mt7601u_eeprom_init(struct mt7601u_dev *dev)
 	dev_info(dev->dev, "EEPROM ver:%02hhx fae:%02hhx\n",
 		 eeprom[MT_EE_VERSION_EE], eeprom[MT_EE_VERSION_FAE]);
 
-	mt7601u_set_macaddr(dev, eeprom);
+	mt7601u_set_macaddr(dev, eeprom + MT_EE_MAC_ADDR);
 	mt7601u_set_chip_cap(dev, eeprom);
 	mt7601u_set_channel_power(dev, eeprom);
 	mt7601u_set_country_reg(dev, eeprom);
diff --git a/drivers/net/wireless/mediatek/mt7601u/initvals.h b/drivers/net/wireless/mediatek/mt7601u/initvals.h
index ec11ff66969d..2dc6b68e7fb9 100644
--- a/drivers/net/wireless/mediatek/mt7601u/initvals.h
+++ b/drivers/net/wireless/mediatek/mt7601u/initvals.h
@@ -139,6 +139,7 @@ static const struct mt76_reg_pair mac_common_vals[] = {
 	{ MT_TXOP_HLDR_ET,		0x00000002 },
 	{ MT_XIFS_TIME_CFG,		0x33a41010 },
 	{ MT_PWR_PIN_CFG,		0x00000000 },
+	{ MT_PN_PAD_MODE,		0x00000001 },
 };
 
 static const struct mt76_reg_pair mac_chip_vals[] = {
diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.c b/drivers/net/wireless/mediatek/mt7601u/mac.c
index d6dc59bb00df..d55d7040a56d 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mac.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mac.c
@@ -16,6 +16,22 @@
 #include "trace.h"
 #include <linux/etherdevice.h>
 
+void mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *addr)
+{
+	ether_addr_copy(dev->macaddr, addr);
+
+	if (!is_valid_ether_addr(dev->macaddr)) {
+		eth_random_addr(dev->macaddr);
+		dev_info(dev->dev,
+			 "Invalid MAC address, using random address %pM\n",
+			 dev->macaddr);
+	}
+
+	mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr));
+	mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) |
+		FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
+}
+
 static void
 mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate)
 {
@@ -464,8 +480,16 @@ u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
 
 	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_DECRYPT)) {
 		status->flag |= RX_FLAG_DECRYPTED;
-		status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
+		status->flag |= RX_FLAG_MMIC_STRIPPED;
+		status->flag |= RX_FLAG_MIC_STRIPPED;
+		status->flag |= RX_FLAG_ICV_STRIPPED;
+		status->flag |= RX_FLAG_IV_STRIPPED;
 	}
+	/* let mac80211 take care of PN validation since apparently
+	 * the hardware does not support it
+	 */
+	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_PN_LEN))
+		status->flag &= ~RX_FLAG_IV_STRIPPED;
 
 	status->chains = BIT(0);
 	rssi = mt7601u_phy_get_rssi(dev, rxwi, rate);
diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.h b/drivers/net/wireless/mediatek/mt7601u/mac.h
index 2c22d63c63a2..b7aa24656d0e 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mac.h
+++ b/drivers/net/wireless/mediatek/mt7601u/mac.h
@@ -174,5 +174,6 @@ u16 mt76_mac_tx_rate_val(struct mt7601u_dev *dev,
 struct mt76_tx_status
 mt7601u_mac_fetch_tx_status(struct mt7601u_dev *dev);
 void mt76_send_tx_status(struct mt7601u_dev *dev, struct mt76_tx_status *stat);
+void mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *addr);
 
 #endif
diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c
index 43ebd460ba86..3c9ea40d9584 100644
--- a/drivers/net/wireless/mediatek/mt7601u/main.c
+++ b/drivers/net/wireless/mediatek/mt7601u/main.c
@@ -64,6 +64,9 @@ static int mt7601u_add_interface(struct ieee80211_hw *hw,
 	 */
 	mvif->idx = idx;
 
+	if (!ether_addr_equal(dev->macaddr, vif->addr))
+		mt7601u_set_macaddr(dev, vif->addr);
+
 	if (dev->wcid_mask[wcid / BITS_PER_LONG] & BIT(wcid % BITS_PER_LONG))
 		return -ENOSPC;
 	dev->wcid_mask[wcid / BITS_PER_LONG] |= BIT(wcid % BITS_PER_LONG);
diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c
index 65a8004418ea..d9d6fd7eff5e 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mcu.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c
@@ -58,8 +58,7 @@ static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev,
 	trace_mt_mcu_msg_send(dev, skb, csum, need_resp);
 }
 
-static struct sk_buff *
-mt7601u_mcu_msg_alloc(struct mt7601u_dev *dev, const void *data, int len)
+static struct sk_buff *mt7601u_mcu_msg_alloc(const void *data, int len)
 {
 	struct sk_buff *skb;
 
@@ -171,7 +170,7 @@ static int mt7601u_mcu_function_select(struct mt7601u_dev *dev,
 		.value = cpu_to_le32(val),
 	};
 
-	skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+	skb = mt7601u_mcu_msg_alloc(&msg, sizeof(msg));
 	if (!skb)
 		return -ENOMEM;
 	return mt7601u_mcu_msg_send(dev, skb, CMD_FUN_SET_OP, func == 5);
@@ -208,7 +207,7 @@ mt7601u_mcu_calibrate(struct mt7601u_dev *dev, enum mcu_calibrate cal, u32 val)
 		.value = cpu_to_le32(val),
 	};
 
-	skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+	skb = mt7601u_mcu_msg_alloc(&msg, sizeof(msg));
 	if (!skb)
 		return -ENOMEM;
 	return mt7601u_mcu_msg_send(dev, skb, CMD_CALIBRATION_OP, true);
diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
index c7ec40475a5f..9233744451a9 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
+++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
@@ -147,7 +147,8 @@ enum {
  * @rx_lock:		protects @rx_q.
  * @con_mon_lock:	protects @ap_bssid, @bcn_*, @avg_rssi.
  * @mutex:		ensures exclusive access from mac80211 callbacks.
- * @vendor_req_mutex:	protects @vend_buf, ensures atomicity of split writes.
+ * @vendor_req_mutex:	protects @vend_buf, ensures atomicity of read/write
+ *			accesses
  * @reg_atomic_mutex:	ensures atomicity of indirect register accesses
  *			(accesses to RF and BBP).
  * @hw_atomic_mutex:	ensures exclusive access to HW during critical
diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c
index b9e4f6793138..d8b7863f7926 100644
--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
+++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
@@ -129,15 +129,14 @@ void mt7601u_vendor_reset(struct mt7601u_dev *dev)
 			       MT_VEND_DEV_MODE_RESET, 0, NULL, 0);
 }
 
-u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
+/* should be called with vendor_req_mutex held */
+static u32 __mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
 {
 	int ret;
 	u32 val = ~0;
 
 	WARN_ONCE(offset > USHRT_MAX, "read high off:%08x", offset);
 
-	mutex_lock(&dev->vendor_req_mutex);
-
 	ret = mt7601u_vendor_request(dev, MT_VEND_MULTI_READ, USB_DIR_IN,
 				     0, offset, dev->vend_buf, MT_VEND_BUF);
 	if (ret == MT_VEND_BUF)
@@ -146,25 +145,41 @@ u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
 		dev_err(dev->dev, "Error: wrong size read:%d off:%08x\n",
 			ret, offset);
 
-	mutex_unlock(&dev->vendor_req_mutex);
-
 	trace_reg_read(dev, offset, val);
 	return val;
 }
 
-int mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
-			     const u16 offset, const u32 val)
+u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
 {
-	int ret;
+	u32 ret;
 
 	mutex_lock(&dev->vendor_req_mutex);
+	ret = __mt7601u_rr(dev, offset);
+	mutex_unlock(&dev->vendor_req_mutex);
 
-	ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
-				     val & 0xffff, offset, NULL, 0);
+	return ret;
+}
+
+/* should be called with vendor_req_mutex held */
+static int __mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
+				      const u16 offset, const u32 val)
+{
+	int ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
+					 val & 0xffff, offset, NULL, 0);
 	if (!ret)
 		ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
 					     val >> 16, offset + 2, NULL, 0);
+	trace_reg_write(dev, offset, val);
+	return ret;
+}
+
+int mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
+			     const u16 offset, const u32 val)
+{
+	int ret;
 
+	mutex_lock(&dev->vendor_req_mutex);
+	ret = __mt7601u_vendor_single_wr(dev, req, offset, val);
 	mutex_unlock(&dev->vendor_req_mutex);
 
 	return ret;
@@ -175,23 +190,30 @@ void mt7601u_wr(struct mt7601u_dev *dev, u32 offset, u32 val)
 	WARN_ONCE(offset > USHRT_MAX, "write high off:%08x", offset);
 
 	mt7601u_vendor_single_wr(dev, MT_VEND_WRITE, offset, val);
-	trace_reg_write(dev, offset, val);
 }
 
 u32 mt7601u_rmw(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val)
 {
-	val |= mt7601u_rr(dev, offset) & ~mask;
-	mt7601u_wr(dev, offset, val);
+	mutex_lock(&dev->vendor_req_mutex);
+	val |= __mt7601u_rr(dev, offset) & ~mask;
+	__mt7601u_vendor_single_wr(dev, MT_VEND_WRITE, offset, val);
+	mutex_unlock(&dev->vendor_req_mutex);
+
 	return val;
 }
 
 u32 mt7601u_rmc(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val)
 {
-	u32 reg = mt7601u_rr(dev, offset);
+	u32 reg;
 
+	mutex_lock(&dev->vendor_req_mutex);
+	reg = __mt7601u_rr(dev, offset);
 	val |= reg & ~mask;
 	if (reg != val)
-		mt7601u_wr(dev, offset, val);
+		__mt7601u_vendor_single_wr(dev, MT_VEND_WRITE,
+					   offset, val);
+	mutex_unlock(&dev->vendor_req_mutex);
+
 	return val;
 }
 
diff --git a/drivers/net/wireless/quantenna/Kconfig b/drivers/net/wireless/quantenna/Kconfig
index 30943656e989..de84ce125c26 100644
--- a/drivers/net/wireless/quantenna/Kconfig
+++ b/drivers/net/wireless/quantenna/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_QUANTENNA
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_QUANTENNA
diff --git a/drivers/net/wireless/quantenna/qtnfmac/bus.h b/drivers/net/wireless/quantenna/qtnfmac/bus.h
index 56e5fed92a2a..0a1604683bab 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/bus.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/bus.h
@@ -59,8 +59,9 @@ struct qtnf_bus {
 	char fwname[32];
 	struct napi_struct mux_napi;
 	struct net_device mux_dev;
-	struct completion request_firmware_complete;
+	struct completion firmware_init_complete;
 	struct workqueue_struct *workqueue;
+	struct work_struct fw_work;
 	struct work_struct event_work;
 	struct mutex bus_lock; /* lock during command/event processing */
 	struct dentry *dbg_dir;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
index 6f6190964320..f117904d9120 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
@@ -127,7 +127,7 @@ static inline void qtnf_dis_txdone_irq(struct qtnf_pcie_bus_priv *priv)
 	spin_unlock_irqrestore(&priv->irq_lock, flags);
 }
 
-static int qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
+static void qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
 {
 	struct pci_dev *pdev = priv->pdev;
 
@@ -148,8 +148,6 @@ static int qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
 		pr_warn("legacy PCIE interrupts enabled\n");
 		pci_intx(pdev, 1);
 	}
-
-	return 0;
 }
 
 static void qtnf_deassert_intx(struct qtnf_pcie_bus_priv *priv)
@@ -162,6 +160,17 @@ static void qtnf_deassert_intx(struct qtnf_pcie_bus_priv *priv)
 	qtnf_non_posted_write(cfg, reg);
 }
 
+static void qtnf_reset_card(struct qtnf_pcie_bus_priv *priv)
+{
+	const u32 data = QTN_PEARL_IPC_IRQ_WORD(QTN_PEARL_LHOST_EP_RESET);
+	void __iomem *reg = priv->sysctl_bar +
+			    QTN_PEARL_SYSCTL_LHOST_IRQ_OFFSET;
+
+	qtnf_non_posted_write(data, reg);
+	msleep(QTN_EP_RESET_WAIT_MS);
+	pci_restore_state(priv->pdev);
+}
+
 static void qtnf_ipc_gen_ep_int(void *arg)
 {
 	const struct qtnf_pcie_bus_priv *priv = arg;
@@ -478,10 +487,11 @@ static int alloc_rx_buffers(struct qtnf_pcie_bus_priv *priv)
 }
 
 /* all rx/tx activity should have ceased before calling this function */
-static void free_xfer_buffers(void *data)
+static void qtnf_free_xfer_buffers(struct qtnf_pcie_bus_priv *priv)
 {
-	struct qtnf_pcie_bus_priv *priv = (struct qtnf_pcie_bus_priv *)data;
+	struct qtnf_tx_bd *txbd;
 	struct qtnf_rx_bd *rxbd;
+	struct sk_buff *skb;
 	dma_addr_t paddr;
 	int i;
 
@@ -489,19 +499,26 @@ static void free_xfer_buffers(void *data)
 	for (i = 0; i < priv->rx_bd_num; i++) {
 		if (priv->rx_skb && priv->rx_skb[i]) {
 			rxbd = &priv->rx_bd_vbase[i];
+			skb = priv->rx_skb[i];
 			paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
 					      le32_to_cpu(rxbd->addr));
 			pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
 					 PCI_DMA_FROMDEVICE);
-
-			dev_kfree_skb_any(priv->rx_skb[i]);
+			dev_kfree_skb_any(skb);
+			priv->rx_skb[i] = NULL;
 		}
 	}
 
 	/* free tx buffers */
 	for (i = 0; i < priv->tx_bd_num; i++) {
 		if (priv->tx_skb && priv->tx_skb[i]) {
-			dev_kfree_skb_any(priv->tx_skb[i]);
+			txbd = &priv->tx_bd_vbase[i];
+			skb = priv->tx_skb[i];
+			paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
+					      le32_to_cpu(txbd->addr));
+			pci_unmap_single(priv->pdev, paddr, skb->len,
+					 PCI_DMA_TODEVICE);
+			dev_kfree_skb_any(skb);
 			priv->tx_skb[i] = NULL;
 		}
 	}
@@ -937,6 +954,98 @@ static const struct qtnf_bus_ops qtnf_pcie_bus_ops = {
 	.data_rx_stop		= qtnf_pcie_data_rx_stop,
 };
 
+static int qtnf_dbg_mps_show(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "%d\n", priv->mps);
+
+	return 0;
+}
+
+static int qtnf_dbg_msi_show(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "%u\n", priv->msi_enabled);
+
+	return 0;
+}
+
+static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+	u32 reg = readl(PCIE_HDP_INT_EN(priv->pcie_reg_base));
+	u32 status;
+
+	seq_printf(s, "pcie_irq_count(%u)\n", priv->pcie_irq_count);
+	seq_printf(s, "pcie_irq_tx_count(%u)\n", priv->pcie_irq_tx_count);
+	status = reg &  PCIE_HDP_INT_TX_BITS;
+	seq_printf(s, "pcie_irq_tx_status(%s)\n",
+		   (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
+	seq_printf(s, "pcie_irq_rx_count(%u)\n", priv->pcie_irq_rx_count);
+	status = reg &  PCIE_HDP_INT_RX_BITS;
+	seq_printf(s, "pcie_irq_rx_status(%s)\n",
+		   (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
+	seq_printf(s, "pcie_irq_uf_count(%u)\n", priv->pcie_irq_uf_count);
+	status = reg &  PCIE_HDP_INT_HHBM_UF;
+	seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
+		   (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");
+
+	return 0;
+}
+
+static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "tx_full_count(%u)\n", priv->tx_full_count);
+	seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
+	seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
+	seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
+
+	seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
+	seq_printf(s, "tx_bd_p_index(%u)\n",
+		   readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
+			& (priv->tx_bd_num - 1));
+	seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
+	seq_printf(s, "tx queue len(%u)\n",
+		   CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
+			    priv->tx_bd_num));
+
+	seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
+	seq_printf(s, "rx_bd_p_index(%u)\n",
+		   readl(PCIE_HDP_TX0DMA_CNT(priv->pcie_reg_base))
+			& (priv->rx_bd_num - 1));
+	seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
+	seq_printf(s, "rx alloc queue len(%u)\n",
+		   CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
+			      priv->rx_bd_num));
+
+	return 0;
+}
+
+static int qtnf_dbg_shm_stats(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "shm_ipc_ep_in.tx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_in.tx_packet_count);
+	seq_printf(s, "shm_ipc_ep_in.rx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_in.rx_packet_count);
+	seq_printf(s, "shm_ipc_ep_out.tx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_out.tx_timeout_count);
+	seq_printf(s, "shm_ipc_ep_out.rx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_out.rx_packet_count);
+
+	return 0;
+}
+
 static int qtnf_ep_fw_send(struct qtnf_pcie_bus_priv *priv, uint32_t size,
 			   int blk, const u8 *pblk, const u8 *fw)
 {
@@ -1052,181 +1161,102 @@ qtnf_ep_fw_load(struct qtnf_pcie_bus_priv *priv, const u8 *fw, u32 fw_size)
 	return 0;
 }
 
-static void qtnf_firmware_load(const struct firmware *fw, void *context)
-{
-	struct qtnf_pcie_bus_priv *priv = (void *)context;
-	struct pci_dev *pdev = priv->pdev;
-	struct qtnf_bus *bus = pci_get_drvdata(pdev);
-	int ret;
-
-	if (!fw) {
-		pr_err("failed to get firmware %s\n", bus->fwname);
-		goto fw_load_err;
-	}
-
-	ret = qtnf_ep_fw_load(priv, fw->data, fw->size);
-	if (ret) {
-		pr_err("FW upload error\n");
-		goto fw_load_err;
-	}
-
-	if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
-			    QTN_FW_DL_TIMEOUT_MS)) {
-		pr_err("FW bringup timed out\n");
-		goto fw_load_err;
-	}
-
-	bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
-	pr_info("firmware is up and running\n");
-
-fw_load_err:
-
-	if (fw)
-		release_firmware(fw);
-
-	complete(&bus->request_firmware_complete);
-}
-
-static int qtnf_bringup_fw(struct qtnf_bus *bus)
+static void qtnf_fw_work_handler(struct work_struct *work)
 {
+	struct qtnf_bus *bus = container_of(work, struct qtnf_bus, fw_work);
 	struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus);
 	struct pci_dev *pdev = priv->pdev;
+	const struct firmware *fw;
 	int ret;
 	u32 state = QTN_RC_FW_LOADRDY | QTN_RC_FW_QLINK;
 
-	if (flashboot)
+	if (flashboot) {
 		state |= QTN_RC_FW_FLASHBOOT;
+	} else {
+		ret = request_firmware(&fw, bus->fwname, &pdev->dev);
+		if (ret < 0) {
+			pr_err("failed to get firmware %s\n", bus->fwname);
+			goto fw_load_fail;
+		}
+	}
 
 	qtnf_set_state(&priv->bda->bda_rc_state, state);
 
 	if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_LOADRDY,
 			    QTN_FW_DL_TIMEOUT_MS)) {
 		pr_err("card is not ready\n");
-		return -ETIMEDOUT;
+		goto fw_load_fail;
 	}
 
 	qtnf_clear_state(&priv->bda->bda_ep_state, QTN_EP_FW_LOADRDY);
 
 	if (flashboot) {
-		pr_info("Booting FW from flash\n");
-
-		if (!qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
-				     QTN_FW_DL_TIMEOUT_MS))
-			bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
+		pr_info("booting firmware from flash\n");
+	} else {
+		pr_info("starting firmware upload: %s\n", bus->fwname);
 
-		return 0;
+		ret = qtnf_ep_fw_load(priv, fw->data, fw->size);
+		release_firmware(fw);
+		if (ret) {
+			pr_err("firmware upload error\n");
+			goto fw_load_fail;
+		}
 	}
 
-	pr_info("starting firmware upload: %s\n", bus->fwname);
-
-	ret = request_firmware_nowait(THIS_MODULE, 1, bus->fwname, &pdev->dev,
-				      GFP_KERNEL, priv, qtnf_firmware_load);
-	if (ret < 0)
-		pr_err("request_firmware_nowait error %d\n", ret);
-	else
-		ret = 1;
-
-	return ret;
-}
-
-static void qtnf_reclaim_tasklet_fn(unsigned long data)
-{
-	struct qtnf_pcie_bus_priv *priv = (void *)data;
-
-	qtnf_pcie_data_tx_reclaim(priv);
-	qtnf_en_txdone_irq(priv);
-}
-
-static int qtnf_dbg_mps_show(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+	if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
+			    QTN_FW_DL_TIMEOUT_MS)) {
+		pr_err("firmware bringup timed out\n");
+		goto fw_load_fail;
+	}
 
-	seq_printf(s, "%d\n", priv->mps);
+	bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
+	pr_info("firmware is up and running\n");
 
-	return 0;
-}
+	if (qtnf_poll_state(&priv->bda->bda_ep_state,
+			    QTN_EP_FW_QLINK_DONE, QTN_FW_QLINK_TIMEOUT_MS)) {
+		pr_err("firmware runtime failure\n");
+		goto fw_load_fail;
+	}
 
-static int qtnf_dbg_msi_show(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+	ret = qtnf_core_attach(bus);
+	if (ret) {
+		pr_err("failed to attach core\n");
+		goto fw_load_fail;
+	}
 
-	seq_printf(s, "%u\n", priv->msi_enabled);
+	qtnf_debugfs_init(bus, DRV_NAME);
+	qtnf_debugfs_add_entry(bus, "mps", qtnf_dbg_mps_show);
+	qtnf_debugfs_add_entry(bus, "msi_enabled", qtnf_dbg_msi_show);
+	qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
+	qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
+	qtnf_debugfs_add_entry(bus, "shm_stats", qtnf_dbg_shm_stats);
 
-	return 0;
-}
+	goto fw_load_exit;
 
-static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-	u32 reg = readl(PCIE_HDP_INT_EN(priv->pcie_reg_base));
-	u32 status;
+fw_load_fail:
+	bus->fw_state = QTNF_FW_STATE_DEAD;
 
-	seq_printf(s, "pcie_irq_count(%u)\n", priv->pcie_irq_count);
-	seq_printf(s, "pcie_irq_tx_count(%u)\n", priv->pcie_irq_tx_count);
-	status = reg &  PCIE_HDP_INT_TX_BITS;
-	seq_printf(s, "pcie_irq_tx_status(%s)\n",
-		   (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
-	seq_printf(s, "pcie_irq_rx_count(%u)\n", priv->pcie_irq_rx_count);
-	status = reg &  PCIE_HDP_INT_RX_BITS;
-	seq_printf(s, "pcie_irq_rx_status(%s)\n",
-		   (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
-	seq_printf(s, "pcie_irq_uf_count(%u)\n", priv->pcie_irq_uf_count);
-	status = reg &  PCIE_HDP_INT_HHBM_UF;
-	seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
-		   (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");
-
-	return 0;
+fw_load_exit:
+	complete(&bus->firmware_init_complete);
+	put_device(&pdev->dev);
 }
 
-static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
+static void qtnf_bringup_fw_async(struct qtnf_bus *bus)
 {
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-	seq_printf(s, "tx_full_count(%u)\n", priv->tx_full_count);
-	seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
-	seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
-	seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
-
-	seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
-	seq_printf(s, "tx_bd_p_index(%u)\n",
-		   readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
-			& (priv->tx_bd_num - 1));
-	seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
-	seq_printf(s, "tx queue len(%u)\n",
-		   CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
-			    priv->tx_bd_num));
-
-	seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
-	seq_printf(s, "rx_bd_p_index(%u)\n",
-		   readl(PCIE_HDP_TX0DMA_CNT(priv->pcie_reg_base))
-			& (priv->rx_bd_num - 1));
-	seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
-	seq_printf(s, "rx alloc queue len(%u)\n",
-		   CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
-			      priv->rx_bd_num));
+	struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus);
+	struct pci_dev *pdev = priv->pdev;
 
-	return 0;
+	get_device(&pdev->dev);
+	INIT_WORK(&bus->fw_work, qtnf_fw_work_handler);
+	schedule_work(&bus->fw_work);
 }
 
-static int qtnf_dbg_shm_stats(struct seq_file *s, void *data)
+static void qtnf_reclaim_tasklet_fn(unsigned long data)
 {
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-	seq_printf(s, "shm_ipc_ep_in.tx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_in.tx_packet_count);
-	seq_printf(s, "shm_ipc_ep_in.rx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_in.rx_packet_count);
-	seq_printf(s, "shm_ipc_ep_out.tx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_out.tx_timeout_count);
-	seq_printf(s, "shm_ipc_ep_out.rx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_out.rx_packet_count);
+	struct qtnf_pcie_bus_priv *priv = (void *)data;
 
-	return 0;
+	qtnf_pcie_data_tx_reclaim(priv);
+	qtnf_en_txdone_irq(priv);
 }
 
 static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -1237,10 +1267,8 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	bus = devm_kzalloc(&pdev->dev,
 			   sizeof(*bus) + sizeof(*pcie_priv), GFP_KERNEL);
-	if (!bus) {
-		ret = -ENOMEM;
-		goto err_init;
-	}
+	if (!bus)
+		return -ENOMEM;
 
 	pcie_priv = get_bus_priv(bus);
 
@@ -1251,7 +1279,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pcie_priv->pdev = pdev;
 
 	strcpy(bus->fwname, QTN_PCI_PEARL_FW_NAME);
-	init_completion(&bus->request_firmware_complete);
+	init_completion(&bus->firmware_init_complete);
 	mutex_init(&bus->bus_lock);
 	spin_lock_init(&pcie_priv->tx0_lock);
 	spin_lock_init(&pcie_priv->irq_lock);
@@ -1267,11 +1295,18 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pcie_priv->tx_reclaim_done = 0;
 	pcie_priv->tx_reclaim_req = 0;
 
+	tasklet_init(&pcie_priv->reclaim_tq, qtnf_reclaim_tasklet_fn,
+		     (unsigned long)pcie_priv);
+
+	init_dummy_netdev(&bus->mux_dev);
+	netif_napi_add(&bus->mux_dev, &bus->mux_napi,
+		       qtnf_rx_poll, 10);
+
 	pcie_priv->workqueue = create_singlethread_workqueue("QTNF_PEARL_PCIE");
 	if (!pcie_priv->workqueue) {
 		pr_err("failed to alloc bus workqueue\n");
 		ret = -ENODEV;
-		goto err_priv;
+		goto err_init;
 	}
 
 	if (!pci_is_pcie(pdev)) {
@@ -1300,14 +1335,8 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_base;
 	}
 
-	pcim_pin_device(pdev);
 	pci_set_master(pdev);
-
-	ret = qtnf_pcie_init_irq(pcie_priv);
-	if (ret < 0) {
-		pr_err("irq init failed\n");
-		goto err_base;
-	}
+	qtnf_pcie_init_irq(pcie_priv);
 
 	ret = qtnf_pcie_init_memory(pcie_priv);
 	if (ret < 0) {
@@ -1315,22 +1344,18 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_base;
 	}
 
+	pci_save_state(pdev);
+
 	ret = qtnf_pcie_init_shm_ipc(pcie_priv);
 	if (ret < 0) {
 		pr_err("PCIE SHM IPC init failed\n");
 		goto err_base;
 	}
 
-	ret = devm_add_action(&pdev->dev, free_xfer_buffers, (void *)pcie_priv);
-	if (ret) {
-		pr_err("custom release callback init failed\n");
-		goto err_base;
-	}
-
 	ret = qtnf_pcie_init_xfer(pcie_priv);
 	if (ret) {
 		pr_err("PCIE xfer init failed\n");
-		goto err_base;
+		goto err_ipc;
 	}
 
 	/* init default irq settings */
@@ -1343,58 +1368,28 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 			       "qtnf_pcie_irq", (void *)bus);
 	if (ret) {
 		pr_err("failed to request pcie irq %d\n", pdev->irq);
-		goto err_base;
-	}
-
-	tasklet_init(&pcie_priv->reclaim_tq, qtnf_reclaim_tasklet_fn,
-		     (unsigned long)pcie_priv);
-	init_dummy_netdev(&bus->mux_dev);
-	netif_napi_add(&bus->mux_dev, &bus->mux_napi,
-		       qtnf_rx_poll, 10);
-
-	ret = qtnf_bringup_fw(bus);
-	if (ret < 0)
-		goto err_bringup_fw;
-	else if (ret)
-		wait_for_completion(&bus->request_firmware_complete);
-
-	if (bus->fw_state != QTNF_FW_STATE_FW_DNLD_DONE) {
-		pr_err("failed to start FW\n");
-		goto err_bringup_fw;
-	}
-
-	if (qtnf_poll_state(&pcie_priv->bda->bda_ep_state, QTN_EP_FW_QLINK_DONE,
-			    QTN_FW_QLINK_TIMEOUT_MS)) {
-		pr_err("FW runtime failure\n");
-		goto err_bringup_fw;
+		goto err_xfer;
 	}
 
-	ret = qtnf_core_attach(bus);
-	if (ret) {
-		pr_err("failed to attach core\n");
-		goto err_bringup_fw;
-	}
-
-	qtnf_debugfs_init(bus, DRV_NAME);
-	qtnf_debugfs_add_entry(bus, "mps", qtnf_dbg_mps_show);
-	qtnf_debugfs_add_entry(bus, "msi_enabled", qtnf_dbg_msi_show);
-	qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
-	qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
-	qtnf_debugfs_add_entry(bus, "shm_stats", qtnf_dbg_shm_stats);
+	qtnf_bringup_fw_async(bus);
 
 	return 0;
 
-err_bringup_fw:
-	netif_napi_del(&bus->mux_napi);
+err_xfer:
+	qtnf_free_xfer_buffers(pcie_priv);
+
+err_ipc:
+	qtnf_pcie_free_shm_ipc(pcie_priv);
 
 err_base:
 	flush_workqueue(pcie_priv->workqueue);
 	destroy_workqueue(pcie_priv->workqueue);
+	netif_napi_del(&bus->mux_napi);
 
-err_priv:
+err_init:
+	tasklet_kill(&pcie_priv->reclaim_tq);
 	pci_set_drvdata(pdev, NULL);
 
-err_init:
 	return ret;
 }
 
@@ -1407,18 +1402,23 @@ static void qtnf_pcie_remove(struct pci_dev *pdev)
 	if (!bus)
 		return;
 
+	wait_for_completion(&bus->firmware_init_complete);
+
+	if (bus->fw_state == QTNF_FW_STATE_ACTIVE)
+		qtnf_core_detach(bus);
+
 	priv = get_bus_priv(bus);
 
-	qtnf_core_detach(bus);
 	netif_napi_del(&bus->mux_napi);
-
 	flush_workqueue(priv->workqueue);
 	destroy_workqueue(priv->workqueue);
 	tasklet_kill(&priv->reclaim_tq);
 
+	qtnf_free_xfer_buffers(priv);
 	qtnf_debugfs_remove(bus);
 
 	qtnf_pcie_free_shm_ipc(priv);
+	qtnf_reset_card(priv);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
index c5a4e46d26ef..00bb21a1c47a 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
@@ -46,6 +46,7 @@
 /* state transition timeouts */
 #define QTN_FW_DL_TIMEOUT_MS	3000
 #define QTN_FW_QLINK_TIMEOUT_MS	30000
+#define QTN_EP_RESET_WAIT_MS	1000
 
 #define PCIE_HDP_INT_RX_BITS (0		\
 	| PCIE_HDP_INT_EP_TXDMA		\
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
index 5b48b425fa7f..0bfe285b6b48 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
@@ -351,5 +351,6 @@
 
 #define QTN_PEARL_IPC_IRQ_WORD(irq)	(BIT(irq) | BIT(irq + 16))
 #define QTN_PEARL_LHOST_IPC_IRQ		(6)
+#define QTN_PEARL_LHOST_EP_RESET	(7)
 
 #endif /* __PEARL_PCIE_H */
diff --git a/drivers/net/wireless/ralink/Kconfig b/drivers/net/wireless/ralink/Kconfig
index 41dbf3130e2b..9b79e59ee97b 100644
--- a/drivers/net/wireless/ralink/Kconfig
+++ b/drivers/net/wireless/ralink/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_RALINK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_RALINK
diff --git a/drivers/net/wireless/realtek/Kconfig b/drivers/net/wireless/realtek/Kconfig
index 8a8ba2003964..3db988e689d7 100644
--- a/drivers/net/wireless/realtek/Kconfig
+++ b/drivers/net/wireless/realtek/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_REALTEK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_REALTEK
diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
index 121b94f09714..9a1d15b3ce45 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
@@ -1450,6 +1450,7 @@ static int rtl8187_probe(struct usb_interface *intf,
 		goto err_free_dev;
 	}
 	mutex_init(&priv->io_mutex);
+	mutex_init(&priv->conf_mutex);
 
 	SET_IEEE80211_DEV(dev, &intf->dev);
 	usb_set_intfdata(intf, dev);
@@ -1625,7 +1626,6 @@ static int rtl8187_probe(struct usb_interface *intf,
 		printk(KERN_ERR "rtl8187: Cannot register device\n");
 		goto err_free_dmabuf;
 	}
-	mutex_init(&priv->conf_mutex);
 	skb_queue_head_init(&priv->b_tx_status.queue);
 
 	wiphy_info(dev->wiphy, "hwaddr %pM, %s V%d + %s, rfkill mask %d\n",
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index d6c03bd5cc65..6db3389e2ced 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -244,6 +244,9 @@ static void _rtl_init_hw_vht_capab(struct ieee80211_hw *hw,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
 
+	if (!(rtlpriv->cfg->spec_ver & RTL_SPEC_SUPPORT_VHT))
+		return;
+
 	if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE ||
 	    rtlhal->hw_type == HARDWARE_TYPE_RTL8822BE) {
 		u16 mcs_map;
@@ -397,6 +400,7 @@ static void _rtl_init_mac80211(struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, MFP_CAPABLE);
 	ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS);
 	ieee80211_hw_set(hw, SUPPORTS_AMSDU_IN_AMPDU);
+	ieee80211_hw_set(hw, SUPPORT_FAST_XMIT);
 
 	/* swlps or hwlps has been set in diff chip in init_sw_vars */
 	if (rtlpriv->psc.swctrl_lps) {
@@ -886,8 +890,7 @@ static void _rtl_query_bandwidth_mode(struct ieee80211_hw *hw,
 
 	tcb_desc->packet_bw = HT_CHANNEL_WIDTH_20_40;
 
-	if (rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8812AE ||
-	    rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8821AE) {
+	if (rtlpriv->cfg->spec_ver & RTL_SPEC_SUPPORT_VHT) {
 		if (mac->opmode == NL80211_IFTYPE_AP ||
 		    mac->opmode == NL80211_IFTYPE_ADHOC ||
 		    mac->opmode == NL80211_IFTYPE_MESH_POINT) {
@@ -1594,7 +1597,11 @@ static u16 rtl_get_tx_report_sn(struct ieee80211_hw *hw)
 	struct rtl_tx_report *tx_report = &rtlpriv->tx_report;
 	u16 sn;
 
-	sn = atomic_inc_return(&tx_report->sn) & 0x0FFF;
+	/* SW_DEFINE[11:8] are reserved (driver fills zeros)
+	 * SW_DEFINE[7:2] are used by driver
+	 * SW_DEFINE[1:0] are reserved for firmware (driver fills zeros)
+	 */
+	sn = (atomic_inc_return(&tx_report->sn) & 0x003F) << 2;
 
 	tx_report->last_sent_sn = sn;
 	tx_report->last_sent_time = jiffies;
@@ -1622,14 +1629,23 @@ void rtl_tx_report_handler(struct ieee80211_hw *hw, u8 *tmp_buf, u8 c2h_cmd_len)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_tx_report *tx_report = &rtlpriv->tx_report;
 	u16 sn;
+	u8 st, retry;
 
-	sn = ((tmp_buf[7] & 0x0F) << 8) | tmp_buf[6];
+	if (rtlpriv->cfg->spec_ver & RTL_SPEC_EXT_C2H) {
+		sn = GET_TX_REPORT_SN_V2(tmp_buf);
+		st = GET_TX_REPORT_ST_V2(tmp_buf);
+		retry = GET_TX_REPORT_RETRY_V2(tmp_buf);
+	} else {
+		sn = GET_TX_REPORT_SN_V1(tmp_buf);
+		st = GET_TX_REPORT_ST_V1(tmp_buf);
+		retry = GET_TX_REPORT_RETRY_V1(tmp_buf);
+	}
 
 	tx_report->last_recv_sn = sn;
 
 	RT_TRACE(rtlpriv, COMP_TX_REPORT, DBG_DMESG,
 		 "Recv TX-Report st=0x%02X sn=0x%X retry=0x%X\n",
-		 tmp_buf[0], sn, tmp_buf[2]);
+		 st, sn, retry);
 }
 EXPORT_SYMBOL_GPL(rtl_tx_report_handler);
 
@@ -1643,7 +1659,8 @@ bool rtl_check_tx_report_acked(struct ieee80211_hw *hw)
 
 	if (time_before(tx_report->last_sent_time + 3 * HZ, jiffies)) {
 		RT_TRACE(rtlpriv, COMP_TX_REPORT, DBG_WARNING,
-			 "Check TX-Report timeout!!\n");
+			 "Check TX-Report timeout!! s_sn=0x%X r_sn=0x%X\n",
+			 tx_report->last_sent_sn, tx_report->last_recv_sn);
 		return true;	/* 3 sec. (timeout) seen as acked */
 	}
 
@@ -2629,6 +2646,11 @@ EXPORT_SYMBOL_GPL(rtl_global_var);
 
 static int __init rtl_core_module_init(void)
 {
+	BUILD_BUG_ON(TX_PWR_BY_RATE_NUM_RATE < TX_PWR_BY_RATE_NUM_SECTION);
+	BUILD_BUG_ON(MAX_RATE_SECTION_NUM != MAX_RATE_SECTION);
+	BUILD_BUG_ON(MAX_BASE_NUM_IN_PHY_REG_PG_24G != MAX_RATE_SECTION);
+	BUILD_BUG_ON(MAX_BASE_NUM_IN_PHY_REG_PG_5G != (MAX_RATE_SECTION - 1));
+
 	if (rtl_rate_control_register())
 		pr_err("rtl: Unable to register rtl_rc, use default RC !!\n");
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
index fd3b1fb35dff..05beb16f0a0a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
@@ -1104,7 +1104,7 @@ static void halbtc8723b1ant_ps_tdma(struct btc_coexist *btcoexist,
 	}
 
 	if ((type == 1) || (type == 2) || (type == 9) || (type == 11) ||
-	    (type == 101) || (type == 102) || (type == 109) || (type == 101)) {
+	    (type == 101) || (type == 102) || (type == 109) || (type == 111)) {
 		if (!coex_sta->force_lps_on) {
 			/* Native power save TDMA, only for A2DP-only case
 			 * 1/2/9/11 while wifi noisy threshold > 30
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c
new file mode 100644
index 000000000000..951b8c1e0153
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c
@@ -0,0 +1,55 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2016-2017  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ *****************************************************************************/
+#include "halbt_precomp.h"
+
+void ex_hal8822b_wifi_only_hw_config(struct wifi_only_cfg *wifionlycfg)
+{
+	/*BB control*/
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x4c, 0x01800000, 0x2);
+	/*SW control*/
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0xcb4, 0xff, 0x77);
+	/*antenna mux switch */
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x974, 0x300, 0x3);
+
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x1990, 0x300, 0x0);
+
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x80000, 0x0);
+	/*switch to WL side controller and gnt_wl gnt_bt debug signal */
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x70, 0xff000000, 0x0e);
+	/*gnt_wl=1 , gnt_bt=0*/
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x1704, 0xffffffff, 0x7700);
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x1700, 0xffffffff, 0xc00f0038);
+}
+
+void ex_hal8822b_wifi_only_scannotify(struct wifi_only_cfg *wifionlycfg,
+				      u8 is_5g)
+{
+	hal8822b_wifi_only_switch_antenna(wifionlycfg, is_5g);
+}
+
+void ex_hal8822b_wifi_only_switchbandnotify(struct wifi_only_cfg *wifionlycfg,
+					    u8 is_5g)
+{
+	hal8822b_wifi_only_switch_antenna(wifionlycfg, is_5g);
+}
+
+void hal8822b_wifi_only_switch_antenna(struct wifi_only_cfg *wifionlycfg,
+				       u8 is_5g)
+{
+	if (is_5g)
+		halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x300, 0x1);
+	else
+		halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x300, 0x2);
+}
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h
new file mode 100644
index 000000000000..6ec356542eea
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h
@@ -0,0 +1,25 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2016-2017  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ *****************************************************************************/
+#ifndef __INC_HAL8822BWIFIONLYHWCFG_H
+#define __INC_HAL8822BWIFIONLYHWCFG_H
+
+void ex_hal8822b_wifi_only_hw_config(struct wifi_only_cfg *wifionlycfg);
+void ex_hal8822b_wifi_only_scannotify(struct wifi_only_cfg *wifionlycfg,
+				      u8 is_5g);
+void ex_hal8822b_wifi_only_switchbandnotify(struct wifi_only_cfg *wifionlycfg,
+					    u8 is_5g);
+void hal8822b_wifi_only_switch_antenna(struct wifi_only_cfg *wifionlycfg,
+				       u8 is_5g);
+#endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 1404729441a2..823694cb4fdb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -1039,6 +1039,28 @@ static void halbtc_fill_h2c_cmd(void *bt_context, u8 element_id,
 					cmd_len, cmd_buf);
 }
 
+void halbtc_send_wifi_port_id_cmd(void *bt_context)
+{
+	struct btc_coexist *btcoexist = (struct btc_coexist *)bt_context;
+	struct rtl_priv *rtlpriv = btcoexist->adapter;
+	u8 cmd_buf[1] = {0};	/* port id [2:0] = 0 */
+
+	rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, H2C_BT_PORT_ID,
+					1, cmd_buf);
+}
+
+void halbtc_set_default_port_id_cmd(void *bt_context)
+{
+	struct btc_coexist *btcoexist = (struct btc_coexist *)bt_context;
+	struct rtl_priv *rtlpriv = btcoexist->adapter;
+	struct ieee80211_hw *hw = rtlpriv->mac80211.hw;
+
+	if (!rtlpriv->cfg->ops->set_default_port_id_cmd)
+		return;
+
+	rtlpriv->cfg->ops->set_default_port_id_cmd(hw);
+}
+
 static
 void halbtc_set_bt_reg(void *btc_context, u8 reg_type, u32 offset, u32 set_val)
 {
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index 8ed217656539..f5d8159a88eb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -691,6 +691,8 @@ void exhalbtc_lps_leave(struct btc_coexist *btcoexist);
 void exhalbtc_low_wifi_traffic_notify(struct btc_coexist *btcoexist);
 void exhalbtc_set_single_ant_path(struct btc_coexist *btcoexist,
 				  u8 single_ant_path);
+void halbtc_send_wifi_port_id_cmd(void *bt_context);
+void halbtc_set_default_port_id_cmd(void *bt_context);
 
 /* The following are used by wifi_only case */
 enum wifionly_chip_interface {
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c b/drivers/net/wireless/realtek/rtlwifi/efuse.c
index 35b50be633f1..fd13d4ef53b8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
@@ -50,6 +50,11 @@ static const struct efuse_map RTL8712_SDIO_EFUSE_TABLE[] = {
 	{11, 0, 0, 28}
 };
 
+static const struct rtl_efuse_ops efuse_ops = {
+	.efuse_onebyte_read = efuse_one_byte_read,
+	.efuse_logical_map_read = efuse_shadow_read,
+};
+
 static void efuse_shadow_read_1byte(struct ieee80211_hw *hw, u16 offset,
 				    u8 *value);
 static void efuse_shadow_read_2byte(struct ieee80211_hw *hw, u16 offset,
@@ -1364,3 +1369,11 @@ void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen)
 	*pfwlen = fwlen;
 }
 EXPORT_SYMBOL_GPL(rtl_fill_dummy);
+
+void rtl_efuse_ops_init(struct ieee80211_hw *hw)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+
+	rtlpriv->efuse.efuse_ops = &efuse_ops;
+}
+EXPORT_SYMBOL_GPL(rtl_efuse_ops_init);
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h b/drivers/net/wireless/realtek/rtlwifi/efuse.h
index 952fdc288f0e..dfa31c13fc7a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.h
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h
@@ -116,5 +116,5 @@ void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen);
 void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
 		       u32 size);
 void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size);
-
+void rtl_efuse_ops_init(struct ieee80211_hw *hw);
 #endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 01ccf8884831..2437422625bf 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -2238,6 +2238,7 @@ int rtl_pci_probe(struct pci_dev *pdev,
 	rtlpriv->cfg = (struct rtl_hal_cfg *)(id->driver_data);
 	rtlpriv->intf_ops = &rtl_pci_ops;
 	rtlpriv->glb_var = &rtl_global_var;
+	rtl_efuse_ops_init(hw);
 
 	/* MEM map */
 	err = pci_request_regions(pdev, KBUILD_MODNAME);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c
index d1cb7d405618..6c78c6dabbdf 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rc.c
@@ -42,6 +42,23 @@ static u8 _rtl_rc_get_highest_rix(struct rtl_priv *rtlpriv,
 	struct rtl_phy *rtlphy = &(rtlpriv->phy);
 	struct rtl_sta_info *sta_entry = NULL;
 	u16 wireless_mode = 0;
+	u8 nss;
+	struct ieee80211_tx_rate rate;
+
+	switch (get_rf_type(rtlphy)) {
+	case RF_4T4R:
+		nss = 4;
+		break;
+	case RF_3T3R:
+		nss = 3;
+		break;
+	case RF_2T2R:
+		nss = 2;
+		break;
+	default:
+		nss = 1;
+		break;
+	}
 
 	/*
 	 *this rate is no use for true rate, firmware
@@ -66,28 +83,51 @@ static u8 _rtl_rc_get_highest_rix(struct rtl_priv *rtlpriv,
 			} else if (wireless_mode == WIRELESS_MODE_G) {
 				return G_MODE_MAX_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_N_24G) {
-				if (get_rf_type(rtlphy) != RF_2T2R)
+				if (nss == 1)
 					return N_MODE_MCS7_RIX;
 				else
 					return N_MODE_MCS15_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_AC_24G) {
-				return AC_MODE_MCS9_RIX;
+				if (sta->bandwidth == IEEE80211_STA_RX_BW_20) {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS8_RIX,
+							       nss);
+					goto out;
+				} else {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS9_RIX,
+							       nss);
+					goto out;
+				}
 			}
 			return 0;
 		} else {
 			if (wireless_mode == WIRELESS_MODE_A) {
 				return A_MODE_MAX_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_N_5G) {
-				if (get_rf_type(rtlphy) != RF_2T2R)
+				if (nss == 1)
 					return N_MODE_MCS7_RIX;
 				else
 					return N_MODE_MCS15_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_AC_5G) {
-				return AC_MODE_MCS9_RIX;
+				if (sta->bandwidth == IEEE80211_STA_RX_BW_20) {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS8_RIX,
+							       nss);
+					goto out;
+				} else {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS9_RIX,
+							       nss);
+					goto out;
+				}
 			}
 			return 0;
 		}
 	}
+
+out:
+	return rate.idx;
 }
 
 static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
@@ -111,9 +151,6 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
 	}
 	rate->count = tries;
 	rate->idx = rix >= 0x00 ? rix : 0x00;
-	if (rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8812AE &&
-	    wireless_mode == WIRELESS_MODE_AC_5G)
-		rate->idx += 0x10;/*2NSS for 8812AE*/
 
 	if (!not_data) {
 		if (txrc->short_preamble)
@@ -126,10 +163,10 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
 			if (sta && sta->vht_cap.vht_supported)
 				rate->flags |= IEEE80211_TX_RC_80_MHZ_WIDTH;
 		} else {
-			if (mac->bw_40)
-				rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
 			if (mac->bw_80)
 				rate->flags |= IEEE80211_TX_RC_80_MHZ_WIDTH;
+			else if (mac->bw_40)
+				rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
 		}
 
 		if (sgi_20 || sgi_40 || sgi_80)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
index 9cff6bc4049c..cf551785eb08 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
@@ -299,9 +299,6 @@ static void _rtl92c_get_txpower_writeval_by_regulatory(struct ieee80211_hw *hw,
 			writeVal = 0x00000000;
 		if (rtlpriv->dm.dynamic_txhighpower_lvl == TXHIGHPWRLEVEL_BT1)
 			writeVal = writeVal - 0x06060606;
-		else if (rtlpriv->dm.dynamic_txhighpower_lvl ==
-			 TXHIGHPWRLEVEL_BT2)
-			writeVal = writeVal;
 		*(p_outwriteval + rf) = writeVal;
 	}
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
index ac4a82de40c7..9ab56827124e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
@@ -427,7 +427,6 @@ static void _rtl_rx_process(struct ieee80211_hw *hw, struct sk_buff *skb)
 		 (u32)hdr->addr1[0], (u32)hdr->addr1[1],
 		 (u32)hdr->addr1[2], (u32)hdr->addr1[3],
 		 (u32)hdr->addr1[4], (u32)hdr->addr1[5]);
-	memcpy(IEEE80211_SKB_RXCB(skb), rx_status, sizeof(*rx_status));
 	ieee80211_rx(hw, skb);
 }
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
index f9ccd13c79f9..e7bbbc95cdb1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
@@ -1125,7 +1125,8 @@ static void _rtl8723be_enable_aspm_back_door(struct ieee80211_hw *hw)
 
 	/* Configuration Space offset 0x70f BIT7 is used to control L0S */
 	tmp8 = _rtl8723be_dbi_read(rtlpriv, 0x70f);
-	_rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7));
+	_rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7) |
+			     ASPM_L1_LATENCY << 3);
 
 	/* Configuration Space offset 0x719 Bit3 is for L1
 	 * BIT4 is for clock request
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index ab5d462b1a3a..9bb3d9dfce79 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -328,6 +328,7 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = {
 	.alt_fw_name = "rtlwifi/rtl8821aefw.bin",
 	.ops = &rtl8821ae_hal_ops,
 	.mod_params = &rtl8821ae_mod_params,
+	.spec_ver = RTL_SPEC_SUPPORT_VHT,
 	.maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
 	.maps[SYS_FUNC_EN] = REG_SYS_FUNC_EN,
 	.maps[SYS_CLK] = REG_SYS_CLKR,
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 46dcb7fef195..4f48b934ec01 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -154,10 +154,21 @@ enum rtl8192c_h2c_cmd {
 	MAX_H2CCMD
 };
 
+enum {
+	H2C_BT_PORT_ID = 0x71,
+};
+
+#define GET_TX_REPORT_SN_V1(c2h)	(c2h[6])
+#define GET_TX_REPORT_ST_V1(c2h)	(c2h[0] & 0xC0)
+#define GET_TX_REPORT_RETRY_V1(c2h)	(c2h[2] & 0x3F)
+#define GET_TX_REPORT_SN_V2(c2h)	(c2h[6])
+#define GET_TX_REPORT_ST_V2(c2h)	(c2h[7] & 0xC0)
+#define GET_TX_REPORT_RETRY_V2(c2h)	(c2h[8] & 0x3F)
+
 #define MAX_TX_COUNT			4
 #define MAX_REGULATION_NUM		4
 #define MAX_RF_PATH_NUM			4
-#define MAX_RATE_SECTION_NUM		6
+#define MAX_RATE_SECTION_NUM		6	/* = MAX_RATE_SECTION */
 #define MAX_2_4G_BANDWIDTH_NUM		4
 #define MAX_5G_BANDWIDTH_NUM		4
 #define	MAX_RF_PATH			4
@@ -167,8 +178,9 @@ enum rtl8192c_h2c_cmd {
 #define TX_PWR_BY_RATE_NUM_BAND		2
 #define TX_PWR_BY_RATE_NUM_RF		4
 #define TX_PWR_BY_RATE_NUM_SECTION	12
-#define MAX_BASE_NUM_IN_PHY_REG_PG_24G  6
-#define MAX_BASE_NUM_IN_PHY_REG_PG_5G	5
+#define TX_PWR_BY_RATE_NUM_RATE		84 /* >= TX_PWR_BY_RATE_NUM_SECTION */
+#define MAX_BASE_NUM_IN_PHY_REG_PG_24G	6  /* MAX_RATE_SECTION */
+#define MAX_BASE_NUM_IN_PHY_REG_PG_5G	5  /* MAX_RATE_SECTION -1 */
 
 #define BUFDESC_SEG_NUM		1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */
 
@@ -264,6 +276,7 @@ enum rate_section {
 	HT_MCS8_MCS15,
 	VHT_1SSMCS0_1SSMCS9,
 	VHT_2SSMCS0_2SSMCS9,
+	MAX_RATE_SECTION,
 };
 
 enum intf_type {
@@ -278,6 +291,13 @@ enum radio_path {
 	RF90_PATH_D = 3,
 };
 
+enum radio_mask {
+	RF_MASK_A = BIT(0),
+	RF_MASK_B = BIT(1),
+	RF_MASK_C = BIT(2),
+	RF_MASK_D = BIT(3),
+};
+
 enum regulation_txpwr_lmt {
 	TXPWR_LMT_FCC = 0,
 	TXPWR_LMT_MKK = 1,
@@ -571,6 +591,7 @@ enum ht_channel_width {
 	HT_CHANNEL_WIDTH_20 = 0,
 	HT_CHANNEL_WIDTH_20_40 = 1,
 	HT_CHANNEL_WIDTH_80 = 2,
+	HT_CHANNEL_WIDTH_MAX,
 };
 
 /* Ref: 802.11i sepc D10.0 7.3.2.25.1
@@ -952,6 +973,8 @@ enum package_type {
 
 enum rtl_spec_ver {
 	RTL_SPEC_NEW_RATEID = BIT(0),	/* use ratr_table_mode_new */
+	RTL_SPEC_SUPPORT_VHT = BIT(1),	/* support VHT */
+	RTL_SPEC_EXT_C2H = BIT(2),	/* extend FW C2H (e.g. TX REPORT) */
 };
 
 struct octet_string {
@@ -1277,7 +1300,7 @@ struct rtl_phy {
 	u32 tx_power_by_rate_offset[TX_PWR_BY_RATE_NUM_BAND]
 				   [TX_PWR_BY_RATE_NUM_RF]
 				   [TX_PWR_BY_RATE_NUM_RF]
-				   [TX_PWR_BY_RATE_NUM_SECTION];
+				   [TX_PWR_BY_RATE_NUM_RATE];
 	u8 txpwr_by_rate_base_24g[TX_PWR_BY_RATE_NUM_RF]
 				 [TX_PWR_BY_RATE_NUM_RF]
 				 [MAX_BASE_NUM_IN_PHY_REG_PG_24G];
@@ -1794,6 +1817,7 @@ struct rtl_dm {
 #define	EFUSE_MAX_LOGICAL_SIZE			512
 
 struct rtl_efuse {
+	const struct rtl_efuse_ops *efuse_ops;
 	bool autoLoad_ok;
 	bool bootfromefuse;
 	u16 max_physical_size;
@@ -1899,6 +1923,12 @@ struct rtl_efuse {
 	u8 channel_plan;
 };
 
+struct rtl_efuse_ops {
+	int (*efuse_onebyte_read)(struct ieee80211_hw *hw, u16 addr, u8 *data);
+	void (*efuse_logical_map_read)(struct ieee80211_hw *hw, u8 type,
+				       u16 offset, u32 *value);
+};
+
 struct rtl_tx_report {
 	atomic_t sn;
 	u16 last_sent_sn;
@@ -2231,6 +2261,7 @@ struct rtl_hal_ops {
 	void (*bt_coex_off_before_lps) (struct ieee80211_hw *hw);
 	void (*fill_h2c_cmd) (struct ieee80211_hw *hw, u8 element_id,
 			      u32 cmd_len, u8 *p_cmdbuffer);
+	void (*set_default_port_id_cmd)(struct ieee80211_hw *hw);
 	bool (*get_btc_status) (void);
 	bool (*is_fw_header)(struct rtlwifi_firmware_header *hdr);
 	u32 (*rx_command_packet)(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/rsi/Kconfig b/drivers/net/wireless/rsi/Kconfig
index 7c5e4ca4e3d0..f004be33fcfa 100644
--- a/drivers/net/wireless/rsi/Kconfig
+++ b/drivers/net/wireless/rsi/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_RSI
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_RSI
@@ -42,4 +42,13 @@ config RSI_USB
 	  This option enables the USB bus support in rsi drivers.
 	  Select M (recommended), if you have a RSI 1x1 wireless module.
 
+config RSI_COEX
+	bool "Redpine Signals WLAN BT Coexistence support"
+	depends on BT_HCIRSI && RSI_91X
+	default y
+	---help---
+	  This option enables the WLAN BT coex support in rsi drivers.
+	  Select M (recommended), if you have want to use this feature
+	  and you have RS9113 module.
+
 endif # WLAN_VENDOR_RSI
diff --git a/drivers/net/wireless/rsi/Makefile b/drivers/net/wireless/rsi/Makefile
index 47c45908d894..ff87121a5928 100644
--- a/drivers/net/wireless/rsi/Makefile
+++ b/drivers/net/wireless/rsi/Makefile
@@ -5,6 +5,7 @@ rsi_91x-y			+= rsi_91x_mac80211.o
 rsi_91x-y			+= rsi_91x_mgmt.o
 rsi_91x-y			+= rsi_91x_hal.o
 rsi_91x-y			+= rsi_91x_ps.o
+rsi_91x-$(CONFIG_RSI_COEX)	+= rsi_91x_coex.o
 rsi_91x-$(CONFIG_RSI_DEBUGFS)	+= rsi_91x_debugfs.o
 
 rsi_usb-y			+= rsi_91x_usb.o rsi_91x_usb_ops.o
diff --git a/drivers/net/wireless/rsi/rsi_91x_coex.c b/drivers/net/wireless/rsi/rsi_91x_coex.c
new file mode 100644
index 000000000000..d055099dadf1
--- /dev/null
+++ b/drivers/net/wireless/rsi/rsi_91x_coex.c
@@ -0,0 +1,179 @@
+/**
+ * Copyright (c) 2018 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "rsi_main.h"
+#include "rsi_coex.h"
+#include "rsi_mgmt.h"
+#include "rsi_hal.h"
+
+static enum rsi_coex_queues rsi_coex_determine_coex_q
+			(struct rsi_coex_ctrl_block *coex_cb)
+{
+	enum rsi_coex_queues q_num = RSI_COEX_Q_INVALID;
+
+	if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_COMMON]) > 0)
+		q_num = RSI_COEX_Q_COMMON;
+	if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_BT]) > 0)
+		q_num = RSI_COEX_Q_BT;
+	if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_WLAN]) > 0)
+		q_num = RSI_COEX_Q_WLAN;
+
+	return q_num;
+}
+
+static void rsi_coex_sched_tx_pkts(struct rsi_coex_ctrl_block *coex_cb)
+{
+	enum rsi_coex_queues coex_q = RSI_COEX_Q_INVALID;
+	struct sk_buff *skb;
+
+	do {
+		coex_q = rsi_coex_determine_coex_q(coex_cb);
+		rsi_dbg(INFO_ZONE, "queue = %d\n", coex_q);
+
+		if (coex_q == RSI_COEX_Q_BT) {
+			skb = skb_dequeue(&coex_cb->coex_tx_qs[RSI_COEX_Q_BT]);
+			rsi_send_bt_pkt(coex_cb->priv, skb);
+		}
+	} while (coex_q != RSI_COEX_Q_INVALID);
+}
+
+static void rsi_coex_scheduler_thread(struct rsi_common *common)
+{
+	struct rsi_coex_ctrl_block *coex_cb =
+		(struct rsi_coex_ctrl_block *)common->coex_cb;
+	u32 timeout = EVENT_WAIT_FOREVER;
+
+	do {
+		rsi_wait_event(&coex_cb->coex_tx_thread.event, timeout);
+		rsi_reset_event(&coex_cb->coex_tx_thread.event);
+
+		rsi_coex_sched_tx_pkts(coex_cb);
+	} while (atomic_read(&coex_cb->coex_tx_thread.thread_done) == 0);
+
+	complete_and_exit(&coex_cb->coex_tx_thread.completion, 0);
+}
+
+int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg)
+{
+	u8 msg_type = msg[RSI_RX_DESC_MSG_TYPE_OFFSET];
+
+	switch (msg_type) {
+	case COMMON_CARD_READY_IND:
+		rsi_dbg(INFO_ZONE, "common card ready received\n");
+		rsi_handle_card_ready(common, msg);
+		break;
+	case SLEEP_NOTIFY_IND:
+		rsi_dbg(INFO_ZONE, "sleep notify received\n");
+		rsi_mgmt_pkt_recv(common, msg);
+		break;
+	}
+
+	return 0;
+}
+
+static inline int rsi_map_coex_q(u8 hal_queue)
+{
+	switch (hal_queue) {
+	case RSI_COEX_Q:
+		return RSI_COEX_Q_COMMON;
+	case RSI_WLAN_Q:
+		return RSI_COEX_Q_WLAN;
+	case RSI_BT_Q:
+		return RSI_COEX_Q_BT;
+	}
+	return RSI_COEX_Q_INVALID;
+}
+
+int rsi_coex_send_pkt(void *priv, struct sk_buff *skb, u8 hal_queue)
+{
+	struct rsi_common *common = (struct rsi_common *)priv;
+	struct rsi_coex_ctrl_block *coex_cb =
+		(struct rsi_coex_ctrl_block *)common->coex_cb;
+	struct skb_info *tx_params = NULL;
+	enum rsi_coex_queues coex_q;
+	int status;
+
+	coex_q = rsi_map_coex_q(hal_queue);
+	if (coex_q == RSI_COEX_Q_INVALID) {
+		rsi_dbg(ERR_ZONE, "Invalid coex queue\n");
+		return -EINVAL;
+	}
+	if (coex_q != RSI_COEX_Q_COMMON &&
+	    coex_q != RSI_COEX_Q_WLAN) {
+		skb_queue_tail(&coex_cb->coex_tx_qs[coex_q], skb);
+		rsi_set_event(&coex_cb->coex_tx_thread.event);
+		return 0;
+	}
+	if (common->iface_down) {
+		tx_params =
+			(struct skb_info *)&IEEE80211_SKB_CB(skb)->driver_data;
+
+		if (!(tx_params->flags & INTERNAL_MGMT_PKT)) {
+			rsi_indicate_tx_status(common->priv, skb, -EINVAL);
+			return 0;
+		}
+	}
+
+	/* Send packet to hal */
+	if (skb->priority == MGMT_SOFT_Q)
+		status = rsi_send_mgmt_pkt(common, skb);
+	else
+		status = rsi_send_data_pkt(common, skb);
+
+	return status;
+}
+
+int rsi_coex_attach(struct rsi_common *common)
+{
+	struct rsi_coex_ctrl_block *coex_cb;
+	int cnt;
+
+	coex_cb = kzalloc(sizeof(*coex_cb), GFP_KERNEL);
+	if (!coex_cb)
+		return -ENOMEM;
+
+	common->coex_cb = (void *)coex_cb;
+	coex_cb->priv = common;
+
+	/* Initialize co-ex queues */
+	for (cnt = 0; cnt < NUM_COEX_TX_QUEUES; cnt++)
+		skb_queue_head_init(&coex_cb->coex_tx_qs[cnt]);
+	rsi_init_event(&coex_cb->coex_tx_thread.event);
+
+	/* Initialize co-ex thread */
+	if (rsi_create_kthread(common,
+			       &coex_cb->coex_tx_thread,
+			       rsi_coex_scheduler_thread,
+			       "Coex-Tx-Thread")) {
+		rsi_dbg(ERR_ZONE, "%s: Unable to init tx thrd\n", __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void rsi_coex_detach(struct rsi_common *common)
+{
+	struct rsi_coex_ctrl_block *coex_cb =
+		(struct rsi_coex_ctrl_block *)common->coex_cb;
+	int cnt;
+
+	rsi_kill_thread(&coex_cb->coex_tx_thread);
+
+	for (cnt = 0; cnt < NUM_COEX_TX_QUEUES; cnt++)
+		skb_queue_purge(&coex_cb->coex_tx_qs[cnt]);
+
+	kfree(coex_cb);
+}
diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c
index d0d2201830e8..5dafd2e1306c 100644
--- a/drivers/net/wireless/rsi/rsi_91x_core.c
+++ b/drivers/net/wireless/rsi/rsi_91x_core.c
@@ -17,6 +17,7 @@
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
 #include "rsi_hal.h"
+#include "rsi_coex.h"
 
 /**
  * rsi_determine_min_weight_queue() - This function determines the queue with
@@ -301,14 +302,23 @@ void rsi_core_qos_processor(struct rsi_common *common)
 			mutex_unlock(&common->tx_lock);
 			break;
 		}
-
-		if (q_num == MGMT_SOFT_Q) {
-			status = rsi_send_mgmt_pkt(common, skb);
-		} else if (q_num == MGMT_BEACON_Q) {
+		if (q_num == MGMT_BEACON_Q) {
 			status = rsi_send_pkt_to_bus(common, skb);
 			dev_kfree_skb(skb);
 		} else {
-			status = rsi_send_data_pkt(common, skb);
+#ifdef CONFIG_RSI_COEX
+			if (common->coex_mode > 1) {
+				status = rsi_coex_send_pkt(common, skb,
+							   RSI_WLAN_Q);
+			} else {
+#endif
+				if (q_num == MGMT_SOFT_Q)
+					status = rsi_send_mgmt_pkt(common, skb);
+				else
+					status = rsi_send_data_pkt(common, skb);
+#ifdef CONFIG_RSI_COEX
+			}
+#endif
 		}
 
 		if (status) {
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index 1176de646942..de608ae365a4 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/firmware.h>
+#include <net/bluetooth/bluetooth.h>
 #include "rsi_mgmt.h"
 #include "rsi_hal.h"
 #include "rsi_sdio.h"
@@ -24,6 +25,7 @@
 static struct ta_metadata metadata_flash_content[] = {
 	{"flash_content", 0x00010000},
 	{"rsi/rs9113_wlan_qspi.rps", 0x00010000},
+	{"rsi/rs9113_wlan_bt_dual_mode.rps", 0x00010000},
 };
 
 int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb)
@@ -31,8 +33,15 @@ int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb)
 	struct rsi_hw *adapter = common->priv;
 	int status;
 
+	if (common->coex_mode > 1)
+		mutex_lock(&common->tx_bus_mutex);
+
 	status = adapter->host_intf_ops->write_pkt(common->priv,
 						   skb->data, skb->len);
+
+	if (common->coex_mode > 1)
+		mutex_unlock(&common->tx_bus_mutex);
+
 	return status;
 }
 
@@ -296,8 +305,7 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
 	if (status)
 		goto err;
 
-	status = adapter->host_intf_ops->write_pkt(common->priv, skb->data,
-						   skb->len);
+	status = rsi_send_pkt_to_bus(common, skb);
 	if (status)
 		rsi_dbg(ERR_ZONE, "%s: Failed to write pkt\n", __func__);
 
@@ -342,8 +350,7 @@ int rsi_send_mgmt_pkt(struct rsi_common *common,
 		goto err;
 
 	rsi_prepare_mgmt_desc(common, skb);
-	status = adapter->host_intf_ops->write_pkt(common->priv,
-						   (u8 *)skb->data, skb->len);
+	status = rsi_send_pkt_to_bus(common, skb);
 	if (status)
 		rsi_dbg(ERR_ZONE, "%s: Failed to write the packet\n", __func__);
 
@@ -352,6 +359,43 @@ err:
 	return status;
 }
 
+int rsi_send_bt_pkt(struct rsi_common *common, struct sk_buff *skb)
+{
+	int status = -EINVAL;
+	u8 header_size = 0;
+	struct rsi_bt_desc *bt_desc;
+	u8 queueno = ((skb->data[1] >> 4) & 0xf);
+
+	if (queueno == RSI_BT_MGMT_Q) {
+		status = rsi_send_pkt_to_bus(common, skb);
+		if (status)
+			rsi_dbg(ERR_ZONE, "%s: Failed to write bt mgmt pkt\n",
+				__func__);
+		goto out;
+	}
+	header_size = FRAME_DESC_SZ;
+	if (header_size > skb_headroom(skb)) {
+		rsi_dbg(ERR_ZONE, "%s: Not enough headroom\n", __func__);
+		status = -ENOSPC;
+		goto out;
+	}
+	skb_push(skb, header_size);
+	memset(skb->data, 0, header_size);
+	bt_desc = (struct rsi_bt_desc *)skb->data;
+
+	rsi_set_len_qno(&bt_desc->len_qno, (skb->len - FRAME_DESC_SZ),
+			RSI_BT_DATA_Q);
+	bt_desc->bt_pkt_type = cpu_to_le16(bt_cb(skb)->pkt_type);
+
+	status = rsi_send_pkt_to_bus(common, skb);
+	if (status)
+		rsi_dbg(ERR_ZONE, "%s: Failed to write bt pkt\n", __func__);
+
+out:
+	dev_kfree_skb(skb);
+	return status;
+}
+
 int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb)
 {
 	struct rsi_hw *adapter = (struct rsi_hw *)common->priv;
@@ -926,10 +970,6 @@ int rsi_hal_device_init(struct rsi_hw *adapter)
 {
 	struct rsi_common *common = adapter->priv;
 
-	common->coex_mode = RSI_DEV_COEX_MODE_WIFI_ALONE;
-	common->oper_mode = RSI_DEV_OPMODE_WIFI_ALONE;
-	adapter->device_model = RSI_DEV_9113;
-
 	switch (adapter->device_model) {
 	case RSI_DEV_9113:
 		if (rsi_load_firmware(adapter)) {
diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index 0cb8e68bab58..1485a0c89df2 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -18,8 +18,10 @@
 
 #include <linux/module.h>
 #include <linux/firmware.h>
+#include <net/rsi_91x.h>
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
+#include "rsi_coex.h"
 #include "rsi_hal.h"
 
 u32 rsi_zone_enabled = /* INFO_ZONE |
@@ -34,6 +36,14 @@ u32 rsi_zone_enabled = /* INFO_ZONE |
 			0;
 EXPORT_SYMBOL_GPL(rsi_zone_enabled);
 
+#ifdef CONFIG_RSI_COEX
+static struct rsi_proto_ops g_proto_ops = {
+	.coex_send_pkt = rsi_coex_send_pkt,
+	.get_host_intf = rsi_get_host_intf,
+	.set_bt_context = rsi_set_bt_context,
+};
+#endif
+
 /**
  * rsi_dbg() - This function outputs informational messages.
  * @zone: Zone of interest for output message.
@@ -60,8 +70,24 @@ EXPORT_SYMBOL_GPL(rsi_dbg);
 static char *opmode_str(int oper_mode)
 {
 	switch (oper_mode) {
-	case RSI_DEV_OPMODE_WIFI_ALONE:
+	case DEV_OPMODE_WIFI_ALONE:
 		return "Wi-Fi alone";
+	case DEV_OPMODE_BT_ALONE:
+		return "BT EDR alone";
+	case DEV_OPMODE_BT_LE_ALONE:
+		return "BT LE alone";
+	case DEV_OPMODE_BT_DUAL:
+		return "BT Dual";
+	case DEV_OPMODE_STA_BT:
+		return "Wi-Fi STA + BT EDR";
+	case DEV_OPMODE_STA_BT_LE:
+		return "Wi-Fi STA + BT LE";
+	case DEV_OPMODE_STA_BT_DUAL:
+		return "Wi-Fi STA + BT DUAL";
+	case DEV_OPMODE_AP_BT:
+		return "Wi-Fi AP + BT EDR";
+	case DEV_OPMODE_AP_BT_DUAL:
+		return "Wi-Fi AP + BT DUAL";
 	}
 
 	return "Unknown";
@@ -137,16 +163,19 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common,
  *
  * Return: 0 on success, -1 on failure.
  */
-int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
+int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
 {
 	u8 *frame_desc = NULL, extended_desc = 0;
 	u32 index, length = 0, queueno = 0;
 	u16 actual_length = 0, offset;
 	struct sk_buff *skb = NULL;
+#ifdef CONFIG_RSI_COEX
+	u8 bt_pkt_type;
+#endif
 
 	index = 0;
 	do {
-		frame_desc = &common->rx_data_pkt[index];
+		frame_desc = &rx_pkt[index];
 		actual_length = *(u16 *)&frame_desc[0];
 		offset = *(u16 *)&frame_desc[2];
 
@@ -160,8 +189,15 @@ int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
 
 		switch (queueno) {
 		case RSI_COEX_Q:
-			rsi_mgmt_pkt_recv(common, (frame_desc + offset));
+#ifdef CONFIG_RSI_COEX
+			if (common->coex_mode > 1)
+				rsi_coex_recv_pkt(common, frame_desc + offset);
+			else
+#endif
+				rsi_mgmt_pkt_recv(common,
+						  (frame_desc + offset));
 			break;
+
 		case RSI_WIFI_DATA_Q:
 			skb = rsi_prepare_skb(common,
 					      (frame_desc + offset),
@@ -177,6 +213,25 @@ int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
 			rsi_mgmt_pkt_recv(common, (frame_desc + offset));
 			break;
 
+#ifdef CONFIG_RSI_COEX
+		case RSI_BT_MGMT_Q:
+		case RSI_BT_DATA_Q:
+#define BT_RX_PKT_TYPE_OFST	14
+#define BT_CARD_READY_IND	0x89
+			bt_pkt_type = frame_desc[offset + BT_RX_PKT_TYPE_OFST];
+			if (bt_pkt_type == BT_CARD_READY_IND) {
+				rsi_dbg(INFO_ZONE, "BT Card ready recvd\n");
+				if (rsi_bt_ops.attach(common, &g_proto_ops))
+					rsi_dbg(ERR_ZONE,
+						"Failed to attach BT module\n");
+			} else {
+				if (common->bt_adapter)
+					rsi_bt_ops.recv_pkt(common->bt_adapter,
+							frame_desc + offset);
+			}
+			break;
+#endif
+
 		default:
 			rsi_dbg(ERR_ZONE, "%s: pkt from invalid queue: %d\n",
 				__func__,   queueno);
@@ -217,13 +272,29 @@ static void rsi_tx_scheduler_thread(struct rsi_common *common)
 	complete_and_exit(&common->tx_thread.completion, 0);
 }
 
+#ifdef CONFIG_RSI_COEX
+enum rsi_host_intf rsi_get_host_intf(void *priv)
+{
+	struct rsi_common *common = (struct rsi_common *)priv;
+
+	return common->priv->rsi_host_intf;
+}
+
+void rsi_set_bt_context(void *priv, void *bt_context)
+{
+	struct rsi_common *common = (struct rsi_common *)priv;
+
+	common->bt_adapter = bt_context;
+}
+#endif
+
 /**
  * rsi_91x_init() - This function initializes os interface operations.
  * @void: Void.
  *
  * Return: Pointer to the adapter structure on success, NULL on failure .
  */
-struct rsi_hw *rsi_91x_init(void)
+struct rsi_hw *rsi_91x_init(u16 oper_mode)
 {
 	struct rsi_hw *adapter = NULL;
 	struct rsi_common *common = NULL;
@@ -251,6 +322,7 @@ struct rsi_hw *rsi_91x_init(void)
 	mutex_init(&common->mutex);
 	mutex_init(&common->tx_lock);
 	mutex_init(&common->rx_lock);
+	mutex_init(&common->tx_bus_mutex);
 
 	if (rsi_create_kthread(common,
 			       &common->tx_thread,
@@ -265,6 +337,43 @@ struct rsi_hw *rsi_91x_init(void)
 	timer_setup(&common->roc_timer, rsi_roc_timeout, 0);
 	init_completion(&common->wlan_init_completion);
 	common->init_done = true;
+	adapter->device_model = RSI_DEV_9113;
+	common->oper_mode = oper_mode;
+
+	/* Determine coex mode */
+	switch (common->oper_mode) {
+	case DEV_OPMODE_STA_BT_DUAL:
+	case DEV_OPMODE_STA_BT:
+	case DEV_OPMODE_STA_BT_LE:
+	case DEV_OPMODE_BT_ALONE:
+	case DEV_OPMODE_BT_LE_ALONE:
+	case DEV_OPMODE_BT_DUAL:
+		common->coex_mode = 2;
+		break;
+	case DEV_OPMODE_AP_BT_DUAL:
+	case DEV_OPMODE_AP_BT:
+		common->coex_mode = 4;
+		break;
+	case DEV_OPMODE_WIFI_ALONE:
+		common->coex_mode = 1;
+		break;
+	default:
+		common->oper_mode = 1;
+		common->coex_mode = 1;
+	}
+	rsi_dbg(INFO_ZONE, "%s: oper_mode = %d, coex_mode = %d\n",
+		__func__, common->oper_mode, common->coex_mode);
+
+	adapter->device_model = RSI_DEV_9113;
+#ifdef CONFIG_RSI_COEX
+	if (common->coex_mode > 1) {
+		if (rsi_coex_attach(common)) {
+			rsi_dbg(ERR_ZONE, "Failed to init coex module\n");
+			goto err;
+		}
+	}
+#endif
+
 	return adapter;
 
 err:
@@ -292,6 +401,16 @@ void rsi_91x_deinit(struct rsi_hw *adapter)
 	for (ii = 0; ii < NUM_SOFT_QUEUES; ii++)
 		skb_queue_purge(&common->tx_queue[ii]);
 
+#ifdef CONFIG_RSI_COEX
+	if (common->coex_mode > 1) {
+		if (common->bt_adapter) {
+			rsi_bt_ops.detach(common->bt_adapter);
+			common->bt_adapter = NULL;
+		}
+		rsi_coex_detach(common);
+	}
+#endif
+
 	common->init_done = false;
 
 	kfree(common);
diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index 46c9d5470dfb..c21fca750fd4 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -1791,7 +1791,7 @@ out:
 	return -EINVAL;
 }
 
-static int rsi_handle_card_ready(struct rsi_common *common, u8 *msg)
+int rsi_handle_card_ready(struct rsi_common *common, u8 *msg)
 {
 	switch (common->fsm_state) {
 	case FSM_CARD_NOT_READY:
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index b0cf41195051..98c7d1dae18e 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -18,8 +18,17 @@
 #include <linux/module.h>
 #include "rsi_sdio.h"
 #include "rsi_common.h"
+#include "rsi_coex.h"
 #include "rsi_hal.h"
 
+/* Default operating mode is wlan STA + BT */
+static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+module_param(dev_oper_mode, ushort, 0444);
+MODULE_PARM_DESC(dev_oper_mode,
+		 "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+		 "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+		 "6[AP + BT classic], 14[AP + BT classic + BT LE]");
+
 /**
  * rsi_sdio_set_cmd52_arg() - This function prepares cmd 52 read/write arg.
  * @rw: Read/write
@@ -754,6 +763,8 @@ static int rsi_sdio_host_intf_write_pkt(struct rsi_hw *adapter,
 	int status;
 
 	queueno = ((pkt[1] >> 4) & 0xf);
+	if (queueno == RSI_BT_MGMT_Q || queueno == RSI_BT_DATA_Q)
+		queueno = RSI_BT_Q;
 
 	num_blocks = len / block_size;
 
@@ -922,14 +933,16 @@ static int rsi_probe(struct sdio_func *pfunction,
 		     const struct sdio_device_id *id)
 {
 	struct rsi_hw *adapter;
+	struct rsi_91x_sdiodev *sdev;
+	int status;
 
 	rsi_dbg(INIT_ZONE, "%s: Init function called\n", __func__);
 
-	adapter = rsi_91x_init();
+	adapter = rsi_91x_init(dev_oper_mode);
 	if (!adapter) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to init os intf ops\n",
 			__func__);
-		return 1;
+		return -EINVAL;
 	}
 	adapter->rsi_host_intf = RSI_HOST_INTF_SDIO;
 	adapter->host_intf_ops = &sdio_host_intf_ops;
@@ -937,39 +950,58 @@ static int rsi_probe(struct sdio_func *pfunction,
 	if (rsi_init_sdio_interface(adapter, pfunction)) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to init sdio interface\n",
 			__func__);
-		goto fail;
+		status = -EIO;
+		goto fail_free_adapter;
+	}
+	sdev = (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+	rsi_init_event(&sdev->rx_thread.event);
+	status = rsi_create_kthread(adapter->priv, &sdev->rx_thread,
+				    rsi_sdio_rx_thread, "SDIO-RX-Thread");
+	if (status) {
+		rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
+		goto fail_free_adapter;
 	}
+	skb_queue_head_init(&sdev->rx_q.head);
+	sdev->rx_q.num_rx_pkts = 0;
+
 	sdio_claim_host(pfunction);
 	if (sdio_claim_irq(pfunction, rsi_handle_interrupt)) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to request IRQ\n", __func__);
 		sdio_release_host(pfunction);
-		goto fail;
+		status = -EIO;
+		goto fail_kill_thread;
 	}
 	sdio_release_host(pfunction);
 	rsi_dbg(INIT_ZONE, "%s: Registered Interrupt handler\n", __func__);
 
 	if (rsi_hal_device_init(adapter)) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in device init\n", __func__);
-		sdio_claim_host(pfunction);
-		sdio_release_irq(pfunction);
-		sdio_disable_func(pfunction);
-		sdio_release_host(pfunction);
-		goto fail;
+		status = -EINVAL;
+		goto fail_kill_thread;
 	}
 	rsi_dbg(INFO_ZONE, "===> RSI Device Init Done <===\n");
 
 	if (rsi_sdio_master_access_msword(adapter, MISC_CFG_BASE_ADDR)) {
 		rsi_dbg(ERR_ZONE, "%s: Unable to set ms word reg\n", __func__);
-		return -EIO;
+		status = -EIO;
+		goto fail_dev_init;
 	}
 
 	adapter->priv->hibernate_resume = false;
 	adapter->priv->reinit_hw = false;
 	return 0;
-fail:
+
+fail_dev_init:
+	sdio_claim_host(pfunction);
+	sdio_release_irq(pfunction);
+	sdio_disable_func(pfunction);
+	sdio_release_host(pfunction);
+fail_kill_thread:
+	rsi_kill_thread(&sdev->rx_thread);
+fail_free_adapter:
 	rsi_91x_deinit(adapter);
 	rsi_dbg(ERR_ZONE, "%s: Failed in probe...Exiting\n", __func__);
-	return 1;
+	return status;
 }
 
 static void ulp_read_write(struct rsi_hw *adapter, u16 addr, u32 data,
@@ -1065,6 +1097,8 @@ static void rsi_disconnect(struct sdio_func *pfunction)
 		return;
 
 	dev = (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+
+	rsi_kill_thread(&dev->rx_thread);
 	sdio_claim_host(pfunction);
 	sdio_release_irq(pfunction);
 	sdio_release_host(pfunction);
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index 8e2a95c486b0..612c211e21a1 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -16,6 +16,7 @@
  */
 
 #include <linux/firmware.h>
+#include <net/rsi_91x.h>
 #include "rsi_sdio.h"
 #include "rsi_common.h"
 
@@ -59,6 +60,43 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word)
 	return status;
 }
 
+void rsi_sdio_rx_thread(struct rsi_common *common)
+{
+	struct rsi_hw *adapter = common->priv;
+	struct rsi_91x_sdiodev *sdev = adapter->rsi_dev;
+	struct sk_buff *skb;
+	int status;
+
+	do {
+		rsi_wait_event(&sdev->rx_thread.event, EVENT_WAIT_FOREVER);
+		rsi_reset_event(&sdev->rx_thread.event);
+
+		while (true) {
+			if (atomic_read(&sdev->rx_thread.thread_done))
+				goto out;
+
+			skb = skb_dequeue(&sdev->rx_q.head);
+			if (!skb)
+				break;
+			if (sdev->rx_q.num_rx_pkts > 0)
+				sdev->rx_q.num_rx_pkts--;
+			status = rsi_read_pkt(common, skb->data, skb->len);
+			if (status) {
+				rsi_dbg(ERR_ZONE, "Failed to read the packet\n");
+				dev_kfree_skb(skb);
+				break;
+			}
+			dev_kfree_skb(skb);
+		}
+	} while (1);
+
+out:
+	rsi_dbg(INFO_ZONE, "%s: Terminated SDIO RX thread\n", __func__);
+	skb_queue_purge(&sdev->rx_q.head);
+	atomic_inc(&sdev->rx_thread.thread_done);
+	complete_and_exit(&sdev->rx_thread.completion, 0);
+}
+
 /**
  * rsi_process_pkt() - This Function reads rx_blocks register and figures out
  *		       the size of the rx pkt.
@@ -75,6 +113,10 @@ static int rsi_process_pkt(struct rsi_common *common)
 	u32 rcv_pkt_len = 0;
 	int status = 0;
 	u8 value = 0;
+	struct sk_buff *skb;
+
+	if (dev->rx_q.num_rx_pkts >= RSI_MAX_RX_PKTS)
+		return 0;
 
 	num_blks = ((adapter->interrupt_status & 1) |
 			((adapter->interrupt_status >> RECV_NUM_BLOCKS) << 1));
@@ -102,27 +144,24 @@ static int rsi_process_pkt(struct rsi_common *common)
 
 	rcv_pkt_len = (num_blks * 256);
 
-	common->rx_data_pkt = kmalloc(rcv_pkt_len, GFP_KERNEL);
-	if (!common->rx_data_pkt) {
-		rsi_dbg(ERR_ZONE, "%s: Failed in memory allocation\n",
-			__func__);
+	skb = dev_alloc_skb(rcv_pkt_len);
+	if (!skb)
 		return -ENOMEM;
-	}
 
-	status = rsi_sdio_host_intf_read_pkt(adapter,
-					     common->rx_data_pkt,
-					     rcv_pkt_len);
+	status = rsi_sdio_host_intf_read_pkt(adapter, skb->data, rcv_pkt_len);
 	if (status) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to read packet from card\n",
 			__func__);
-		goto fail;
+		dev_kfree_skb(skb);
+		return status;
 	}
+	skb_put(skb, rcv_pkt_len);
+	skb_queue_tail(&dev->rx_q.head, skb);
+	dev->rx_q.num_rx_pkts++;
 
-	status = rsi_read_pkt(common, rcv_pkt_len);
+	rsi_set_event(&dev->rx_thread.event);
 
-fail:
-	kfree(common->rx_data_pkt);
-	return status;
+	return 0;
 }
 
 /**
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 8f8443833348..be8236f404b5 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -16,8 +16,20 @@
  */
 
 #include <linux/module.h>
+#include <net/rsi_91x.h>
 #include "rsi_usb.h"
 #include "rsi_hal.h"
+#include "rsi_coex.h"
+
+/* Default operating mode is wlan STA + BT */
+static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+module_param(dev_oper_mode, ushort, 0444);
+MODULE_PARM_DESC(dev_oper_mode,
+		 "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+		 "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+		 "6[AP + BT classic], 14[AP + BT classic + BT LE]");
+
+static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num);
 
 /**
  * rsi_usb_card_write() - This function writes to the USB Card.
@@ -103,41 +115,42 @@ static int rsi_find_bulk_in_and_out_endpoints(struct usb_interface *interface,
 	struct usb_host_interface *iface_desc;
 	struct usb_endpoint_descriptor *endpoint;
 	__le16 buffer_size;
-	int ii, bep_found = 0;
+	int ii, bin_found = 0, bout_found = 0;
 
 	iface_desc = &(interface->altsetting[0]);
 
 	for (ii = 0; ii < iface_desc->desc.bNumEndpoints; ++ii) {
 		endpoint = &(iface_desc->endpoint[ii].desc);
 
-		if ((!(dev->bulkin_endpoint_addr)) &&
+		if (!dev->bulkin_endpoint_addr[bin_found] &&
 		    (endpoint->bEndpointAddress & USB_DIR_IN) &&
-		    ((endpoint->bmAttributes &
-		    USB_ENDPOINT_XFERTYPE_MASK) ==
+		    ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
 		    USB_ENDPOINT_XFER_BULK)) {
 			buffer_size = endpoint->wMaxPacketSize;
-			dev->bulkin_size = buffer_size;
-			dev->bulkin_endpoint_addr =
+			dev->bulkin_size[bin_found] = buffer_size;
+			dev->bulkin_endpoint_addr[bin_found] =
 				endpoint->bEndpointAddress;
+			bin_found++;
 		}
 
-		if (!dev->bulkout_endpoint_addr[bep_found] &&
+		if (!dev->bulkout_endpoint_addr[bout_found] &&
 		    !(endpoint->bEndpointAddress & USB_DIR_IN) &&
 		    ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
-		      USB_ENDPOINT_XFER_BULK)) {
-			dev->bulkout_endpoint_addr[bep_found] =
+		    USB_ENDPOINT_XFER_BULK)) {
+			buffer_size = endpoint->wMaxPacketSize;
+			dev->bulkout_endpoint_addr[bout_found] =
 				endpoint->bEndpointAddress;
 			buffer_size = endpoint->wMaxPacketSize;
-			dev->bulkout_size[bep_found] = buffer_size;
-			bep_found++;
+			dev->bulkout_size[bout_found] = buffer_size;
+			bout_found++;
 		}
 
-		if (bep_found >= MAX_BULK_EP)
+		if (bin_found >= MAX_BULK_EP || bout_found >= MAX_BULK_EP)
 			break;
 	}
 
-	if (!(dev->bulkin_endpoint_addr) &&
-	    (dev->bulkout_endpoint_addr[0]))
+	if (!(dev->bulkin_endpoint_addr[0]) &&
+	    dev->bulkout_endpoint_addr[0])
 		return -EINVAL;
 
 	return 0;
@@ -247,13 +260,33 @@ static int rsi_usb_reg_write(struct usb_device *usbdev,
  */
 static void rsi_rx_done_handler(struct urb *urb)
 {
-	struct rsi_hw *adapter = urb->context;
-	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+	struct rx_usb_ctrl_block *rx_cb = urb->context;
+	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)rx_cb->data;
+	int status = -EINVAL;
 
 	if (urb->status)
-		return;
+		goto out;
+
+	if (urb->actual_length <= 0) {
+		rsi_dbg(INFO_ZONE, "%s: Zero length packet\n", __func__);
+		goto out;
+	}
+	if (skb_queue_len(&dev->rx_q) >= RSI_MAX_RX_PKTS) {
+		rsi_dbg(INFO_ZONE, "Max RX packets reached\n");
+		goto out;
+	}
+	skb_put(rx_cb->rx_skb, urb->actual_length);
+	skb_queue_tail(&dev->rx_q, rx_cb->rx_skb);
 
 	rsi_set_event(&dev->rx_thread.event);
+	status = 0;
+
+out:
+	if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num))
+		rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__);
+
+	if (status)
+		dev_kfree_skb(rx_cb->rx_skb);
 }
 
 /**
@@ -262,20 +295,34 @@ static void rsi_rx_done_handler(struct urb *urb)
  *
  * Return: 0 on success, a negative error code on failure.
  */
-static int rsi_rx_urb_submit(struct rsi_hw *adapter)
+static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num)
 {
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
-	struct urb *urb = dev->rx_usb_urb[0];
+	struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1];
+	struct urb *urb = rx_cb->rx_urb;
 	int status;
+	struct sk_buff *skb;
+	u8 dword_align_bytes = 0;
+
+#define RSI_MAX_RX_USB_PKT_SIZE	3000
+	skb = dev_alloc_skb(RSI_MAX_RX_USB_PKT_SIZE);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, MAX_DWORD_ALIGN_BYTES);
+	dword_align_bytes = (unsigned long)skb->data & 0x3f;
+	if (dword_align_bytes > 0)
+		skb_push(skb, dword_align_bytes);
+	urb->transfer_buffer = skb->data;
+	rx_cb->rx_skb = skb;
 
 	usb_fill_bulk_urb(urb,
 			  dev->usbdev,
 			  usb_rcvbulkpipe(dev->usbdev,
-				dev->bulkin_endpoint_addr),
+			  dev->bulkin_endpoint_addr[ep_num - 1]),
 			  urb->transfer_buffer,
-			  3000,
+			  RSI_MAX_RX_USB_PKT_SIZE,
 			  rsi_rx_done_handler,
-			  adapter);
+			  rx_cb);
 
 	status = usb_submit_urb(urb, GFP_KERNEL);
 	if (status)
@@ -487,11 +534,51 @@ static void rsi_deinit_usb_interface(struct rsi_hw *adapter)
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 
 	rsi_kill_thread(&dev->rx_thread);
-	usb_free_urb(dev->rx_usb_urb[0]);
-	kfree(adapter->priv->rx_data_pkt);
+
+	usb_free_urb(dev->rx_cb[0].rx_urb);
+	if (adapter->priv->coex_mode > 1)
+		usb_free_urb(dev->rx_cb[1].rx_urb);
+
 	kfree(dev->tx_buffer);
 }
 
+static int rsi_usb_init_rx(struct rsi_hw *adapter)
+{
+	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+	struct rx_usb_ctrl_block *rx_cb;
+	u8 idx, num_rx_cb;
+
+	num_rx_cb = (adapter->priv->coex_mode > 1 ? 2 : 1);
+
+	for (idx = 0; idx < num_rx_cb; idx++) {
+		rx_cb = &dev->rx_cb[idx];
+
+		rx_cb->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (!rx_cb->rx_urb) {
+			rsi_dbg(ERR_ZONE, "Failed alloc rx urb[%d]\n", idx);
+			goto err;
+		}
+		rx_cb->ep_num = idx + 1;
+		rx_cb->data = (void *)dev;
+	}
+	skb_queue_head_init(&dev->rx_q);
+	rsi_init_event(&dev->rx_thread.event);
+	if (rsi_create_kthread(adapter->priv, &dev->rx_thread,
+			       rsi_usb_rx_thread, "RX-Thread")) {
+		rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	usb_free_urb(dev->rx_cb[0].rx_urb);
+	if (adapter->priv->coex_mode > 1)
+		usb_free_urb(dev->rx_cb[1].rx_urb);
+
+	return -1;
+}
+
 /**
  * rsi_init_usb_interface() - This function initializes the usb interface.
  * @adapter: Pointer to the adapter structure.
@@ -503,7 +590,6 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 				  struct usb_interface *pfunction)
 {
 	struct rsi_91x_usbdev *rsi_dev;
-	struct rsi_common *common = adapter->priv;
 	int status;
 
 	rsi_dev = kzalloc(sizeof(*rsi_dev), GFP_KERNEL);
@@ -512,49 +598,37 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 
 	adapter->rsi_dev = rsi_dev;
 	rsi_dev->usbdev = interface_to_usbdev(pfunction);
+	rsi_dev->priv = (void *)adapter;
 
-	if (rsi_find_bulk_in_and_out_endpoints(pfunction, adapter))
-		return -EINVAL;
+	if (rsi_find_bulk_in_and_out_endpoints(pfunction, adapter)) {
+		status = -EINVAL;
+		goto fail_eps;
+	}
 
 	adapter->device = &pfunction->dev;
 	usb_set_intfdata(pfunction, adapter);
 
-	common->rx_data_pkt = kmalloc(2048, GFP_KERNEL);
-	if (!common->rx_data_pkt) {
-		rsi_dbg(ERR_ZONE, "%s: Failed to allocate memory\n",
-			__func__);
-		return -ENOMEM;
-	}
-
 	rsi_dev->tx_buffer = kmalloc(2048, GFP_KERNEL);
 	if (!rsi_dev->tx_buffer) {
 		status = -ENOMEM;
-		goto fail_tx;
+		goto fail_eps;
 	}
-	rsi_dev->rx_usb_urb[0] = usb_alloc_urb(0, GFP_KERNEL);
-	if (!rsi_dev->rx_usb_urb[0]) {
+
+	if (rsi_usb_init_rx(adapter)) {
+		rsi_dbg(ERR_ZONE, "Failed to init RX handle\n");
 		status = -ENOMEM;
 		goto fail_rx;
 	}
-	rsi_dev->rx_usb_urb[0]->transfer_buffer = adapter->priv->rx_data_pkt;
+
 	rsi_dev->tx_blk_size = 252;
 	adapter->block_size = rsi_dev->tx_blk_size;
 
 	/* Initializing function callbacks */
-	adapter->rx_urb_submit = rsi_rx_urb_submit;
 	adapter->check_hw_queue_status = rsi_usb_check_queue_status;
 	adapter->determine_event_timeout = rsi_usb_event_timeout;
 	adapter->rsi_host_intf = RSI_HOST_INTF_USB;
 	adapter->host_intf_ops = &usb_host_intf_ops;
 
-	rsi_init_event(&rsi_dev->rx_thread.event);
-	status = rsi_create_kthread(common, &rsi_dev->rx_thread,
-				    rsi_usb_rx_thread, "RX-Thread");
-	if (status) {
-		rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
-		goto fail_thread;
-	}
-
 #ifdef CONFIG_RSI_DEBUGFS
 	/* In USB, one less than the MAX_DEBUGFS_ENTRIES entries is required */
 	adapter->num_debugfs_entries = (MAX_DEBUGFS_ENTRIES - 1);
@@ -563,12 +637,12 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 	rsi_dbg(INIT_ZONE, "%s: Enabled the interface\n", __func__);
 	return 0;
 
-fail_thread:
-	usb_free_urb(rsi_dev->rx_usb_urb[0]);
 fail_rx:
 	kfree(rsi_dev->tx_buffer);
-fail_tx:
-	kfree(common->rx_data_pkt);
+
+fail_eps:
+	kfree(rsi_dev);
+
 	return status;
 }
 
@@ -662,7 +736,7 @@ static int rsi_probe(struct usb_interface *pfunction,
 
 	rsi_dbg(INIT_ZONE, "%s: Init function called\n", __func__);
 
-	adapter = rsi_91x_init();
+	adapter = rsi_91x_init(dev_oper_mode);
 	if (!adapter) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to init os intf ops\n",
 			__func__);
@@ -698,10 +772,16 @@ static int rsi_probe(struct usb_interface *pfunction,
 		rsi_dbg(INIT_ZONE, "%s: Device Init Done\n", __func__);
 	}
 
-	status = rsi_rx_urb_submit(adapter);
+	status = rsi_rx_urb_submit(adapter, WLAN_EP);
 	if (status)
 		goto err1;
 
+	if (adapter->priv->coex_mode > 1) {
+		status = rsi_rx_urb_submit(adapter, BT_EP);
+		if (status)
+			goto err1;
+	}
+
 	return 0;
 err1:
 	rsi_deinit_usb_interface(adapter);
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
index 465692b3c351..b1687d22f73f 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
@@ -30,31 +30,32 @@ void rsi_usb_rx_thread(struct rsi_common *common)
 	struct rsi_hw *adapter = common->priv;
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 	int status;
+	struct sk_buff *skb;
 
 	do {
 		rsi_wait_event(&dev->rx_thread.event, EVENT_WAIT_FOREVER);
+		rsi_reset_event(&dev->rx_thread.event);
 
-		if (atomic_read(&dev->rx_thread.thread_done))
-			goto out;
+		while (true) {
+			if (atomic_read(&dev->rx_thread.thread_done))
+				goto out;
 
-		mutex_lock(&common->rx_lock);
-		status = rsi_read_pkt(common, 0);
-		if (status) {
-			rsi_dbg(ERR_ZONE, "%s: Failed To read data", __func__);
-			mutex_unlock(&common->rx_lock);
-			return;
-		}
-		mutex_unlock(&common->rx_lock);
-		rsi_reset_event(&dev->rx_thread.event);
-		if (adapter->rx_urb_submit(adapter)) {
-			rsi_dbg(ERR_ZONE,
-				"%s: Failed in urb submission", __func__);
-			return;
+			skb = skb_dequeue(&dev->rx_q);
+			if (!skb)
+				break;
+			status = rsi_read_pkt(common, skb->data, 0);
+			if (status) {
+				rsi_dbg(ERR_ZONE, "%s: Failed To read data",
+					__func__);
+				break;
+			}
+			dev_kfree_skb(skb);
 		}
 	} while (1);
 
 out:
 	rsi_dbg(INFO_ZONE, "%s: Terminated thread\n", __func__);
+	skb_queue_purge(&dev->rx_q);
 	complete_and_exit(&dev->rx_thread.completion, 0);
 }
 
diff --git a/drivers/net/wireless/rsi/rsi_coex.h b/drivers/net/wireless/rsi/rsi_coex.h
new file mode 100644
index 000000000000..0fdc67f37a56
--- /dev/null
+++ b/drivers/net/wireless/rsi/rsi_coex.h
@@ -0,0 +1,37 @@
+/**
+ * Copyright (c) 2018 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __RSI_COEX_H__
+#define __RSI_COEX_H__
+
+#include "rsi_common.h"
+
+#ifdef CONFIG_RSI_COEX
+#define COMMON_CARD_READY_IND           0
+#define NUM_COEX_TX_QUEUES              5
+
+struct rsi_coex_ctrl_block {
+	struct rsi_common *priv;
+	struct sk_buff_head coex_tx_qs[NUM_COEX_TX_QUEUES];
+	struct rsi_thread coex_tx_thread;
+};
+
+int rsi_coex_attach(struct rsi_common *common);
+void rsi_coex_detach(struct rsi_common *common);
+int rsi_coex_send_pkt(void *priv, struct sk_buff *skb, u8 proto_type);
+int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg);
+#endif
+#endif
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index d07dbba61727..d9ff3b8be86e 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -62,6 +62,7 @@ static inline int rsi_create_kthread(struct rsi_common *common,
 				     u8 *name)
 {
 	init_completion(&thread->completion);
+	atomic_set(&thread->thread_done, 0);
 	thread->task = kthread_run(func_ptr, common, "%s", name);
 	if (IS_ERR(thread->task))
 		return (int)PTR_ERR(thread->task);
@@ -80,9 +81,9 @@ static inline int rsi_kill_thread(struct rsi_thread *handle)
 
 void rsi_mac80211_detach(struct rsi_hw *hw);
 u16 rsi_get_connected_channel(struct ieee80211_vif *vif);
-struct rsi_hw *rsi_91x_init(void);
+struct rsi_hw *rsi_91x_init(u16 oper_mode);
 void rsi_91x_deinit(struct rsi_hw *adapter);
-int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len);
+int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len);
 #ifdef CONFIG_PM
 int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan);
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
index a09d36b6b765..786dccd0b732 100644
--- a/drivers/net/wireless/rsi/rsi_hal.h
+++ b/drivers/net/wireless/rsi/rsi_hal.h
@@ -17,6 +17,17 @@
 #ifndef __RSI_HAL_H__
 #define __RSI_HAL_H__
 
+/* Device Operating modes */
+#define DEV_OPMODE_WIFI_ALONE		1
+#define DEV_OPMODE_BT_ALONE		4
+#define DEV_OPMODE_BT_LE_ALONE		8
+#define DEV_OPMODE_BT_DUAL		12
+#define DEV_OPMODE_STA_BT		5
+#define DEV_OPMODE_STA_BT_LE		9
+#define DEV_OPMODE_STA_BT_DUAL		13
+#define DEV_OPMODE_AP_BT		6
+#define DEV_OPMODE_AP_BT_DUAL		14
+
 #define FLASH_WRITE_CHUNK_SIZE		(4 * 1024)
 #define FLASH_SECTOR_SIZE		(4 * 1024)
 
@@ -103,6 +114,7 @@
 
 #define FW_FLASH_OFFSET			0x820
 #define LMAC_VER_OFFSET			(FW_FLASH_OFFSET + 0x200)
+#define MAX_DWORD_ALIGN_BYTES		64
 
 struct bl_header {
 	__le32 flags;
@@ -145,8 +157,18 @@ struct rsi_data_desc {
 	u8 sta_id;
 } __packed;
 
+struct rsi_bt_desc {
+	__le16 len_qno;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
+	__le16 reserved4;
+	__le16 bt_pkt_type;
+} __packed;
+
 int rsi_hal_device_init(struct rsi_hw *adapter);
 int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb);
 int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb);
+int rsi_send_bt_pkt(struct rsi_common *common, struct sk_buff *skb);
 
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index 8cab630af4a5..ef4fa323694b 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <net/mac80211.h>
+#include <net/rsi_91x.h>
 
 struct rsi_sta {
 	struct ieee80211_sta *sta;
@@ -85,10 +86,6 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define MGMT_HW_Q			10
 #define BEACON_HW_Q			11
 
-/* Queue information */
-#define RSI_COEX_Q			0x0
-#define RSI_WIFI_MGMT_Q                 0x4
-#define RSI_WIFI_DATA_Q                 0x5
 #define IEEE80211_MGMT_FRAME            0x00
 #define IEEE80211_CTL_FRAME             0x04
 
@@ -115,6 +112,7 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define RSI_WOW_NO_CONNECTION		BIT(1)
 
 #define RSI_DEV_9113		1
+#define RSI_MAX_RX_PKTS		64
 
 struct version_info {
 	u16 major;
@@ -209,6 +207,7 @@ struct rsi_common {
 	struct rsi_hw *priv;
 	struct vif_priv vif_info[RSI_MAX_VIFS];
 
+	void *coex_cb;
 	bool mgmt_q_block;
 	struct version_info lmac_ver;
 
@@ -273,6 +272,8 @@ struct rsi_common {
 	u8 obm_ant_sel_val;
 	int tx_power;
 	u8 ant_in_use;
+	/* Mutex used for writing packet to bus */
+	struct mutex tx_bus_mutex;
 	bool hibernate_resume;
 	bool reinit_hw;
 	u8 wow_flags;
@@ -291,11 +292,8 @@ struct rsi_common {
 	bool p2p_enabled;
 	struct timer_list roc_timer;
 	struct ieee80211_vif *roc_vif;
-};
 
-enum host_intf {
-	RSI_HOST_INTF_SDIO = 0,
-	RSI_HOST_INTF_USB
+	void *bt_adapter;
 };
 
 struct eepromrw_info {
@@ -322,7 +320,7 @@ struct rsi_hw {
 	struct device *device;
 	u8 sc_nvifs;
 
-	enum host_intf rsi_host_intf;
+	enum rsi_host_intf rsi_host_intf;
 	u16 block_size;
 	enum ps_state ps_state;
 	struct rsi_ps_info ps_info;
@@ -343,7 +341,6 @@ struct rsi_hw {
 	void *rsi_dev;
 	struct rsi_host_intf_ops *host_intf_ops;
 	int (*check_hw_queue_status)(struct rsi_hw *adapter, u8 q_num);
-	int (*rx_urb_submit)(struct rsi_hw *adapter);
 	int (*determine_event_timeout)(struct rsi_hw *adapter);
 };
 
@@ -367,4 +364,8 @@ struct rsi_host_intf_ops {
 				      u8 *fw);
 	int (*reinit_device)(struct rsi_hw *adapter);
 };
+
+enum rsi_host_intf rsi_get_host_intf(void *priv);
+void rsi_set_bt_context(void *priv, void *bt_context);
+
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h
index 389094a3f91c..cf6567ae5bbe 100644
--- a/drivers/net/wireless/rsi/rsi_mgmt.h
+++ b/drivers/net/wireless/rsi/rsi_mgmt.h
@@ -57,12 +57,14 @@
 #define WOW_PATTERN_SIZE 256
 
 /* Receive Frame Types */
+#define RSI_RX_DESC_MSG_TYPE_OFFSET	2
 #define TA_CONFIRM_TYPE                 0x01
 #define RX_DOT11_MGMT                   0x02
 #define TX_STATUS_IND                   0x04
 #define BEACON_EVENT_IND		0x08
 #define PROBEREQ_CONFIRM                2
 #define CARD_READY_IND                  0x00
+#define SLEEP_NOTIFY_IND                0x06
 
 #define RSI_DELETE_PEER                 0x0
 #define RSI_ADD_PEER                    0x1
@@ -638,6 +640,7 @@ static inline void rsi_set_len_qno(__le16 *addr, u16 len, u8 qno)
 	*addr = cpu_to_le16(len | ((qno & 7) << 12));
 }
 
+int rsi_handle_card_ready(struct rsi_common *common, u8 *msg);
 int rsi_mgmt_pkt_recv(struct rsi_common *common, u8 *msg);
 int rsi_set_vap_capabilities(struct rsi_common *common, enum opmode mode,
 			     u8 *mac_addr, u8 vap_id, u8 vap_status);
diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h
index 49c549ba6682..ba649be284af 100644
--- a/drivers/net/wireless/rsi/rsi_sdio.h
+++ b/drivers/net/wireless/rsi/rsi_sdio.h
@@ -105,6 +105,11 @@ struct receive_info {
 	u32 buf_available_counter;
 };
 
+struct rsi_sdio_rx_q {
+	u8 num_rx_pkts;
+	struct sk_buff_head head;
+};
+
 struct rsi_91x_sdiodev {
 	struct sdio_func *pfunction;
 	struct task_struct *sdio_irq_task;
@@ -117,6 +122,8 @@ struct rsi_91x_sdiodev {
 	u16 tx_blk_size;
 	u8 write_fail;
 	bool buff_status_updated;
+	struct rsi_sdio_rx_q rx_q;
+	struct rsi_thread rx_thread;
 };
 
 void rsi_interrupt_handler(struct rsi_hw *adapter);
@@ -131,4 +138,5 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word);
 void rsi_sdio_ack_intr(struct rsi_hw *adapter, u8 int_bit);
 int rsi_sdio_determine_event_timeout(struct rsi_hw *adapter);
 int rsi_sdio_check_buffer_status(struct rsi_hw *adapter, u8 q_num);
+void rsi_sdio_rx_thread(struct rsi_common *common);
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h
index 891daea2d932..a88d59295a98 100644
--- a/drivers/net/wireless/rsi/rsi_usb.h
+++ b/drivers/net/wireless/rsi/rsi_usb.h
@@ -31,7 +31,7 @@
 #define USB_VENDOR_REGISTER_WRITE    0x16
 #define RSI_USB_TX_HEAD_ROOM         128
 
-#define MAX_RX_URBS                  1
+#define MAX_RX_URBS                  2
 #define MAX_BULK_EP                  8
 #define WLAN_EP                      1
 #define BT_EP                        2
@@ -39,19 +39,28 @@
 #define RSI_USB_BUF_SIZE	     4096
 #define RSI_USB_CTRL_BUF_SIZE	     0x04
 
+struct rx_usb_ctrl_block {
+	u8 *data;
+	struct urb *rx_urb;
+	struct sk_buff *rx_skb;
+	u8 ep_num;
+};
+
 struct rsi_91x_usbdev {
+	void *priv;
 	struct rsi_thread rx_thread;
 	u8 endpoint;
 	struct usb_device *usbdev;
 	struct usb_interface *pfunction;
-	struct urb *rx_usb_urb[MAX_RX_URBS];
+	struct rx_usb_ctrl_block rx_cb[MAX_RX_URBS];
 	u8 *tx_buffer;
-	__le16 bulkin_size;
-	u8 bulkin_endpoint_addr;
+	__le16 bulkin_size[MAX_BULK_EP];
+	u8 bulkin_endpoint_addr[MAX_BULK_EP];
 	__le16 bulkout_size[MAX_BULK_EP];
 	u8 bulkout_endpoint_addr[MAX_BULK_EP];
 	u32 tx_blk_size;
 	u8 write_fail;
+	struct sk_buff_head rx_q;
 };
 
 static inline int rsi_usb_check_queue_status(struct rsi_hw *adapter, u8 q_num)
diff --git a/drivers/net/wireless/st/Kconfig b/drivers/net/wireless/st/Kconfig
index 969b4f6e53b5..ff69a80a9633 100644
--- a/drivers/net/wireless/st/Kconfig
+++ b/drivers/net/wireless/st/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ST
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ST
diff --git a/drivers/net/wireless/ti/Kconfig b/drivers/net/wireless/ti/Kconfig
index 92fbd6597e34..366c687445ad 100644
--- a/drivers/net/wireless/ti/Kconfig
+++ b/drivers/net/wireless/ti/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_TI
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_TI
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 037defd10b91..bd8641ad953b 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -122,8 +122,7 @@ static int wl1251_fetch_nvs(struct wl1251 *wl)
 		goto out;
 	}
 
-	wl->nvs_len = fw->size;
-	wl->nvs = kmemdup(fw->data, wl->nvs_len, GFP_KERNEL);
+	wl->nvs = kmemdup(fw->data, fw->size, GFP_KERNEL);
 
 	if (!wl->nvs) {
 		wl1251_error("could not allocate memory for the nvs file");
@@ -131,6 +130,8 @@ static int wl1251_fetch_nvs(struct wl1251 *wl)
 		goto out;
 	}
 
+	wl->nvs_len = fw->size;
+
 	ret = 0;
 
 out:
@@ -202,13 +203,6 @@ static int wl1251_chip_wakeup(struct wl1251 *wl)
 			goto out;
 	}
 
-	if (wl->nvs == NULL && !wl->use_eeprom) {
-		/* No NVS from netlink, try to get it from the filesystem */
-		ret = wl1251_fetch_nvs(wl);
-		if (ret < 0)
-			goto out;
-	}
-
 out:
 	return ret;
 }
@@ -1446,6 +1440,61 @@ static int wl1251_read_eeprom_mac(struct wl1251 *wl)
 	return 0;
 }
 
+#define NVS_OFF_MAC_LEN 0x19
+#define NVS_OFF_MAC_ADDR_LO 0x1a
+#define NVS_OFF_MAC_ADDR_HI 0x1b
+#define NVS_OFF_MAC_DATA 0x1c
+
+static int wl1251_check_nvs_mac(struct wl1251 *wl)
+{
+	if (wl->nvs_len < 0x24)
+		return -ENODATA;
+
+	/* length is 2 and data address is 0x546c (ANDed with 0xfffe) */
+	if (wl->nvs[NVS_OFF_MAC_LEN] != 2 ||
+	    wl->nvs[NVS_OFF_MAC_ADDR_LO] != 0x6d ||
+	    wl->nvs[NVS_OFF_MAC_ADDR_HI] != 0x54)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int wl1251_read_nvs_mac(struct wl1251 *wl)
+{
+	u8 mac[ETH_ALEN];
+	int i, ret;
+
+	ret = wl1251_check_nvs_mac(wl);
+	if (ret)
+		return ret;
+
+	/* MAC is stored in reverse order */
+	for (i = 0; i < ETH_ALEN; i++)
+		mac[i] = wl->nvs[NVS_OFF_MAC_DATA + ETH_ALEN - i - 1];
+
+	/* 00:00:20:07:03:09 is in example file wl1251-nvs.bin, so invalid */
+	if (ether_addr_equal_unaligned(mac, "\x00\x00\x20\x07\x03\x09"))
+		return -EINVAL;
+
+	memcpy(wl->mac_addr, mac, ETH_ALEN);
+	return 0;
+}
+
+static int wl1251_write_nvs_mac(struct wl1251 *wl)
+{
+	int i, ret;
+
+	ret = wl1251_check_nvs_mac(wl);
+	if (ret)
+		return ret;
+
+	/* MAC is stored in reverse order */
+	for (i = 0; i < ETH_ALEN; i++)
+		wl->nvs[NVS_OFF_MAC_DATA + i] = wl->mac_addr[ETH_ALEN - i - 1];
+
+	return 0;
+}
+
 static int wl1251_register_hw(struct wl1251 *wl)
 {
 	int ret;
@@ -1489,8 +1538,33 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 
 	wl->hw->queues = 4;
 
+	if (wl->nvs == NULL && !wl->use_eeprom) {
+		ret = wl1251_fetch_nvs(wl);
+		if (ret < 0)
+			goto out;
+	}
+
 	if (wl->use_eeprom)
-		wl1251_read_eeprom_mac(wl);
+		ret = wl1251_read_eeprom_mac(wl);
+	else
+		ret = wl1251_read_nvs_mac(wl);
+
+	if (ret == 0 && !is_valid_ether_addr(wl->mac_addr))
+		ret = -EINVAL;
+
+	if (ret < 0) {
+		/*
+		 * In case our MAC address is not correctly set,
+		 * we use a random but Nokia MAC.
+		 */
+		static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
+		memcpy(wl->mac_addr, nokia_oui, 3);
+		get_random_bytes(wl->mac_addr + 3, 3);
+		if (!wl->use_eeprom)
+			wl1251_write_nvs_mac(wl);
+		wl1251_warning("MAC address in eeprom or nvs data is not valid");
+		wl1251_warning("Setting random MAC address: %pM", wl->mac_addr);
+	}
 
 	ret = wl1251_register_hw(wl);
 	if (ret)
@@ -1511,7 +1585,6 @@ struct ieee80211_hw *wl1251_alloc_hw(void)
 	struct ieee80211_hw *hw;
 	struct wl1251 *wl;
 	int i;
-	static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
 
 	hw = ieee80211_alloc_hw(sizeof(*wl), &wl1251_ops);
 	if (!hw) {
@@ -1561,13 +1634,6 @@ struct ieee80211_hw *wl1251_alloc_hw(void)
 	INIT_WORK(&wl->irq_work, wl1251_irq_work);
 	INIT_WORK(&wl->tx_work, wl1251_tx_work);
 
-	/*
-	 * In case our MAC address is not correctly set,
-	 * we use a random but Nokia MAC.
-	 */
-	memcpy(wl->mac_addr, nokia_oui, 3);
-	get_random_bytes(wl->mac_addr + 3, 3);
-
 	wl->state = WL1251_STATE_OFF;
 	mutex_init(&wl->mutex);
 	spin_lock_init(&wl->wl_lock);
diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c
index de2fa6705574..12ed14ebc307 100644
--- a/drivers/net/wireless/ti/wl1251/tx.c
+++ b/drivers/net/wireless/ti/wl1251/tx.c
@@ -221,10 +221,8 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb,
 			struct sk_buff *newskb = skb_copy_expand(skb, 0, 3,
 								 GFP_KERNEL);
 
-			if (unlikely(newskb == NULL)) {
-				wl1251_error("Can't allocate skb!");
+			if (unlikely(newskb == NULL))
 				return -EINVAL;
-			}
 
 			tx_hdr = (struct tx_double_buffer_desc *) newskb->data;
 
diff --git a/drivers/net/wireless/zydas/Kconfig b/drivers/net/wireless/zydas/Kconfig
index a58c0f65e376..b327f86f05be 100644
--- a/drivers/net/wireless/zydas/Kconfig
+++ b/drivers/net/wireless/zydas/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ZYDAS
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ZYDAS
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
index b785742bfd9e..b01b44a5d16e 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
@@ -509,7 +509,6 @@ void zd_mac_tx_failed(struct urb *urb)
 	int found = 0;
 	int i, position = 0;
 
-	q = &mac->ack_wait_queue;
 	spin_lock_irqsave(&q->lock, flags);
 
 	skb_queue_walk(q, skb) {
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b1cf7c6f407a..ef5887037b22 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
 	BUG();
 }
 
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
 {
 	struct xenvif_pkt_state pkt;
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index a56d3eab35dd..e1aef253601e 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -224,7 +224,7 @@ static void xenvif_debugfs_addif(struct xenvif *vif)
 
 			snprintf(filename, sizeof(filename), "io_ring_q%d", i);
 			pfile = debugfs_create_file(filename,
-						    S_IRUSR | S_IWUSR,
+						    0600,
 						    vif->xenvif_dbg_root,
 						    &vif->queues[i],
 						    &xenvif_dbg_io_ring_ops_fops);
@@ -235,7 +235,7 @@ static void xenvif_debugfs_addif(struct xenvif *vif)
 
 		if (vif->ctrl_irq) {
 			pfile = debugfs_create_file("ctrl",
-						    S_IRUSR,
+						    0400,
 						    vif->xenvif_dbg_root,
 						    vif,
 						    &xenvif_dbg_ctrl_ops_fops);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 8328d395e332..4dd0668003e7 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2005,7 +2005,10 @@ static void netback_changed(struct xenbus_device *dev,
 	case XenbusStateInitialised:
 	case XenbusStateReconfiguring:
 	case XenbusStateReconfigured:
+		break;
+
 	case XenbusStateUnknown:
+		wake_up_all(&module_unload_q);
 		break;
 
 	case XenbusStateInitWait:
@@ -2110,9 +2113,9 @@ static ssize_t store_rxbuf(struct device *dev,
 	return len;
 }
 
-static DEVICE_ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
-static DEVICE_ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
-static DEVICE_ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL);
+static DEVICE_ATTR(rxbuf_min, 0644, show_rxbuf, store_rxbuf);
+static DEVICE_ATTR(rxbuf_max, 0644, show_rxbuf, store_rxbuf);
+static DEVICE_ATTR(rxbuf_cur, 0444, show_rxbuf, NULL);
 
 static struct attribute *xennet_dev_attrs[] = {
 	&dev_attr_rxbuf_min.attr,
@@ -2136,7 +2139,9 @@ static int xennet_remove(struct xenbus_device *dev)
 		xenbus_switch_state(dev, XenbusStateClosing);
 		wait_event(module_unload_q,
 			   xenbus_read_driver_state(dev->otherend) ==
-			   XenbusStateClosing);
+			   XenbusStateClosing ||
+			   xenbus_read_driver_state(dev->otherend) ==
+			   XenbusStateUnknown);
 
 		xenbus_switch_state(dev, XenbusStateClosed);
 		wait_event(module_unload_q,
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 345acca576b3..1bd7b3734751 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -278,8 +278,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	disk->queue		= q;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
-	set_capacity(disk, 0);
-	device_add_disk(dev, disk);
 
 	if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
 		return -ENOMEM;
@@ -292,6 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	}
 
 	set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
+	device_add_disk(dev, disk);
 	revalidate_disk(disk);
 	return 0;
 }
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 2ef544f10ec8..4b95ac513de2 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1545,8 +1545,6 @@ static int btt_blk_init(struct btt *btt)
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
 	btt->btt_queue->queuedata = btt;
 
-	set_capacity(btt->btt_disk, 0);
-	device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
 	if (btt_meta_size(btt)) {
 		int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
 
@@ -1558,6 +1556,7 @@ static int btt_blk_init(struct btt *btt)
 		}
 	}
 	set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+	device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
 	btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
 	revalidate_disk(btt->btt_disk);
 
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f5c4e8c6e29d..2f4d18752c97 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -304,7 +304,7 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = {
 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
 		struct nd_namespace_common *ndns)
 {
-	struct device *dev = &nd_pfn->dev;
+	struct device *dev;
 
 	if (!nd_pfn)
 		return NULL;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 10041ac4032c..06f8dcc52ca6 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -335,8 +335,7 @@ static int pmem_attach_disk(struct device *dev,
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 		fua = 0;
 	}
-	wbc = nvdimm_has_cache(nd_region) &&
-		!test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
+	wbc = nvdimm_has_cache(nd_region);
 
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
 				dev_name(&ndns->dev))) {
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e6d01911e092..1593e1806b16 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -532,11 +532,13 @@ static ssize_t persistence_domain_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
 	struct nd_region *nd_region = to_nd_region(dev);
-	unsigned long flags = nd_region->flags;
 
-	return sprintf(buf, "%s%s\n",
-			flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "",
-			flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : "");
+	if (test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags))
+		return sprintf(buf, "cpu_cache\n");
+	else if (test_bit(ND_REGION_PERSIST_MEMCTRL, &nd_region->flags))
+		return sprintf(buf, "memory_controller\n");
+	else
+		return sprintf(buf, "\n");
 }
 static DEVICE_ATTR_RO(persistence_domain);
 
@@ -593,6 +595,13 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
 			return 0;
 	}
 
+	if (a == &dev_attr_persistence_domain.attr) {
+		if ((nd_region->flags & (BIT(ND_REGION_PERSIST_CACHE)
+					| BIT(ND_REGION_PERSIST_MEMCTRL))) == 0)
+			return 0;
+		return a->mode;
+	}
+
 	if (a != &dev_attr_set_cookie.attr
 			&& a != &dev_attr_available_size.attr)
 		return a->mode;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f431c32774f3..7aeca5db7916 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -120,8 +120,12 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
 	int ret;
 
 	ret = nvme_reset_ctrl(ctrl);
-	if (!ret)
+	if (!ret) {
 		flush_work(&ctrl->reset_work);
+		if (ctrl->state != NVME_CTRL_LIVE)
+			ret = -ENETRESET;
+	}
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
@@ -265,7 +269,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	switch (new_state) {
 	case NVME_CTRL_ADMIN_ONLY:
 		switch (old_state) {
-		case NVME_CTRL_RECONNECTING:
+		case NVME_CTRL_CONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -276,7 +280,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		switch (old_state) {
 		case NVME_CTRL_NEW:
 		case NVME_CTRL_RESETTING:
-		case NVME_CTRL_RECONNECTING:
+		case NVME_CTRL_CONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -294,9 +298,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 			break;
 		}
 		break;
-	case NVME_CTRL_RECONNECTING:
+	case NVME_CTRL_CONNECTING:
 		switch (old_state) {
-		case NVME_CTRL_LIVE:
+		case NVME_CTRL_NEW:
 		case NVME_CTRL_RESETTING:
 			changed = true;
 			/* FALLTHRU */
@@ -309,7 +313,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		case NVME_CTRL_LIVE:
 		case NVME_CTRL_ADMIN_ONLY:
 		case NVME_CTRL_RESETTING:
-		case NVME_CTRL_RECONNECTING:
+		case NVME_CTRL_CONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -518,9 +522,11 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
 		u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
 
-		range[n].cattr = cpu_to_le32(0);
-		range[n].nlb = cpu_to_le32(nlb);
-		range[n].slba = cpu_to_le64(slba);
+		if (n < segments) {
+			range[n].cattr = cpu_to_le32(0);
+			range[n].nlb = cpu_to_le32(nlb);
+			range[n].slba = cpu_to_le64(slba);
+		}
 		n++;
 	}
 
@@ -794,13 +800,9 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 
 static int nvme_keep_alive(struct nvme_ctrl *ctrl)
 {
-	struct nvme_command c;
 	struct request *rq;
 
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_keep_alive;
-
-	rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
+	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
 			NVME_QID_ANY);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
@@ -832,6 +834,8 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
 		return;
 
 	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
+	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
 	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
 EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
@@ -1117,14 +1121,19 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 
 static void nvme_update_formats(struct nvme_ctrl *ctrl)
 {
-	struct nvme_ns *ns;
+	struct nvme_ns *ns, *next;
+	LIST_HEAD(rm_list);
 
 	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
-		if (ns->disk && nvme_revalidate_disk(ns->disk))
-			nvme_ns_remove(ns);
+		if (ns->disk && nvme_revalidate_disk(ns->disk)) {
+			list_move_tail(&ns->list, &rm_list);
+		}
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
+
+	list_for_each_entry_safe(ns, next, &rm_list, list)
+		nvme_ns_remove(ns);
 }
 
 static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -2687,7 +2696,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
 		[NVME_CTRL_LIVE]	= "live",
 		[NVME_CTRL_ADMIN_ONLY]	= "only-admin",
 		[NVME_CTRL_RESETTING]	= "resetting",
-		[NVME_CTRL_RECONNECTING]= "reconnecting",
+		[NVME_CTRL_CONNECTING]	= "connecting",
 		[NVME_CTRL_DELETING]	= "deleting",
 		[NVME_CTRL_DEAD]	= "dead",
 	};
@@ -2835,7 +2844,7 @@ out:
 }
 
 static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
-		struct nvme_id_ns *id, bool *new)
+		struct nvme_id_ns *id)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
 	bool is_shared = id->nmic & (1 << 0);
@@ -2851,8 +2860,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 			ret = PTR_ERR(head);
 			goto out_unlock;
 		}
-
-		*new = true;
 	} else {
 		struct nvme_ns_ids ids;
 
@@ -2864,8 +2871,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 			ret = -EINVAL;
 			goto out_unlock;
 		}
-
-		*new = false;
 	}
 
 	list_add_tail(&ns->siblings, &head->list);
@@ -2936,7 +2941,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	struct nvme_id_ns *id;
 	char disk_name[DISK_NAME_LEN];
 	int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
-	bool new = true;
 
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
@@ -2962,7 +2966,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	if (id->ncap == 0)
 		goto out_free_id;
 
-	if (nvme_init_ns_head(ns, nsid, id, &new))
+	if (nvme_init_ns_head(ns, nsid, id))
 		goto out_free_id;
 	nvme_setup_streams_ns(ctrl, ns);
 	
@@ -3028,9 +3032,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 		pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
 			ns->disk->disk_name);
 
-	if (new)
-		nvme_mpath_add_disk(ns->head);
-	nvme_mpath_add_disk_links(ns);
+	nvme_mpath_add_disk(ns->head);
 	return;
  out_unlink_ns:
 	mutex_lock(&ctrl->subsys->lock);
@@ -3050,7 +3052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		return;
 
 	if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-		nvme_mpath_remove_disk_links(ns);
 		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_id_attr_group);
 		if (ns->ndev)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5dd4ceefed8f..8f0f34d06d46 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
  */
 int nvmf_register_transport(struct nvmf_transport_ops *ops)
 {
-	if (!ops->create_ctrl || !ops->module)
+	if (!ops->create_ctrl)
 		return -EINVAL;
 
 	down_write(&nvmf_transports_rwsem);
@@ -650,6 +650,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 				ret = -EINVAL;
 				goto out;
 			}
+			if (opts->discovery_nqn) {
+				pr_debug("Ignoring nr_io_queues value for discovery controller\n");
+				break;
+			}
+
 			opts->nr_io_queues = min_t(unsigned int,
 					num_online_cpus(), token);
 			break;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 25b19f722f5b..a3145d90c1d2 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -171,13 +171,14 @@ static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
 	    cmd->common.opcode != nvme_fabrics_command ||
 	    cmd->fabrics.fctype != nvme_fabrics_type_connect) {
 		/*
-		 * Reconnecting state means transport disruption, which can take
-		 * a long time and even might fail permanently, fail fast to
-		 * give upper layers a chance to failover.
+		 * Connecting state means transport disruption or initial
+		 * establishment, which can take a long time and even might
+		 * fail permanently, fail fast to give upper layers a chance
+		 * to failover.
 		 * Deleting state means that the ctrl will never accept commands
 		 * again, fail it permanently.
 		 */
-		if (ctrl->state == NVME_CTRL_RECONNECTING ||
+		if (ctrl->state == NVME_CTRL_CONNECTING ||
 		    ctrl->state == NVME_CTRL_DELETING) {
 			nvme_req(rq)->status = NVME_SC_ABORT_REQ;
 			return BLK_STS_IOERR;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b856d7c919d2..1dc1387b7134 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -55,9 +55,7 @@ struct nvme_fc_queue {
 
 enum nvme_fcop_flags {
 	FCOP_FLAGS_TERMIO	= (1 << 0),
-	FCOP_FLAGS_RELEASED	= (1 << 1),
-	FCOP_FLAGS_COMPLETE	= (1 << 2),
-	FCOP_FLAGS_AEN		= (1 << 3),
+	FCOP_FLAGS_AEN		= (1 << 1),
 };
 
 struct nvmefc_ls_req_op {
@@ -532,7 +530,7 @@ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
 {
 	switch (ctrl->ctrl.state) {
 	case NVME_CTRL_NEW:
-	case NVME_CTRL_RECONNECTING:
+	case NVME_CTRL_CONNECTING:
 		/*
 		 * As all reconnects were suppressed, schedule a
 		 * connect.
@@ -777,7 +775,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
 		}
 		break;
 
-	case NVME_CTRL_RECONNECTING:
+	case NVME_CTRL_CONNECTING:
 		/*
 		 * The association has already been terminated and the
 		 * controller is attempting reconnects.  No need to do anything
@@ -1208,7 +1206,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 				sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
 
 	assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
-	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
+	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1);
 	/* Linux supports only Dynamic controllers */
 	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
 	uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
@@ -1323,7 +1321,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 				sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
 	conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
 	conn_rqst->connect_cmd.qid  = cpu_to_be16(queue->qnum);
-	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize);
+	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1);
 
 	lsop->queue = queue;
 	lsreq->rqstaddr = conn_rqst;
@@ -1470,7 +1468,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 
 /* *********************** NVME Ctrl Routines **************************** */
 
-static void __nvme_fc_final_op_cleanup(struct request *rq);
 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
 
 static int
@@ -1512,13 +1509,19 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
 static int
 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
 {
-	int state;
+	unsigned long flags;
+	int opstate;
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
+	if (opstate != FCPOP_STATE_ACTIVE)
+		atomic_set(&op->state, opstate);
+	else if (ctrl->flags & FCCTRL_TERMIO)
+		ctrl->iocnt++;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
 
-	state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
-	if (state != FCPOP_STATE_ACTIVE) {
-		atomic_set(&op->state, state);
+	if (opstate != FCPOP_STATE_ACTIVE)
 		return -ECANCELED;
-	}
 
 	ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
 					&ctrl->rport->remoteport,
@@ -1532,60 +1535,26 @@ static void
 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
-	unsigned long flags;
-	int i, ret;
-
-	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
-		if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
-			continue;
-
-		spin_lock_irqsave(&ctrl->lock, flags);
-		if (ctrl->flags & FCCTRL_TERMIO) {
-			ctrl->iocnt++;
-			aen_op->flags |= FCOP_FLAGS_TERMIO;
-		}
-		spin_unlock_irqrestore(&ctrl->lock, flags);
-
-		ret = __nvme_fc_abort_op(ctrl, aen_op);
-		if (ret) {
-			/*
-			 * if __nvme_fc_abort_op failed the io wasn't
-			 * active. Thus this call path is running in
-			 * parallel to the io complete. Treat as non-error.
-			 */
+	int i;
 
-			/* back out the flags/counters */
-			spin_lock_irqsave(&ctrl->lock, flags);
-			if (ctrl->flags & FCCTRL_TERMIO)
-				ctrl->iocnt--;
-			aen_op->flags &= ~FCOP_FLAGS_TERMIO;
-			spin_unlock_irqrestore(&ctrl->lock, flags);
-			return;
-		}
-	}
+	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
+		__nvme_fc_abort_op(ctrl, aen_op);
 }
 
-static inline int
+static inline void
 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
-		struct nvme_fc_fcp_op *op)
+		struct nvme_fc_fcp_op *op, int opstate)
 {
 	unsigned long flags;
-	bool complete_rq = false;
 
-	spin_lock_irqsave(&ctrl->lock, flags);
-	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+	if (opstate == FCPOP_STATE_ABORTED) {
+		spin_lock_irqsave(&ctrl->lock, flags);
 		if (ctrl->flags & FCCTRL_TERMIO) {
 			if (!--ctrl->iocnt)
 				wake_up(&ctrl->ioabort_wait);
 		}
+		spin_unlock_irqrestore(&ctrl->lock, flags);
 	}
-	if (op->flags & FCOP_FLAGS_RELEASED)
-		complete_rq = true;
-	else
-		op->flags |= FCOP_FLAGS_COMPLETE;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
-	return complete_rq;
 }
 
 static void
@@ -1601,6 +1570,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
 	union nvme_result result;
 	bool terminate_assoc = true;
+	int opstate;
 
 	/*
 	 * WARNING:
@@ -1639,11 +1609,12 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	 * association to be terminated.
 	 */
 
+	opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
+
 	fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
 				sizeof(op->rsp_iu), DMA_FROM_DEVICE);
 
-	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
-			op->flags & FCOP_FLAGS_TERMIO)
+	if (opstate == FCPOP_STATE_ABORTED)
 		status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
 	else if (freq->status)
 		status = cpu_to_le16(NVME_SC_INTERNAL << 1);
@@ -1708,7 +1679,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 done:
 	if (op->flags & FCOP_FLAGS_AEN) {
 		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
-		__nvme_fc_fcpop_chk_teardowns(ctrl, op);
+		__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
 		atomic_set(&op->state, FCPOP_STATE_IDLE);
 		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */
 		nvme_fc_ctrl_put(ctrl);
@@ -1722,13 +1693,11 @@ done:
 	if (status &&
 	    (blk_queue_dying(rq->q) ||
 	     ctrl->ctrl.state == NVME_CTRL_NEW ||
-	     ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
+	     ctrl->ctrl.state == NVME_CTRL_CONNECTING))
 		status |= cpu_to_le16(NVME_SC_DNR << 1);
 
-	if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
-		__nvme_fc_final_op_cleanup(rq);
-	else
-		nvme_end_request(rq, status, result);
+	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+	nvme_end_request(rq, status, result);
 
 check_error:
 	if (terminate_assoc)
@@ -2415,46 +2384,16 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
 }
 
 static void
-__nvme_fc_final_op_cleanup(struct request *rq)
+nvme_fc_complete_rq(struct request *rq)
 {
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
 	struct nvme_fc_ctrl *ctrl = op->ctrl;
 
 	atomic_set(&op->state, FCPOP_STATE_IDLE);
-	op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
-			FCOP_FLAGS_COMPLETE);
 
 	nvme_fc_unmap_data(ctrl, rq, op);
 	nvme_complete_rq(rq);
 	nvme_fc_ctrl_put(ctrl);
-
-}
-
-static void
-nvme_fc_complete_rq(struct request *rq)
-{
-	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-	struct nvme_fc_ctrl *ctrl = op->ctrl;
-	unsigned long flags;
-	bool completed = false;
-
-	/*
-	 * the core layer, on controller resets after calling
-	 * nvme_shutdown_ctrl(), calls complete_rq without our
-	 * calling blk_mq_complete_request(), thus there may still
-	 * be live i/o outstanding with the LLDD. Means transport has
-	 * to track complete calls vs fcpio_done calls to know what
-	 * path to take on completes and dones.
-	 */
-	spin_lock_irqsave(&ctrl->lock, flags);
-	if (op->flags & FCOP_FLAGS_COMPLETE)
-		completed = true;
-	else
-		op->flags |= FCOP_FLAGS_RELEASED;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
-	if (completed)
-		__nvme_fc_final_op_cleanup(rq);
 }
 
 /*
@@ -2476,35 +2415,11 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
 	struct nvme_ctrl *nctrl = data;
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
-	unsigned long flags;
-	int status;
 
 	if (!blk_mq_request_started(req))
 		return;
 
-	spin_lock_irqsave(&ctrl->lock, flags);
-	if (ctrl->flags & FCCTRL_TERMIO) {
-		ctrl->iocnt++;
-		op->flags |= FCOP_FLAGS_TERMIO;
-	}
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
-	status = __nvme_fc_abort_op(ctrl, op);
-	if (status) {
-		/*
-		 * if __nvme_fc_abort_op failed the io wasn't
-		 * active. Thus this call path is running in
-		 * parallel to the io complete. Treat as non-error.
-		 */
-
-		/* back out the flags/counters */
-		spin_lock_irqsave(&ctrl->lock, flags);
-		if (ctrl->flags & FCCTRL_TERMIO)
-			ctrl->iocnt--;
-		op->flags &= ~FCOP_FLAGS_TERMIO;
-		spin_unlock_irqrestore(&ctrl->lock, flags);
-		return;
-	}
+	__nvme_fc_abort_op(ctrl, op);
 }
 
 
@@ -2566,11 +2481,11 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 		goto out_free_tag_set;
 	}
 
-	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_cleanup_blk_queue;
 
-	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_delete_hw_queues;
 
@@ -2617,11 +2532,11 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ret)
 		goto out_free_io_queues;
 
-	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_free_io_queues;
 
-	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_delete_hw_queues;
 
@@ -2717,13 +2632,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	nvme_fc_init_queue(ctrl, 0);
 
 	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
-				NVME_AQ_BLK_MQ_DEPTH);
+				NVME_AQ_DEPTH);
 	if (ret)
 		goto out_free_queue;
 
 	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-				NVME_AQ_BLK_MQ_DEPTH,
-				(NVME_AQ_BLK_MQ_DEPTH / 4));
+				NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));
 	if (ret)
 		goto out_delete_hw_queue;
 
@@ -2751,7 +2665,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
 	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
@@ -2784,6 +2698,14 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		opts->queue_size = ctrl->ctrl.maxcmd;
 	}
 
+	if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
+		/* warn if sqsize is lower than queue_size */
+		dev_warn(ctrl->ctrl.device,
+			"queue_size %zu > ctrl sqsize %u, clamping down\n",
+			opts->queue_size, ctrl->ctrl.sqsize + 1);
+		opts->queue_size = ctrl->ctrl.sqsize + 1;
+	}
+
 	ret = nvme_fc_init_aen_ops(ctrl);
 	if (ret)
 		goto out_term_aen_ops;
@@ -2943,7 +2865,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
 	bool recon = true;
 
-	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
+	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
 		return;
 
 	if (portptr->port_state == FC_OBJSTATE_ONLINE)
@@ -2991,10 +2913,10 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
 		dev_err(ctrl->ctrl.device,
 			"NVME-FC{%d}: error_recovery: Couldn't change state "
-			"to RECONNECTING\n", ctrl->cnum);
+			"to CONNECTING\n", ctrl->cnum);
 		return;
 	}
 
@@ -3195,7 +3117,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	 * transport errors (frame drop, LS failure) inherently must kill
 	 * the association. The transport is coded so that any command used
 	 * to create the association (prior to a LIVE state transition
-	 * while NEW or RECONNECTING) will fail if it completes in error or
+	 * while NEW or CONNECTING) will fail if it completes in error or
 	 * times out.
 	 *
 	 * As such: as the connect request was mostly likely due to a
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3b211d9e58b8..060f69e03427 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -198,30 +198,16 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head)
 {
 	if (!head->disk)
 		return;
-	device_add_disk(&head->subsys->dev, head->disk);
-	if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
-			&nvme_ns_id_attr_group))
-		pr_warn("%s: failed to create sysfs group for identification\n",
-			head->disk->disk_name);
-}
-
-void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
-	struct kobject *slave_disk_kobj, *holder_disk_kobj;
-
-	if (!ns->head->disk)
-		return;
-
-	slave_disk_kobj = &disk_to_dev(ns->disk)->kobj;
-	if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj,
-			kobject_name(slave_disk_kobj)))
-		return;
 
-	holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj;
-	if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj,
-			kobject_name(holder_disk_kobj)))
-		sysfs_remove_link(ns->head->disk->slave_dir,
-			kobject_name(slave_disk_kobj));
+	mutex_lock(&head->subsys->lock);
+	if (!(head->disk->flags & GENHD_FL_UP)) {
+		device_add_disk(&head->subsys->dev, head->disk);
+		if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
+				&nvme_ns_id_attr_group))
+			pr_warn("%s: failed to create sysfs group for identification\n",
+				head->disk->disk_name);
+	}
+	mutex_unlock(&head->subsys->lock);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -238,14 +224,3 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 	blk_cleanup_queue(head->disk->queue);
 	put_disk(head->disk);
 }
-
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
-	if (!ns->head->disk)
-		return;
-
-	sysfs_remove_link(ns->disk->part0.holder_dir,
-			kobject_name(&disk_to_dev(ns->head->disk)->kobj));
-	sysfs_remove_link(ns->head->disk->slave_dir,
-			kobject_name(&disk_to_dev(ns->disk)->kobj));
-}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8e4550fa08f8..d733b14ede9d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -123,7 +123,7 @@ enum nvme_ctrl_state {
 	NVME_CTRL_LIVE,
 	NVME_CTRL_ADMIN_ONLY,    /* Only admin queue live */
 	NVME_CTRL_RESETTING,
-	NVME_CTRL_RECONNECTING,
+	NVME_CTRL_CONNECTING,
 	NVME_CTRL_DELETING,
 	NVME_CTRL_DEAD,
 };
@@ -183,6 +183,7 @@ struct nvme_ctrl {
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
 	struct delayed_work ka_work;
+	struct nvme_command ka_cmd;
 	struct work_struct fw_act_work;
 
 	/* Power saving configuration */
@@ -409,9 +410,7 @@ bool nvme_req_needs_failover(struct request *req, blk_status_t error);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
 void nvme_mpath_add_disk(struct nvme_ns_head *head);
-void nvme_mpath_add_disk_links(struct nvme_ns *ns);
 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns);
 
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
@@ -453,12 +452,6 @@ static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 }
-static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
-}
-static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
-}
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 }
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6fe7af00a1f4..b6f43b738f03 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1141,7 +1141,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
 	/* If there is a reset/reinit ongoing, we shouldn't reset again. */
 	switch (dev->ctrl.state) {
 	case NVME_CTRL_RESETTING:
-	case NVME_CTRL_RECONNECTING:
+	case NVME_CTRL_CONNECTING:
 		return false;
 	default:
 		break;
@@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
 	if (!(csts & NVME_CSTS_CFS) && !nssro)
 		return false;
 
-	/* If PCI error recovery process is happening, we cannot reset or
-	 * the recovery mechanism will surely fail.
-	 */
-	if (pci_channel_offline(to_pci_dev(dev->dev)))
-		return false;
-
 	return true;
 }
 
@@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_command cmd;
 	u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
+	/* If PCI error recovery process is happening, we cannot reset or
+	 * the recovery mechanism will surely fail.
+	 */
+	mb();
+	if (pci_channel_offline(to_pci_dev(dev->dev)))
+		return BLK_EH_RESET_TIMER;
+
 	/*
 	 * Reset immediately if the controller is failed
 	 */
@@ -1215,13 +1216,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	 * cancellation error. All outstanding requests are completed on
 	 * shutdown, so we return BLK_EH_HANDLED.
 	 */
-	if (dev->ctrl.state == NVME_CTRL_RESETTING) {
+	switch (dev->ctrl.state) {
+	case NVME_CTRL_CONNECTING:
+	case NVME_CTRL_RESETTING:
 		dev_warn(dev->ctrl.device,
 			 "I/O %d QID %d timeout, disable controller\n",
 			 req->tag, nvmeq->qid);
 		nvme_dev_disable(dev, false);
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		return BLK_EH_HANDLED;
+	default:
+		break;
 	}
 
 	/*
@@ -1364,18 +1369,14 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
 static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 				int qid, int depth)
 {
-	if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
-		unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
-						      dev->ctrl.page_size);
-		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
-		nvmeq->sq_cmds_io = dev->cmb + offset;
-	} else {
-		nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
-					&nvmeq->sq_dma_addr, GFP_KERNEL);
-		if (!nvmeq->sq_cmds)
-			return -ENOMEM;
-	}
+	/* CMB SQEs will be mapped before creation */
+	if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
+		return 0;
 
+	nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+					    &nvmeq->sq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->sq_cmds)
+		return -ENOMEM;
 	return 0;
 }
 
@@ -1449,10 +1450,17 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	struct nvme_dev *dev = nvmeq->dev;
 	int result;
 
+	if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+		unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
+						      dev->ctrl.page_size);
+		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
+		nvmeq->sq_cmds_io = dev->cmb + offset;
+	}
+
 	nvmeq->cq_vector = qid - 1;
 	result = adapter_alloc_cq(dev, qid, nvmeq);
 	if (result < 0)
-		return result;
+		goto release_vector;
 
 	result = adapter_alloc_sq(dev, qid, nvmeq);
 	if (result < 0)
@@ -1466,9 +1474,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	return result;
 
  release_sq:
+	dev->online_queues--;
 	adapter_delete_sq(dev, qid);
  release_cq:
 	adapter_delete_cq(dev, qid);
+ release_vector:
+	nvmeq->cq_vector = -1;
 	return result;
 }
 
@@ -1903,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	int result, nr_io_queues;
 	unsigned long size;
 
-	nr_io_queues = num_present_cpus();
+	nr_io_queues = num_possible_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
 	if (result < 0)
 		return result;
@@ -2288,12 +2299,12 @@ static void nvme_reset_work(struct work_struct *work)
 		nvme_dev_disable(dev, false);
 
 	/*
-	 * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
+	 * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
 	 * initializing procedure here.
 	 */
-	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) {
+	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
 		dev_warn(dev->ctrl.device,
-			"failed to mark controller RECONNECTING\n");
+			"failed to mark controller CONNECTING\n");
 		goto out;
 	}
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2bc059f7d73c..4d84a73ee12d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -887,7 +887,7 @@ free_ctrl:
 static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 {
 	/* If we are resetting/deleting then do nothing */
-	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
 		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
 			ctrl->ctrl.state == NVME_CTRL_LIVE);
 		return;
@@ -973,7 +973,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_start_queues(&ctrl->ctrl);
 
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
 		/* state change failure should never happen */
 		WARN_ON_ONCE(1);
 		return;
@@ -1051,7 +1051,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
 	struct nvme_rdma_device *dev = queue->device;
 	struct ib_device *ibdev = dev->dev;
 
-	if (!blk_rq_bytes(rq))
+	if (!blk_rq_payload_bytes(rq))
 		return;
 
 	if (req->mr) {
@@ -1166,7 +1166,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 
 	c->common.flags |= NVME_CMD_SGL_METABUF;
 
-	if (!blk_rq_bytes(rq))
+	if (!blk_rq_payload_bytes(rq))
 		return nvme_rdma_set_sg_null(c);
 
 	req->sg_table.sgl = req->first_sgl;
@@ -1756,7 +1756,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl, false);
 
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
 		/* state change failure should never happen */
 		WARN_ON_ONCE(1);
 		return;
@@ -1784,11 +1784,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	return;
 
 out_fail:
-	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-	nvme_remove_namespaces(&ctrl->ctrl);
-	nvme_rdma_shutdown_ctrl(ctrl, true);
-	nvme_uninit_ctrl(&ctrl->ctrl);
-	nvme_put_ctrl(&ctrl->ctrl);
+	++ctrl->ctrl.nr_reconnects;
+	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1942,6 +1939,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	if (!ctrl->queues)
 		goto out_uninit_ctrl;
 
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
+	WARN_ON_ONCE(!changed);
+
 	ret = nvme_rdma_configure_admin_queue(ctrl, true);
 	if (ret)
 		goto out_kfree_queues;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 0bd737117a80..a78029e4e5f4 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -520,9 +520,12 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		goto fail;
 	}
 
-	/* either variant of SGLs is fine, as we don't support metadata */
-	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
-		     (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
+	/*
+	 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
+	 * contains an address of a single contiguous physical buffer that is
+	 * byte aligned.
+	 */
+	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 		goto fail;
 	}
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 0a4372a016f2..28bbdff4a88b 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -105,10 +105,13 @@ static void nvmet_execute_flush(struct nvmet_req *req)
 static u16 nvmet_discard_range(struct nvmet_ns *ns,
 		struct nvme_dsm_range *range, struct bio **bio)
 {
-	if (__blkdev_issue_discard(ns->bdev,
+	int ret;
+
+	ret = __blkdev_issue_discard(ns->bdev,
 			le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
 			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
-			GFP_KERNEL, 0, bio))
+			GFP_KERNEL, 0, bio);
+	if (ret && ret != -EOPNOTSUPP)
 		return NVME_SC_INTERNAL | NVME_SC_DNR;
 	return 0;
 }
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 7991ec3a17db..861d1509b22b 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -184,7 +184,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		return BLK_STS_OK;
 	}
 
-	if (blk_rq_bytes(req)) {
+	if (blk_rq_payload_bytes(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
 		if (sg_alloc_table_chained(&iod->sg_table,
 				blk_rq_nr_phys_segments(req),
@@ -193,7 +193,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 		iod->req.sg = iod->sg_table.sgl;
 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
-		iod->req.transfer_len = blk_rq_bytes(req);
+		iod->req.transfer_len = blk_rq_payload_bytes(req);
 	}
 
 	blk_mq_start_request(req);
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 36ed84e26d9c..f46828e3b082 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -977,11 +977,11 @@ static int of_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 	return 0;
 }
 
-static void *
+static const void *
 of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
 				const struct device *dev)
 {
-	return (void *)of_device_get_match_data(dev);
+	return of_device_get_match_data(dev);
 }
 
 const struct fwnode_operations of_fwnode_ops = {
diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 2d87bc1adf38..0c0910709435 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -55,7 +55,7 @@ int dev_pm_opp_init_cpufreq_table(struct device *dev,
 	if (max_opps <= 0)
 		return max_opps ? max_opps : -ENODATA;
 
-	freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_ATOMIC);
+	freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL);
 	if (!freq_table)
 		return -ENOMEM;
 
diff --git a/drivers/pci/dwc/pcie-designware-host.c b/drivers/pci/dwc/pcie-designware-host.c
index 8de2d5c69b1d..dc9303abda42 100644
--- a/drivers/pci/dwc/pcie-designware-host.c
+++ b/drivers/pci/dwc/pcie-designware-host.c
@@ -613,7 +613,7 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
 	/* setup bus numbers */
 	val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
 	val &= 0xff000000;
-	val |= 0x00010100;
+	val |= 0x00ff0100;
 	dw_pcie_writel_dbi(pci, PCI_PRIMARY_BUS, val);
 
 	/* setup command register */
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index fc734014206f..46d47bd6ca1f 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3419,22 +3419,29 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE,
 
 static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
 {
-	pci_set_vpd_size(dev, 8192);
-}
-
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd);
+	int chip = (dev->device & 0xf000) >> 12;
+	int func = (dev->device & 0x0f00) >>  8;
+	int prod = (dev->device & 0x00ff) >>  0;
+
+	/*
+	 * If this is a T3-based adapter, there's a 1KB VPD area at offset
+	 * 0xc00 which contains the preferred VPD values.  If this is a T4 or
+	 * later based adapter, the special VPD is at offset 0x400 for the
+	 * Physical Functions (the SR-IOV Virtual Functions have no VPD
+	 * Capabilities).  The PCI VPD Access core routines will normally
+	 * compute the size of the VPD by parsing the VPD Data Structure at
+	 * offset 0x000.  This will result in silent failures when attempting
+	 * to accesses these other VPD areas which are beyond those computed
+	 * limits.
+	 */
+	if (chip == 0x0 && prod >= 0x20)
+		pci_set_vpd_size(dev, 8192);
+	else if (chip >= 0x4 && func < 0x8)
+		pci_set_vpd_size(dev, 2048);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+			quirk_chelsio_extend_vpd);
 
 #ifdef CONFIG_ACPI
 /*
@@ -3901,6 +3908,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230,
 			 quirk_dma_func1_alias);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642,
 			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645,
+			 quirk_dma_func1_alias);
 /* https://bugs.gentoo.org/show_bug.cgi?id=497630 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JMICRON,
 			 PCI_DEVICE_ID_JMICRON_JMB388_ESD,
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 369d48d6c6f1..365447240d95 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -401,6 +401,10 @@ void pci_release_resource(struct pci_dev *dev, int resno)
 	struct resource *res = dev->resource + resno;
 
 	pci_info(dev, "BAR %d: releasing %pR\n", resno, res);
+
+	if (!res->parent)
+		return;
+
 	release_resource(res);
 	res->end = resource_size(res) - 1;
 	res->start = 0;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 7bc5eee96b31..f63db346c219 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -17,7 +17,6 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/sched/clock.h>
 #include <linux/spinlock.h>
@@ -26,6 +25,9 @@
 
 #include <asm/irq_regs.h>
 
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
+static DEFINE_PER_CPU(int, cpu_irq);
+
 static int
 armpmu_map_cache_event(const unsigned (*cache_map)
 				      [PERF_COUNT_HW_CACHE_MAX]
@@ -320,17 +322,9 @@ validate_group(struct perf_event *event)
 	return 0;
 }
 
-static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu)
-{
-	struct platform_device *pdev = armpmu->plat_device;
-
-	return pdev ? dev_get_platdata(&pdev->dev) : NULL;
-}
-
 static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 {
 	struct arm_pmu *armpmu;
-	struct arm_pmu_platdata *plat;
 	int ret;
 	u64 start_clock, finish_clock;
 
@@ -341,14 +335,11 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 	 * dereference.
 	 */
 	armpmu = *(void **)dev;
-
-	plat = armpmu_get_platdata(armpmu);
+	if (WARN_ON_ONCE(!armpmu))
+		return IRQ_NONE;
 
 	start_clock = sched_clock();
-	if (plat && plat->handle_irq)
-		ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
-	else
-		ret = armpmu->handle_irq(irq, armpmu);
+	ret = armpmu->handle_irq(irq, armpmu);
 	finish_clock = sched_clock();
 
 	perf_sample_event_took(finish_clock - start_clock);
@@ -531,54 +522,41 @@ int perf_num_counters(void)
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
-void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
+static int armpmu_count_irq_users(const int irq)
 {
-	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
-	int irq = per_cpu(hw_events->irq, cpu);
+	int cpu, count = 0;
 
-	if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
-		return;
-
-	if (irq_is_percpu_devid(irq)) {
-		free_percpu_irq(irq, &hw_events->percpu_pmu);
-		cpumask_clear(&armpmu->active_irqs);
-		return;
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(cpu_irq, cpu) == irq)
+			count++;
 	}
 
-	free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+	return count;
 }
 
-void armpmu_free_irqs(struct arm_pmu *armpmu)
+void armpmu_free_irq(int irq, int cpu)
 {
-	int cpu;
+	if (per_cpu(cpu_irq, cpu) == 0)
+		return;
+	if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
+		return;
+
+	if (!irq_is_percpu_devid(irq))
+		free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu));
+	else if (armpmu_count_irq_users(irq) == 1)
+		free_percpu_irq(irq, &cpu_armpmu);
 
-	for_each_cpu(cpu, &armpmu->supported_cpus)
-		armpmu_free_irq(armpmu, cpu);
+	per_cpu(cpu_irq, cpu) = 0;
 }
 
-int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
+int armpmu_request_irq(int irq, int cpu)
 {
 	int err = 0;
-	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
 	const irq_handler_t handler = armpmu_dispatch_irq;
-	int irq = per_cpu(hw_events->irq, cpu);
 	if (!irq)
 		return 0;
 
-	if (irq_is_percpu_devid(irq) && cpumask_empty(&armpmu->active_irqs)) {
-		err = request_percpu_irq(irq, handler, "arm-pmu",
-					 &hw_events->percpu_pmu);
-	} else if (irq_is_percpu_devid(irq)) {
-		int other_cpu = cpumask_first(&armpmu->active_irqs);
-		int other_irq = per_cpu(hw_events->irq, other_cpu);
-
-		if (irq != other_irq) {
-			pr_warn("mismatched PPIs detected.\n");
-			err = -EINVAL;
-			goto err_out;
-		}
-	} else {
-		struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu);
+	if (!irq_is_percpu_devid(irq)) {
 		unsigned long irq_flags;
 
 		err = irq_force_affinity(irq, cpumask_of(cpu));
@@ -589,22 +567,22 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
 			goto err_out;
 		}
 
-		if (platdata && platdata->irq_flags) {
-			irq_flags = platdata->irq_flags;
-		} else {
-			irq_flags = IRQF_PERCPU |
-				    IRQF_NOBALANCING |
-				    IRQF_NO_THREAD;
-		}
+		irq_flags = IRQF_PERCPU |
+			    IRQF_NOBALANCING |
+			    IRQF_NO_THREAD;
 
+		irq_set_status_flags(irq, IRQ_NOAUTOEN);
 		err = request_irq(irq, handler, irq_flags, "arm-pmu",
-				  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+				  per_cpu_ptr(&cpu_armpmu, cpu));
+	} else if (armpmu_count_irq_users(irq) == 0) {
+		err = request_percpu_irq(irq, handler, "arm-pmu",
+					 &cpu_armpmu);
 	}
 
 	if (err)
 		goto err_out;
 
-	cpumask_set_cpu(cpu, &armpmu->active_irqs);
+	per_cpu(cpu_irq, cpu) = irq;
 	return 0;
 
 err_out:
@@ -612,19 +590,6 @@ err_out:
 	return err;
 }
 
-int armpmu_request_irqs(struct arm_pmu *armpmu)
-{
-	int cpu, err;
-
-	for_each_cpu(cpu, &armpmu->supported_cpus) {
-		err = armpmu_request_irq(armpmu, cpu);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
 static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
 {
 	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
@@ -647,12 +612,14 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 	if (pmu->reset)
 		pmu->reset(pmu);
 
+	per_cpu(cpu_armpmu, cpu) = pmu;
+
 	irq = armpmu_get_cpu_irq(pmu, cpu);
 	if (irq) {
-		if (irq_is_percpu_devid(irq)) {
+		if (irq_is_percpu_devid(irq))
 			enable_percpu_irq(irq, IRQ_TYPE_NONE);
-			return 0;
-		}
+		else
+			enable_irq(irq);
 	}
 
 	return 0;
@@ -667,8 +634,14 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
 		return 0;
 
 	irq = armpmu_get_cpu_irq(pmu, cpu);
-	if (irq && irq_is_percpu_devid(irq))
-		disable_percpu_irq(irq);
+	if (irq) {
+		if (irq_is_percpu_devid(irq))
+			disable_percpu_irq(irq);
+		else
+			disable_irq_nosync(irq);
+	}
+
+	per_cpu(cpu_armpmu, cpu) = NULL;
 
 	return 0;
 }
@@ -800,18 +773,18 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 					    &cpu_pmu->node);
 }
 
-struct arm_pmu *armpmu_alloc(void)
+static struct arm_pmu *__armpmu_alloc(gfp_t flags)
 {
 	struct arm_pmu *pmu;
 	int cpu;
 
-	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	pmu = kzalloc(sizeof(*pmu), flags);
 	if (!pmu) {
 		pr_info("failed to allocate PMU device!\n");
 		goto out;
 	}
 
-	pmu->hw_events = alloc_percpu(struct pmu_hw_events);
+	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
 	if (!pmu->hw_events) {
 		pr_info("failed to allocate per-cpu PMU data.\n");
 		goto out_free_pmu;
@@ -857,6 +830,17 @@ out:
 	return NULL;
 }
 
+struct arm_pmu *armpmu_alloc(void)
+{
+	return __armpmu_alloc(GFP_KERNEL);
+}
+
+struct arm_pmu *armpmu_alloc_atomic(void)
+{
+	return __armpmu_alloc(GFP_ATOMIC);
+}
+
+
 void armpmu_free(struct arm_pmu *pmu)
 {
 	free_percpu(pmu->hw_events);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 705f1a390e31..0f197516d708 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -11,6 +11,8 @@
 #include <linux/acpi.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 
@@ -87,7 +89,13 @@ static int arm_pmu_acpi_parse_irqs(void)
 			pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu);
 		}
 
+		/*
+		 * Log and request the IRQ so the core arm_pmu code can manage
+		 * it. We'll have to sanity-check IRQs later when we associate
+		 * them with their PMUs.
+		 */
 		per_cpu(pmu_irqs, cpu) = irq;
+		armpmu_request_irq(irq, cpu);
 	}
 
 	return 0;
@@ -127,7 +135,7 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
 		return pmu;
 	}
 
-	pmu = armpmu_alloc();
+	pmu = armpmu_alloc_atomic();
 	if (!pmu) {
 		pr_warn("Unable to allocate PMU for CPU%d\n",
 			smp_processor_id());
@@ -140,6 +148,35 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
 }
 
 /*
+ * Check whether the new IRQ is compatible with those already associated with
+ * the PMU (e.g. we don't have mismatched PPIs).
+ */
+static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
+{
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+	int cpu;
+
+	if (!irq)
+		return true;
+
+	for_each_cpu(cpu, &pmu->supported_cpus) {
+		int other_irq = per_cpu(hw_events->irq, cpu);
+		if (!other_irq)
+			continue;
+
+		if (irq == other_irq)
+			continue;
+		if (!irq_is_percpu_devid(irq) && !irq_is_percpu_devid(other_irq))
+			continue;
+
+		pr_warn("mismatched PPIs detected\n");
+		return false;
+	}
+
+	return true;
+}
+
+/*
  * This must run before the common arm_pmu hotplug logic, so that we can
  * associate a CPU and its interrupt before the common code tries to manage the
  * affinity and so on.
@@ -164,19 +201,14 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
 	if (!pmu)
 		return -ENOMEM;
 
-	cpumask_set_cpu(cpu, &pmu->supported_cpus);
-
 	per_cpu(probed_pmus, cpu) = pmu;
 
-	/*
-	 * Log and request the IRQ so the core arm_pmu code can manage it.  In
-	 * some situations (e.g. mismatched PPIs), we may fail to request the
-	 * IRQ. However, it may be too late for us to do anything about it.
-	 * The common ARM PMU code will log a warning in this case.
-	 */
-	hw_events = pmu->hw_events;
-	per_cpu(hw_events->irq, cpu) = irq;
-	armpmu_request_irq(pmu, cpu);
+	if (pmu_irq_matches(pmu, irq)) {
+		hw_events = pmu->hw_events;
+		per_cpu(hw_events->irq, cpu) = irq;
+	}
+
+	cpumask_set_cpu(cpu, &pmu->supported_cpus);
 
 	/*
 	 * Ideally, we'd probe the PMU here when we find the first matching
@@ -247,11 +279,6 @@ static int arm_pmu_acpi_init(void)
 	if (acpi_disabled)
 		return 0;
 
-	/*
-	 * We can't request IRQs yet, since we don't know the cookie value
-	 * until we know which CPUs share the same logical PMU. We'll handle
-	 * that in arm_pmu_acpi_cpu_starting().
-	 */
 	ret = arm_pmu_acpi_parse_irqs();
 	if (ret)
 		return ret;
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 46501cc79fd7..7729eda5909d 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -127,13 +127,6 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 			pdev->dev.of_node);
 	}
 
-	/*
-	 * Some platforms have all PMU IRQs OR'd into a single IRQ, with a
-	 * special platdata function that attempts to demux them.
-	 */
-	if (dev_get_platdata(&pdev->dev))
-		cpumask_setall(&pmu->supported_cpus);
-
 	for (i = 0; i < num_irqs; i++) {
 		int cpu, irq;
 
@@ -164,6 +157,36 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 	return 0;
 }
 
+static int armpmu_request_irqs(struct arm_pmu *armpmu)
+{
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+	int cpu, err;
+
+	for_each_cpu(cpu, &armpmu->supported_cpus) {
+		int irq = per_cpu(hw_events->irq, cpu);
+		if (!irq)
+			continue;
+
+		err = armpmu_request_irq(irq, cpu);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static void armpmu_free_irqs(struct arm_pmu *armpmu)
+{
+	int cpu;
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+
+	for_each_cpu(cpu, &armpmu->supported_cpus) {
+		int irq = per_cpu(hw_events->irq, cpu);
+
+		armpmu_free_irq(irq, cpu);
+	}
+}
+
 int arm_pmu_device_probe(struct platform_device *pdev,
 			 const struct of_device_id *of_table,
 			 const struct pmu_probe_info *probe_table)
diff --git a/drivers/phy/qualcomm/phy-qcom-ufs.c b/drivers/phy/qualcomm/phy-qcom-ufs.c
index c5ff4525edef..c5493ea51282 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-ufs.c
@@ -675,3 +675,8 @@ int ufs_qcom_phy_power_off(struct phy *generic_phy)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ufs_qcom_phy_power_off);
+
+MODULE_AUTHOR("Yaniv Gardi <[email protected]>");
+MODULE_AUTHOR("Vivek Gautam <[email protected]>");
+MODULE_DESCRIPTION("Universal Flash Storage (UFS) QCOM PHY");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c
index 1fda9d6c7ea3..4b91ff74779b 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-axg.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c
@@ -716,7 +716,7 @@ static const char * const uart_b_groups[] = {
 	"uart_tx_b_x", "uart_rx_b_x", "uart_cts_b_x", "uart_rts_b_x",
 };
 
-static const char * const uart_ao_b_gpioz_groups[] = {
+static const char * const uart_ao_b_z_groups[] = {
 	"uart_ao_tx_b_z", "uart_ao_rx_b_z",
 	"uart_ao_cts_b_z", "uart_ao_rts_b_z",
 };
@@ -855,7 +855,7 @@ static struct meson_pmx_func meson_axg_periphs_functions[] = {
 	FUNCTION(nand),
 	FUNCTION(uart_a),
 	FUNCTION(uart_b),
-	FUNCTION(uart_ao_b_gpioz),
+	FUNCTION(uart_ao_b_z),
 	FUNCTION(i2c0),
 	FUNCTION(i2c1),
 	FUNCTION(i2c2),
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 6dec6ab13300..d8599736a41a 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -423,7 +423,7 @@ static int chromeos_laptop_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static const struct chromeos_laptop samsung_series_5_550 = {
+static struct chromeos_laptop samsung_series_5_550 = {
 	.i2c_peripherals = {
 		/* Touchpad. */
 		{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
@@ -432,14 +432,14 @@ static const struct chromeos_laptop samsung_series_5_550 = {
 	},
 };
 
-static const struct chromeos_laptop samsung_series_5 = {
+static struct chromeos_laptop samsung_series_5 = {
 	.i2c_peripherals = {
 		/* Light Sensor. */
 		{ .add = setup_tsl2583_als, I2C_ADAPTER_SMBUS },
 	},
 };
 
-static const struct chromeos_laptop chromebook_pixel = {
+static struct chromeos_laptop chromebook_pixel = {
 	.i2c_peripherals = {
 		/* Touch Screen. */
 		{ .add = setup_atmel_1664s_ts, I2C_ADAPTER_PANEL },
@@ -450,14 +450,14 @@ static const struct chromeos_laptop chromebook_pixel = {
 	},
 };
 
-static const struct chromeos_laptop hp_chromebook_14 = {
+static struct chromeos_laptop hp_chromebook_14 = {
 	.i2c_peripherals = {
 		/* Touchpad. */
 		{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
 	},
 };
 
-static const struct chromeos_laptop dell_chromebook_11 = {
+static struct chromeos_laptop dell_chromebook_11 = {
 	.i2c_peripherals = {
 		/* Touchpad. */
 		{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
@@ -466,28 +466,28 @@ static const struct chromeos_laptop dell_chromebook_11 = {
 	},
 };
 
-static const struct chromeos_laptop toshiba_cb35 = {
+static struct chromeos_laptop toshiba_cb35 = {
 	.i2c_peripherals = {
 		/* Touchpad. */
 		{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
 	},
 };
 
-static const struct chromeos_laptop acer_c7_chromebook = {
+static struct chromeos_laptop acer_c7_chromebook = {
 	.i2c_peripherals = {
 		/* Touchpad. */
 		{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
 	},
 };
 
-static const struct chromeos_laptop acer_ac700 = {
+static struct chromeos_laptop acer_ac700 = {
 	.i2c_peripherals = {
 		/* Light Sensor. */
 		{ .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
 	},
 };
 
-static const struct chromeos_laptop acer_c720 = {
+static struct chromeos_laptop acer_c720 = {
 	.i2c_peripherals = {
 		/* Touchscreen. */
 		{ .add = setup_atmel_1664s_ts, I2C_ADAPTER_DESIGNWARE_1 },
@@ -500,14 +500,14 @@ static const struct chromeos_laptop acer_c720 = {
 	},
 };
 
-static const struct chromeos_laptop hp_pavilion_14_chromebook = {
+static struct chromeos_laptop hp_pavilion_14_chromebook = {
 	.i2c_peripherals = {
 		/* Touchpad. */
 		{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
 	},
 };
 
-static const struct chromeos_laptop cr48 = {
+static struct chromeos_laptop cr48 = {
 	.i2c_peripherals = {
 		/* Light Sensor. */
 		{ .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 9a8f96465cdc..51ebc5a6053f 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -105,31 +105,45 @@ config ASUS_LAPTOP
 
 	  If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+#
+# The DELL_SMBIOS driver depends on ACPI_WMI and/or DCDBAS if those
+# backends are selected. The "depends" line prevents a configuration
+# where DELL_SMBIOS=y while either of those dependencies =m.
+#
 config DELL_SMBIOS
-	tristate
+	tristate "Dell SMBIOS driver"
+	depends on DCDBAS || DCDBAS=n
+	depends on ACPI_WMI || ACPI_WMI=n
+	---help---
+	This provides support for the Dell SMBIOS calling interface.
+	If you have a Dell computer you should enable this option.
+
+	Be sure to select at least one backend for it to work properly.
 
 config DELL_SMBIOS_WMI
-	tristate "Dell SMBIOS calling interface (WMI implementation)"
+	bool "Dell SMBIOS driver WMI backend"
+	default y
 	depends on ACPI_WMI
 	select DELL_WMI_DESCRIPTOR
-	select DELL_SMBIOS
+	depends on DELL_SMBIOS
 	---help---
 	This provides an implementation for the Dell SMBIOS calling interface
 	communicated over ACPI-WMI.
 
-	If you have a Dell computer from >2007 you should say Y or M here.
+	If you have a Dell computer from >2007 you should say Y here.
 	If you aren't sure and this module doesn't work for your computer
 	it just won't load.
 
 config DELL_SMBIOS_SMM
-	tristate "Dell SMBIOS calling interface (SMM implementation)"
+	bool "Dell SMBIOS driver SMM backend"
+	default y
 	depends on DCDBAS
-	select DELL_SMBIOS
+	depends on DELL_SMBIOS
 	---help---
 	This provides an implementation for the Dell SMBIOS calling interface
 	communicated over SMI/SMM.
 
-	If you have a Dell computer from <=2017 you should say Y or M here.
+	If you have a Dell computer from <=2017 you should say Y here.
 	If you aren't sure and this module doesn't work for your computer
 	it just won't load.
 
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index c388608ad2a3..2ba6cb795338 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -13,8 +13,9 @@ obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
 obj-$(CONFIG_ACPI_CMPC)		+= classmate-laptop.o
 obj-$(CONFIG_COMPAL_LAPTOP)	+= compal-laptop.o
 obj-$(CONFIG_DELL_SMBIOS)	+= dell-smbios.o
-obj-$(CONFIG_DELL_SMBIOS_WMI)	+= dell-smbios-wmi.o
-obj-$(CONFIG_DELL_SMBIOS_SMM)	+= dell-smbios-smm.o
+dell-smbios-objs		:= dell-smbios-base.o
+dell-smbios-$(CONFIG_DELL_SMBIOS_WMI)	+= dell-smbios-wmi.o
+dell-smbios-$(CONFIG_DELL_SMBIOS_SMM)	+= dell-smbios-smm.o
 obj-$(CONFIG_DELL_LAPTOP)	+= dell-laptop.o
 obj-$(CONFIG_DELL_WMI)		+= dell-wmi.o
 obj-$(CONFIG_DELL_WMI_DESCRIPTOR)	+= dell-wmi-descriptor.o
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index 2a68f59d2228..c52c6723374b 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -127,24 +127,6 @@ static const struct dmi_system_id dell_device_table[] __initconst = {
 		},
 	},
 	{
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_CHASSIS_TYPE, "30"), /*Tablet*/
-		},
-	},
-	{
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /*Convertible*/
-		},
-	},
-	{
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_CHASSIS_TYPE, "32"), /*Detachable*/
-		},
-	},
-	{
 		.ident = "Dell Computer Corporation",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -1279,7 +1261,7 @@ static int kbd_get_state(struct kbd_state *state)
 	struct calling_interface_buffer buffer;
 	int ret;
 
-	dell_fill_request(&buffer, 0, 0, 0, 0);
+	dell_fill_request(&buffer, 0x1, 0, 0, 0);
 	ret = dell_send_request(&buffer,
 				CLASS_KBD_BACKLIGHT, SELECT_KBD_BACKLIGHT);
 	if (ret)
diff --git a/drivers/platform/x86/dell-smbios.c b/drivers/platform/x86/dell-smbios-base.c
index 8541cde4cb7d..2485c80a9fdd 100644
--- a/drivers/platform/x86/dell-smbios.c
+++ b/drivers/platform/x86/dell-smbios-base.c
@@ -36,7 +36,7 @@ static DEFINE_MUTEX(smbios_mutex);
 struct smbios_device {
 	struct list_head list;
 	struct device *device;
-	int (*call_fn)(struct calling_interface_buffer *);
+	int (*call_fn)(struct calling_interface_buffer *arg);
 };
 
 struct smbios_call {
@@ -352,8 +352,10 @@ static void __init parse_da_table(const struct dmi_header *dm)
 	struct calling_interface_structure *table =
 		container_of(dm, struct calling_interface_structure, header);
 
-	/* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
-	   6 bytes of entry */
+	/*
+	 * 4 bytes of table header, plus 7 bytes of Dell header
+	 * plus at least 6 bytes of entry
+	 */
 
 	if (dm->length < 17)
 		return;
@@ -554,7 +556,7 @@ static void free_group(struct platform_device *pdev)
 static int __init dell_smbios_init(void)
 {
 	const struct dmi_device *valid;
-	int ret;
+	int ret, wmi, smm;
 
 	valid = dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Dell System", NULL);
 	if (!valid) {
@@ -589,8 +591,24 @@ static int __init dell_smbios_init(void)
 	if (ret)
 		goto fail_create_group;
 
+	/* register backends */
+	wmi = init_dell_smbios_wmi();
+	if (wmi)
+		pr_debug("Failed to initialize WMI backend: %d\n", wmi);
+	smm = init_dell_smbios_smm();
+	if (smm)
+		pr_debug("Failed to initialize SMM backend: %d\n", smm);
+	if (wmi && smm) {
+		pr_err("No SMBIOS backends available (wmi: %d, smm: %d)\n",
+			wmi, smm);
+		goto fail_sysfs;
+	}
+
 	return 0;
 
+fail_sysfs:
+	free_group(platform_device);
+
 fail_create_group:
 	platform_device_del(platform_device);
 
@@ -607,6 +625,8 @@ fail_platform_driver:
 
 static void __exit dell_smbios_exit(void)
 {
+	exit_dell_smbios_wmi();
+	exit_dell_smbios_smm();
 	mutex_lock(&smbios_mutex);
 	if (platform_device) {
 		free_group(platform_device);
@@ -617,11 +637,12 @@ static void __exit dell_smbios_exit(void)
 	mutex_unlock(&smbios_mutex);
 }
 
-subsys_initcall(dell_smbios_init);
+module_init(dell_smbios_init);
 module_exit(dell_smbios_exit);
 
 MODULE_AUTHOR("Matthew Garrett <[email protected]>");
 MODULE_AUTHOR("Gabriele Mazzotta <[email protected]>");
 MODULE_AUTHOR("Pali Rohár <[email protected]>");
+MODULE_AUTHOR("Mario Limonciello <[email protected]>");
 MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS");
 MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/dell-smbios-smm.c b/drivers/platform/x86/dell-smbios-smm.c
index 89f65c4651a0..e9e9da556318 100644
--- a/drivers/platform/x86/dell-smbios-smm.c
+++ b/drivers/platform/x86/dell-smbios-smm.c
@@ -58,7 +58,7 @@ static const struct dmi_system_id dell_device_table[] __initconst = {
 };
 MODULE_DEVICE_TABLE(dmi, dell_device_table);
 
-static void __init parse_da_table(const struct dmi_header *dm)
+static void parse_da_table(const struct dmi_header *dm)
 {
 	struct calling_interface_structure *table =
 		container_of(dm, struct calling_interface_structure, header);
@@ -73,7 +73,7 @@ static void __init parse_da_table(const struct dmi_header *dm)
 	da_command_code = table->cmdIOCode;
 }
 
-static void __init find_cmd_address(const struct dmi_header *dm, void *dummy)
+static void find_cmd_address(const struct dmi_header *dm, void *dummy)
 {
 	switch (dm->type) {
 	case 0xda: /* Calling interface */
@@ -128,7 +128,7 @@ static bool test_wsmt_enabled(void)
 	return false;
 }
 
-static int __init dell_smbios_smm_init(void)
+int init_dell_smbios_smm(void)
 {
 	int ret;
 	/*
@@ -176,7 +176,7 @@ fail_platform_device_alloc:
 	return ret;
 }
 
-static void __exit dell_smbios_smm_exit(void)
+void exit_dell_smbios_smm(void)
 {
 	if (platform_device) {
 		dell_smbios_unregister_device(&platform_device->dev);
@@ -184,13 +184,3 @@ static void __exit dell_smbios_smm_exit(void)
 		free_page((unsigned long)buffer);
 	}
 }
-
-subsys_initcall(dell_smbios_smm_init);
-module_exit(dell_smbios_smm_exit);
-
-MODULE_AUTHOR("Matthew Garrett <[email protected]>");
-MODULE_AUTHOR("Gabriele Mazzotta <[email protected]>");
-MODULE_AUTHOR("Pali Rohár <[email protected]>");
-MODULE_AUTHOR("Mario Limonciello <[email protected]>");
-MODULE_DESCRIPTION("Dell SMBIOS communications over SMI");
-MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/dell-smbios-wmi.c b/drivers/platform/x86/dell-smbios-wmi.c
index 609557aa5868..fbefedb1c172 100644
--- a/drivers/platform/x86/dell-smbios-wmi.c
+++ b/drivers/platform/x86/dell-smbios-wmi.c
@@ -228,7 +228,7 @@ static const struct wmi_device_id dell_smbios_wmi_id_table[] = {
 	{ },
 };
 
-static void __init parse_b1_table(const struct dmi_header *dm)
+static void parse_b1_table(const struct dmi_header *dm)
 {
 	struct misc_bios_flags_structure *flags =
 	container_of(dm, struct misc_bios_flags_structure, header);
@@ -242,7 +242,7 @@ static void __init parse_b1_table(const struct dmi_header *dm)
 		wmi_supported = 1;
 }
 
-static void __init find_b1(const struct dmi_header *dm, void *dummy)
+static void find_b1(const struct dmi_header *dm, void *dummy)
 {
 	switch (dm->type) {
 	case 0xb1: /* misc bios flags */
@@ -261,7 +261,7 @@ static struct wmi_driver dell_smbios_wmi_driver = {
 	.filter_callback = dell_smbios_wmi_filter,
 };
 
-static int __init init_dell_smbios_wmi(void)
+int init_dell_smbios_wmi(void)
 {
 	dmi_walk(find_b1, NULL);
 
@@ -271,15 +271,9 @@ static int __init init_dell_smbios_wmi(void)
 	return wmi_driver_register(&dell_smbios_wmi_driver);
 }
 
-static void __exit exit_dell_smbios_wmi(void)
+void exit_dell_smbios_wmi(void)
 {
 	wmi_driver_unregister(&dell_smbios_wmi_driver);
 }
 
-module_init(init_dell_smbios_wmi);
-module_exit(exit_dell_smbios_wmi);
-
 MODULE_ALIAS("wmi:" DELL_WMI_SMBIOS_GUID);
-MODULE_AUTHOR("Mario Limonciello <[email protected]>");
-MODULE_DESCRIPTION("Dell SMBIOS communications over WMI");
-MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/dell-smbios.h b/drivers/platform/x86/dell-smbios.h
index 138d478d9adc..d8adaf959740 100644
--- a/drivers/platform/x86/dell-smbios.h
+++ b/drivers/platform/x86/dell-smbios.h
@@ -75,4 +75,29 @@ int dell_laptop_register_notifier(struct notifier_block *nb);
 int dell_laptop_unregister_notifier(struct notifier_block *nb);
 void dell_laptop_call_notifier(unsigned long action, void *data);
 
-#endif
+/* for the supported backends */
+#ifdef CONFIG_DELL_SMBIOS_WMI
+int init_dell_smbios_wmi(void);
+void exit_dell_smbios_wmi(void);
+#else /* CONFIG_DELL_SMBIOS_WMI */
+static inline int init_dell_smbios_wmi(void)
+{
+	return -ENODEV;
+}
+static inline void exit_dell_smbios_wmi(void)
+{}
+#endif /* CONFIG_DELL_SMBIOS_WMI */
+
+#ifdef CONFIG_DELL_SMBIOS_SMM
+int init_dell_smbios_smm(void);
+void exit_dell_smbios_smm(void);
+#else /* CONFIG_DELL_SMBIOS_SMM */
+static inline int init_dell_smbios_smm(void)
+{
+	return -ENODEV;
+}
+static inline void exit_dell_smbios_smm(void)
+{}
+#endif /* CONFIG_DELL_SMBIOS_SMM */
+
+#endif /* _DELL_SMBIOS_H_ */
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 2c9927430d85..8d102195a392 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -714,7 +714,7 @@ static int __init dell_wmi_init(void)
 
 	return wmi_driver_register(&dell_wmi_driver);
 }
-module_init(dell_wmi_init);
+late_initcall(dell_wmi_init);
 
 static void __exit dell_wmi_exit(void)
 {
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 5b6f18b18801..535199c9e6bc 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -113,7 +113,7 @@ MODULE_PARM_DESC(no_bt_rfkill, "No rfkill for bluetooth.");
 /*
  * ACPI Helpers
  */
-#define IDEAPAD_EC_TIMEOUT (100) /* in ms */
+#define IDEAPAD_EC_TIMEOUT (200) /* in ms */
 
 static int read_method_int(acpi_handle handle, const char *method, int *val)
 {
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index d1a01311c1a2..5e3df194723e 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -376,6 +376,7 @@ static int intel_hid_remove(struct platform_device *device)
 {
 	acpi_handle handle = ACPI_HANDLE(&device->dev);
 
+	device_init_wakeup(&device->dev, false);
 	acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
 	intel_hid_set_enable(&device->dev, false);
 	intel_button_array_enable(&device->dev, false);
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index b703d6f5b099..c13780b8dabb 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
 #include <linux/kernel.h>
@@ -97,9 +98,35 @@ out_unknown:
 	dev_dbg(&device->dev, "unknown event index 0x%x\n", event);
 }
 
-static int intel_vbtn_probe(struct platform_device *device)
+static void detect_tablet_mode(struct platform_device *device)
 {
+	const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
+	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
+	acpi_handle handle = ACPI_HANDLE(&device->dev);
 	struct acpi_buffer vgbs_output = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	acpi_status status;
+	int m;
+
+	if (!(chassis_type && strcmp(chassis_type, "31") == 0))
+		goto out;
+
+	status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output);
+	if (ACPI_FAILURE(status))
+		goto out;
+
+	obj = vgbs_output.pointer;
+	if (!(obj && obj->type == ACPI_TYPE_INTEGER))
+		goto out;
+
+	m = !(obj->integer.value & TABLET_MODE_FLAG);
+	input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
+out:
+	kfree(vgbs_output.pointer);
+}
+
+static int intel_vbtn_probe(struct platform_device *device)
+{
 	acpi_handle handle = ACPI_HANDLE(&device->dev);
 	struct intel_vbtn_priv *priv;
 	acpi_status status;
@@ -122,22 +149,7 @@ static int intel_vbtn_probe(struct platform_device *device)
 		return err;
 	}
 
-	/*
-	 * VGBS being present and returning something means we have
-	 * a tablet mode switch.
-	 */
-	status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output);
-	if (ACPI_SUCCESS(status)) {
-		union acpi_object *obj = vgbs_output.pointer;
-
-		if (obj && obj->type == ACPI_TYPE_INTEGER) {
-			int m = !(obj->integer.value & TABLET_MODE_FLAG);
-
-			input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
-		}
-	}
-
-	kfree(vgbs_output.pointer);
+	detect_tablet_mode(device);
 
 	status = acpi_install_notify_handler(handle,
 					     ACPI_DEVICE_NOTIFY,
@@ -154,6 +166,7 @@ static int intel_vbtn_remove(struct platform_device *device)
 {
 	acpi_handle handle = ACPI_HANDLE(&device->dev);
 
+	device_init_wakeup(&device->dev, false);
 	acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
 
 	/*
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index daa68acbc900..8796211ef24a 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -933,7 +933,7 @@ static int wmi_dev_probe(struct device *dev)
 			goto probe_failure;
 		}
 
-		buf = kmalloc(strlen(wdriver->driver.name) + 4, GFP_KERNEL);
+		buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL);
 		if (!buf) {
 			ret = -ENOMEM;
 			goto probe_string_failure;
@@ -945,7 +945,7 @@ static int wmi_dev_probe(struct device *dev)
 		wblock->char_dev.mode = 0444;
 		ret = misc_register(&wblock->char_dev);
 		if (ret) {
-			dev_warn(dev, "failed to register char dev: %d", ret);
+			dev_warn(dev, "failed to register char dev: %d\n", ret);
 			ret = -ENOMEM;
 			goto probe_misc_failure;
 		}
@@ -1048,7 +1048,7 @@ static int wmi_create_device(struct device *wmi_bus_dev,
 
 	if (result) {
 		dev_warn(wmi_bus_dev,
-			 "%s data block query control method not found",
+			 "%s data block query control method not found\n",
 			 method);
 		return result;
 	}
@@ -1198,7 +1198,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
 
 		retval = device_add(&wblock->dev.dev);
 		if (retval) {
-			dev_err(wmi_bus_dev, "failed to register %pULL\n",
+			dev_err(wmi_bus_dev, "failed to register %pUL\n",
 				wblock->gblock.guid);
 			if (debug_event)
 				wmi_method_enable(wblock, 0);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index dd4708c58480..1fc0c0811da4 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -4310,7 +4310,7 @@ static int _regulator_resume_early(struct device *dev, void *data)
 
 	rstate = regulator_get_suspend_state(rdev, *state);
 	if (rstate == NULL)
-		return -EINVAL;
+		return 0;
 
 	mutex_lock(&rdev->mutex);
 
diff --git a/drivers/regulator/stm32-vrefbuf.c b/drivers/regulator/stm32-vrefbuf.c
index 72c8b3e1022b..e0a9c445ed67 100644
--- a/drivers/regulator/stm32-vrefbuf.c
+++ b/drivers/regulator/stm32-vrefbuf.c
@@ -51,7 +51,7 @@ static int stm32_vrefbuf_enable(struct regulator_dev *rdev)
 	 * arbitrary timeout.
 	 */
 	ret = readl_poll_timeout(priv->base + STM32_VREFBUF_CSR, val,
-				 !(val & STM32_VRR), 650, 10000);
+				 val & STM32_VRR, 650, 10000);
 	if (ret) {
 		dev_err(&rdev->dev, "stm32 vrefbuf timed out!\n");
 		val = readl_relaxed(priv->base + STM32_VREFBUF_CSR);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index a7c15f0085e2..ecef8e73d40b 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2581,8 +2581,6 @@ int dasd_cancel_req(struct dasd_ccw_req *cqr)
 	case DASD_CQR_QUEUED:
 		/* request was not started - just set to cleared */
 		cqr->status = DASD_CQR_CLEARED;
-		if (cqr->callback_data == DASD_SLEEPON_START_TAG)
-			cqr->callback_data = DASD_SLEEPON_END_TAG;
 		break;
 	case DASD_CQR_IN_IO:
 		/* request in IO - terminate IO and release again */
@@ -3902,9 +3900,12 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
 		wait_event(dasd_flush_wq,
 			   (cqr->status != DASD_CQR_CLEAR_PENDING));
 
-		/* mark sleepon requests as ended */
-		if (cqr->callback_data == DASD_SLEEPON_START_TAG)
-			cqr->callback_data = DASD_SLEEPON_END_TAG;
+		/*
+		 * requeue requests to blocklayer will only work
+		 * for block device requests
+		 */
+		if (_dasd_requeue_request(cqr))
+			continue;
 
 		/* remove requests from device and block queue */
 		list_del_init(&cqr->devlist);
@@ -3917,13 +3918,6 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
 			cqr = refers;
 		}
 
-		/*
-		 * requeue requests to blocklayer will only work
-		 * for block device requests
-		 */
-		if (_dasd_requeue_request(cqr))
-			continue;
-
 		if (cqr->block)
 			list_del_init(&cqr->blocklist);
 		cqr->block->base->discipline->free_cp(
@@ -3940,8 +3934,7 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
 		list_splice_tail(&requeue_queue, &device->ccw_queue);
 		spin_unlock_irq(get_ccwdev_lock(device->cdev));
 	}
-	/* wake up generic waitqueue for eventually ended sleepon requests */
-	wake_up(&generic_waitq);
+	dasd_schedule_device_bh(device);
 	return rc;
 }
 
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 1319122e9d12..9169af7dbb43 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -795,6 +795,7 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 
 	ccw_device_set_timeout(cdev, 0);
 	cdev->private->iretry = 255;
+	cdev->private->async_kill_io_rc = -ETIMEDOUT;
 	ret = ccw_device_cancel_halt_clear(cdev);
 	if (ret == -EBUSY) {
 		ccw_device_set_timeout(cdev, 3*HZ);
@@ -871,7 +872,7 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event)
 	/* OK, i/o is dead now. Call interrupt handler. */
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
-			      ERR_PTR(-EIO));
+			      ERR_PTR(cdev->private->async_kill_io_rc));
 }
 
 static void
@@ -888,14 +889,16 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 	ccw_device_online_verify(cdev, 0);
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
-			      ERR_PTR(-EIO));
+			      ERR_PTR(cdev->private->async_kill_io_rc));
 }
 
 void ccw_device_kill_io(struct ccw_device *cdev)
 {
 	int ret;
 
+	ccw_device_set_timeout(cdev, 0);
 	cdev->private->iretry = 255;
+	cdev->private->async_kill_io_rc = -EIO;
 	ret = ccw_device_cancel_halt_clear(cdev);
 	if (ret == -EBUSY) {
 		ccw_device_set_timeout(cdev, 3*HZ);
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 1caf6a398760..75ce12a24dc2 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -159,7 +159,7 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
 }
 
 /**
- * ccw_device_start_key() - start a s390 channel program with key
+ * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key
  * @cdev: target ccw device
  * @cpa: logical start address of channel program
  * @intparm: user specific interruption parameter; will be presented back to
@@ -170,10 +170,15 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * @key: storage key to be used for the I/O
  * @flags: additional flags; defines the action to be performed for I/O
  *	   processing.
+ * @expires: timeout value in jiffies
  *
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
+ * This function notifies the device driver if the channel program has not
+ * completed during the time specified by @expires. If a timeout occurs, the
+ * channel program is terminated via xsch, hsch or csch, and the device's
+ * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -182,9 +187,9 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * Context:
  *  Interrupts disabled, ccw device lock held
  */
-int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
-			 unsigned long intparm, __u8 lpm, __u8 key,
-			 unsigned long flags)
+int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
+				 unsigned long intparm, __u8 lpm, __u8 key,
+				 unsigned long flags, int expires)
 {
 	struct subchannel *sch;
 	int ret;
@@ -224,6 +229,8 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
 	switch (ret) {
 	case 0:
 		cdev->private->intparm = intparm;
+		if (expires)
+			ccw_device_set_timeout(cdev, expires);
 		break;
 	case -EACCES:
 	case -ENODEV:
@@ -234,7 +241,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
 }
 
 /**
- * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key
+ * ccw_device_start_key() - start a s390 channel program with key
  * @cdev: target ccw device
  * @cpa: logical start address of channel program
  * @intparm: user specific interruption parameter; will be presented back to
@@ -245,15 +252,10 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * @key: storage key to be used for the I/O
  * @flags: additional flags; defines the action to be performed for I/O
  *	   processing.
- * @expires: timeout value in jiffies
  *
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
- * This function notifies the device driver if the channel program has not
- * completed during the time specified by @expires. If a timeout occurs, the
- * channel program is terminated via xsch, hsch or csch, and the device's
- * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -262,19 +264,12 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * Context:
  *  Interrupts disabled, ccw device lock held
  */
-int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
-				 unsigned long intparm, __u8 lpm, __u8 key,
-				 unsigned long flags, int expires)
+int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
+			 unsigned long intparm, __u8 lpm, __u8 key,
+			 unsigned long flags)
 {
-	int ret;
-
-	if (!cdev)
-		return -ENODEV;
-	ccw_device_set_timeout(cdev, expires);
-	ret = ccw_device_start_key(cdev, cpa, intparm, lpm, key, flags);
-	if (ret != 0)
-		ccw_device_set_timeout(cdev, 0);
-	return ret;
+	return ccw_device_start_timeout_key(cdev, cpa, intparm, lpm, key,
+					    flags, 0);
 }
 
 /**
@@ -489,18 +484,20 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id)
 EXPORT_SYMBOL(ccw_device_get_id);
 
 /**
- * ccw_device_tm_start_key() - perform start function
+ * ccw_device_tm_start_timeout_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
  * @lpm: mask of paths to use
  * @key: storage key to use for storage access
+ * @expires: time span in jiffies after which to abort request
  *
  * Start the tcw on the given ccw device. Return zero on success, non-zero
  * otherwise.
  */
-int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
-			    unsigned long intparm, u8 lpm, u8 key)
+int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
+				    unsigned long intparm, u8 lpm, u8 key,
+				    int expires)
 {
 	struct subchannel *sch;
 	int rc;
@@ -527,37 +524,32 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
 			return -EACCES;
 	}
 	rc = cio_tm_start_key(sch, tcw, lpm, key);
-	if (rc == 0)
+	if (rc == 0) {
 		cdev->private->intparm = intparm;
+		if (expires)
+			ccw_device_set_timeout(cdev, expires);
+	}
 	return rc;
 }
-EXPORT_SYMBOL(ccw_device_tm_start_key);
+EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
 
 /**
- * ccw_device_tm_start_timeout_key() - perform start function
+ * ccw_device_tm_start_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
  * @lpm: mask of paths to use
  * @key: storage key to use for storage access
- * @expires: time span in jiffies after which to abort request
  *
  * Start the tcw on the given ccw device. Return zero on success, non-zero
  * otherwise.
  */
-int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
-				    unsigned long intparm, u8 lpm, u8 key,
-				    int expires)
+int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
+			    unsigned long intparm, u8 lpm, u8 key)
 {
-	int ret;
-
-	ccw_device_set_timeout(cdev, expires);
-	ret = ccw_device_tm_start_key(cdev, tcw, intparm, lpm, key);
-	if (ret != 0)
-		ccw_device_set_timeout(cdev, 0);
-	return ret;
+	return ccw_device_tm_start_timeout_key(cdev, tcw, intparm, lpm, key, 0);
 }
-EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
+EXPORT_SYMBOL(ccw_device_tm_start_key);
 
 /**
  * ccw_device_tm_start() - perform start function
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index af571d8d6925..90e4e3a7841b 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -157,6 +157,7 @@ struct ccw_device_private {
 	unsigned long intparm;	/* user interruption parameter */
 	struct qdio_irq *qdio_data;
 	struct irb irb;		/* device status */
+	int async_kill_io_rc;
 	struct senseid senseid;	/* SenseID info */
 	struct pgid pgid[8];	/* path group IDs per chpid*/
 	struct ccw1 iccws[2];	/* ccws for SNID/SID/SPGID commands */
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 959c65cf75d9..4326715dc13e 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -233,8 +233,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
 #define QETH_IDX_FUNC_LEVEL_OSD		 0x0101
 #define QETH_IDX_FUNC_LEVEL_IQD		 0x4108
 
-#define QETH_REAL_CARD		1
-#define QETH_VLAN_CARD		2
 #define QETH_BUFSIZE		4096
 
 /**
@@ -556,12 +554,6 @@ enum qeth_prot_versions {
 	QETH_PROT_IPV6 = 0x0006,
 };
 
-enum qeth_ip_types {
-	QETH_IP_TYPE_NORMAL,
-	QETH_IP_TYPE_VIPA,
-	QETH_IP_TYPE_RXIP,
-};
-
 enum qeth_cmd_buffer_state {
 	BUF_STATE_FREE,
 	BUF_STATE_LOCKED,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index ca72f3311004..19203340f879 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -527,8 +527,7 @@ static inline int qeth_is_cq(struct qeth_card *card, unsigned int queue)
 	    queue == card->qdio.no_in_queues - 1;
 }
 
-
-static int qeth_issue_next_read(struct qeth_card *card)
+static int __qeth_issue_next_read(struct qeth_card *card)
 {
 	int rc;
 	struct qeth_cmd_buffer *iob;
@@ -559,6 +558,17 @@ static int qeth_issue_next_read(struct qeth_card *card)
 	return rc;
 }
 
+static int qeth_issue_next_read(struct qeth_card *card)
+{
+	int ret;
+
+	spin_lock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+	ret = __qeth_issue_next_read(card);
+	spin_unlock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+
+	return ret;
+}
+
 static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card)
 {
 	struct qeth_reply *reply;
@@ -708,11 +718,8 @@ static int qeth_check_idx_response(struct qeth_card *card,
 
 	QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
 	if ((buffer[2] & 0xc0) == 0xc0) {
-		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE "
-			   "with cause code 0x%02x%s\n",
-			   buffer[4],
-			   ((buffer[4] == 0x22) ?
-			    " -- try another portname" : ""));
+		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+				 buffer[4]);
 		QETH_CARD_TEXT(card, 2, "ckidxres");
 		QETH_CARD_TEXT(card, 2, " idxterm");
 		QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);
@@ -960,7 +967,7 @@ void qeth_clear_thread_running_bit(struct qeth_card *card, unsigned long thread)
 	spin_lock_irqsave(&card->thread_mask_lock, flags);
 	card->thread_running_mask &= ~thread;
 	spin_unlock_irqrestore(&card->thread_mask_lock, flags);
-	wake_up(&card->wait_q);
+	wake_up_all(&card->wait_q);
 }
 EXPORT_SYMBOL_GPL(qeth_clear_thread_running_bit);
 
@@ -1164,6 +1171,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
 		}
 		rc = qeth_get_problem(cdev, irb);
 		if (rc) {
+			card->read_or_write_problem = 1;
 			qeth_clear_ipacmd_list(card);
 			qeth_schedule_recovery(card);
 			goto out;
@@ -1182,7 +1190,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
 		return;
 	if (channel == &card->read &&
 	    channel->state == CH_STATE_UP)
-		qeth_issue_next_read(card);
+		__qeth_issue_next_read(card);
 
 	iob = channel->iob;
 	index = channel->buf_no;
@@ -2134,24 +2142,25 @@ int qeth_send_control_data(struct qeth_card *card, int len,
 	}
 	reply->callback = reply_cb;
 	reply->param = reply_param;
-	if (card->state == CARD_STATE_DOWN)
-		reply->seqno = QETH_IDX_COMMAND_SEQNO;
-	else
-		reply->seqno = card->seqno.ipa++;
+
 	init_waitqueue_head(&reply->wait_q);
-	spin_lock_irqsave(&card->lock, flags);
-	list_add_tail(&reply->list, &card->cmd_waiter_list);
-	spin_unlock_irqrestore(&card->lock, flags);
 
 	while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
-	qeth_prepare_control_data(card, len, iob);
 
 	if (IS_IPA(iob->data)) {
 		cmd = __ipa_cmd(iob);
+		cmd->hdr.seqno = card->seqno.ipa++;
+		reply->seqno = cmd->hdr.seqno;
 		event_timeout = QETH_IPA_TIMEOUT;
 	} else {
+		reply->seqno = QETH_IDX_COMMAND_SEQNO;
 		event_timeout = QETH_TIMEOUT;
 	}
+	qeth_prepare_control_data(card, len, iob);
+
+	spin_lock_irqsave(&card->lock, flags);
+	list_add_tail(&reply->list, &card->cmd_waiter_list);
+	spin_unlock_irqrestore(&card->lock, flags);
 
 	timeout = jiffies + event_timeout;
 
@@ -2837,7 +2846,8 @@ static int qeth_init_input_buffer(struct qeth_card *card,
 	int i;
 
 	if ((card->options.cq == QETH_CQ_ENABLED) && (!buf->rx_skb)) {
-		buf->rx_skb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
+		buf->rx_skb = netdev_alloc_skb(card->dev,
+					       QETH_RX_PULL_LEN + ETH_HLEN);
 		if (!buf->rx_skb)
 			return 1;
 	}
@@ -2874,8 +2884,8 @@ int qeth_init_qdio_queues(struct qeth_card *card)
 	QETH_DBF_TEXT(SETUP, 2, "initqdqs");
 
 	/* inbound queue */
-	qdio_reset_buffers(card->qdio.in_q->qdio_bufs,
-			   QDIO_MAX_BUFFERS_PER_Q);
+	qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_initialize_working_pool_list(card);
 	/*give only as many buffers to hardware as we have buffer pool entries*/
 	for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i)
@@ -2933,7 +2943,7 @@ static void qeth_fill_ipacmd_header(struct qeth_card *card,
 	memset(cmd, 0, sizeof(struct qeth_ipa_cmd));
 	cmd->hdr.command = command;
 	cmd->hdr.initiator = IPA_CMD_INITIATOR_HOST;
-	cmd->hdr.seqno = card->seqno.ipa;
+	/* cmd->hdr.seqno is set by qeth_send_control_data() */
 	cmd->hdr.adapter_type = qeth_get_ipa_adp_type(card->info.link_type);
 	cmd->hdr.rel_adapter_no = (__u8) card->info.portno;
 	if (card->options.layer2)
@@ -2950,12 +2960,10 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
 		enum qeth_ipa_cmds ipacmd, enum qeth_prot_versions prot)
 {
 	struct qeth_cmd_buffer *iob;
-	struct qeth_ipa_cmd *cmd;
 
 	iob = qeth_get_buffer(&card->write);
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
-		qeth_fill_ipacmd_header(card, cmd, ipacmd, prot);
+		qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot);
 	} else {
 		dev_warn(&card->gdev->dev,
 			 "The qeth driver ran out of channel command buffers\n");
@@ -3066,7 +3074,7 @@ static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETADAPTERPARMS,
 				     QETH_PROT_IPV4);
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+		cmd = __ipa_cmd(iob);
 		cmd->data.setadapterparms.hdr.cmdlength = cmdlen;
 		cmd->data.setadapterparms.hdr.command_code = command;
 		cmd->data.setadapterparms.hdr.used_total = 1;
@@ -3208,7 +3216,7 @@ static int qeth_query_setdiagass(struct qeth_card *card)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 16;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_QUERY;
 	return qeth_send_ipa_cmd(card, iob, qeth_query_setdiagass_cb, NULL);
@@ -3261,7 +3269,7 @@ int qeth_hw_trap(struct qeth_card *card, enum qeth_diags_trap_action action)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 80;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRAP;
 	cmd->data.diagass.type = 1;
@@ -3898,10 +3906,12 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
 int qeth_get_elements_no(struct qeth_card *card,
 		     struct sk_buff *skb, int extra_elems, int data_offset)
 {
-	int elements = qeth_get_elements_for_range(
-				(addr_t)skb->data + data_offset,
-				(addr_t)skb->data + skb_headlen(skb)) +
-			qeth_get_elements_for_frags(skb);
+	addr_t end = (addr_t)skb->data + skb_headlen(skb);
+	int elements = qeth_get_elements_for_frags(skb);
+	addr_t start = (addr_t)skb->data + data_offset;
+
+	if (start != end)
+		elements += qeth_get_elements_for_range(start, end);
 
 	if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
 		QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
@@ -4237,7 +4247,7 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
 			sizeof(struct qeth_ipacmd_setadpparms_hdr) + 8);
 	if (!iob)
 		return;
-	cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setadapterparms.data.mode = mode;
 	qeth_send_ipa_cmd(card, iob, qeth_setadp_promisc_mode_cb, NULL);
 }
@@ -4304,7 +4314,7 @@ int qeth_setadpparms_change_macaddr(struct qeth_card *card)
 				   sizeof(struct qeth_change_addr));
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setadapterparms.data.change_addr.cmd = CHANGE_ADDR_READ_MAC;
 	cmd->data.setadapterparms.data.change_addr.addr_size = ETH_ALEN;
 	ether_addr_copy(cmd->data.setadapterparms.data.change_addr.addr,
@@ -4419,7 +4429,7 @@ static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
 				   sizeof(struct qeth_set_access_ctrl));
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl;
 	access_ctrl_req->subcmd_code = isolation;
 
@@ -4665,7 +4675,7 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
 		rc = -ENOMEM;
 		goto out;
 	}
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
 	rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
 				    qeth_snmp_command_cb, (void *)&qinfo);
@@ -4750,7 +4760,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
 		rc = -ENOMEM;
 		goto out_free;
 	}
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	oat_req = &cmd->data.setadapterparms.data.query_oat;
 	oat_req->subcmd_code = oat_data.command;
 
@@ -5084,8 +5094,6 @@ static void qeth_core_free_card(struct qeth_card *card)
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 	qeth_clean_channel(&card->read);
 	qeth_clean_channel(&card->write);
-	if (card->dev)
-		free_netdev(card->dev);
 	qeth_free_qdio_buffers(card);
 	unregister_service_level(&card->qeth_service_level);
 	kfree(card);
@@ -5327,7 +5335,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
 	} else {
 		unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
 
-		skb = dev_alloc_skb(linear + headroom);
+		skb = napi_alloc_skb(&card->napi, linear + headroom);
 	}
 	if (!skb)
 		goto no_mem;
@@ -5491,7 +5499,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETASSPARMS, prot);
 
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+		cmd = __ipa_cmd(iob);
 		cmd->data.setassparms.hdr.assist_no = ipa_func;
 		cmd->data.setassparms.hdr.length = 8 + len;
 		cmd->data.setassparms.hdr.command_code = cmd_code;
@@ -5514,7 +5522,7 @@ int qeth_send_setassparms(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 4, "sendassp");
 
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	if (len <= sizeof(__u32))
 		cmd->data.setassparms.data.flags_32bit = (__u32) data;
 	else   /* (len > sizeof(__u32)) */
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 7f236440483f..50a313806dde 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -108,7 +108,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setdelmac.mac_length = ETH_ALEN;
 	ether_addr_copy(cmd->data.setdelmac.mac, mac);
 	return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob,
@@ -305,7 +305,7 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setdelvlan.vlan_id = i;
 	return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob,
 					    qeth_l2_send_setdelvlan_cb, NULL));
@@ -437,10 +437,8 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 			*done = 1;
 			break;
 		}
-		skb->dev = card->dev;
 		switch (hdr->hdr.l2.id) {
 		case QETH_HEADER_TYPE_LAYER2:
-			skb->pkt_type = PACKET_HOST;
 			skb->protocol = eth_type_trans(skb, skb->dev);
 			if ((card->dev->features & NETIF_F_RXCSUM)
 			   && ((hdr->hdr.l2.flags[1] &
@@ -915,8 +913,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l2_set_offline(cgdev);
 
 	if (card->dev) {
-		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
+		free_netdev(card->dev);
 		card->dev = NULL;
 	}
 	return;
@@ -975,6 +973,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 		return -ENODEV;
 
 	card->dev->ml_priv = card;
+	card->dev->priv_flags |= IFF_UNICAST_FLT;
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
 	card->dev->min_mtu = 64;
@@ -991,9 +990,16 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 		card->dev->features |= NETIF_F_VLAN_CHALLENGED;
 	else
 		card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (card->info.type != QETH_CARD_TYPE_OSN &&
+	    card->info.type != QETH_CARD_TYPE_IQD) {
+		card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+		card->dev->needed_headroom = sizeof(struct qeth_hdr);
+		card->dev->hw_features |= NETIF_F_SG;
+		card->dev->vlan_features |= NETIF_F_SG;
+	}
+
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
-		card->dev->hw_features = NETIF_F_SG;
-		card->dev->vlan_features = NETIF_F_SG;
 		card->dev->features |= NETIF_F_SG;
 		/* OSA 3S and earlier has no RX/TX support */
 		if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
@@ -1005,11 +1011,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 			card->dev->vlan_features |= NETIF_F_RXCSUM;
 		}
 	}
-	if (card->info.type != QETH_CARD_TYPE_OSN &&
-	    card->info.type != QETH_CARD_TYPE_IQD) {
-		card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-		card->dev->needed_headroom = sizeof(struct qeth_hdr);
-	}
 
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
@@ -1086,7 +1087,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	qeth_l2_setup_bridgeport_attrs(card);
 
 	card->state = CARD_STATE_HARDSETUP;
-	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_print_status_message(card);
 
 	/* softsetup */
@@ -1374,7 +1374,6 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
 {
 	struct qeth_cmd_buffer *iob;
 	struct qeth_card *card;
-	int rc;
 
 	if (!dev)
 		return -ENODEV;
@@ -1385,9 +1384,8 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
 	if (!qeth_card_hw_is_reachable(card))
 		return -ENODEV;
 	iob = qeth_wait_for_buffer(&card->write);
-	memcpy(iob->data+IPA_PDU_HEADER_SIZE, data, data_len);
-	rc = qeth_osn_send_ipa_cmd(card, iob, data_len);
-	return rc;
+	memcpy(__ipa_cmd(iob), data, data_len);
+	return qeth_osn_send_ipa_cmd(card, iob, data_len);
 }
 EXPORT_SYMBOL(qeth_osn_assist);
 
@@ -1764,7 +1762,7 @@ static struct qeth_cmd_buffer *qeth_sbp_build_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, ipa_cmd, 0);
 	if (!iob)
 		return iob;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.sbp.hdr.cmdlength = sizeof(struct qeth_ipacmd_sbp_hdr) +
 				      cmd_length;
 	cmd->data.sbp.hdr.command_code = sbp_cmd;
@@ -2129,7 +2127,7 @@ static int qeth_l2_vnicc_request(struct qeth_card *card,
 		return -ENOMEM;
 
 	/* create header for request */
-	cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	req = &cmd->data.vnicc;
 
 	/* create sub command header for request */
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index bdd45f4dcace..87659cfc9066 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -15,21 +15,26 @@
 
 #define QETH_SNIFF_AVAIL	0x0008
 
+enum qeth_ip_types {
+	QETH_IP_TYPE_NORMAL,
+	QETH_IP_TYPE_VIPA,
+	QETH_IP_TYPE_RXIP,
+};
+
 struct qeth_ipaddr {
 	struct hlist_node hnode;
 	enum qeth_ip_types type;
-	enum qeth_ipa_setdelip_flags set_flags;
-	enum qeth_ipa_setdelip_flags del_flags;
+	unsigned char mac[ETH_ALEN];
 	u8 is_multicast:1;
 	u8 in_progress:1;
 	u8 disp_flag:2;
+	u8 ipato:1;			/* ucast only */
 
 	/* is changed only for normal ip addresses
 	 * for non-normal addresses it always is  1
 	 */
 	int  ref_counter;
 	enum qeth_prot_versions proto;
-	unsigned char mac[ETH_ALEN];
 	union {
 		struct {
 			unsigned int addr;
@@ -40,8 +45,50 @@ struct qeth_ipaddr {
 			unsigned int pfxlen;
 		} a6;
 	} u;
-
 };
+
+static inline void qeth_l3_init_ipaddr(struct qeth_ipaddr *addr,
+				       enum qeth_ip_types type,
+				       enum qeth_prot_versions proto)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->type = type;
+	addr->proto = proto;
+	addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+}
+
+static inline bool qeth_l3_addr_match_ip(struct qeth_ipaddr *a1,
+					 struct qeth_ipaddr *a2)
+{
+	if (a1->proto != a2->proto)
+		return false;
+	if (a1->proto == QETH_PROT_IPV6)
+		return ipv6_addr_equal(&a1->u.a6.addr, &a2->u.a6.addr);
+	return a1->u.a4.addr == a2->u.a4.addr;
+}
+
+static inline bool qeth_l3_addr_match_all(struct qeth_ipaddr *a1,
+					  struct qeth_ipaddr *a2)
+{
+	/* Assumes that the pair was obtained via qeth_l3_addr_find_by_ip(),
+	 * so 'proto' and 'addr' match for sure.
+	 *
+	 * For ucast:
+	 * -	'mac' is always 0.
+	 * -	'mask'/'pfxlen' for RXIP/VIPA is always 0. For NORMAL, matching
+	 *	values are required to avoid mixups in takeover eligibility.
+	 *
+	 * For mcast,
+	 * -	'mac' is mapped from the IP, and thus always matches.
+	 * -	'mask'/'pfxlen' is always 0.
+	 */
+	if (a1->type != a2->type)
+		return false;
+	if (a1->proto == QETH_PROT_IPV6)
+		return a1->u.a6.pfxlen == a2->u.a6.pfxlen;
+	return a1->u.a4.mask == a2->u.a4.mask;
+}
+
 static inline  u64 qeth_l3_ipaddr_hash(struct qeth_ipaddr *addr)
 {
 	u64  ret = 0;
@@ -77,15 +124,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *, struct qeth_ipato_entry *);
 int qeth_l3_del_ipato_entry(struct qeth_card *card,
 			    enum qeth_prot_versions proto, u8 *addr,
 			    int mask_bits);
-int qeth_l3_add_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr);
-int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr);
 void qeth_l3_update_ipato(struct qeth_card *card);
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
-int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
-int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add);
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+			     enum qeth_ip_types type,
+			     enum qeth_prot_versions proto);
 
 #endif /* __QETH_L3_H__ */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b0c888e86cd4..c1a16a74aa83 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -67,6 +67,33 @@ void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
 		qeth_l3_ipaddr6_to_string(addr, buf);
 }
 
+static struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
+{
+	struct qeth_ipaddr *addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
+
+	if (addr)
+		qeth_l3_init_ipaddr(addr, QETH_IP_TYPE_NORMAL, prot);
+	return addr;
+}
+
+static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
+						   struct qeth_ipaddr *query)
+{
+	u64 key = qeth_l3_ipaddr_hash(query);
+	struct qeth_ipaddr *addr;
+
+	if (query->is_multicast) {
+		hash_for_each_possible(card->ip_mc_htable, addr, hnode, key)
+			if (qeth_l3_addr_match_ip(addr, query))
+				return addr;
+	} else {
+		hash_for_each_possible(card->ip_htable,  addr, hnode, key)
+			if (qeth_l3_addr_match_ip(addr, query))
+				return addr;
+	}
+	return NULL;
+}
+
 static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
 {
 	int i, j;
@@ -120,40 +147,18 @@ static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
 	return rc;
 }
 
-inline int
-qeth_l3_ipaddrs_is_equal(struct qeth_ipaddr *addr1, struct qeth_ipaddr *addr2)
-{
-	return addr1->proto == addr2->proto &&
-	       !memcmp(&addr1->u, &addr2->u, sizeof(addr1->u)) &&
-	       ether_addr_equal_64bits(addr1->mac, addr2->mac);
-}
-
-static struct qeth_ipaddr *
-qeth_l3_ip_from_hash(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
-{
-	struct qeth_ipaddr *addr;
-
-	if (tmp_addr->is_multicast) {
-		hash_for_each_possible(card->ip_mc_htable,  addr,
-				hnode, qeth_l3_ipaddr_hash(tmp_addr))
-			if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
-				return addr;
-	} else {
-		hash_for_each_possible(card->ip_htable,  addr,
-				hnode, qeth_l3_ipaddr_hash(tmp_addr))
-			if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
-				return addr;
-	}
-
-	return NULL;
-}
-
-int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_delete_ip(struct qeth_card *card,
+			     struct qeth_ipaddr *tmp_addr)
 {
 	int rc = 0;
 	struct qeth_ipaddr *addr;
 
-	QETH_CARD_TEXT(card, 4, "delip");
+	if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+		QETH_CARD_TEXT(card, 2, "delrxip");
+	else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+		QETH_CARD_TEXT(card, 2, "delvipa");
+	else
+		QETH_CARD_TEXT(card, 2, "delip");
 
 	if (tmp_addr->proto == QETH_PROT_IPV4)
 		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -162,23 +167,18 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 		QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
 	}
 
-	addr = qeth_l3_ip_from_hash(card, tmp_addr);
-	if (!addr)
+	addr = qeth_l3_find_addr_by_ip(card, tmp_addr);
+	if (!addr || !qeth_l3_addr_match_all(addr, tmp_addr))
 		return -ENOENT;
 
 	addr->ref_counter--;
-	if (addr->ref_counter > 0 && (addr->type == QETH_IP_TYPE_NORMAL ||
-				      addr->type == QETH_IP_TYPE_RXIP))
+	if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0)
 		return rc;
 	if (addr->in_progress)
 		return -EINPROGRESS;
 
-	if (!qeth_card_hw_is_reachable(card)) {
-		addr->disp_flag = QETH_DISP_ADDR_DELETE;
-		return 0;
-	}
-
-	rc = qeth_l3_deregister_addr_entry(card, addr);
+	if (qeth_card_hw_is_reachable(card))
+		rc = qeth_l3_deregister_addr_entry(card, addr);
 
 	hash_del(&addr->hnode);
 	kfree(addr);
@@ -186,12 +186,18 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 	return rc;
 }
 
-int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 {
 	int rc = 0;
 	struct qeth_ipaddr *addr;
+	char buf[40];
 
-	QETH_CARD_TEXT(card, 4, "addip");
+	if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+		QETH_CARD_TEXT(card, 2, "addrxip");
+	else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+		QETH_CARD_TEXT(card, 2, "addvipa");
+	else
+		QETH_CARD_TEXT(card, 2, "addip");
 
 	if (tmp_addr->proto == QETH_PROT_IPV4)
 		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -200,8 +206,20 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 		QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
 	}
 
-	addr = qeth_l3_ip_from_hash(card, tmp_addr);
-	if (!addr) {
+	addr = qeth_l3_find_addr_by_ip(card, tmp_addr);
+	if (addr) {
+		if (tmp_addr->type != QETH_IP_TYPE_NORMAL)
+			return -EADDRINUSE;
+		if (qeth_l3_addr_match_all(addr, tmp_addr)) {
+			addr->ref_counter++;
+			return 0;
+		}
+		qeth_l3_ipaddr_to_string(tmp_addr->proto, (u8 *)&tmp_addr->u,
+					 buf);
+		dev_warn(&card->gdev->dev,
+			 "Registering IP address %s failed\n", buf);
+		return -EADDRINUSE;
+	} else {
 		addr = qeth_l3_get_addr_buffer(tmp_addr->proto);
 		if (!addr)
 			return -ENOMEM;
@@ -211,7 +229,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 
 		if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
 			QETH_CARD_TEXT(card, 2, "tkovaddr");
-			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+			addr->ipato = 1;
 		}
 		hash_add(card->ip_htable, &addr->hnode,
 				qeth_l3_ipaddr_hash(addr));
@@ -241,39 +259,18 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 				(rc == IPA_RC_LAN_OFFLINE)) {
 			addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
 			if (addr->ref_counter < 1) {
-				qeth_l3_delete_ip(card, addr);
+				qeth_l3_deregister_addr_entry(card, addr);
+				hash_del(&addr->hnode);
 				kfree(addr);
 			}
 		} else {
 			hash_del(&addr->hnode);
 			kfree(addr);
 		}
-	} else {
-		if (addr->type == QETH_IP_TYPE_NORMAL ||
-		    addr->type == QETH_IP_TYPE_RXIP)
-			addr->ref_counter++;
 	}
-
 	return rc;
 }
 
-
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(
-				enum qeth_prot_versions prot)
-{
-	struct qeth_ipaddr *addr;
-
-	addr = kzalloc(sizeof(struct qeth_ipaddr), GFP_ATOMIC);
-	if (!addr)
-		return NULL;
-
-	addr->type = QETH_IP_TYPE_NORMAL;
-	addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
-	addr->proto = prot;
-
-	return addr;
-}
-
 static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
 {
 	struct qeth_ipaddr *addr;
@@ -321,11 +318,7 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
 	spin_lock_bh(&card->ip_lock);
 
 	hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
-		if (addr->disp_flag == QETH_DISP_ADDR_DELETE) {
-			qeth_l3_deregister_addr_entry(card, addr);
-			hash_del(&addr->hnode);
-			kfree(addr);
-		} else if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+		if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
 			if (addr->proto == QETH_PROT_IPV4) {
 				addr->in_progress = 1;
 				spin_unlock_bh(&card->ip_lock);
@@ -362,7 +355,7 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	ether_addr_copy(cmd->data.setdelipm.mac, addr->mac);
 	if (addr->proto == QETH_PROT_IPV6)
 		memcpy(cmd->data.setdelipm.ip6, &addr->u.a6.addr,
@@ -389,21 +382,38 @@ static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len)
 	}
 }
 
+static u32 qeth_l3_get_setdelip_flags(struct qeth_ipaddr *addr, bool set)
+{
+	switch (addr->type) {
+	case QETH_IP_TYPE_RXIP:
+		return (set) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+	case QETH_IP_TYPE_VIPA:
+		return (set) ? QETH_IPA_SETIP_VIPA_FLAG :
+			       QETH_IPA_DELIP_VIPA_FLAG;
+	default:
+		return (set && addr->ipato) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+	}
+}
+
 static int qeth_l3_send_setdelip(struct qeth_card *card,
-		struct qeth_ipaddr *addr, int ipacmd, unsigned int flags)
+				 struct qeth_ipaddr *addr,
+				 enum qeth_ipa_cmds ipacmd)
 {
-	int rc;
 	struct qeth_cmd_buffer *iob;
 	struct qeth_ipa_cmd *cmd;
 	__u8 netmask[16];
+	u32 flags;
 
 	QETH_CARD_TEXT(card, 4, "setdelip");
-	QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
 
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
+
+	flags = qeth_l3_get_setdelip_flags(addr, ipacmd == IPA_CMD_SETIP);
+	QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
+
 	if (addr->proto == QETH_PROT_IPV6) {
 		memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr,
 		       sizeof(struct in6_addr));
@@ -417,9 +427,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
 		cmd->data.setdelip4.flags = flags;
 	}
 
-	rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
-
-	return rc;
+	return qeth_send_ipa_cmd(card, iob, NULL, NULL);
 }
 
 static int qeth_l3_send_setrouting(struct qeth_card *card,
@@ -433,7 +441,7 @@ static int qeth_l3_send_setrouting(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETRTG, prot);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setrtg.type = (type);
 	rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
 
@@ -535,10 +543,7 @@ void qeth_l3_update_ipato(struct qeth_card *card)
 	hash_for_each(card->ip_htable, i, addr, hnode) {
 		if (addr->type != QETH_IP_TYPE_NORMAL)
 			continue;
-		if (qeth_l3_is_addr_covered_by_ipato(card, addr))
-			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
-		else
-			addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+		addr->ipato = qeth_l3_is_addr_covered_by_ipato(card, addr);
 	}
 }
 
@@ -616,142 +621,39 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card,
 	return rc;
 }
 
-/*
- * VIPA related functions
- */
-int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-	      const u8 *addr)
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+			     enum qeth_ip_types type,
+			     enum qeth_prot_versions proto)
 {
-	struct qeth_ipaddr *ipaddr;
+	struct qeth_ipaddr addr;
 	int rc;
 
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "addvipa4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "addvipa6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_VIPA;
-		ipaddr->set_flags = QETH_IPA_SETIP_VIPA_FLAG;
-		ipaddr->del_flags = QETH_IPA_DELIP_VIPA_FLAG;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-
-	if (qeth_l3_ip_from_hash(card, ipaddr))
-		rc = -EEXIST;
+	qeth_l3_init_ipaddr(&addr, type, proto);
+	if (proto == QETH_PROT_IPV4)
+		memcpy(&addr.u.a4.addr, ip, 4);
 	else
-		rc = qeth_l3_add_ip(card, ipaddr);
-
-	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-
-	return rc;
-}
-
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr)
-{
-	struct qeth_ipaddr *ipaddr;
-	int rc;
-
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "delvipa4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "delvipa6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_VIPA;
-	} else
-		return -ENOMEM;
+		memcpy(&addr.u.a6.addr, ip, 16);
 
 	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_delete_ip(card, ipaddr);
+	rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
 	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
 	return rc;
 }
 
-/*
- * proxy ARP related functions
- */
-int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-	      const u8 *addr)
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add)
 {
-	struct qeth_ipaddr *ipaddr;
-	int rc;
-
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "addrxip4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "addrxip6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
+	struct qeth_ipaddr addr;
+	int rc, i;
 
-		ipaddr->type = QETH_IP_TYPE_RXIP;
-		ipaddr->set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
-		ipaddr->del_flags = 0;
-	} else
-		return -ENOMEM;
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+	addr.u.a6.addr.s6_addr[0] = 0xfe;
+	addr.u.a6.addr.s6_addr[1] = 0x80;
+	for (i = 0; i < 8; i++)
+		addr.u.a6.addr.s6_addr[8+i] = card->options.hsuid[i];
 
 	spin_lock_bh(&card->ip_lock);
-
-	if (qeth_l3_ip_from_hash(card, ipaddr))
-		rc = -EEXIST;
-	else
-		rc = qeth_l3_add_ip(card, ipaddr);
-
+	rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
 	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-
-	return rc;
-}
-
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr)
-{
-	struct qeth_ipaddr *ipaddr;
-	int rc;
-
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "delrxip4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "delrxip6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_RXIP;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_delete_ip(card, ipaddr);
-	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
 	return rc;
 }
 
@@ -778,8 +680,7 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
 		if (addr->is_multicast)
 			rc =  qeth_l3_send_setdelmc(card, addr, IPA_CMD_SETIPM);
 		else
-			rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP,
-					addr->set_flags);
+			rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP);
 		if (rc)
 			QETH_CARD_TEXT(card, 2, "failed");
 	} while ((--cnt > 0) && rc);
@@ -811,8 +712,7 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
 	if (addr->is_multicast)
 		rc = qeth_l3_send_setdelmc(card, addr, IPA_CMD_DELIPM);
 	else
-		rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP,
-					addr->del_flags);
+		rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP);
 	if (rc)
 		QETH_CARD_TEXT(card, 2, "failed");
 
@@ -1092,7 +992,7 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card)
 				     QETH_PROT_IPV6);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
 			card->info.unique_id;
 
@@ -1137,7 +1037,7 @@ static int qeth_l3_get_unique_id(struct qeth_card *card)
 				     QETH_PROT_IPV6);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
 			card->info.unique_id;
 
@@ -1213,7 +1113,7 @@ qeth_diags_trace(struct qeth_card *card, enum qeth_diags_trace_cmds diags_cmd)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 16;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRACE;
 	cmd->data.diagass.type = QETH_DIAGS_TYPE_HIPERSOCKET;
@@ -1239,8 +1139,9 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev)
 		tmp->u.a4.addr = be32_to_cpu(im4->multiaddr);
 		tmp->is_multicast = 1;
 
-		ipm = qeth_l3_ip_from_hash(card, tmp);
+		ipm = qeth_l3_find_addr_by_ip(card, tmp);
 		if (ipm) {
+			/* for mcast, by-IP match means full match */
 			ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
 		} else {
 			ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
@@ -1319,8 +1220,9 @@ static void qeth_l3_add_mc6_to_hash(struct qeth_card *card,
 		       sizeof(struct in6_addr));
 		tmp->is_multicast = 1;
 
-		ipm = qeth_l3_ip_from_hash(card, tmp);
+		ipm = qeth_l3_find_addr_by_ip(card, tmp);
 		if (ipm) {
+			/* for mcast, by-IP match means full match */
 			ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
 			continue;
 		}
@@ -1520,30 +1422,24 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 				ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
 
 			card->stats.multicast++;
-			skb->pkt_type = PACKET_MULTICAST;
 			break;
 		case QETH_CAST_BROADCAST:
 			ether_addr_copy(tg_addr, card->dev->broadcast);
 			card->stats.multicast++;
-			skb->pkt_type = PACKET_BROADCAST;
 			break;
-		case QETH_CAST_UNICAST:
-		case QETH_CAST_ANYCAST:
-		case QETH_CAST_NOCAST:
 		default:
 			if (card->options.sniffer)
 				skb->pkt_type = PACKET_OTHERHOST;
-			else
-				skb->pkt_type = PACKET_HOST;
 			ether_addr_copy(tg_addr, card->dev->dev_addr);
 		}
+
 		if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
 			card->dev->header_ops->create(skb, card->dev, prot,
 				tg_addr, &hdr->hdr.l3.next_hop.rx.src_mac,
-				card->dev->addr_len);
+				skb->len);
 		else
 			card->dev->header_ops->create(skb, card->dev, prot,
-				tg_addr, "FAKELL", card->dev->addr_len);
+				tg_addr, "FAKELL", skb->len);
 	}
 
 	skb->protocol = eth_type_trans(skb, card->dev);
@@ -1590,20 +1486,16 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			*done = 1;
 			break;
 		}
-		skb->dev = card->dev;
 		switch (hdr->hdr.l3.id) {
 		case QETH_HEADER_TYPE_LAYER3:
 			magic = *(__u16 *)skb->data;
 			if ((card->info.type == QETH_CARD_TYPE_IQD) &&
 			    (magic == ETH_P_AF_IUCV)) {
 				skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
-				skb->pkt_type = PACKET_HOST;
-				skb->mac_header = NET_SKB_PAD;
-				skb->dev = card->dev;
 				len = skb->len;
 				card->dev->header_ops->create(skb, card->dev, 0,
-					card->dev->dev_addr, "FAKELL",
-					card->dev->addr_len);
+					card->dev->dev_addr, "FAKELL", len);
+				skb_reset_mac_header(skb);
 				netif_receive_skb(skb);
 			} else {
 				qeth_l3_rebuild_skb(card, skb, hdr);
@@ -1612,7 +1504,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			}
 			break;
 		case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
-			skb->pkt_type = PACKET_HOST;
 			skb->protocol = eth_type_trans(skb, skb->dev);
 			len = skb->len;
 			netif_receive_skb(skb);
@@ -1631,69 +1522,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 	return work_done;
 }
 
-static int qeth_l3_verify_vlan_dev(struct net_device *dev,
-			struct qeth_card *card)
-{
-	int rc = 0;
-	u16 vid;
-
-	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
-		struct net_device *netdev;
-
-		rcu_read_lock();
-		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
-					      vid);
-		rcu_read_unlock();
-		if (netdev == dev) {
-			rc = QETH_VLAN_CARD;
-			break;
-		}
-	}
-
-	if (rc && !(vlan_dev_real_dev(dev)->ml_priv == (void *)card))
-		return 0;
-
-	return rc;
-}
-
-static int qeth_l3_verify_dev(struct net_device *dev)
-{
-	struct qeth_card *card;
-	int rc = 0;
-	unsigned long flags;
-
-	read_lock_irqsave(&qeth_core_card_list.rwlock, flags);
-	list_for_each_entry(card, &qeth_core_card_list.list, list) {
-		if (card->dev == dev) {
-			rc = QETH_REAL_CARD;
-			break;
-		}
-		rc = qeth_l3_verify_vlan_dev(dev, card);
-		if (rc)
-			break;
-	}
-	read_unlock_irqrestore(&qeth_core_card_list.rwlock, flags);
-
-	return rc;
-}
-
-static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
-{
-	struct qeth_card *card = NULL;
-	int rc;
-
-	rc = qeth_l3_verify_dev(dev);
-	if (rc == QETH_REAL_CARD)
-		card = dev->ml_priv;
-	else if (rc == QETH_VLAN_CARD)
-		card = vlan_dev_real_dev(dev)->ml_priv;
-	if (card && card->options.layer2)
-		card = NULL;
-	if (card)
-		QETH_CARD_TEXT_(card, 4, "%d", rc);
-	return card ;
-}
-
 static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 {
 	QETH_DBF_TEXT(SETUP, 2, "stopcard");
@@ -2022,7 +1850,7 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 				       prot);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setassparms.data.query_arp.request_bits = 0x000F;
 	cmd->data.setassparms.data.query_arp.reply_bits = 0;
 	cmd->data.setassparms.data.query_arp.no_entries = 0;
@@ -2450,11 +2278,12 @@ static void qeth_tso_fill_header(struct qeth_card *card,
 static int qeth_l3_get_elements_no_tso(struct qeth_card *card,
 			struct sk_buff *skb, int extra_elems)
 {
-	addr_t tcpdptr = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb);
-	int elements = qeth_get_elements_for_range(
-				tcpdptr,
-				(addr_t)skb->data + skb_headlen(skb)) +
-				qeth_get_elements_for_frags(skb);
+	addr_t start = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb);
+	addr_t end = (addr_t)skb->data + skb_headlen(skb);
+	int elements = qeth_get_elements_for_frags(skb);
+
+	if (start != end)
+		elements += qeth_get_elements_for_range(start, end);
 
 	if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
 		QETH_DBF_MESSAGE(2,
@@ -2802,14 +2631,16 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 			if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
 				card->dev->dev_id = card->info.unique_id &
 							 0xffff;
+
+			card->dev->hw_features |= NETIF_F_SG;
+			card->dev->vlan_features |= NETIF_F_SG;
+
 			if (!card->info.guestlan) {
-				card->dev->hw_features = NETIF_F_SG |
-					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-					NETIF_F_TSO;
-				card->dev->vlan_features = NETIF_F_SG |
-					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-					NETIF_F_TSO;
 				card->dev->features |= NETIF_F_SG;
+				card->dev->hw_features |= NETIF_F_TSO |
+					NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+				card->dev->vlan_features |= NETIF_F_TSO |
+					NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -2882,8 +2713,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l3_set_offline(cgdev);
 
 	if (card->dev) {
-		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
+		free_netdev(card->dev);
 		card->dev = NULL;
 	}
 
@@ -2924,7 +2755,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		card->info.hwtrap = 0;
 
 	card->state = CARD_STATE_HARDSETUP;
-	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_print_status_message(card);
 
 	/* softsetup */
@@ -3147,13 +2977,43 @@ struct qeth_discipline qeth_l3_discipline = {
 };
 EXPORT_SYMBOL_GPL(qeth_l3_discipline);
 
+static int qeth_l3_handle_ip_event(struct qeth_card *card,
+				   struct qeth_ipaddr *addr,
+				   unsigned long event)
+{
+	switch (event) {
+	case NETDEV_UP:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_add_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
+		return NOTIFY_OK;
+	case NETDEV_DOWN:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_delete_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
+{
+	if (is_vlan_dev(dev))
+		dev = vlan_dev_real_dev(dev);
+	if (dev->netdev_ops == &qeth_l3_osa_netdev_ops ||
+	    dev->netdev_ops == &qeth_l3_netdev_ops)
+		return (struct qeth_card *) dev->ml_priv;
+	return NULL;
+}
+
 static int qeth_l3_ip_event(struct notifier_block *this,
 			    unsigned long event, void *ptr)
 {
 
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
-	struct net_device *dev = (struct net_device *)ifa->ifa_dev->dev;
-	struct qeth_ipaddr *addr;
+	struct net_device *dev = ifa->ifa_dev->dev;
+	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 
 	if (dev_net(dev) != &init_net)
@@ -3164,29 +3024,11 @@ static int qeth_l3_ip_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 	QETH_CARD_TEXT(card, 3, "ipevent");
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-	if (addr) {
-		addr->u.a4.addr = be32_to_cpu(ifa->ifa_address);
-		addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask);
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_UP:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_add_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	case NETDEV_DOWN:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	}
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4);
+	addr.u.a4.addr = be32_to_cpu(ifa->ifa_address);
+	addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask);
 
-	kfree(addr);
-	return NOTIFY_DONE;
+	return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip_notifier = {
@@ -3198,8 +3040,8 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 			     unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
-	struct net_device *dev = (struct net_device *)ifa->idev->dev;
-	struct qeth_ipaddr *addr;
+	struct net_device *dev = ifa->idev->dev;
+	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 
 	card = qeth_l3_get_card_from_dev(dev);
@@ -3209,29 +3051,11 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 	if (!qeth_is_supported(card, IPA_IPV6))
 		return NOTIFY_DONE;
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-	if (addr) {
-		memcpy(&addr->u.a6.addr, &ifa->addr, sizeof(struct in6_addr));
-		addr->u.a6.pfxlen = ifa->prefix_len;
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_UP:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_add_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	case NETDEV_DOWN:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	}
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+	addr.u.a6.addr = ifa->addr;
+	addr.u.a6.pfxlen = ifa->prefix_len;
 
-	kfree(addr);
-	return NOTIFY_DONE;
+	return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip6_notifier = {
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index a645cfe66ddf..f61192a048f4 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -272,9 +272,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
-	struct qeth_ipaddr *addr;
 	char *tmp;
-	int rc, i;
+	int rc;
 
 	if (!card)
 		return -EINVAL;
@@ -293,25 +292,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 	if (strlen(tmp) > 8)
 		return -EINVAL;
 
-	if (card->options.hsuid[0]) {
+	if (card->options.hsuid[0])
 		/* delete old ip address */
-		addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-		if (!addr)
-			return -ENOMEM;
-
-		addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-		for (i = 8; i < 16; i++)
-			addr->u.a6.addr.s6_addr[i] =
-				card->options.hsuid[i - 8];
-		addr->u.a6.pfxlen = 0;
-		addr->type = QETH_IP_TYPE_NORMAL;
-
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		kfree(addr);
-	}
+		qeth_l3_modify_hsuid(card, false);
 
 	if (strlen(tmp) == 0) {
 		/* delete ip address only */
@@ -331,21 +314,7 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 	if (card->dev)
 		memcpy(card->dev->perm_addr, card->options.hsuid, 9);
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-	if (addr != NULL) {
-		addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-		for (i = 8; i < 16; i++)
-			addr->u.a6.addr.s6_addr[i] = card->options.hsuid[i - 8];
-		addr->u.a6.pfxlen = 0;
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_add_ip(card, addr);
-	spin_unlock_bh(&card->ip_lock);
-	kfree(addr);
+	rc = qeth_l3_modify_hsuid(card, true);
 
 	return rc ? rc : count;
 }
@@ -767,7 +736,8 @@ static ssize_t qeth_l3_dev_vipa_add_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_vipae(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_add_vipa(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+					      QETH_IP_TYPE_VIPA, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -796,7 +766,8 @@ static ssize_t qeth_l3_dev_vipa_del_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_vipae(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_del_vipa(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+					      QETH_IP_TYPE_VIPA, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -908,7 +879,8 @@ static ssize_t qeth_l3_dev_rxip_add_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_rxipe(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_add_rxip(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+					      QETH_IP_TYPE_RXIP, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -937,7 +909,8 @@ static ssize_t qeth_l3_dev_rxip_del_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_rxipe(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_del_rxip(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+					      QETH_IP_TYPE_RXIP, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index ba2e0856d22c..8f5c1d7f751a 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -1297,6 +1297,9 @@ static int virtio_ccw_cio_notify(struct ccw_device *cdev, int event)
 		vcdev->device_lost = true;
 		rc = NOTIFY_DONE;
 		break;
+	case CIO_OPER:
+		rc = NOTIFY_OK;
+		break;
 	default:
 		rc = NOTIFY_DONE;
 		break;
@@ -1309,6 +1312,27 @@ static struct ccw_device_id virtio_ids[] = {
 	{},
 };
 
+#ifdef CONFIG_PM_SLEEP
+static int virtio_ccw_freeze(struct ccw_device *cdev)
+{
+	struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
+
+	return virtio_device_freeze(&vcdev->vdev);
+}
+
+static int virtio_ccw_restore(struct ccw_device *cdev)
+{
+	struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
+	int ret;
+
+	ret = virtio_ccw_set_transport_rev(vcdev);
+	if (ret)
+		return ret;
+
+	return virtio_device_restore(&vcdev->vdev);
+}
+#endif
+
 static struct ccw_driver virtio_ccw_driver = {
 	.driver = {
 		.owner = THIS_MODULE,
@@ -1321,6 +1345,11 @@ static struct ccw_driver virtio_ccw_driver = {
 	.set_online = virtio_ccw_online,
 	.notify = virtio_ccw_cio_notify,
 	.int_class = IRQIO_VIR,
+#ifdef CONFIG_PM_SLEEP
+	.freeze = virtio_ccw_freeze,
+	.thaw = virtio_ccw_restore,
+	.restore = virtio_ccw_restore,
+#endif
 };
 
 static int __init pure_hex(char **cp, unsigned int *val, int min_digit,
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index fcfd28d2884c..de1b3fce936d 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -185,7 +185,6 @@ ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \
 CFLAGS_ncr53c8xx.o	:= $(ncr53c8xx-flags-y) $(ncr53c8xx-flags-m)
 zalon7xx-objs	:= zalon.o ncr53c8xx.o
 NCR_Q720_mod-objs	:= NCR_Q720.o ncr53c8xx.o
-oktagon_esp_mod-objs	:= oktagon_esp.o oktagon_io.o
 
 # Files generated that shall be removed upon make clean
 clean-files :=	53c700_d.h 53c700_u.h
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index b3b931ab77eb..2664ea0df35f 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1693,8 +1693,10 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	 *	Map in the registers from the adapter.
 	 */
 	aac->base_size = AAC_MIN_FOOTPRINT_SIZE;
-	if ((*aac_drivers[index].init)(aac))
+	if ((*aac_drivers[index].init)(aac)) {
+		error = -ENODEV;
 		goto out_unmap;
+	}
 
 	if (aac->sync_mode) {
 		if (aac_sync_mode)
diff --git a/drivers/scsi/aic7xxx/aiclib.c b/drivers/scsi/aic7xxx/aiclib.c
deleted file mode 100644
index 828ae3d9a510..000000000000
--- a/drivers/scsi/aic7xxx/aiclib.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Implementation of Utility functions for all SCSI device types.
- *
- * Copyright (c) 1997, 1998, 1999 Justin T. Gibbs.
- * Copyright (c) 1997, 1998 Kenneth D. Merry.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions, and the following disclaimer,
- *    without modification, immediately at the beginning of the file.
- * 2. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/cam/scsi/scsi_all.c,v 1.38 2002/09/23 04:56:35 mjacob Exp $
- * $Id$
- */
-
-#include "aiclib.h"
-
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 8e2f767147cb..5a645b8b9af1 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -1889,6 +1889,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req,
 		/* we will not receive ABTS response for this IO */
 		BNX2FC_IO_DBG(io_req, "Timer context finished processing "
 			   "this scsi cmd\n");
+		return;
 	}
 
 	/* Cancel the timeout_work, as we received IO completion */
diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c
index be5ee2d37815..7dbbbb81a1e7 100644
--- a/drivers/scsi/csiostor/csio_lnode.c
+++ b/drivers/scsi/csiostor/csio_lnode.c
@@ -114,7 +114,7 @@ static enum csio_ln_ev fwevt_to_lnevt[] = {
 static struct csio_lnode *
 csio_ln_lookup_by_portid(struct csio_hw *hw, uint8_t portid)
 {
-	struct csio_lnode *ln = hw->rln;
+	struct csio_lnode *ln;
 	struct list_head *tmp;
 
 	/* Match siblings lnode with portid */
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 022e421c2185..4b44325d1a82 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -876,6 +876,11 @@ static void alua_rtpg_work(struct work_struct *work)
 
 /**
  * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ * @pg: ALUA port group associated with @sdev.
+ * @sdev: SCSI device for which to submit an RTPG.
+ * @qdata: Information about the callback to invoke after the RTPG.
+ * @force: Whether or not to submit an RTPG if a work item that will submit an
+ *         RTPG already has been scheduled.
  *
  * Returns true if and only if alua_rtpg_work() will be called asynchronously.
  * That function is responsible for calling @qdata->fn().
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 57bf43e34863..dd9464920456 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -328,8 +328,6 @@ static void scsi_host_dev_release(struct device *dev)
 	if (shost->work_q)
 		destroy_workqueue(shost->work_q);
 
-	destroy_rcu_head(&shost->rcu);
-
 	if (shost->shost_state == SHOST_CREATED) {
 		/*
 		 * Free the shost_dev device name here if scsi_host_alloc()
@@ -404,7 +402,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	INIT_LIST_HEAD(&shost->starved_list);
 	init_waitqueue_head(&shost->host_wait);
 	mutex_init(&shost->scan_mutex);
-	init_rcu_head(&shost->rcu);
 
 	index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
 	if (index < 0)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 9a0696f68f37..b81a53c4a9a8 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -367,7 +367,7 @@ enum ibmvfc_fcp_rsp_info_codes {
 };
 
 struct ibmvfc_fcp_rsp_info {
-	__be16 reserved;
+	u8 reserved[3];
 	u8 rsp_code;
 	u8 reserved2[4];
 }__attribute__((packed, aligned (2)));
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 6de9681ace82..ceab5e5c41c2 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -223,6 +223,7 @@ out_done:
 static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 {
 	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
+	struct domain_device *dev = cmd_to_domain_dev(cmd);
 	struct sas_task *task = TO_SAS_TASK(cmd);
 
 	/* At this point, we only get called following an actual abort
@@ -231,6 +232,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 	 */
 	sas_end_task(cmd, task);
 
+	if (dev_is_sata(dev)) {
+		/* defer commands to libata so that libata EH can
+		 * handle ata qcs correctly
+		 */
+		list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q);
+		return;
+	}
+
 	/* now finish the command and move it on to the error
 	 * handler done list, this also takes it off the
 	 * error handler pending list.
@@ -238,22 +247,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 	scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q);
 }
 
-static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
-{
-	struct domain_device *dev = cmd_to_domain_dev(cmd);
-	struct sas_ha_struct *ha = dev->port->ha;
-	struct sas_task *task = TO_SAS_TASK(cmd);
-
-	if (!dev_is_sata(dev)) {
-		sas_eh_finish_cmd(cmd);
-		return;
-	}
-
-	/* report the timeout to libata */
-	sas_end_task(cmd, task);
-	list_move_tail(&cmd->eh_entry, &ha->eh_ata_q);
-}
-
 static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
 {
 	struct scsi_cmnd *cmd, *n;
@@ -261,7 +254,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd
 	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
 		if (cmd->device->sdev_target == my_cmd->device->sdev_target &&
 		    cmd->device->lun == my_cmd->device->lun)
-			sas_eh_defer_cmd(cmd);
+			sas_eh_finish_cmd(cmd);
 	}
 }
 
@@ -631,12 +624,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
 		case TASK_IS_DONE:
 			SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
 				    task);
-			sas_eh_defer_cmd(cmd);
+			sas_eh_finish_cmd(cmd);
 			continue;
 		case TASK_IS_ABORTED:
 			SAS_DPRINTK("%s: task 0x%p is aborted\n",
 				    __func__, task);
-			sas_eh_defer_cmd(cmd);
+			sas_eh_finish_cmd(cmd);
 			continue;
 		case TASK_IS_AT_LU:
 			SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
@@ -647,7 +640,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
 					    "recovered\n",
 					    SAS_ADDR(task->dev),
 					    cmd->device->lun);
-				sas_eh_defer_cmd(cmd);
+				sas_eh_finish_cmd(cmd);
 				sas_scsi_clear_queue_lu(work_q, cmd);
 				goto Again;
 			}
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 073ced07e662..dc8e850fbfd2 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -216,36 +216,30 @@ inline void megasas_return_cmd_fusion(struct megasas_instance *instance,
 /**
  * megasas_fire_cmd_fusion -	Sends command to the FW
  * @instance:			Adapter soft state
- * @req_desc:			32bit or 64bit Request descriptor
+ * @req_desc:			64bit Request descriptor
  *
- * Perform PCI Write. Ventura supports 32 bit Descriptor.
- * Prior to Ventura (12G) MR controller supports 64 bit Descriptor.
+ * Perform PCI Write.
  */
 
 static void
 megasas_fire_cmd_fusion(struct megasas_instance *instance,
 		union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc)
 {
-	if (instance->adapter_type == VENTURA_SERIES)
-		writel(le32_to_cpu(req_desc->u.low),
-			&instance->reg_set->inbound_single_queue_port);
-	else {
 #if defined(writeq) && defined(CONFIG_64BIT)
-		u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) |
-				le32_to_cpu(req_desc->u.low));
+	u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) |
+		le32_to_cpu(req_desc->u.low));
 
-		writeq(req_data, &instance->reg_set->inbound_low_queue_port);
+	writeq(req_data, &instance->reg_set->inbound_low_queue_port);
 #else
-		unsigned long flags;
-		spin_lock_irqsave(&instance->hba_lock, flags);
-		writel(le32_to_cpu(req_desc->u.low),
-			&instance->reg_set->inbound_low_queue_port);
-		writel(le32_to_cpu(req_desc->u.high),
-			&instance->reg_set->inbound_high_queue_port);
-		mmiowb();
-		spin_unlock_irqrestore(&instance->hba_lock, flags);
+	unsigned long flags;
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	writel(le32_to_cpu(req_desc->u.low),
+		&instance->reg_set->inbound_low_queue_port);
+	writel(le32_to_cpu(req_desc->u.high),
+		&instance->reg_set->inbound_high_queue_port);
+	mmiowb();
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
 #endif
-	}
 }
 
 /**
@@ -982,7 +976,6 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 	const char *sys_info;
 	MFI_CAPABILITIES *drv_ops;
 	u32 scratch_pad_2;
-	unsigned long flags;
 	ktime_t time;
 	bool cur_fw_64bit_dma_capable;
 
@@ -1121,14 +1114,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 			break;
 	}
 
-	/* For Ventura also IOC INIT required 64 bit Descriptor write. */
-	spin_lock_irqsave(&instance->hba_lock, flags);
-	writel(le32_to_cpu(req_desc.u.low),
-	       &instance->reg_set->inbound_low_queue_port);
-	writel(le32_to_cpu(req_desc.u.high),
-	       &instance->reg_set->inbound_high_queue_port);
-	mmiowb();
-	spin_unlock_irqrestore(&instance->hba_lock, flags);
+	megasas_fire_cmd_fusion(instance, &req_desc);
 
 	wait_and_poll(instance, cmd, MFI_POLL_TIMEOUT_SECS);
 
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 13d6e4ec3022..0aafbfd1b746 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2410,8 +2410,11 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 				continue;
 			}
 
-			for_each_cpu(cpu, mask)
+			for_each_cpu_and(cpu, mask, cpu_online_mask) {
+				if (cpu >= ioc->cpu_msix_table_sz)
+					break;
 				ioc->cpu_msix_table[cpu] = reply_q->msix_index;
+			}
 		}
 		return;
 	}
@@ -6294,14 +6297,14 @@ _base_reset_handler(struct MPT3SAS_ADAPTER *ioc, int reset_phase)
 }
 
 /**
- * _wait_for_commands_to_complete - reset controller
+ * mpt3sas_wait_for_commands_to_complete - reset controller
  * @ioc: Pointer to MPT_ADAPTER structure
  *
  * This function is waiting 10s for all pending commands to complete
  * prior to putting controller in reset.
  */
-static void
-_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc)
 {
 	u32 ioc_state;
 
@@ -6374,7 +6377,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
 			is_fault = 1;
 	}
 	_base_reset_handler(ioc, MPT3_IOC_PRE_RESET);
-	_wait_for_commands_to_complete(ioc);
+	mpt3sas_wait_for_commands_to_complete(ioc);
 	_base_mask_interrupts(ioc);
 	r = _base_make_ioc_ready(ioc, type);
 	if (r)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 789bc421424b..99ccf83b8c51 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1433,6 +1433,9 @@ void mpt3sas_base_update_missing_delay(struct MPT3SAS_ADAPTER *ioc,
 
 int mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc);
 
+void
+mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc);
+
 
 /* scsih shared API */
 struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 74fca184dba9..a1cb0236c550 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2835,7 +2835,8 @@ scsih_abort(struct scsi_cmnd *scmd)
 	_scsih_tm_display_info(ioc, scmd);
 
 	sas_device_priv_data = scmd->device->hostdata;
-	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+	if (!sas_device_priv_data || !sas_device_priv_data->sas_target ||
+	    ioc->remove_host) {
 		sdev_printk(KERN_INFO, scmd->device,
 			"device been deleted! scmd(%p)\n", scmd);
 		scmd->result = DID_NO_CONNECT << 16;
@@ -2898,7 +2899,8 @@ scsih_dev_reset(struct scsi_cmnd *scmd)
 	_scsih_tm_display_info(ioc, scmd);
 
 	sas_device_priv_data = scmd->device->hostdata;
-	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+	if (!sas_device_priv_data || !sas_device_priv_data->sas_target ||
+	    ioc->remove_host) {
 		sdev_printk(KERN_INFO, scmd->device,
 			"device been deleted! scmd(%p)\n", scmd);
 		scmd->result = DID_NO_CONNECT << 16;
@@ -2961,7 +2963,8 @@ scsih_target_reset(struct scsi_cmnd *scmd)
 	_scsih_tm_display_info(ioc, scmd);
 
 	sas_device_priv_data = scmd->device->hostdata;
-	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+	if (!sas_device_priv_data || !sas_device_priv_data->sas_target ||
+	    ioc->remove_host) {
 		starget_printk(KERN_INFO, starget, "target been deleted! scmd(%p)\n",
 			scmd);
 		scmd->result = DID_NO_CONNECT << 16;
@@ -3019,7 +3022,7 @@ scsih_host_reset(struct scsi_cmnd *scmd)
 	    ioc->name, scmd);
 	scsi_print_command(scmd);
 
-	if (ioc->is_driver_loading) {
+	if (ioc->is_driver_loading || ioc->remove_host) {
 		pr_info(MPT3SAS_FMT "Blocking the host reset\n",
 		    ioc->name);
 		r = FAILED;
@@ -4453,7 +4456,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc)
 		st = scsi_cmd_priv(scmd);
 		mpt3sas_base_clear_st(ioc, st);
 		scsi_dma_unmap(scmd);
-		if (ioc->pci_error_recovery)
+		if (ioc->pci_error_recovery || ioc->remove_host)
 			scmd->result = DID_NO_CONNECT << 16;
 		else
 			scmd->result = DID_RESET << 16;
@@ -9739,6 +9742,10 @@ static void scsih_remove(struct pci_dev *pdev)
 	unsigned long flags;
 
 	ioc->remove_host = 1;
+
+	mpt3sas_wait_for_commands_to_complete(ioc);
+	_scsih_flush_running_cmds(ioc);
+
 	_scsih_fw_event_cleanup_queue(ioc);
 
 	spin_lock_irqsave(&ioc->fw_event_lock, flags);
@@ -9815,6 +9822,10 @@ scsih_shutdown(struct pci_dev *pdev)
 	unsigned long flags;
 
 	ioc->remove_host = 1;
+
+	mpt3sas_wait_for_commands_to_complete(ioc);
+	_scsih_flush_running_cmds(ioc);
+
 	_scsih_fw_event_cleanup_queue(ioc);
 
 	spin_lock_irqsave(&ioc->fw_event_lock, flags);
@@ -10547,7 +10558,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name),
 	    "fw_event_%s%d", ioc->driver_name, ioc->id);
 	ioc->firmware_event_thread = alloc_ordered_workqueue(
-	    ioc->firmware_event_name, WQ_MEM_RECLAIM);
+	    ioc->firmware_event_name, 0);
 	if (!ioc->firmware_event_thread) {
 		pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
 		    ioc->name, __FILE__, __LINE__, __func__);
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 667d7697ba01..d09afe1b567d 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -762,6 +762,11 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
 
 	iscsi_cid = cqe->conn_id;
 	qedi_conn = qedi->cid_que.conn_cid_tbl[iscsi_cid];
+	if (!qedi_conn) {
+		QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+			  "icid not found 0x%x\n", cqe->conn_id);
+		return;
+	}
 
 	/* Based on this itt get the corresponding qedi_cmd */
 	spin_lock_bh(&qedi_conn->tmf_work_lock);
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 029e2e69b29f..f57a94b4f0d9 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1724,7 +1724,6 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf)
 {
 	struct qedi_ctx *qedi = data;
 	struct nvm_iscsi_initiator *initiator;
-	char *str = buf;
 	int rc = 1;
 	u32 ipv6_en, dhcp_en, ip_len;
 	struct nvm_iscsi_block *block;
@@ -1758,32 +1757,32 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf)
 
 	switch (type) {
 	case ISCSI_BOOT_ETH_IP_ADDR:
-		rc = snprintf(str, ip_len, fmt, ip);
+		rc = snprintf(buf, ip_len, fmt, ip);
 		break;
 	case ISCSI_BOOT_ETH_SUBNET_MASK:
-		rc = snprintf(str, ip_len, fmt, sub);
+		rc = snprintf(buf, ip_len, fmt, sub);
 		break;
 	case ISCSI_BOOT_ETH_GATEWAY:
-		rc = snprintf(str, ip_len, fmt, gw);
+		rc = snprintf(buf, ip_len, fmt, gw);
 		break;
 	case ISCSI_BOOT_ETH_FLAGS:
-		rc = snprintf(str, 3, "%hhd\n",
+		rc = snprintf(buf, 3, "%hhd\n",
 			      SYSFS_FLAG_FW_SEL_BOOT);
 		break;
 	case ISCSI_BOOT_ETH_INDEX:
-		rc = snprintf(str, 3, "0\n");
+		rc = snprintf(buf, 3, "0\n");
 		break;
 	case ISCSI_BOOT_ETH_MAC:
-		rc = sysfs_format_mac(str, qedi->mac, ETH_ALEN);
+		rc = sysfs_format_mac(buf, qedi->mac, ETH_ALEN);
 		break;
 	case ISCSI_BOOT_ETH_VLAN:
-		rc = snprintf(str, 12, "%d\n",
+		rc = snprintf(buf, 12, "%d\n",
 			      GET_FIELD2(initiator->generic_cont0,
 					 NVM_ISCSI_CFG_INITIATOR_VLAN));
 		break;
 	case ISCSI_BOOT_ETH_ORIGIN:
 		if (dhcp_en)
-			rc = snprintf(str, 3, "3\n");
+			rc = snprintf(buf, 3, "3\n");
 		break;
 	default:
 		rc = 0;
@@ -1819,7 +1818,6 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf)
 {
 	struct qedi_ctx *qedi = data;
 	struct nvm_iscsi_initiator *initiator;
-	char *str = buf;
 	int rc;
 	struct nvm_iscsi_block *block;
 
@@ -1831,8 +1829,8 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf)
 
 	switch (type) {
 	case ISCSI_BOOT_INI_INITIATOR_NAME:
-		rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
-			      initiator->initiator_name.byte);
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN,
+			     initiator->initiator_name.byte);
 		break;
 	default:
 		rc = 0;
@@ -1860,7 +1858,6 @@ static ssize_t
 qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
 			char *buf, enum qedi_nvm_tgts idx)
 {
-	char *str = buf;
 	int rc = 1;
 	u32 ctrl_flags, ipv6_en, chap_en, mchap_en, ip_len;
 	struct nvm_iscsi_block *block;
@@ -1899,48 +1896,48 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
 
 	switch (type) {
 	case ISCSI_BOOT_TGT_NAME:
-		rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
-			      block->target[idx].target_name.byte);
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN,
+			     block->target[idx].target_name.byte);
 		break;
 	case ISCSI_BOOT_TGT_IP_ADDR:
 		if (ipv6_en)
-			rc = snprintf(str, ip_len, "%pI6\n",
+			rc = snprintf(buf, ip_len, "%pI6\n",
 				      block->target[idx].ipv6_addr.byte);
 		else
-			rc = snprintf(str, ip_len, "%pI4\n",
+			rc = snprintf(buf, ip_len, "%pI4\n",
 				      block->target[idx].ipv4_addr.byte);
 		break;
 	case ISCSI_BOOT_TGT_PORT:
-		rc = snprintf(str, 12, "%d\n",
+		rc = snprintf(buf, 12, "%d\n",
 			      GET_FIELD2(block->target[idx].generic_cont0,
 					 NVM_ISCSI_CFG_TARGET_TCP_PORT));
 		break;
 	case ISCSI_BOOT_TGT_LUN:
-		rc = snprintf(str, 22, "%.*d\n",
+		rc = snprintf(buf, 22, "%.*d\n",
 			      block->target[idx].lun.value[1],
 			      block->target[idx].lun.value[0]);
 		break;
 	case ISCSI_BOOT_TGT_CHAP_NAME:
-		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
-			      chap_name);
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+			     chap_name);
 		break;
 	case ISCSI_BOOT_TGT_CHAP_SECRET:
-		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
-			      chap_secret);
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+			     chap_secret);
 		break;
 	case ISCSI_BOOT_TGT_REV_CHAP_NAME:
-		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
-			      mchap_name);
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+			     mchap_name);
 		break;
 	case ISCSI_BOOT_TGT_REV_CHAP_SECRET:
-		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
-			      mchap_secret);
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+			     mchap_secret);
 		break;
 	case ISCSI_BOOT_TGT_FLAGS:
-		rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT);
+		rc = snprintf(buf, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT);
 		break;
 	case ISCSI_BOOT_TGT_NIC_ASSOC:
-		rc = snprintf(str, 3, "0\n");
+		rc = snprintf(buf, 3, "0\n");
 		break;
 	default:
 		rc = 0;
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index be7d6824581a..c9689f97c307 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -261,9 +261,9 @@
 struct name_list_extended {
 	struct get_name_list_extended *l;
 	dma_addr_t		ldma;
-	struct list_head 	fcports;	/* protect by sess_list */
+	struct list_head	fcports;
+	spinlock_t		fcports_lock;
 	u32			size;
-	u8			sent;
 };
 /*
  * Timeout timer counts in seconds
@@ -2217,6 +2217,7 @@ typedef struct {
 
 /* FCP-4 types */
 #define FC4_TYPE_FCP_SCSI	0x08
+#define FC4_TYPE_NVME		0x28
 #define FC4_TYPE_OTHER		0x0
 #define FC4_TYPE_UNKNOWN	0xff
 
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 5bf9a59432f6..403fa096f8c8 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -3179,6 +3179,7 @@ done_free_sp:
 	sp->free(sp);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 done:
+	fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	return rval;
 }
 
@@ -3370,6 +3371,7 @@ done_free_sp:
 	sp->free(sp);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 done:
+	fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	return rval;
 }
 
@@ -3971,6 +3973,9 @@ out:
 	spin_lock_irqsave(&vha->work_lock, flags);
 	vha->scan.scan_flags &= ~SF_SCANNING;
 	spin_unlock_irqrestore(&vha->work_lock, flags);
+
+	if ((fc4type == FC4_TYPE_FCP_SCSI) && vha->flags.nvme_enabled)
+		qla24xx_async_gpnft(vha, FC4_TYPE_NVME);
 }
 
 static void qla2x00_async_gpnft_gnnft_sp_done(void *s, int res)
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index aececf664654..8d7fab3cd01d 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -59,8 +59,6 @@ qla2x00_sp_timeout(struct timer_list *t)
 	req->outstanding_cmds[sp->handle] = NULL;
 	iocb = &sp->u.iocb_cmd;
 	iocb->timeout(sp);
-	if (sp->type != SRB_ELS_DCMD)
-		sp->free(sp);
 	spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
 }
 
@@ -102,7 +100,6 @@ qla2x00_async_iocb_timeout(void *data)
 	srb_t *sp = data;
 	fc_port_t *fcport = sp->fcport;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct event_arg ea;
 
 	if (fcport) {
 		ql_dbg(ql_dbg_disc, fcport->vha, 0x2071,
@@ -117,25 +114,13 @@ qla2x00_async_iocb_timeout(void *data)
 
 	switch (sp->type) {
 	case SRB_LOGIN_CMD:
-		if (!fcport)
-			break;
 		/* Retry as needed. */
 		lio->u.logio.data[0] = MBS_COMMAND_ERROR;
 		lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ?
 			QLA_LOGIO_LOGIN_RETRIED : 0;
-		memset(&ea, 0, sizeof(ea));
-		ea.event = FCME_PLOGI_DONE;
-		ea.fcport = sp->fcport;
-		ea.data[0] = lio->u.logio.data[0];
-		ea.data[1] = lio->u.logio.data[1];
-		ea.sp = sp;
-		qla24xx_handle_plogi_done_event(fcport->vha, &ea);
+		sp->done(sp, QLA_FUNCTION_TIMEOUT);
 		break;
 	case SRB_LOGOUT_CMD:
-		if (!fcport)
-			break;
-		qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT);
-		break;
 	case SRB_CT_PTHRU_CMD:
 	case SRB_MB_IOCB:
 	case SRB_NACK_PLOGI:
@@ -228,6 +213,7 @@ done_free_sp:
 	sp->free(sp);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 done:
+	fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	return rval;
 }
 
@@ -235,12 +221,10 @@ static void
 qla2x00_async_logout_sp_done(void *ptr, int res)
 {
 	srb_t *sp = ptr;
-	struct srb_iocb *lio = &sp->u.iocb_cmd;
 
 	sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
-	if (!test_bit(UNLOADING, &sp->vha->dpc_flags))
-		qla2x00_post_async_logout_done_work(sp->vha, sp->fcport,
-		    lio->u.logio.data);
+	sp->fcport->login_gen++;
+	qlt_logo_completion_handler(sp->fcport, res);
 	sp->free(sp);
 }
 
@@ -280,7 +264,7 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 done_free_sp:
 	sp->free(sp);
 done:
-	fcport->flags &= ~FCF_ASYNC_SENT;
+	fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	return rval;
 }
 
@@ -288,6 +272,7 @@ void
 qla2x00_async_prlo_done(struct scsi_qla_host *vha, fc_port_t *fcport,
     uint16_t *data)
 {
+	fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	/* Don't re-login in target mode */
 	if (!fcport->tgt_session)
 		qla2x00_mark_device_lost(vha, fcport, 1, 0);
@@ -301,6 +286,7 @@ qla2x00_async_prlo_sp_done(void *s, int res)
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	struct scsi_qla_host *vha = sp->vha;
 
+	sp->fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	if (!test_bit(UNLOADING, &vha->dpc_flags))
 		qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport,
 		    lio->u.logio.data);
@@ -339,6 +325,7 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport)
 done_free_sp:
 	sp->free(sp);
 done:
+	fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	return rval;
 }
 
@@ -392,6 +379,8 @@ qla2x00_async_adisc_sp_done(void *ptr, int res)
 	    "Async done-%s res %x %8phC\n",
 	    sp->name, res, sp->fcport->port_name);
 
+	sp->fcport->flags &= ~FCF_ASYNC_SENT;
+
 	memset(&ea, 0, sizeof(ea));
 	ea.event = FCME_ADISC_DONE;
 	ea.rc = res;
@@ -442,7 +431,7 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 done_free_sp:
 	sp->free(sp);
 done:
-	fcport->flags &= ~FCF_ASYNC_SENT;
+	fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	qla2x00_post_async_adisc_work(vha, fcport, data);
 	return rval;
 }
@@ -660,8 +649,7 @@ qla24xx_async_gnl_sp_done(void *s, int res)
 		    (loop_id & 0x7fff));
 	}
 
-	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
-	vha->gnl.sent = 0;
+	spin_lock_irqsave(&vha->gnl.fcports_lock, flags);
 
 	INIT_LIST_HEAD(&h);
 	fcport = tf = NULL;
@@ -670,12 +658,16 @@ qla24xx_async_gnl_sp_done(void *s, int res)
 
 	list_for_each_entry_safe(fcport, tf, &h, gnl_entry) {
 		list_del_init(&fcport->gnl_entry);
+		spin_lock(&vha->hw->tgt.sess_lock);
 		fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+		spin_unlock(&vha->hw->tgt.sess_lock);
 		ea.fcport = fcport;
 
 		qla2x00_fcport_event_handler(vha, &ea);
 	}
+	spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags);
 
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 	/* create new fcport if fw has knowledge of new sessions */
 	for (i = 0; i < n; i++) {
 		port_id_t id;
@@ -727,18 +719,21 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
 	ql_dbg(ql_dbg_disc, vha, 0x20d9,
 	    "Async-gnlist WWPN %8phC \n", fcport->port_name);
 
-	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	spin_lock_irqsave(&vha->gnl.fcports_lock, flags);
+	if (!list_empty(&fcport->gnl_entry)) {
+		spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags);
+		rval = QLA_SUCCESS;
+		goto done;
+	}
+
+	spin_lock(&vha->hw->tgt.sess_lock);
 	fcport->disc_state = DSC_GNL;
 	fcport->last_rscn_gen = fcport->rscn_gen;
 	fcport->last_login_gen = fcport->login_gen;
+	spin_unlock(&vha->hw->tgt.sess_lock);
 
 	list_add_tail(&fcport->gnl_entry, &vha->gnl.fcports);
-	if (vha->gnl.sent) {
-		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
-		return QLA_SUCCESS;
-	}
-	vha->gnl.sent = 1;
-	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+	spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags);
 
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
@@ -1066,6 +1061,7 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	fc_port_t *fcport = ea->fcport;
 	struct port_database_24xx *pd;
 	struct srb *sp = ea->sp;
+	uint8_t	ls;
 
 	pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
 
@@ -1078,7 +1074,12 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	if (fcport->disc_state == DSC_DELETE_PEND)
 		return;
 
-	switch (pd->current_login_state) {
+	if (fcport->fc4f_nvme)
+		ls = pd->current_login_state >> 4;
+	else
+		ls = pd->current_login_state & 0xf;
+
+	switch (ls) {
 	case PDS_PRLI_COMPLETE:
 		__qla24xx_parse_gpdb(vha, fcport, pd);
 		break;
@@ -1168,8 +1169,9 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
 	if (fcport->scan_state != QLA_FCPORT_FOUND)
 		return 0;
 
-	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+	if ((fcport->loop_id != FC_NO_LOOP_ID) &&
+	    ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+	     (fcport->fw_login_state == DSC_LS_PRLI_PEND)))
 		return 0;
 
 	if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
@@ -1544,6 +1546,7 @@ qla24xx_abort_sp_done(void *ptr, int res)
 	srb_t *sp = ptr;
 	struct srb_iocb *abt = &sp->u.iocb_cmd;
 
+	del_timer(&sp->u.iocb_cmd.timer);
 	complete(&abt->u.abt.comp);
 }
 
@@ -1716,7 +1719,6 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 
 			set_bit(ea->fcport->loop_id, vha->hw->loop_id_map);
 			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
-			ea->fcport->loop_id = FC_NO_LOOP_ID;
 			ea->fcport->chip_reset = vha->hw->base_qpair->chip_reset;
 			ea->fcport->logout_on_delete = 1;
 			ea->fcport->send_els_logo = 0;
@@ -1808,6 +1810,7 @@ qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport,
 	qla2x00_mark_device_lost(vha, fcport, 1, 0);
 	qlt_logo_completion_handler(fcport, data[0]);
 	fcport->login_gen++;
+	fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	return;
 }
 
@@ -1815,6 +1818,7 @@ void
 qla2x00_async_adisc_done(struct scsi_qla_host *vha, fc_port_t *fcport,
     uint16_t *data)
 {
+	fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	if (data[0] == MBS_COMMAND_COMPLETE) {
 		qla2x00_update_fcport(vha, fcport);
 
@@ -1822,7 +1826,6 @@ qla2x00_async_adisc_done(struct scsi_qla_host *vha, fc_port_t *fcport,
 	}
 
 	/* Retry login. */
-	fcport->flags &= ~FCF_ASYNC_SENT;
 	if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 	else
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 1b62e943ec49..8d00d559bd26 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -3275,12 +3275,11 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb)
 	memset(abt_iocb, 0, sizeof(struct abort_entry_24xx));
 	abt_iocb->entry_type = ABORT_IOCB_TYPE;
 	abt_iocb->entry_count = 1;
-	abt_iocb->handle =
-	     cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no,
-		 aio->u.abt.cmd_hndl));
+	abt_iocb->handle = cpu_to_le32(MAKE_HANDLE(req->id, sp->handle));
 	abt_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	abt_iocb->handle_to_abort =
-	    cpu_to_le32(MAKE_HANDLE(req->id, aio->u.abt.cmd_hndl));
+	    cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no,
+				    aio->u.abt.cmd_hndl));
 	abt_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
 	abt_iocb->port_id[1] = sp->fcport->d_id.b.area;
 	abt_iocb->port_id[2] = sp->fcport->d_id.b.domain;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 14109d86c3f6..89f93ebd819d 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -272,7 +272,8 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
 	struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
 
 	/* Read all mbox registers? */
-	mboxes = (1 << ha->mbx_count) - 1;
+	WARN_ON_ONCE(ha->mbx_count > 32);
+	mboxes = (1ULL << ha->mbx_count) - 1;
 	if (!ha->mcp)
 		ql_dbg(ql_dbg_async, vha, 0x5001, "MBX pointer ERROR.\n");
 	else
@@ -2880,7 +2881,8 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
 	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
 
 	/* Read all mbox registers? */
-	mboxes = (1 << ha->mbx_count) - 1;
+	WARN_ON_ONCE(ha->mbx_count > 32);
+	mboxes = (1ULL << ha->mbx_count) - 1;
 	if (!ha->mcp)
 		ql_dbg(ql_dbg_async, vha, 0x504e, "MBX pointer ERROR.\n");
 	else
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 12ee6e02d146..5c5dcca4d1da 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -454,7 +454,7 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req,
 	ha->req_q_map[0] = req;
 	set_bit(0, ha->rsp_qid_map);
 	set_bit(0, ha->req_qid_map);
-	return 1;
+	return 0;
 
 fail_qpair_map:
 	kfree(ha->base_qpair);
@@ -471,6 +471,9 @@ fail_req_map:
 
 static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 {
+	if (!ha->req_q_map)
+		return;
+
 	if (IS_QLAFX00(ha)) {
 		if (req && req->ring_fx00)
 			dma_free_coherent(&ha->pdev->dev,
@@ -481,14 +484,17 @@ static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 		(req->length + 1) * sizeof(request_t),
 		req->ring, req->dma);
 
-	if (req)
+	if (req) {
 		kfree(req->outstanding_cmds);
-
-	kfree(req);
+		kfree(req);
+	}
 }
 
 static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
 {
+	if (!ha->rsp_q_map)
+		return;
+
 	if (IS_QLAFX00(ha)) {
 		if (rsp && rsp->ring)
 			dma_free_coherent(&ha->pdev->dev,
@@ -499,7 +505,8 @@ static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
 		(rsp->length + 1) * sizeof(response_t),
 		rsp->ring, rsp->dma);
 	}
-	kfree(rsp);
+	if (rsp)
+		kfree(rsp);
 }
 
 static void qla2x00_free_queues(struct qla_hw_data *ha)
@@ -1723,6 +1730,8 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
 	struct qla_tgt_cmd *cmd;
 	uint8_t trace = 0;
 
+	if (!ha->req_q_map)
+		return;
 	spin_lock_irqsave(qp->qp_lock_ptr, flags);
 	req = qp->req;
 	for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
@@ -3095,14 +3104,14 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	/* Set up the irqs */
 	ret = qla2x00_request_irqs(ha, rsp);
 	if (ret)
-		goto probe_hw_failed;
+		goto probe_failed;
 
 	/* Alloc arrays of request and response ring ptrs */
-	if (!qla2x00_alloc_queues(ha, req, rsp)) {
+	if (qla2x00_alloc_queues(ha, req, rsp)) {
 		ql_log(ql_log_fatal, base_vha, 0x003d,
 		    "Failed to allocate memory for queue pointers..."
 		    "aborting.\n");
-		goto probe_init_failed;
+		goto probe_failed;
 	}
 
 	if (ha->mqenable && shost_use_blk_mq(host)) {
@@ -3387,15 +3396,6 @@ skip_dpc:
 
 	return 0;
 
-probe_init_failed:
-	qla2x00_free_req_que(ha, req);
-	ha->req_q_map[0] = NULL;
-	clear_bit(0, ha->req_qid_map);
-	qla2x00_free_rsp_que(ha, rsp);
-	ha->rsp_q_map[0] = NULL;
-	clear_bit(0, ha->rsp_qid_map);
-	ha->max_req_queues = ha->max_rsp_queues = 0;
-
 probe_failed:
 	if (base_vha->timer_active)
 		qla2x00_stop_timer(base_vha);
@@ -3625,6 +3625,8 @@ qla2x00_remove_one(struct pci_dev *pdev)
 	}
 	qla2x00_wait_for_hba_ready(base_vha);
 
+	qla2x00_wait_for_sess_deletion(base_vha);
+
 	/*
 	 * if UNLOAD flag is already set, then continue unload,
 	 * where it was set first.
@@ -4506,11 +4508,17 @@ qla2x00_mem_free(struct qla_hw_data *ha)
 	if (ha->init_cb)
 		dma_free_coherent(&ha->pdev->dev, ha->init_cb_size,
 			ha->init_cb, ha->init_cb_dma);
-	vfree(ha->optrom_buffer);
-	kfree(ha->nvram);
-	kfree(ha->npiv_info);
-	kfree(ha->swl);
-	kfree(ha->loop_id_map);
+
+	if (ha->optrom_buffer)
+		vfree(ha->optrom_buffer);
+	if (ha->nvram)
+		kfree(ha->nvram);
+	if (ha->npiv_info)
+		kfree(ha->npiv_info);
+	if (ha->swl)
+		kfree(ha->swl);
+	if (ha->loop_id_map)
+		kfree(ha->loop_id_map);
 
 	ha->srb_mempool = NULL;
 	ha->ctx_mempool = NULL;
@@ -4526,6 +4534,15 @@ qla2x00_mem_free(struct qla_hw_data *ha)
 	ha->ex_init_cb_dma = 0;
 	ha->async_pd = NULL;
 	ha->async_pd_dma = 0;
+	ha->loop_id_map = NULL;
+	ha->npiv_info = NULL;
+	ha->optrom_buffer = NULL;
+	ha->swl = NULL;
+	ha->nvram = NULL;
+	ha->mctp_dump = NULL;
+	ha->dcbx_tlv = NULL;
+	ha->xgmac_data = NULL;
+	ha->sfp_data = NULL;
 
 	ha->s_dma_pool = NULL;
 	ha->dl_dma_pool = NULL;
@@ -4575,6 +4592,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 
 	spin_lock_init(&vha->work_lock);
 	spin_lock_init(&vha->cmd_list_lock);
+	spin_lock_init(&vha->gnl.fcports_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
 	init_waitqueue_head(&vha->vref_waitq);
 
@@ -4804,9 +4822,12 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
 			fcport->d_id = e->u.new_sess.id;
 			fcport->flags |= FCF_FABRIC_DEVICE;
 			fcport->fw_login_state = DSC_LS_PLOGI_PEND;
-			if (e->u.new_sess.fc4_type == FC4_TYPE_FCP_SCSI)
+			if (e->u.new_sess.fc4_type == FC4_TYPE_FCP_SCSI) {
 				fcport->fc4_type = FC4_TYPE_FCP_SCSI;
-
+			} else if (e->u.new_sess.fc4_type == FC4_TYPE_NVME) {
+				fcport->fc4_type = FC4_TYPE_OTHER;
+				fcport->fc4f_nvme = FC4_TYPE_NVME;
+			}
 			memcpy(fcport->port_name, e->u.new_sess.port_name,
 			    WWN_SIZE);
 		} else {
@@ -4875,6 +4896,8 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
 			}
 			qlt_plogi_ack_unref(vha, pla);
 		} else {
+			fc_port_t *dfcp = NULL;
+
 			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 			tfcp = qla2x00_find_fcport_by_nportid(vha,
 			    &e->u.new_sess.id, 1);
@@ -4897,11 +4920,13 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
 				default:
 					fcport->login_pause = 1;
 					tfcp->conflict = fcport;
-					qlt_schedule_sess_for_deletion(tfcp);
+					dfcp = tfcp;
 					break;
 				}
 			}
 			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+			if (dfcp)
+				qlt_schedule_sess_for_deletion(tfcp);
 
 			wwn = wwn_to_u64(fcport->node_name);
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index fc89af8fe256..b49ac85f3de2 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1224,10 +1224,10 @@ static void qla24xx_chk_fcp_state(struct fc_port *sess)
 	}
 }
 
-/* ha->tgt.sess_lock supposed to be held on entry */
 void qlt_schedule_sess_for_deletion(struct fc_port *sess)
 {
 	struct qla_tgt *tgt = sess->tgt;
+	struct qla_hw_data *ha = sess->vha->hw;
 	unsigned long flags;
 
 	if (sess->disc_state == DSC_DELETE_PEND)
@@ -1244,16 +1244,16 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess)
 			return;
 	}
 
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 	if (sess->deleted == QLA_SESS_DELETED)
 		sess->logout_on_delete = 0;
 
-	spin_lock_irqsave(&sess->vha->work_lock, flags);
 	if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
-		spin_unlock_irqrestore(&sess->vha->work_lock, flags);
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 		return;
 	}
 	sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
-	spin_unlock_irqrestore(&sess->vha->work_lock, flags);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 	sess->disc_state = DSC_DELETE_PEND;
 
@@ -1262,13 +1262,10 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess)
 	ql_dbg(ql_dbg_tgt, sess->vha, 0xe001,
 	    "Scheduling sess %p for deletion\n", sess);
 
-	/* use cancel to push work element through before re-queue */
-	cancel_work_sync(&sess->del_work);
 	INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn);
-	queue_work(sess->vha->hw->wq, &sess->del_work);
+	WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work));
 }
 
-/* ha->tgt.sess_lock supposed to be held on entry */
 static void qlt_clear_tgt_db(struct qla_tgt *tgt)
 {
 	struct fc_port *sess;
@@ -1451,8 +1448,8 @@ qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen)
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf008, "qla_tgt_fc_port_deleted %p", sess);
 
 	sess->local = 1;
-	qlt_schedule_sess_for_deletion(sess);
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+	qlt_schedule_sess_for_deletion(sess);
 }
 
 static inline int test_tgt_sess_count(struct qla_tgt *tgt)
@@ -1512,10 +1509,8 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
 	 * Lock is needed, because we still can get an incoming packet.
 	 */
 	mutex_lock(&vha->vha_tgt.tgt_mutex);
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 	tgt->tgt_stop = 1;
 	qlt_clear_tgt_db(tgt);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
 	mutex_unlock(&qla_tgt_mutex);
 
@@ -4871,8 +4866,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 				    sess);
 				qlt_send_term_imm_notif(vha, iocb, 1);
 				res = 0;
-				spin_lock_irqsave(&tgt->ha->tgt.sess_lock,
-				    flags);
 				break;
 			}
 
diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h
index fc233717355f..817f312023a9 100644
--- a/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h
@@ -168,6 +168,8 @@
 #define DEV_DB_NON_PERSISTENT	0
 #define DEV_DB_PERSISTENT	1
 
+#define QL4_ISP_REG_DISCONNECT 0xffffffffU
+
 #define COPY_ISID(dst_isid, src_isid) {			\
 	int i, j;					\
 	for (i = 0, j = ISID_SIZE - 1; i < ISID_SIZE;)	\
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 82e889bbe0ed..fc2c97d9a0d6 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -262,6 +262,24 @@ static struct iscsi_transport qla4xxx_iscsi_transport = {
 
 static struct scsi_transport_template *qla4xxx_scsi_transport;
 
+static int qla4xxx_isp_check_reg(struct scsi_qla_host *ha)
+{
+	u32 reg_val = 0;
+	int rval = QLA_SUCCESS;
+
+	if (is_qla8022(ha))
+		reg_val = readl(&ha->qla4_82xx_reg->host_status);
+	else if (is_qla8032(ha) || is_qla8042(ha))
+		reg_val = qla4_8xxx_rd_direct(ha, QLA8XXX_PEG_ALIVE_COUNTER);
+	else
+		reg_val = readw(&ha->reg->ctrl_status);
+
+	if (reg_val == QL4_ISP_REG_DISCONNECT)
+		rval = QLA_ERROR;
+
+	return rval;
+}
+
 static int qla4xxx_send_ping(struct Scsi_Host *shost, uint32_t iface_num,
 			     uint32_t iface_type, uint32_t payload_size,
 			     uint32_t pid, struct sockaddr *dst_addr)
@@ -9186,10 +9204,17 @@ static int qla4xxx_eh_abort(struct scsi_cmnd *cmd)
 	struct srb *srb = NULL;
 	int ret = SUCCESS;
 	int wait = 0;
+	int rval;
 
 	ql4_printk(KERN_INFO, ha, "scsi%ld:%d:%llu: Abort command issued cmd=%p, cdb=0x%x\n",
 		   ha->host_no, id, lun, cmd, cmd->cmnd[0]);
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	srb = (struct srb *) CMD_SP(cmd);
 	if (!srb) {
@@ -9241,6 +9266,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
 	struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
 	struct ddb_entry *ddb_entry = cmd->device->hostdata;
 	int ret = FAILED, stat;
+	int rval;
 
 	if (!ddb_entry)
 		return ret;
@@ -9260,6 +9286,12 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
 		      cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	/* FIXME: wait for hba to go online */
 	stat = qla4xxx_reset_lun(ha, ddb_entry, cmd->device->lun);
 	if (stat != QLA_SUCCESS) {
@@ -9303,6 +9335,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
 	struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
 	struct ddb_entry *ddb_entry = cmd->device->hostdata;
 	int stat, ret;
+	int rval;
 
 	if (!ddb_entry)
 		return FAILED;
@@ -9320,6 +9353,12 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
 		      ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	stat = qla4xxx_reset_target(ha, ddb_entry);
 	if (stat != QLA_SUCCESS) {
 		starget_printk(KERN_INFO, scsi_target(cmd->device),
@@ -9374,9 +9413,16 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd)
 {
 	int return_status = FAILED;
 	struct scsi_qla_host *ha;
+	int rval;
 
 	ha = to_qla_host(cmd->device->host);
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	if ((is_qla8032(ha) || is_qla8042(ha)) && ql4xdontresethba)
 		qla4_83xx_set_idc_dontreset(ha);
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index d042915ce895..ca53a5f785ee 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -223,7 +223,8 @@ static void scsi_eh_reset(struct scsi_cmnd *scmd)
 
 static void scsi_eh_inc_host_failed(struct rcu_head *head)
 {
-	struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu);
+	struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
+	struct Scsi_Host *shost = scmd->device->host;
 	unsigned long flags;
 
 	spin_lock_irqsave(shost->host_lock, flags);
@@ -259,7 +260,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
 	 * Ensure that all tasks observe the host state change before the
 	 * host_failed change.
 	 */
-	call_rcu(&shost->rcu, scsi_eh_inc_host_failed);
+	call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
 }
 
 /**
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a86df9ca7d1c..c84f931388f2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -671,6 +671,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
 	if (!blk_rq_is_scsi(req)) {
 		WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
 		cmd->flags &= ~SCMD_INITIALIZED;
+		destroy_rcu_head(&cmd->rcu);
 	}
 
 	if (req->mq_ctx) {
@@ -720,6 +721,8 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
 		int result)
 {
 	switch (host_byte(result)) {
+	case DID_OK:
+		return BLK_STS_OK;
 	case DID_TRANSPORT_FAILFAST:
 		return BLK_STS_TRANSPORT;
 	case DID_TARGET_FAILURE:
@@ -1151,6 +1154,7 @@ static void scsi_initialize_rq(struct request *rq)
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
 
 	scsi_req_init(&cmd->req);
+	init_rcu_head(&cmd->rcu);
 	cmd->jiffies_at_alloc = jiffies;
 	cmd->retries = 0;
 }
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index bff21e636ddd..3541caf3fceb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2595,6 +2595,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
 	int res;
 	struct scsi_device *sdp = sdkp->device;
 	struct scsi_mode_data data;
+	int disk_ro = get_disk_ro(sdkp->disk);
 	int old_wp = sdkp->write_prot;
 
 	set_disk_ro(sdkp->disk, 0);
@@ -2635,7 +2636,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
 			  "Test WP failed, assume Write Enabled\n");
 	} else {
 		sdkp->write_prot = ((data.device_specific & 0x80) != 0);
-		set_disk_ro(sdkp->disk, sdkp->write_prot);
+		set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro);
 		if (sdkp->first_scan || old_wp != sdkp->write_prot) {
 			sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n",
 				  sdkp->write_prot ? "on" : "off");
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 6c348a211ebb..89cf4498f535 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -403,7 +403,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
  */
 static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 {
-	u64 zone_blocks;
+	u64 zone_blocks = 0;
 	sector_t block = 0;
 	unsigned char *buf;
 	unsigned char *rec;
@@ -421,10 +421,8 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
 	/* Do a report zone to get the same field */
 	ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0);
-	if (ret) {
-		zone_blocks = 0;
-		goto out;
-	}
+	if (ret)
+		goto out_free;
 
 	same = buf[4] & 0x0f;
 	if (same > 0) {
@@ -464,7 +462,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 			ret = sd_zbc_report_zones(sdkp, buf,
 						  SD_ZBC_BUF_SIZE, block);
 			if (ret)
-				return ret;
+				goto out_free;
 		}
 
 	} while (block < sdkp->capacity);
@@ -472,35 +470,32 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 	zone_blocks = sdkp->zone_blocks;
 
 out:
-	kfree(buf);
-
 	if (!zone_blocks) {
 		if (sdkp->first_scan)
 			sd_printk(KERN_NOTICE, sdkp,
 				  "Devices with non constant zone "
 				  "size are not supported\n");
-		return -ENODEV;
-	}
-
-	if (!is_power_of_2(zone_blocks)) {
+		ret = -ENODEV;
+	} else if (!is_power_of_2(zone_blocks)) {
 		if (sdkp->first_scan)
 			sd_printk(KERN_NOTICE, sdkp,
 				  "Devices with non power of 2 zone "
 				  "size are not supported\n");
-		return -ENODEV;
-	}
-
-	if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
+		ret = -ENODEV;
+	} else if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
 		if (sdkp->first_scan)
 			sd_printk(KERN_NOTICE, sdkp,
 				  "Zone size too large\n");
-		return -ENODEV;
+		ret = -ENODEV;
+	} else {
+		sdkp->zone_blocks = zone_blocks;
+		sdkp->zone_shift = ilog2(zone_blocks);
 	}
 
-	sdkp->zone_blocks = zone_blocks;
-	sdkp->zone_shift = ilog2(zone_blocks);
+out_free:
+	kfree(buf);
 
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 40fc7a590e81..8c51d628b52e 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1311,7 +1311,8 @@ static int storvsc_do_io(struct hv_device *device,
 			 */
 			cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
 				    cpumask_of_node(cpu_to_node(q_num)));
-			for_each_cpu(tgt_cpu, &alloced_mask) {
+			for_each_cpu_wrap(tgt_cpu, &alloced_mask,
+					outgoing_channel->target_cpu + 1) {
 				if (tgt_cpu != outgoing_channel->target_cpu) {
 					outgoing_channel =
 					stor_device->stor_chns[tgt_cpu];
@@ -1657,7 +1658,7 @@ static struct scsi_host_template scsi_driver = {
 	.eh_timed_out =		storvsc_eh_timed_out,
 	.slave_alloc =		storvsc_device_alloc,
 	.slave_configure =	storvsc_device_configure,
-	.cmd_per_lun =		255,
+	.cmd_per_lun =		2048,
 	.this_id =		-1,
 	.use_clustering =	ENABLE_CLUSTERING,
 	/* Make sure we dont get a sg segment crosses a page boundary */
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index ca360daa6a25..378af306fda1 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -536,7 +536,7 @@ sym_getsync(struct sym_hcb *np, u_char dt, u_char sfac, u_char *divp, u_char *fa
 	 *  Look for the greatest clock divisor that allows an 
 	 *  input speed faster than the period.
 	 */
-	while (div-- > 0)
+	while (--div > 0)
 		if (kpc >= (div_10M[div] << 2)) break;
 
 	/*
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a355d989b414..c7da2c185990 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4352,6 +4352,8 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
 	/* REPORT SUPPORTED OPERATION CODES is not supported */
 	sdev->no_report_opcodes = 1;
 
+	/* WRITE_SAME command is not supported */
+	sdev->no_write_same = 1;
 
 	ufshcd_set_queue_depth(sdev);
 
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
index e4f5bb056fd2..ba3cfa8e279b 100644
--- a/drivers/soc/fsl/qbman/qman.c
+++ b/drivers/soc/fsl/qbman/qman.c
@@ -2443,39 +2443,21 @@ struct cgr_comp {
 	struct completion completion;
 };
 
-static int qman_delete_cgr_thread(void *p)
+static void qman_delete_cgr_smp_call(void *p)
 {
-	struct cgr_comp *cgr_comp = (struct cgr_comp *)p;
-	int ret;
-
-	ret = qman_delete_cgr(cgr_comp->cgr);
-	complete(&cgr_comp->completion);
-
-	return ret;
+	qman_delete_cgr((struct qman_cgr *)p);
 }
 
 void qman_delete_cgr_safe(struct qman_cgr *cgr)
 {
-	struct task_struct *thread;
-	struct cgr_comp cgr_comp;
-
 	preempt_disable();
 	if (qman_cgr_cpus[cgr->cgrid] != smp_processor_id()) {
-		init_completion(&cgr_comp.completion);
-		cgr_comp.cgr = cgr;
-		thread = kthread_create(qman_delete_cgr_thread, &cgr_comp,
-					"cgr_del");
-
-		if (IS_ERR(thread))
-			goto out;
-
-		kthread_bind(thread, qman_cgr_cpus[cgr->cgrid]);
-		wake_up_process(thread);
-		wait_for_completion(&cgr_comp.completion);
+		smp_call_function_single(qman_cgr_cpus[cgr->cgrid],
+					 qman_delete_cgr_smp_call, cgr, true);
 		preempt_enable();
 		return;
 	}
-out:
+
 	qman_delete_cgr(cgr);
 	preempt_enable();
 }
diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
index 53f7275d6cbd..750f93197411 100644
--- a/drivers/soc/imx/gpc.c
+++ b/drivers/soc/imx/gpc.c
@@ -348,7 +348,7 @@ static int imx_gpc_old_dt_init(struct device *dev, struct regmap *regmap,
 		if (i == 1) {
 			domain->supply = devm_regulator_get(dev, "pu");
 			if (IS_ERR(domain->supply))
-				return PTR_ERR(domain->supply);;
+				return PTR_ERR(domain->supply);
 
 			ret = imx_pgc_get_clocks(dev, domain);
 			if (ret)
@@ -470,13 +470,21 @@ static int imx_gpc_probe(struct platform_device *pdev)
 
 static int imx_gpc_remove(struct platform_device *pdev)
 {
+	struct device_node *pgc_node;
 	int ret;
 
+	pgc_node = of_get_child_by_name(pdev->dev.of_node, "pgc");
+
+	/* bail out if DT too old and doesn't provide the necessary info */
+	if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells") &&
+	    !pgc_node)
+		return 0;
+
 	/*
 	 * If the old DT binding is used the toplevel driver needs to
 	 * de-register the power domains
 	 */
-	if (!of_get_child_by_name(pdev->dev.of_node, "pgc")) {
+	if (!pgc_node) {
 		of_genpd_del_provider(pdev->dev.of_node);
 
 		ret = pm_genpd_remove(&imx_gpc_domains[GPC_PGC_DOMAIN_PU].base);
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index b3f5cae98ea6..9371651d8017 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -117,7 +117,7 @@ config SSB_SERIAL
 
 config SSB_DRIVER_PCICORE_POSSIBLE
 	bool
-	depends on SSB_PCIHOST
+	depends on SSB_PCIHOST && SSB = y
 	default y
 
 config SSB_DRIVER_PCICORE
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 65420a9f0e82..116594413f66 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -522,7 +522,7 @@ static int ssb_devices_register(struct ssb_bus *bus)
 			/* Set dev to NULL to not unregister
 			 * dev on error unwinding. */
 			sdev->dev = NULL;
-			kfree(devwrap);
+			put_device(dev);
 			goto error;
 		}
 		dev_idx++;
@@ -1116,7 +1116,7 @@ static bool ssb_dma_translation_special_bit(struct ssb_device *dev)
 			chip_id == 43231 || chip_id == 43222);
 	}
 
-	return 0;
+	return false;
 }
 
 u32 ssb_dma_translation(struct ssb_device *dev)
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index bbdc53b686dd..86580b6df33d 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -326,24 +326,23 @@ static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin)
 	mutex_lock(&ashmem_mutex);
 
 	if (asma->size == 0) {
-		ret = -EINVAL;
-		goto out;
+		mutex_unlock(&ashmem_mutex);
+		return -EINVAL;
 	}
 
 	if (!asma->file) {
-		ret = -EBADF;
-		goto out;
+		mutex_unlock(&ashmem_mutex);
+		return -EBADF;
 	}
 
+	mutex_unlock(&ashmem_mutex);
+
 	ret = vfs_llseek(asma->file, offset, origin);
 	if (ret < 0)
-		goto out;
+		return ret;
 
 	/** Copy f_pos from backing file, since f_ops->llseek() sets it */
 	file->f_pos = asma->file->f_pos;
-
-out:
-	mutex_unlock(&ashmem_mutex);
 	return ret;
 }
 
@@ -702,30 +701,30 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
 	size_t pgstart, pgend;
 	int ret = -EINVAL;
 
-	if (unlikely(!asma->file))
-		return -EINVAL;
-
 	if (unlikely(copy_from_user(&pin, p, sizeof(pin))))
 		return -EFAULT;
 
+	mutex_lock(&ashmem_mutex);
+
+	if (unlikely(!asma->file))
+		goto out_unlock;
+
 	/* per custom, you can pass zero for len to mean "everything onward" */
 	if (!pin.len)
 		pin.len = PAGE_ALIGN(asma->size) - pin.offset;
 
 	if (unlikely((pin.offset | pin.len) & ~PAGE_MASK))
-		return -EINVAL;
+		goto out_unlock;
 
 	if (unlikely(((__u32)-1) - pin.offset < pin.len))
-		return -EINVAL;
+		goto out_unlock;
 
 	if (unlikely(PAGE_ALIGN(asma->size) < pin.offset + pin.len))
-		return -EINVAL;
+		goto out_unlock;
 
 	pgstart = pin.offset / PAGE_SIZE;
 	pgend = pgstart + (pin.len / PAGE_SIZE) - 1;
 
-	mutex_lock(&ashmem_mutex);
-
 	switch (cmd) {
 	case ASHMEM_PIN:
 		ret = ashmem_pin(asma, pgstart, pgend);
@@ -738,6 +737,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
 		break;
 	}
 
+out_unlock:
 	mutex_unlock(&ashmem_mutex);
 
 	return ret;
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index 94e06925c712..49718c96bf9e 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/cma.h>
 #include <linux/scatterlist.h>
+#include <linux/highmem.h>
 
 #include "ion.h"
 
@@ -42,6 +43,22 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 	if (!pages)
 		return -ENOMEM;
 
+	if (PageHighMem(pages)) {
+		unsigned long nr_clear_pages = nr_pages;
+		struct page *page = pages;
+
+		while (nr_clear_pages > 0) {
+			void *vaddr = kmap_atomic(page);
+
+			memset(vaddr, 0, PAGE_SIZE);
+			kunmap_atomic(vaddr);
+			page++;
+			nr_clear_pages--;
+		}
+	} else {
+		memset(page_address(pages), 0, size);
+	}
+
 	table = kmalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		goto err;
diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c
index e618a87521a3..9d733471ca2e 100644
--- a/drivers/staging/comedi/drivers.c
+++ b/drivers/staging/comedi/drivers.c
@@ -475,8 +475,7 @@ unsigned int comedi_nsamples_left(struct comedi_subdevice *s,
 	struct comedi_cmd *cmd = &async->cmd;
 
 	if (cmd->stop_src == TRIG_COUNT) {
-		unsigned int nscans = nsamples / cmd->scan_end_arg;
-		unsigned int scans_left = __comedi_nscans_left(s, nscans);
+		unsigned int scans_left = __comedi_nscans_left(s, cmd->stop_arg);
 		unsigned int scan_pos =
 		    comedi_bytes_to_samples(s, async->scan_progress);
 		unsigned long long samples_left = 0;
diff --git a/drivers/staging/fsl-mc/bus/Kconfig b/drivers/staging/fsl-mc/bus/Kconfig
index 1f9100049176..b35ef7ee6901 100644
--- a/drivers/staging/fsl-mc/bus/Kconfig
+++ b/drivers/staging/fsl-mc/bus/Kconfig
@@ -7,7 +7,7 @@
 
 config FSL_MC_BUS
 	bool "QorIQ DPAA2 fsl-mc bus driver"
-	depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86 || PPC)))
+	depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86_LOCAL_APIC || PPC)))
 	select GENERIC_MSI_IRQ_DOMAIN
 	help
 	  Driver to enable the bus infrastructure for the QorIQ DPAA2
diff --git a/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c b/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
index 5064d5ddf581..fc2013aade51 100644
--- a/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
+++ b/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
@@ -73,6 +73,8 @@ static int __init its_fsl_mc_msi_init(void)
 
 	for (np = of_find_matching_node(NULL, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller"))
 			continue;
 
diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index f01595593ce2..425e8b82533b 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -141,6 +141,8 @@
 #define AD7192_GPOCON_P1DAT	BIT(1) /* P1 state */
 #define AD7192_GPOCON_P0DAT	BIT(0) /* P0 state */
 
+#define AD7192_EXT_FREQ_MHZ_MIN	2457600
+#define AD7192_EXT_FREQ_MHZ_MAX	5120000
 #define AD7192_INT_FREQ_MHZ	4915200
 
 /* NOTE:
@@ -218,6 +220,12 @@ static int ad7192_calibrate_all(struct ad7192_state *st)
 				ARRAY_SIZE(ad7192_calib_arr));
 }
 
+static inline bool ad7192_valid_external_frequency(u32 freq)
+{
+	return (freq >= AD7192_EXT_FREQ_MHZ_MIN &&
+		freq <= AD7192_EXT_FREQ_MHZ_MAX);
+}
+
 static int ad7192_setup(struct ad7192_state *st,
 			const struct ad7192_platform_data *pdata)
 {
@@ -243,17 +251,20 @@ static int ad7192_setup(struct ad7192_state *st,
 			 id);
 
 	switch (pdata->clock_source_sel) {
-	case AD7192_CLK_EXT_MCLK1_2:
-	case AD7192_CLK_EXT_MCLK2:
-		st->mclk = AD7192_INT_FREQ_MHZ;
-		break;
 	case AD7192_CLK_INT:
 	case AD7192_CLK_INT_CO:
-		if (pdata->ext_clk_hz)
-			st->mclk = pdata->ext_clk_hz;
-		else
-			st->mclk = AD7192_INT_FREQ_MHZ;
+		st->mclk = AD7192_INT_FREQ_MHZ;
 		break;
+	case AD7192_CLK_EXT_MCLK1_2:
+	case AD7192_CLK_EXT_MCLK2:
+		if (ad7192_valid_external_frequency(pdata->ext_clk_hz)) {
+			st->mclk = pdata->ext_clk_hz;
+			break;
+		}
+		dev_err(&st->sd.spi->dev, "Invalid frequency setting %u\n",
+			pdata->ext_clk_hz);
+		ret = -EINVAL;
+		goto out;
 	default:
 		ret = -EINVAL;
 		goto out;
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 2b28fb9c0048..3bcf49466361 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -648,8 +648,6 @@ static int ad5933_register_ring_funcs_and_init(struct iio_dev *indio_dev)
 	/* Ring buffer functions - here trigger setup related */
 	indio_dev->setup_ops = &ad5933_ring_setup_ops;
 
-	indio_dev->modes |= INDIO_BUFFER_HARDWARE;
-
 	return 0;
 }
 
@@ -762,7 +760,7 @@ static int ad5933_probe(struct i2c_client *client,
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &ad5933_info;
 	indio_dev->name = id->name;
-	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->modes = (INDIO_BUFFER_SOFTWARE | INDIO_DIRECT_MODE);
 	indio_dev->channels = ad5933_channels;
 	indio_dev->num_channels = ARRAY_SIZE(ad5933_channels);
 
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 5c0e59e8fe46..cbe98bc2b998 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2180,6 +2180,12 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
 				}
 				if (tty_hung_up_p(file))
 					break;
+				/*
+				 * Abort readers for ttys which never actually
+				 * get hung up.  See __tty_hangup().
+				 */
+				if (test_bit(TTY_HUPPING, &tty->flags))
+					break;
 				if (!timeout)
 					break;
 				if (file->f_flags & O_NONBLOCK) {
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 54adf8d56350..a93f77ab3da0 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -3387,11 +3387,9 @@ static int serial_pci_is_class_communication(struct pci_dev *dev)
 	/*
 	 * If it is not a communications device or the programming
 	 * interface is greater than 6, give up.
-	 *
-	 * (Should we try to make guesses for multiport serial devices
-	 * later?)
 	 */
 	if ((((dev->class >> 8) != PCI_CLASS_COMMUNICATION_SERIAL) &&
+	     ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MULTISERIAL) &&
 	     ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MODEM)) ||
 	    (dev->class & 0xff) > 6)
 		return -ENODEV;
@@ -3428,6 +3426,12 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board)
 {
 	int num_iomem, num_port, first_port = -1, i;
 
+	/*
+	 * Should we try to make guesses for multiport serial devices later?
+	 */
+	if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_MULTISERIAL)
+		return -ENODEV;
+
 	num_iomem = num_port = 0;
 	for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) {
 		if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
@@ -4699,6 +4703,17 @@ static const struct pci_device_id serial_pci_tbl[] = {
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,    /* 135a.0dc0 */
 		pbn_b2_4_115200 },
 	/*
+	 * BrainBoxes UC-260
+	 */
+	{	PCI_VENDOR_ID_INTASHIELD, 0x0D21,
+		PCI_ANY_ID, PCI_ANY_ID,
+		PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+		pbn_b2_4_115200 },
+	{	PCI_VENDOR_ID_INTASHIELD, 0x0E34,
+		PCI_ANY_ID, PCI_ANY_ID,
+		 PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+		pbn_b2_4_115200 },
+	/*
 	 * Perle PCI-RAS cards
 	 */
 	{       PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030,
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index df46a9e88c34..e287fe8f10fc 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1734,6 +1734,7 @@ static void atmel_get_ip_name(struct uart_port *port)
 		switch (version) {
 		case 0x302:
 		case 0x10213:
+		case 0x10302:
 			dev_dbg(port->dev, "This version is usart\n");
 			atmel_port->has_frac_baudrate = true;
 			atmel_port->has_hw_timer = true;
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 870e84fb6e39..a24278380fec 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -245,11 +245,12 @@ int __init of_setup_earlycon(const struct earlycon_id *match,
 	}
 	port->mapbase = addr;
 	port->uartclk = BASE_BAUD * 16;
-	port->membase = earlycon_map(port->mapbase, SZ_4K);
 
 	val = of_get_flat_dt_prop(node, "reg-offset", NULL);
 	if (val)
 		port->mapbase += be32_to_cpu(*val);
+	port->membase = earlycon_map(port->mapbase, SZ_4K);
+
 	val = of_get_flat_dt_prop(node, "reg-shift", NULL);
 	if (val)
 		port->regshift = be32_to_cpu(*val);
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 1d7ca382bc12..a33c685af990 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2093,7 +2093,7 @@ static int serial_imx_probe(struct platform_device *pdev)
 	uart_get_rs485_mode(&pdev->dev, &sport->port.rs485);
 
 	if (sport->port.rs485.flags & SER_RS485_ENABLED &&
-	    (!sport->have_rtscts || !sport->have_rtsgpio))
+	    (!sport->have_rtscts && !sport->have_rtsgpio))
 		dev_err(&pdev->dev, "no RTS control, disabling rs485\n");
 
 	imx_rs485_config(&sport->port, &sport->port.rs485);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index c8dde56b532b..35b9201db3b4 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1144,6 +1144,8 @@ static int uart_do_autoconfig(struct tty_struct *tty,struct uart_state *state)
 		uport->ops->config_port(uport, flags);
 
 		ret = uart_startup(tty, state, 1);
+		if (ret == 0)
+			tty_port_set_initialized(port, true);
 		if (ret > 0)
 			ret = 0;
 	}
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 7257c078e155..44adf9db38f8 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -885,6 +885,8 @@ static void sci_receive_chars(struct uart_port *port)
 		/* Tell the rest of the system the news. New characters! */
 		tty_flip_buffer_push(tport);
 	} else {
+		/* TTY buffers full; read from RX reg to prevent lockup */
+		serial_port_in(port, SCxRDR);
 		serial_port_in(port, SCxSR); /* dummy read */
 		sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port));
 	}
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index eb9133b472f4..63114ea35ec1 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -586,6 +586,14 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 		return;
 	}
 
+	/*
+	 * Some console devices aren't actually hung up for technical and
+	 * historical reasons, which can lead to indefinite interruptible
+	 * sleep in n_tty_read().  The following explicitly tells
+	 * n_tty_read() to abort readers.
+	 */
+	set_bit(TTY_HUPPING, &tty->flags);
+
 	/* inuse_filps is protected by the single tty lock,
 	   this really needs to change if we want to flush the
 	   workqueue with the lock held */
@@ -640,6 +648,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 	 * from the ldisc side, which is now guaranteed.
 	 */
 	set_bit(TTY_HUPPED, &tty->flags);
+	clear_bit(TTY_HUPPING, &tty->flags);
 	tty_unlock(tty);
 
 	if (f)
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index f699abab1787..148f3ee70286 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -19,6 +19,12 @@ config USB_EHCI_BIG_ENDIAN_MMIO
 config USB_EHCI_BIG_ENDIAN_DESC
 	bool
 
+config USB_UHCI_BIG_ENDIAN_MMIO
+	bool
+
+config USB_UHCI_BIG_ENDIAN_DESC
+	bool
+
 menuconfig USB_SUPPORT
 	bool "USB support"
 	depends on HAS_IOMEM
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 06b3b54a0e68..7b366a6c0b49 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -174,6 +174,7 @@ static int acm_wb_alloc(struct acm *acm)
 		wb = &acm->wb[wbn];
 		if (!wb->use) {
 			wb->use = 1;
+			wb->len = 0;
 			return wbn;
 		}
 		wbn = (wbn + 1) % ACM_NW;
@@ -805,16 +806,18 @@ static int acm_tty_write(struct tty_struct *tty,
 static void acm_tty_flush_chars(struct tty_struct *tty)
 {
 	struct acm *acm = tty->driver_data;
-	struct acm_wb *cur = acm->putbuffer;
+	struct acm_wb *cur;
 	int err;
 	unsigned long flags;
 
+	spin_lock_irqsave(&acm->write_lock, flags);
+
+	cur = acm->putbuffer;
 	if (!cur) /* nothing to do */
-		return;
+		goto out;
 
 	acm->putbuffer = NULL;
 	err = usb_autopm_get_interface_async(acm->control);
-	spin_lock_irqsave(&acm->write_lock, flags);
 	if (err < 0) {
 		cur->use = 0;
 		acm->putbuffer = cur;
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index c64cf6c4a83d..0c11d40a12bc 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -151,6 +151,10 @@ int usb_control_msg(struct usb_device *dev, unsigned int pipe, __u8 request,
 
 	ret = usb_internal_control_msg(dev, pipe, dr, data, size, timeout);
 
+	/* Linger a bit, prior to the next control message. */
+	if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
+		msleep(200);
+
 	kfree(dr);
 
 	return ret;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 4024926c1d68..54b019e267c5 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -226,8 +226,12 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+	/* Corsair K70 RGB */
+	{ USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
+
 	/* Corsair Strafe RGB */
-	{ USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT },
+	{ USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT |
+	  USB_QUIRK_DELAY_CTRL_MSG },
 
 	/* Corsair K70 LUX */
 	{ USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT },
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index e4c3ce0de5de..5bcad1d869b5 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1917,7 +1917,9 @@ static void dwc2_hsotg_program_zlp(struct dwc2_hsotg *hsotg,
 		/* Not specific buffer needed for ep0 ZLP */
 		dma_addr_t dma = hs_ep->desc_list_dma;
 
-		dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep);
+		if (!index)
+			dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep);
+
 		dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, dma, 0);
 	} else {
 		dwc2_writel(DXEPTSIZ_MC(1) | DXEPTSIZ_PKTCNT(1) |
@@ -2974,9 +2976,13 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 	if (ints & DXEPINT_STSPHSERCVD) {
 		dev_dbg(hsotg->dev, "%s: StsPhseRcvd\n", __func__);
 
-		/* Move to STATUS IN for DDMA */
-		if (using_desc_dma(hsotg))
-			dwc2_hsotg_ep0_zlp(hsotg, true);
+		/* Safety check EP0 state when STSPHSERCVD asserted */
+		if (hsotg->ep0_state == DWC2_EP0_DATA_OUT) {
+			/* Move to STATUS IN for DDMA */
+			if (using_desc_dma(hsotg))
+				dwc2_hsotg_ep0_zlp(hsotg, true);
+		}
+
 	}
 
 	if (ints & DXEPINT_BACK2BACKSETUP)
@@ -3375,12 +3381,6 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	dwc2_writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
 	       DXEPCTL_USBACTEP, hsotg->regs + DIEPCTL0);
 
-	dwc2_hsotg_enqueue_setup(hsotg);
-
-	dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
-		dwc2_readl(hsotg->regs + DIEPCTL0),
-		dwc2_readl(hsotg->regs + DOEPCTL0));
-
 	/* clear global NAKs */
 	val = DCTL_CGOUTNAK | DCTL_CGNPINNAK;
 	if (!is_usb_reset)
@@ -3391,6 +3391,12 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	mdelay(3);
 
 	hsotg->lx_state = DWC2_L0;
+
+	dwc2_hsotg_enqueue_setup(hsotg);
+
+	dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
+		dwc2_readl(hsotg->regs + DIEPCTL0),
+		dwc2_readl(hsotg->regs + DOEPCTL0));
 }
 
 static void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg)
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index 03fd20f0b496..c4a47496d2fb 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -137,7 +137,7 @@ static void dwc2_set_stm32f4x9_fsotg_params(struct dwc2_hsotg *hsotg)
 	p->activate_stm_fs_transceiver = true;
 }
 
-static void dwc2_set_stm32f7xx_hsotg_params(struct dwc2_hsotg *hsotg)
+static void dwc2_set_stm32f7_hsotg_params(struct dwc2_hsotg *hsotg)
 {
 	struct dwc2_core_params *p = &hsotg->params;
 
@@ -164,8 +164,8 @@ const struct of_device_id dwc2_of_match_table[] = {
 	{ .compatible = "st,stm32f4x9-fsotg",
 	  .data = dwc2_set_stm32f4x9_fsotg_params },
 	{ .compatible = "st,stm32f4x9-hsotg" },
-	{ .compatible = "st,stm32f7xx-hsotg",
-	  .data = dwc2_set_stm32f7xx_hsotg_params },
+	{ .compatible = "st,stm32f7-hsotg",
+	  .data = dwc2_set_stm32f7_hsotg_params },
 	{},
 };
 MODULE_DEVICE_TABLE(of, dwc2_of_match_table);
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index ade2ab00d37a..e94bf91cc58a 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -100,6 +100,8 @@ static void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
 	reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG));
 	reg |= DWC3_GCTL_PRTCAPDIR(mode);
 	dwc3_writel(dwc->regs, DWC3_GCTL, reg);
+
+	dwc->current_dr_role = mode;
 }
 
 static void __dwc3_set_mode(struct work_struct *work)
@@ -133,8 +135,6 @@ static void __dwc3_set_mode(struct work_struct *work)
 
 	dwc3_set_prtcap(dwc, dwc->desired_dr_role);
 
-	dwc->current_dr_role = dwc->desired_dr_role;
-
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	switch (dwc->desired_dr_role) {
@@ -175,7 +175,7 @@ void dwc3_set_mode(struct dwc3 *dwc, u32 mode)
 	dwc->desired_dr_role = mode;
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
-	queue_work(system_power_efficient_wq, &dwc->drd_work);
+	queue_work(system_freezable_wq, &dwc->drd_work);
 }
 
 u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type)
@@ -219,7 +219,7 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
 	 * XHCI driver will reset the host block. If dwc3 was configured for
 	 * host-only mode, then we can return early.
 	 */
-	if (dwc->dr_mode == USB_DR_MODE_HOST)
+	if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
 		return 0;
 
 	reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -234,6 +234,9 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
 		udelay(1);
 	} while (--retries);
 
+	phy_exit(dwc->usb3_generic_phy);
+	phy_exit(dwc->usb2_generic_phy);
+
 	return -ETIMEDOUT;
 }
 
@@ -483,6 +486,22 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc)
 	parms->hwparams8 = dwc3_readl(dwc->regs, DWC3_GHWPARAMS8);
 }
 
+static int dwc3_core_ulpi_init(struct dwc3 *dwc)
+{
+	int intf;
+	int ret = 0;
+
+	intf = DWC3_GHWPARAMS3_HSPHY_IFC(dwc->hwparams.hwparams3);
+
+	if (intf == DWC3_GHWPARAMS3_HSPHY_IFC_ULPI ||
+	    (intf == DWC3_GHWPARAMS3_HSPHY_IFC_UTMI_ULPI &&
+	     dwc->hsphy_interface &&
+	     !strncmp(dwc->hsphy_interface, "ulpi", 4)))
+		ret = dwc3_ulpi_init(dwc);
+
+	return ret;
+}
+
 /**
  * dwc3_phy_setup - Configure USB PHY Interface of DWC3 Core
  * @dwc: Pointer to our controller context structure
@@ -494,7 +513,6 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc)
 static int dwc3_phy_setup(struct dwc3 *dwc)
 {
 	u32 reg;
-	int ret;
 
 	reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0));
 
@@ -565,9 +583,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
 		}
 		/* FALLTHROUGH */
 	case DWC3_GHWPARAMS3_HSPHY_IFC_ULPI:
-		ret = dwc3_ulpi_init(dwc);
-		if (ret)
-			return ret;
 		/* FALLTHROUGH */
 	default:
 		break;
@@ -724,6 +739,7 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc)
 }
 
 static int dwc3_core_get_phy(struct dwc3 *dwc);
+static int dwc3_core_ulpi_init(struct dwc3 *dwc);
 
 /**
  * dwc3_core_init - Low-level initialization of DWC3 Core
@@ -755,17 +771,27 @@ static int dwc3_core_init(struct dwc3 *dwc)
 			dwc->maximum_speed = USB_SPEED_HIGH;
 	}
 
-	ret = dwc3_core_get_phy(dwc);
+	ret = dwc3_phy_setup(dwc);
 	if (ret)
 		goto err0;
 
-	ret = dwc3_core_soft_reset(dwc);
-	if (ret)
-		goto err0;
+	if (!dwc->ulpi_ready) {
+		ret = dwc3_core_ulpi_init(dwc);
+		if (ret)
+			goto err0;
+		dwc->ulpi_ready = true;
+	}
 
-	ret = dwc3_phy_setup(dwc);
+	if (!dwc->phys_ready) {
+		ret = dwc3_core_get_phy(dwc);
+		if (ret)
+			goto err0a;
+		dwc->phys_ready = true;
+	}
+
+	ret = dwc3_core_soft_reset(dwc);
 	if (ret)
-		goto err0;
+		goto err0a;
 
 	dwc3_core_setup_global_control(dwc);
 	dwc3_core_num_eps(dwc);
@@ -838,6 +864,9 @@ err1:
 	phy_exit(dwc->usb2_generic_phy);
 	phy_exit(dwc->usb3_generic_phy);
 
+err0a:
+	dwc3_ulpi_exit(dwc);
+
 err0:
 	return ret;
 }
@@ -916,7 +945,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc)
 
 	switch (dwc->dr_mode) {
 	case USB_DR_MODE_PERIPHERAL:
-		dwc->current_dr_role = DWC3_GCTL_PRTCAP_DEVICE;
 		dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE);
 
 		if (dwc->usb2_phy)
@@ -932,7 +960,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc)
 		}
 		break;
 	case USB_DR_MODE_HOST:
-		dwc->current_dr_role = DWC3_GCTL_PRTCAP_HOST;
 		dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST);
 
 		if (dwc->usb2_phy)
@@ -1234,7 +1261,6 @@ err4:
 
 err3:
 	dwc3_free_event_buffers(dwc);
-	dwc3_ulpi_exit(dwc);
 
 err2:
 	pm_runtime_allow(&pdev->dev);
@@ -1284,7 +1310,7 @@ static int dwc3_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int dwc3_suspend_common(struct dwc3 *dwc)
+static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
 {
 	unsigned long	flags;
 
@@ -1296,6 +1322,10 @@ static int dwc3_suspend_common(struct dwc3 *dwc)
 		dwc3_core_exit(dwc);
 		break;
 	case DWC3_GCTL_PRTCAP_HOST:
+		/* do nothing during host runtime_suspend */
+		if (!PMSG_IS_AUTO(msg))
+			dwc3_core_exit(dwc);
+		break;
 	default:
 		/* do nothing */
 		break;
@@ -1304,7 +1334,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc)
 	return 0;
 }
 
-static int dwc3_resume_common(struct dwc3 *dwc)
+static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
 {
 	unsigned long	flags;
 	int		ret;
@@ -1320,6 +1350,13 @@ static int dwc3_resume_common(struct dwc3 *dwc)
 		spin_unlock_irqrestore(&dwc->lock, flags);
 		break;
 	case DWC3_GCTL_PRTCAP_HOST:
+		/* nothing to do on host runtime_resume */
+		if (!PMSG_IS_AUTO(msg)) {
+			ret = dwc3_core_init(dwc);
+			if (ret)
+				return ret;
+		}
+		break;
 	default:
 		/* do nothing */
 		break;
@@ -1331,12 +1368,11 @@ static int dwc3_resume_common(struct dwc3 *dwc)
 static int dwc3_runtime_checks(struct dwc3 *dwc)
 {
 	switch (dwc->current_dr_role) {
-	case USB_DR_MODE_PERIPHERAL:
-	case USB_DR_MODE_OTG:
+	case DWC3_GCTL_PRTCAP_DEVICE:
 		if (dwc->connected)
 			return -EBUSY;
 		break;
-	case USB_DR_MODE_HOST:
+	case DWC3_GCTL_PRTCAP_HOST:
 	default:
 		/* do nothing */
 		break;
@@ -1353,7 +1389,7 @@ static int dwc3_runtime_suspend(struct device *dev)
 	if (dwc3_runtime_checks(dwc))
 		return -EBUSY;
 
-	ret = dwc3_suspend_common(dwc);
+	ret = dwc3_suspend_common(dwc, PMSG_AUTO_SUSPEND);
 	if (ret)
 		return ret;
 
@@ -1369,7 +1405,7 @@ static int dwc3_runtime_resume(struct device *dev)
 
 	device_init_wakeup(dev, false);
 
-	ret = dwc3_resume_common(dwc);
+	ret = dwc3_resume_common(dwc, PMSG_AUTO_RESUME);
 	if (ret)
 		return ret;
 
@@ -1416,7 +1452,7 @@ static int dwc3_suspend(struct device *dev)
 	struct dwc3	*dwc = dev_get_drvdata(dev);
 	int		ret;
 
-	ret = dwc3_suspend_common(dwc);
+	ret = dwc3_suspend_common(dwc, PMSG_SUSPEND);
 	if (ret)
 		return ret;
 
@@ -1432,7 +1468,7 @@ static int dwc3_resume(struct device *dev)
 
 	pinctrl_pm_select_default_state(dev);
 
-	ret = dwc3_resume_common(dwc);
+	ret = dwc3_resume_common(dwc, PMSG_RESUME);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 03c7aaaac926..860d2bc184d1 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -158,13 +158,15 @@
 #define DWC3_GDBGFIFOSPACE_TYPE(n)	(((n) << 5) & 0x1e0)
 #define DWC3_GDBGFIFOSPACE_SPACE_AVAILABLE(n) (((n) >> 16) & 0xffff)
 
-#define DWC3_TXFIFOQ		1
-#define DWC3_RXFIFOQ		3
-#define DWC3_TXREQQ		5
-#define DWC3_RXREQQ		7
-#define DWC3_RXINFOQ		9
-#define DWC3_DESCFETCHQ		13
-#define DWC3_EVENTQ		15
+#define DWC3_TXFIFOQ		0
+#define DWC3_RXFIFOQ		1
+#define DWC3_TXREQQ		2
+#define DWC3_RXREQQ		3
+#define DWC3_RXINFOQ		4
+#define DWC3_PSTATQ		5
+#define DWC3_DESCFETCHQ		6
+#define DWC3_EVENTQ		7
+#define DWC3_AUXEVENTQ		8
 
 /* Global RX Threshold Configuration Register */
 #define DWC3_GRXTHRCFG_MAXRXBURSTSIZE(n) (((n) & 0x1f) << 19)
@@ -795,7 +797,9 @@ struct dwc3_scratchpad_array {
  * @usb3_phy: pointer to USB3 PHY
  * @usb2_generic_phy: pointer to USB2 PHY
  * @usb3_generic_phy: pointer to USB3 PHY
+ * @phys_ready: flag to indicate that PHYs are ready
  * @ulpi: pointer to ulpi interface
+ * @ulpi_ready: flag to indicate that ULPI is initialized
  * @u2sel: parameter from Set SEL request.
  * @u2pel: parameter from Set SEL request.
  * @u1sel: parameter from Set SEL request.
@@ -893,7 +897,10 @@ struct dwc3 {
 	struct phy		*usb2_generic_phy;
 	struct phy		*usb3_generic_phy;
 
+	bool			phys_ready;
+
 	struct ulpi		*ulpi;
+	bool			ulpi_ready;
 
 	void __iomem		*regs;
 	size_t			regs_size;
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c
index 7ae0eefc7cc7..e54c3622eb28 100644
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -143,6 +143,7 @@ static int dwc3_of_simple_remove(struct platform_device *pdev)
 		clk_disable_unprepare(simple->clks[i]);
 		clk_put(simple->clks[i]);
 	}
+	simple->num_clocks = 0;
 
 	reset_control_assert(simple->resets);
 	reset_control_put(simple->resets);
diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index a4719e853b85..ed8b86517675 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -582,9 +582,25 @@ static int dwc3_omap_resume(struct device *dev)
 	return 0;
 }
 
+static void dwc3_omap_complete(struct device *dev)
+{
+	struct dwc3_omap	*omap = dev_get_drvdata(dev);
+
+	if (extcon_get_state(omap->edev, EXTCON_USB))
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID);
+	else
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF);
+
+	if (extcon_get_state(omap->edev, EXTCON_USB_HOST))
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND);
+	else
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT);
+}
+
 static const struct dev_pm_ops dwc3_omap_dev_pm_ops = {
 
 	SET_SYSTEM_SLEEP_PM_OPS(dwc3_omap_suspend, dwc3_omap_resume)
+	.complete = dwc3_omap_complete,
 };
 
 #define DEV_PM_OPS	(&dwc3_omap_dev_pm_ops)
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 9c2e4a17918e..18be31d5743a 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -854,7 +854,12 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc,
 		trb++;
 		trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
 		trace_dwc3_complete_trb(ep0, trb);
-		ep0->trb_enqueue = 0;
+
+		if (r->direction)
+			dwc->eps[1]->trb_enqueue = 0;
+		else
+			dwc->eps[0]->trb_enqueue = 0;
+
 		dwc->ep0_bounced = false;
 	}
 
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 616ef49ccb49..2bda4eb1e9ac 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2745,6 +2745,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
 		break;
 	}
 
+	dwc->eps[1]->endpoint.maxpacket = dwc->gadget.ep0->maxpacket;
+
 	/* Enable USB2 LPM Capability */
 
 	if ((dwc->revision > DWC3_REVISION_194A) &&
diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c
index 37557651b600..c13befa31110 100644
--- a/drivers/usb/gadget/function/f_eem.c
+++ b/drivers/usb/gadget/function/f_eem.c
@@ -507,7 +507,6 @@ static int eem_unwrap(struct gether *port,
 						0,
 						GFP_ATOMIC);
 			if (unlikely(!skb3)) {
-				DBG(cdev, "unable to realign EEM packet\n");
 				dev_kfree_skb_any(skb2);
 				continue;
 			}
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 8f2cf3baa19c..d2428a9e8900 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1538,7 +1538,6 @@ ffs_fs_kill_sb(struct super_block *sb)
 	if (sb->s_fs_info) {
 		ffs_release_dev(sb->s_fs_info);
 		ffs_data_closed(sb->s_fs_info);
-		ffs_data_put(sb->s_fs_info);
 	}
 }
 
@@ -1855,44 +1854,20 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 
 	spin_lock_irqsave(&func->ffs->eps_lock, flags);
 	while(count--) {
-		struct usb_endpoint_descriptor *ds;
-		struct usb_ss_ep_comp_descriptor *comp_desc = NULL;
-		int needs_comp_desc = false;
-		int desc_idx;
-
-		if (ffs->gadget->speed == USB_SPEED_SUPER) {
-			desc_idx = 2;
-			needs_comp_desc = true;
-		} else if (ffs->gadget->speed == USB_SPEED_HIGH)
-			desc_idx = 1;
-		else
-			desc_idx = 0;
-
-		/* fall-back to lower speed if desc missing for current speed */
-		do {
-			ds = ep->descs[desc_idx];
-		} while (!ds && --desc_idx >= 0);
-
-		if (!ds) {
-			ret = -EINVAL;
-			break;
-		}
-
 		ep->ep->driver_data = ep;
-		ep->ep->desc = ds;
 
-		if (needs_comp_desc) {
-			comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
-					USB_DT_ENDPOINT_SIZE);
-			ep->ep->maxburst = comp_desc->bMaxBurst + 1;
-			ep->ep->comp_desc = comp_desc;
+		ret = config_ep_by_speed(func->gadget, &func->function, ep->ep);
+		if (ret) {
+			pr_err("%s: config_ep_by_speed(%s) returned %d\n",
+					__func__, ep->ep->name, ret);
+			break;
 		}
 
 		ret = usb_ep_enable(ep->ep);
 		if (likely(!ret)) {
 			epfile->ep = ep;
-			epfile->in = usb_endpoint_dir_in(ds);
-			epfile->isoc = usb_endpoint_xfer_isoc(ds);
+			epfile->in = usb_endpoint_dir_in(ep->ep->desc);
+			epfile->isoc = usb_endpoint_xfer_isoc(ep->ep->desc);
 		} else {
 			break;
 		}
@@ -2979,10 +2954,8 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	struct ffs_data *ffs = func->ffs;
 
 	const int full = !!func->ffs->fs_descs_count;
-	const int high = gadget_is_dualspeed(func->gadget) &&
-		func->ffs->hs_descs_count;
-	const int super = gadget_is_superspeed(func->gadget) &&
-		func->ffs->ss_descs_count;
+	const int high = !!func->ffs->hs_descs_count;
+	const int super = !!func->ffs->ss_descs_count;
 
 	int fs_len, hs_len, ss_len, ret, i;
 	struct ffs_ep *eps_ptr;
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 11fe788b4308..d2dc1f00180b 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -524,6 +524,8 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
 		return ret;
 	}
+	iad_desc.bFirstInterface = ret;
+
 	std_ac_if_desc.bInterfaceNumber = ret;
 	uac2->ac_intf = ret;
 	uac2->ac_alt = 0;
diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 1e9567091d86..0875d38476ee 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -274,7 +274,6 @@ config USB_SNP_UDC_PLAT
 	tristate "Synopsys USB 2.0 Device controller"
 	depends on USB_GADGET && OF && HAS_DMA
 	depends on EXTCON || EXTCON=n
-	select USB_GADGET_DUALSPEED
 	select USB_SNP_CORE
 	default ARCH_BCM_IPROC
 	help
diff --git a/drivers/usb/gadget/udc/bdc/bdc_pci.c b/drivers/usb/gadget/udc/bdc/bdc_pci.c
index 1e940f054cb8..6dbc489513cd 100644
--- a/drivers/usb/gadget/udc/bdc/bdc_pci.c
+++ b/drivers/usb/gadget/udc/bdc/bdc_pci.c
@@ -77,6 +77,7 @@ static int bdc_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
 	if (ret) {
 		dev_err(&pci->dev,
 			"couldn't add resources to bdc device\n");
+		platform_device_put(bdc);
 		return ret;
 	}
 
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 859d5b11ba4c..1f8b19d9cf97 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -180,8 +180,8 @@ EXPORT_SYMBOL_GPL(usb_ep_alloc_request);
 void usb_ep_free_request(struct usb_ep *ep,
 				       struct usb_request *req)
 {
-	ep->ops->free_request(ep, req);
 	trace_usb_ep_free_request(ep, req, 0);
+	ep->ops->free_request(ep, req);
 }
 EXPORT_SYMBOL_GPL(usb_ep_free_request);
 
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index e5b4ee96c4bf..56b517a38865 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1305,7 +1305,7 @@ static void udc_reset_ep_queue(struct fsl_udc *udc, u8 pipe)
 {
 	struct fsl_ep *ep = get_ep_by_pipe(udc, pipe);
 
-	if (ep->name)
+	if (ep->ep.name)
 		nuke(ep, -ESHUTDOWN);
 }
 
@@ -1693,7 +1693,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
 		curr_ep = get_ep_by_pipe(udc, i);
 
 		/* If the ep is configured */
-		if (curr_ep->name == NULL) {
+		if (!curr_ep->ep.name) {
 			WARNING("Invalid EP?");
 			continue;
 		}
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 6e87af248367..409cde4e6a51 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -2410,7 +2410,7 @@ static int renesas_usb3_remove(struct platform_device *pdev)
 	__renesas_usb3_ep_free_request(usb3->ep0_req);
 	if (usb3->phy)
 		phy_put(usb3->phy);
-	pm_runtime_disable(usb3_to_dev(usb3));
+	pm_runtime_disable(&pdev->dev);
 
 	return 0;
 }
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 6150bed7cfa8..4fcfb3084b36 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -633,14 +633,6 @@ config USB_UHCI_ASPEED
        bool
        default y if ARCH_ASPEED
 
-config USB_UHCI_BIG_ENDIAN_MMIO
-	bool
-	default y if SPARC_LEON
-
-config USB_UHCI_BIG_ENDIAN_DESC
-	bool
-	default y if SPARC_LEON
-
 config USB_FHCI_HCD
 	tristate "Freescale QE USB Host Controller support"
 	depends on OF_GPIO && QE_GPIO && QUICC_ENGINE
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index facafdf8fb95..d7641cbdee43 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -774,12 +774,12 @@ static struct urb *request_single_step_set_feature_urb(
 	atomic_inc(&urb->use_count);
 	atomic_inc(&urb->dev->urbnum);
 	urb->setup_dma = dma_map_single(
-			hcd->self.controller,
+			hcd->self.sysdev,
 			urb->setup_packet,
 			sizeof(struct usb_ctrlrequest),
 			DMA_TO_DEVICE);
 	urb->transfer_dma = dma_map_single(
-			hcd->self.controller,
+			hcd->self.sysdev,
 			urb->transfer_buffer,
 			urb->transfer_buffer_length,
 			DMA_FROM_DEVICE);
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 88158324dcae..327630405695 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -1188,10 +1188,10 @@ static int submit_single_step_set_feature(
 	 * 15 secs after the setup
 	 */
 	if (is_setup) {
-		/* SETUP pid */
+		/* SETUP pid, and interrupt after SETUP completion */
 		qtd_fill(ehci, qtd, urb->setup_dma,
 				sizeof(struct usb_ctrlrequest),
-				token | (2 /* "setup" */ << 8), 8);
+				QTD_IOC | token | (2 /* "setup" */ << 8), 8);
 
 		submit_async(ehci, urb, &qtd_list, GFP_ATOMIC);
 		return 0; /*Return now; we shall come back after 15 seconds*/
@@ -1228,12 +1228,8 @@ static int submit_single_step_set_feature(
 	qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma);
 	list_add_tail(&qtd->qtd_list, head);
 
-	/* dont fill any data in such packets */
-	qtd_fill(ehci, qtd, 0, 0, token, 0);
-
-	/* by default, enable interrupt on urb completion */
-	if (likely(!(urb->transfer_flags & URB_NO_INTERRUPT)))
-		qtd->hw_token |= cpu_to_hc32(ehci, QTD_IOC);
+	/* Interrupt after STATUS completion */
+	qtd_fill(ehci, qtd, 0, 0, token | QTD_IOC, 0);
 
 	submit_async(ehci, urb, &qtd_list, GFP_KERNEL);
 
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index ee9676349333..d088c340e4d0 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -74,6 +74,7 @@ static const char	hcd_name [] = "ohci_hcd";
 
 #define	STATECHANGE_DELAY	msecs_to_jiffies(300)
 #define	IO_WATCHDOG_DELAY	msecs_to_jiffies(275)
+#define	IO_WATCHDOG_OFF		0xffffff00
 
 #include "ohci.h"
 #include "pci-quirks.h"
@@ -231,7 +232,7 @@ static int ohci_urb_enqueue (
 		}
 
 		/* Start up the I/O watchdog timer, if it's not running */
-		if (!timer_pending(&ohci->io_watchdog) &&
+		if (ohci->prev_frame_no == IO_WATCHDOG_OFF &&
 				list_empty(&ohci->eds_in_use) &&
 				!(ohci->flags & OHCI_QUIRK_QEMU)) {
 			ohci->prev_frame_no = ohci_frame_no(ohci);
@@ -446,7 +447,8 @@ static int ohci_init (struct ohci_hcd *ohci)
 	struct usb_hcd *hcd = ohci_to_hcd(ohci);
 
 	/* Accept arbitrarily long scatter-gather lists */
-	hcd->self.sg_tablesize = ~0;
+	if (!(hcd->driver->flags & HCD_LOCAL_MEM))
+		hcd->self.sg_tablesize = ~0;
 
 	if (distrust_firmware)
 		ohci->flags |= OHCI_QUIRK_HUB_POWER;
@@ -501,6 +503,7 @@ static int ohci_init (struct ohci_hcd *ohci)
 		return 0;
 
 	timer_setup(&ohci->io_watchdog, io_watchdog_func, 0);
+	ohci->prev_frame_no = IO_WATCHDOG_OFF;
 
 	ohci->hcca = dma_alloc_coherent (hcd->self.controller,
 			sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL);
@@ -730,7 +733,7 @@ static void io_watchdog_func(struct timer_list *t)
 	u32		head;
 	struct ed	*ed;
 	struct td	*td, *td_start, *td_next;
-	unsigned	frame_no;
+	unsigned	frame_no, prev_frame_no = IO_WATCHDOG_OFF;
 	unsigned long	flags;
 
 	spin_lock_irqsave(&ohci->lock, flags);
@@ -835,7 +838,7 @@ static void io_watchdog_func(struct timer_list *t)
 			}
 		}
 		if (!list_empty(&ohci->eds_in_use)) {
-			ohci->prev_frame_no = frame_no;
+			prev_frame_no = frame_no;
 			ohci->prev_wdh_cnt = ohci->wdh_cnt;
 			ohci->prev_donehead = ohci_readl(ohci,
 					&ohci->regs->donehead);
@@ -845,6 +848,7 @@ static void io_watchdog_func(struct timer_list *t)
 	}
 
  done:
+	ohci->prev_frame_no = prev_frame_no;
 	spin_unlock_irqrestore(&ohci->lock, flags);
 }
 
@@ -973,6 +977,7 @@ static void ohci_stop (struct usb_hcd *hcd)
 	if (quirk_nec(ohci))
 		flush_work(&ohci->nec_work);
 	del_timer_sync(&ohci->io_watchdog);
+	ohci->prev_frame_no = IO_WATCHDOG_OFF;
 
 	ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable);
 	ohci_usb_reset(ohci);
diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c
index fb7aaa3b9d06..634f3c7bf774 100644
--- a/drivers/usb/host/ohci-hub.c
+++ b/drivers/usb/host/ohci-hub.c
@@ -311,8 +311,10 @@ static int ohci_bus_suspend (struct usb_hcd *hcd)
 		rc = ohci_rh_suspend (ohci, 0);
 	spin_unlock_irq (&ohci->lock);
 
-	if (rc == 0)
+	if (rc == 0) {
 		del_timer_sync(&ohci->io_watchdog);
+		ohci->prev_frame_no = IO_WATCHDOG_OFF;
+	}
 	return rc;
 }
 
diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
index b2ec8c399363..4ccb85a67bb3 100644
--- a/drivers/usb/host/ohci-q.c
+++ b/drivers/usb/host/ohci-q.c
@@ -1019,6 +1019,8 @@ skip_ed:
 		 * have modified this list.  normally it's just prepending
 		 * entries (which we'd ignore), but paranoia won't hurt.
 		 */
+		*last = ed->ed_next;
+		ed->ed_next = NULL;
 		modified = 0;
 
 		/* unlink urbs as requested, but rescan the list after
@@ -1077,21 +1079,22 @@ rescan_this:
 			goto rescan_this;
 
 		/*
-		 * If no TDs are queued, take ED off the ed_rm_list.
+		 * If no TDs are queued, ED is now idle.
 		 * Otherwise, if the HC is running, reschedule.
-		 * If not, leave it on the list for further dequeues.
+		 * If the HC isn't running, add ED back to the
+		 * start of the list for later processing.
 		 */
 		if (list_empty(&ed->td_list)) {
-			*last = ed->ed_next;
-			ed->ed_next = NULL;
 			ed->state = ED_IDLE;
 			list_del(&ed->in_use_list);
 		} else if (ohci->rh_state == OHCI_RH_RUNNING) {
-			*last = ed->ed_next;
-			ed->ed_next = NULL;
 			ed_schedule(ohci, ed);
 		} else {
-			last = &ed->ed_next;
+			ed->ed_next = ohci->ed_rm_list;
+			ohci->ed_rm_list = ed;
+			/* Don't loop on the same ED */
+			if (last == &ohci->ed_rm_list)
+				last = &ed->ed_next;
 		}
 
 		if (modified)
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 161536717025..67ad4bb6919a 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -66,6 +66,23 @@
 #define	AX_INDXC		0x30
 #define	AX_DATAC		0x34
 
+#define PT_ADDR_INDX		0xE8
+#define PT_READ_INDX		0xE4
+#define PT_SIG_1_ADDR		0xA520
+#define PT_SIG_2_ADDR		0xA521
+#define PT_SIG_3_ADDR		0xA522
+#define PT_SIG_4_ADDR		0xA523
+#define PT_SIG_1_DATA		0x78
+#define PT_SIG_2_DATA		0x56
+#define PT_SIG_3_DATA		0x34
+#define PT_SIG_4_DATA		0x12
+#define PT4_P1_REG		0xB521
+#define PT4_P2_REG		0xB522
+#define PT2_P1_REG		0xD520
+#define PT2_P2_REG		0xD521
+#define PT1_P1_REG		0xD522
+#define PT1_P2_REG		0xD523
+
 #define	NB_PCIE_INDX_ADDR	0xe0
 #define	NB_PCIE_INDX_DATA	0xe4
 #define	PCIE_P_CNTL		0x10040
@@ -513,6 +530,98 @@ void usb_amd_dev_put(void)
 EXPORT_SYMBOL_GPL(usb_amd_dev_put);
 
 /*
+ * Check if port is disabled in BIOS on AMD Promontory host.
+ * BIOS Disabled ports may wake on connect/disconnect and need
+ * driver workaround to keep them disabled.
+ * Returns true if port is marked disabled.
+ */
+bool usb_amd_pt_check_port(struct device *device, int port)
+{
+	unsigned char value, port_shift;
+	struct pci_dev *pdev;
+	u16 reg;
+
+	pdev = to_pci_dev(device);
+	pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_1_ADDR);
+
+	pci_read_config_byte(pdev, PT_READ_INDX, &value);
+	if (value != PT_SIG_1_DATA)
+		return false;
+
+	pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_2_ADDR);
+
+	pci_read_config_byte(pdev, PT_READ_INDX, &value);
+	if (value != PT_SIG_2_DATA)
+		return false;
+
+	pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_3_ADDR);
+
+	pci_read_config_byte(pdev, PT_READ_INDX, &value);
+	if (value != PT_SIG_3_DATA)
+		return false;
+
+	pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_4_ADDR);
+
+	pci_read_config_byte(pdev, PT_READ_INDX, &value);
+	if (value != PT_SIG_4_DATA)
+		return false;
+
+	/* Check disabled port setting, if bit is set port is enabled */
+	switch (pdev->device) {
+	case 0x43b9:
+	case 0x43ba:
+	/*
+	 * device is AMD_PROMONTORYA_4(0x43b9) or PROMONTORYA_3(0x43ba)
+	 * PT4_P1_REG bits[7..1] represents USB2.0 ports 6 to 0
+	 * PT4_P2_REG bits[6..0] represents ports 13 to 7
+	 */
+		if (port > 6) {
+			reg = PT4_P2_REG;
+			port_shift = port - 7;
+		} else {
+			reg = PT4_P1_REG;
+			port_shift = port + 1;
+		}
+		break;
+	case 0x43bb:
+	/*
+	 * device is AMD_PROMONTORYA_2(0x43bb)
+	 * PT2_P1_REG bits[7..5] represents USB2.0 ports 2 to 0
+	 * PT2_P2_REG bits[5..0] represents ports 9 to 3
+	 */
+		if (port > 2) {
+			reg = PT2_P2_REG;
+			port_shift = port - 3;
+		} else {
+			reg = PT2_P1_REG;
+			port_shift = port + 5;
+		}
+		break;
+	case 0x43bc:
+	/*
+	 * device is AMD_PROMONTORYA_1(0x43bc)
+	 * PT1_P1_REG[7..4] represents USB2.0 ports 3 to 0
+	 * PT1_P2_REG[5..0] represents ports 9 to 4
+	 */
+		if (port > 3) {
+			reg = PT1_P2_REG;
+			port_shift = port - 4;
+		} else {
+			reg = PT1_P1_REG;
+			port_shift = port + 4;
+		}
+		break;
+	default:
+		return false;
+	}
+	pci_write_config_word(pdev, PT_ADDR_INDX, reg);
+	pci_read_config_byte(pdev, PT_READ_INDX, &value);
+
+	return !(value & BIT(port_shift));
+}
+EXPORT_SYMBOL_GPL(usb_amd_pt_check_port);
+
+/*
  * Make sure the controller is completely inactive, unable to
  * generate interrupts or do DMA.
  */
diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index b68dcb5dd0fd..4ca0d9b7e463 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h
@@ -17,6 +17,7 @@ void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
 bool usb_xhci_needs_pci_reset(struct pci_dev *pdev);
+bool usb_amd_pt_check_port(struct device *device, int port);
 #else
 struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
@@ -25,6 +26,10 @@ static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {}
 static inline void usb_amd_dev_put(void) {}
 static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {}
 static inline void sb800_prefetch(struct device *dev, int on) {}
+static inline bool usb_amd_pt_check_port(struct device *device, int port)
+{
+	return false;
+}
 #endif  /* CONFIG_USB_PCI */
 
 #endif  /*  __LINUX_USB_PCI_QUIRKS_H  */
diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
index a1ab8acf39ba..c359bae7b754 100644
--- a/drivers/usb/host/xhci-dbgcap.c
+++ b/drivers/usb/host/xhci-dbgcap.c
@@ -328,13 +328,14 @@ dbc_ep_do_queue(struct dbc_ep *dep, struct dbc_request *req)
 int dbc_ep_queue(struct dbc_ep *dep, struct dbc_request *req,
 		 gfp_t gfp_flags)
 {
+	unsigned long		flags;
 	struct xhci_dbc		*dbc = dep->dbc;
 	int			ret = -ESHUTDOWN;
 
-	spin_lock(&dbc->lock);
+	spin_lock_irqsave(&dbc->lock, flags);
 	if (dbc->state == DS_CONFIGURED)
 		ret = dbc_ep_do_queue(dep, req);
-	spin_unlock(&dbc->lock);
+	spin_unlock_irqrestore(&dbc->lock, flags);
 
 	mod_delayed_work(system_wq, &dbc->event_work, 0);
 
@@ -521,15 +522,16 @@ static void xhci_do_dbc_stop(struct xhci_hcd *xhci)
 static int xhci_dbc_start(struct xhci_hcd *xhci)
 {
 	int			ret;
+	unsigned long		flags;
 	struct xhci_dbc		*dbc = xhci->dbc;
 
 	WARN_ON(!dbc);
 
 	pm_runtime_get_sync(xhci_to_hcd(xhci)->self.controller);
 
-	spin_lock(&dbc->lock);
+	spin_lock_irqsave(&dbc->lock, flags);
 	ret = xhci_do_dbc_start(xhci);
-	spin_unlock(&dbc->lock);
+	spin_unlock_irqrestore(&dbc->lock, flags);
 
 	if (ret) {
 		pm_runtime_put(xhci_to_hcd(xhci)->self.controller);
@@ -541,6 +543,7 @@ static int xhci_dbc_start(struct xhci_hcd *xhci)
 
 static void xhci_dbc_stop(struct xhci_hcd *xhci)
 {
+	unsigned long		flags;
 	struct xhci_dbc		*dbc = xhci->dbc;
 	struct dbc_port		*port = &dbc->port;
 
@@ -551,9 +554,9 @@ static void xhci_dbc_stop(struct xhci_hcd *xhci)
 	if (port->registered)
 		xhci_dbc_tty_unregister_device(xhci);
 
-	spin_lock(&dbc->lock);
+	spin_lock_irqsave(&dbc->lock, flags);
 	xhci_do_dbc_stop(xhci);
-	spin_unlock(&dbc->lock);
+	spin_unlock_irqrestore(&dbc->lock, flags);
 
 	pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller);
 }
@@ -779,14 +782,15 @@ static void xhci_dbc_handle_events(struct work_struct *work)
 	int			ret;
 	enum evtreturn		evtr;
 	struct xhci_dbc		*dbc;
+	unsigned long		flags;
 	struct xhci_hcd		*xhci;
 
 	dbc = container_of(to_delayed_work(work), struct xhci_dbc, event_work);
 	xhci = dbc->xhci;
 
-	spin_lock(&dbc->lock);
+	spin_lock_irqsave(&dbc->lock, flags);
 	evtr = xhci_dbc_do_handle_events(dbc);
-	spin_unlock(&dbc->lock);
+	spin_unlock_irqrestore(&dbc->lock, flags);
 
 	switch (evtr) {
 	case EVT_GSER:
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index 8d47b6fbf973..75f0b92694ba 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -92,21 +92,23 @@ static void dbc_start_rx(struct dbc_port *port)
 static void
 dbc_read_complete(struct xhci_hcd *xhci, struct dbc_request *req)
 {
+	unsigned long		flags;
 	struct xhci_dbc		*dbc = xhci->dbc;
 	struct dbc_port		*port = &dbc->port;
 
-	spin_lock(&port->port_lock);
+	spin_lock_irqsave(&port->port_lock, flags);
 	list_add_tail(&req->list_pool, &port->read_queue);
 	tasklet_schedule(&port->push);
-	spin_unlock(&port->port_lock);
+	spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static void dbc_write_complete(struct xhci_hcd *xhci, struct dbc_request *req)
 {
+	unsigned long		flags;
 	struct xhci_dbc		*dbc = xhci->dbc;
 	struct dbc_port		*port = &dbc->port;
 
-	spin_lock(&port->port_lock);
+	spin_lock_irqsave(&port->port_lock, flags);
 	list_add(&req->list_pool, &port->write_pool);
 	switch (req->status) {
 	case 0:
@@ -119,7 +121,7 @@ static void dbc_write_complete(struct xhci_hcd *xhci, struct dbc_request *req)
 			  req->status);
 		break;
 	}
-	spin_unlock(&port->port_lock);
+	spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static void xhci_dbc_free_req(struct dbc_ep *dep, struct dbc_request *req)
@@ -327,12 +329,13 @@ static void dbc_rx_push(unsigned long _port)
 {
 	struct dbc_request	*req;
 	struct tty_struct	*tty;
+	unsigned long		flags;
 	bool			do_push = false;
 	bool			disconnect = false;
 	struct dbc_port		*port = (void *)_port;
 	struct list_head	*queue = &port->read_queue;
 
-	spin_lock_irq(&port->port_lock);
+	spin_lock_irqsave(&port->port_lock, flags);
 	tty = port->port.tty;
 	while (!list_empty(queue)) {
 		req = list_first_entry(queue, struct dbc_request, list_pool);
@@ -392,16 +395,17 @@ static void dbc_rx_push(unsigned long _port)
 	if (!disconnect)
 		dbc_start_rx(port);
 
-	spin_unlock_irq(&port->port_lock);
+	spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static int dbc_port_activate(struct tty_port *_port, struct tty_struct *tty)
 {
+	unsigned long	flags;
 	struct dbc_port	*port = container_of(_port, struct dbc_port, port);
 
-	spin_lock_irq(&port->port_lock);
+	spin_lock_irqsave(&port->port_lock, flags);
 	dbc_start_rx(port);
-	spin_unlock_irq(&port->port_lock);
+	spin_unlock_irqrestore(&port->port_lock, flags);
 
 	return 0;
 }
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index e26e685d8a57..5851052d4668 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -211,7 +211,7 @@ static void xhci_ring_dump_segment(struct seq_file *s,
 static int xhci_ring_trb_show(struct seq_file *s, void *unused)
 {
 	int			i;
-	struct xhci_ring	*ring = s->private;
+	struct xhci_ring	*ring = *(struct xhci_ring **)s->private;
 	struct xhci_segment	*seg = ring->first_seg;
 
 	for (i = 0; i < ring->num_segs; i++) {
@@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci,
 
 	snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
 	epriv->root = xhci_debugfs_create_ring_dir(xhci,
-						   &dev->eps[ep_index].new_ring,
+						   &dev->eps[ep_index].ring,
 						   epriv->name,
 						   spriv->root);
 	spriv->eps[ep_index] = epriv;
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 46d5e08f05f1..72ebbc908e19 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1224,17 +1224,17 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 				temp = readl(port_array[wIndex]);
 				break;
 			}
-
-			/* Software should not attempt to set
-			 * port link state above '3' (U3) and the port
-			 * must be enabled.
-			 */
-			if ((temp & PORT_PE) == 0 ||
-				(link_state > USB_SS_PORT_LS_U3)) {
-				xhci_warn(xhci, "Cannot set link state.\n");
+			/* Port must be enabled */
+			if (!(temp & PORT_PE)) {
+				retval = -ENODEV;
+				break;
+			}
+			/* Can't set port link state above '3' (U3) */
+			if (link_state > USB_SS_PORT_LS_U3) {
+				xhci_warn(xhci, "Cannot set port %d link state %d\n",
+					 wIndex, link_state);
 				goto error;
 			}
-
 			if (link_state == USB_SS_PORT_LS_U3) {
 				slot_id = xhci_find_slot_id_by_port(hcd, xhci,
 						wIndex + 1);
@@ -1522,6 +1522,13 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
 				t2 |= PORT_WKOC_E | PORT_WKCONN_E;
 				t2 &= ~PORT_WKDISC_E;
 			}
+
+			if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) &&
+			    (hcd->speed < HCD_USB3)) {
+				if (usb_amd_pt_check_port(hcd->self.controller,
+							  port_index))
+					t2 &= ~PORT_WAKE_BITS;
+			}
 		} else
 			t2 &= ~PORT_WAKE_BITS;
 
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 6c79037876db..d9f831b67e57 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -42,6 +42,10 @@
 #define PCI_DEVICE_ID_INTEL_APL_XHCI			0x5aa8
 #define PCI_DEVICE_ID_INTEL_DNV_XHCI			0x19d0
 
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_4			0x43b9
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_3			0x43ba
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_2			0x43bb
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_1			0x43bc
 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI		0x1142
 
 static const char hcd_name[] = "xhci_hcd";
@@ -122,9 +126,19 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
 		xhci->quirks |= XHCI_AMD_PLL_FIX;
 
+	if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x43bb)
+		xhci->quirks |= XHCI_SUSPEND_DELAY;
+
 	if (pdev->vendor == PCI_VENDOR_ID_AMD)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
+	if ((pdev->vendor == PCI_VENDOR_ID_AMD) &&
+		((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) ||
+		(pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) ||
+		(pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) ||
+		(pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1)))
+		xhci->quirks |= XHCI_U2_DISABLE_WAKE;
+
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
 		xhci->quirks |= XHCI_LPM_SUPPORT;
 		xhci->quirks |= XHCI_INTEL_HOST;
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 6f038306c14d..6652e2d5bd2e 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -360,7 +360,6 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
 {
 	struct usb_hcd	*hcd = dev_get_drvdata(dev);
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
-	int ret;
 
 	/*
 	 * xhci_suspend() needs `do_wakeup` to know whether host is allowed
@@ -370,12 +369,7 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
 	 * reconsider this when xhci_plat_suspend enlarges its scope, e.g.,
 	 * also applies to runtime suspend.
 	 */
-	ret = xhci_suspend(xhci, device_may_wakeup(dev));
-
-	if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk))
-		clk_disable_unprepare(xhci->clk);
-
-	return ret;
+	return xhci_suspend(xhci, device_may_wakeup(dev));
 }
 
 static int __maybe_unused xhci_plat_resume(struct device *dev)
@@ -384,9 +378,6 @@ static int __maybe_unused xhci_plat_resume(struct device *dev)
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	int ret;
 
-	if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk))
-		clk_prepare_enable(xhci->clk);
-
 	ret = xhci_priv_resume_quirk(hcd);
 	if (ret)
 		return ret;
diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c
index f0b559660007..f33ffc2bc4ed 100644
--- a/drivers/usb/host/xhci-rcar.c
+++ b/drivers/usb/host/xhci-rcar.c
@@ -83,6 +83,10 @@ static const struct soc_device_attribute rcar_quirks_match[]  = {
 		.soc_id = "r8a7796",
 		.data = (void *)RCAR_XHCI_FIRMWARE_V3,
 	},
+	{
+		.soc_id = "r8a77965",
+		.data = (void *)RCAR_XHCI_FIRMWARE_V3,
+	},
 	{ /* sentinel */ },
 };
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 1eeb3396300f..5d37700ae4b0 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -646,8 +646,6 @@ static void xhci_stop(struct usb_hcd *hcd)
 		return;
 	}
 
-	xhci_debugfs_exit(xhci);
-
 	xhci_dbc_exit(xhci);
 
 	spin_lock_irq(&xhci->lock);
@@ -680,6 +678,7 @@ static void xhci_stop(struct usb_hcd *hcd)
 
 	xhci_dbg_trace(xhci, trace_xhci_dbg_init, "cleaning up memory");
 	xhci_mem_cleanup(xhci);
+	xhci_debugfs_exit(xhci);
 	xhci_dbg_trace(xhci, trace_xhci_dbg_init,
 			"xhci_stop completed - status = %x",
 			readl(&xhci->op_regs->status));
@@ -878,6 +877,9 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
 	clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
 	del_timer_sync(&xhci->shared_hcd->rh_timer);
 
+	if (xhci->quirks & XHCI_SUSPEND_DELAY)
+		usleep_range(1000, 1500);
+
 	spin_lock_irq(&xhci->lock);
 	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags);
@@ -1014,6 +1016,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 
 		xhci_dbg(xhci, "cleaning up memory\n");
 		xhci_mem_cleanup(xhci);
+		xhci_debugfs_exit(xhci);
 		xhci_dbg(xhci, "xhci_stop completed - status = %x\n",
 			    readl(&xhci->op_regs->status));
 
@@ -3544,12 +3547,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
 		virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING;
 		del_timer_sync(&virt_dev->eps[i].stop_cmd_timer);
 	}
-
+	xhci_debugfs_remove_slot(xhci, udev->slot_id);
 	ret = xhci_disable_slot(xhci, udev->slot_id);
-	if (ret) {
-		xhci_debugfs_remove_slot(xhci, udev->slot_id);
+	if (ret)
 		xhci_free_virt_device(xhci, udev->slot_id);
-	}
 }
 
 int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 96099a245c69..866e141d4972 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -718,11 +718,12 @@ struct xhci_ep_ctx {
 /* bits 10:14 are Max Primary Streams */
 /* bit 15 is Linear Stream Array */
 /* Interval - period between requests to an endpoint - 125u increments. */
-#define EP_INTERVAL(p)		(((p) & 0xff) << 16)
-#define EP_INTERVAL_TO_UFRAMES(p)		(1 << (((p) >> 16) & 0xff))
-#define CTX_TO_EP_INTERVAL(p)	(((p) >> 16) & 0xff)
-#define EP_MAXPSTREAMS_MASK	(0x1f << 10)
-#define EP_MAXPSTREAMS(p)	(((p) << 10) & EP_MAXPSTREAMS_MASK)
+#define EP_INTERVAL(p)			(((p) & 0xff) << 16)
+#define EP_INTERVAL_TO_UFRAMES(p)	(1 << (((p) >> 16) & 0xff))
+#define CTX_TO_EP_INTERVAL(p)		(((p) >> 16) & 0xff)
+#define EP_MAXPSTREAMS_MASK		(0x1f << 10)
+#define EP_MAXPSTREAMS(p)		(((p) << 10) & EP_MAXPSTREAMS_MASK)
+#define CTX_TO_EP_MAXPSTREAMS(p)	(((p) & EP_MAXPSTREAMS_MASK) >> 10)
 /* Endpoint is set up with a Linear Stream Array (vs. Secondary Stream Array) */
 #define	EP_HAS_LSA		(1 << 15)
 /* hosts with LEC=1 use bits 31:24 as ESIT high bits. */
@@ -1822,9 +1823,10 @@ struct xhci_hcd {
 /* For controller with a broken Port Disable implementation */
 #define XHCI_BROKEN_PORT_PED	(1 << 25)
 #define XHCI_LIMIT_ENDPOINT_INTERVAL_7	(1 << 26)
-/* Reserved. It was XHCI_U2_DISABLE_WAKE */
+#define XHCI_U2_DISABLE_WAKE	(1 << 27)
 #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL	(1 << 28)
 #define XHCI_HW_LPM_DISABLE	(1 << 29)
+#define XHCI_SUSPEND_DELAY	(1 << 30)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
@@ -2549,21 +2551,22 @@ static inline const char *xhci_decode_ep_context(u32 info, u32 info2, u64 deq,
 	u8 burst;
 	u8 cerr;
 	u8 mult;
-	u8 lsa;
-	u8 hid;
+
+	bool lsa;
+	bool hid;
 
 	esit = CTX_TO_MAX_ESIT_PAYLOAD_HI(info) << 16 |
 		CTX_TO_MAX_ESIT_PAYLOAD(tx_info);
 
 	ep_state = info & EP_STATE_MASK;
-	max_pstr = info & EP_MAXPSTREAMS_MASK;
+	max_pstr = CTX_TO_EP_MAXPSTREAMS(info);
 	interval = CTX_TO_EP_INTERVAL(info);
 	mult = CTX_TO_EP_MULT(info) + 1;
-	lsa = info & EP_HAS_LSA;
+	lsa = !!(info & EP_HAS_LSA);
 
 	cerr = (info2 & (3 << 1)) >> 1;
 	ep_type = CTX_TO_EP_TYPE(info2);
-	hid = info2 & (1 << 7);
+	hid = !!(info2 & (1 << 7));
 	burst = CTX_TO_MAX_BURST(info2);
 	maxp = MAX_PACKET_DECODED(info2);
 
diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index 63b9e85dc0e9..236a60f53099 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -42,6 +42,9 @@
 #define USB_DEVICE_ID_LD_MICROCASSYTIME		0x1033	/* USB Product ID of Micro-CASSY Time (reserved) */
 #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE	0x1035	/* USB Product ID of Micro-CASSY Temperature */
 #define USB_DEVICE_ID_LD_MICROCASSYPH		0x1038	/* USB Product ID of Micro-CASSY pH */
+#define USB_DEVICE_ID_LD_POWERANALYSERCASSY	0x1040	/* USB Product ID of Power Analyser CASSY */
+#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY	0x1042	/* USB Product ID of Converter Controller CASSY */
+#define USB_DEVICE_ID_LD_MACHINETESTCASSY	0x1043	/* USB Product ID of Machine Test CASSY */
 #define USB_DEVICE_ID_LD_JWM		0x1080	/* USB Product ID of Joule and Wattmeter */
 #define USB_DEVICE_ID_LD_DMMP		0x1081	/* USB Product ID of Digital Multimeter P (reserved) */
 #define USB_DEVICE_ID_LD_UMIP		0x1090	/* USB Product ID of UMI P */
@@ -84,6 +87,9 @@ static const struct usb_device_id ld_usb_table[] = {
 	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) },
 	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) },
 	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) },
+	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) },
+	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) },
+	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) },
 	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
 	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
 	{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index f5e1bb5e5217..984f7e12a6a5 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -85,6 +85,8 @@ struct mon_reader_text {
 
 	wait_queue_head_t wait;
 	int printf_size;
+	size_t printf_offset;
+	size_t printf_togo;
 	char *printf_buf;
 	struct mutex printf_lock;
 
@@ -376,75 +378,103 @@ err_alloc:
 	return rc;
 }
 
-/*
- * For simplicity, we read one record in one system call and throw out
- * what does not fit. This means that the following does not work:
- *   dd if=/dbg/usbmon/0t bs=10
- * Also, we do not allow seeks and do not bother advancing the offset.
- */
+static ssize_t mon_text_copy_to_user(struct mon_reader_text *rp,
+    char __user * const buf, const size_t nbytes)
+{
+	const size_t togo = min(nbytes, rp->printf_togo);
+
+	if (copy_to_user(buf, &rp->printf_buf[rp->printf_offset], togo))
+		return -EFAULT;
+	rp->printf_togo -= togo;
+	rp->printf_offset += togo;
+	return togo;
+}
+
+/* ppos is not advanced since the llseek operation is not permitted. */
 static ssize_t mon_text_read_t(struct file *file, char __user *buf,
-				size_t nbytes, loff_t *ppos)
+    size_t nbytes, loff_t *ppos)
 {
 	struct mon_reader_text *rp = file->private_data;
 	struct mon_event_text *ep;
 	struct mon_text_ptr ptr;
+	ssize_t ret;
 
-	ep = mon_text_read_wait(rp, file);
-	if (IS_ERR(ep))
-		return PTR_ERR(ep);
 	mutex_lock(&rp->printf_lock);
-	ptr.cnt = 0;
-	ptr.pbuf = rp->printf_buf;
-	ptr.limit = rp->printf_size;
-
-	mon_text_read_head_t(rp, &ptr, ep);
-	mon_text_read_statset(rp, &ptr, ep);
-	ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
-	    " %d", ep->length);
-	mon_text_read_data(rp, &ptr, ep);
-
-	if (copy_to_user(buf, rp->printf_buf, ptr.cnt))
-		ptr.cnt = -EFAULT;
+
+	if (rp->printf_togo == 0) {
+
+		ep = mon_text_read_wait(rp, file);
+		if (IS_ERR(ep)) {
+			mutex_unlock(&rp->printf_lock);
+			return PTR_ERR(ep);
+		}
+		ptr.cnt = 0;
+		ptr.pbuf = rp->printf_buf;
+		ptr.limit = rp->printf_size;
+
+		mon_text_read_head_t(rp, &ptr, ep);
+		mon_text_read_statset(rp, &ptr, ep);
+		ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
+		    " %d", ep->length);
+		mon_text_read_data(rp, &ptr, ep);
+
+		rp->printf_togo = ptr.cnt;
+		rp->printf_offset = 0;
+
+		kmem_cache_free(rp->e_slab, ep);
+	}
+
+	ret = mon_text_copy_to_user(rp, buf, nbytes);
 	mutex_unlock(&rp->printf_lock);
-	kmem_cache_free(rp->e_slab, ep);
-	return ptr.cnt;
+	return ret;
 }
 
+/* ppos is not advanced since the llseek operation is not permitted. */
 static ssize_t mon_text_read_u(struct file *file, char __user *buf,
-				size_t nbytes, loff_t *ppos)
+    size_t nbytes, loff_t *ppos)
 {
 	struct mon_reader_text *rp = file->private_data;
 	struct mon_event_text *ep;
 	struct mon_text_ptr ptr;
+	ssize_t ret;
 
-	ep = mon_text_read_wait(rp, file);
-	if (IS_ERR(ep))
-		return PTR_ERR(ep);
 	mutex_lock(&rp->printf_lock);
-	ptr.cnt = 0;
-	ptr.pbuf = rp->printf_buf;
-	ptr.limit = rp->printf_size;
 
-	mon_text_read_head_u(rp, &ptr, ep);
-	if (ep->type == 'E') {
-		mon_text_read_statset(rp, &ptr, ep);
-	} else if (ep->xfertype == USB_ENDPOINT_XFER_ISOC) {
-		mon_text_read_isostat(rp, &ptr, ep);
-		mon_text_read_isodesc(rp, &ptr, ep);
-	} else if (ep->xfertype == USB_ENDPOINT_XFER_INT) {
-		mon_text_read_intstat(rp, &ptr, ep);
-	} else {
-		mon_text_read_statset(rp, &ptr, ep);
+	if (rp->printf_togo == 0) {
+
+		ep = mon_text_read_wait(rp, file);
+		if (IS_ERR(ep)) {
+			mutex_unlock(&rp->printf_lock);
+			return PTR_ERR(ep);
+		}
+		ptr.cnt = 0;
+		ptr.pbuf = rp->printf_buf;
+		ptr.limit = rp->printf_size;
+
+		mon_text_read_head_u(rp, &ptr, ep);
+		if (ep->type == 'E') {
+			mon_text_read_statset(rp, &ptr, ep);
+		} else if (ep->xfertype == USB_ENDPOINT_XFER_ISOC) {
+			mon_text_read_isostat(rp, &ptr, ep);
+			mon_text_read_isodesc(rp, &ptr, ep);
+		} else if (ep->xfertype == USB_ENDPOINT_XFER_INT) {
+			mon_text_read_intstat(rp, &ptr, ep);
+		} else {
+			mon_text_read_statset(rp, &ptr, ep);
+		}
+		ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
+		    " %d", ep->length);
+		mon_text_read_data(rp, &ptr, ep);
+
+		rp->printf_togo = ptr.cnt;
+		rp->printf_offset = 0;
+
+		kmem_cache_free(rp->e_slab, ep);
 	}
-	ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
-	    " %d", ep->length);
-	mon_text_read_data(rp, &ptr, ep);
 
-	if (copy_to_user(buf, rp->printf_buf, ptr.cnt))
-		ptr.cnt = -EFAULT;
+	ret = mon_text_copy_to_user(rp, buf, nbytes);
 	mutex_unlock(&rp->printf_lock);
-	kmem_cache_free(rp->e_slab, ep);
-	return ptr.cnt;
+	return ret;
 }
 
 static struct mon_event_text *mon_text_read_wait(struct mon_reader_text *rp,
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 968bf1e8b0fe..4d723077be2b 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1756,6 +1756,7 @@ vbus_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int		vbus;
 	u8		devctl;
 
+	pm_runtime_get_sync(dev);
 	spin_lock_irqsave(&musb->lock, flags);
 	val = musb->a_wait_bcon;
 	vbus = musb_platform_get_vbus_status(musb);
@@ -1769,6 +1770,7 @@ vbus_show(struct device *dev, struct device_attribute *attr, char *buf)
 			vbus = 0;
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
+	pm_runtime_put_sync(dev);
 
 	return sprintf(buf, "Vbus %s, timeout %lu msec\n",
 			vbus ? "on" : "off", val);
@@ -2471,11 +2473,11 @@ static int musb_remove(struct platform_device *pdev)
 	musb_disable_interrupts(musb);
 	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 	spin_unlock_irqrestore(&musb->lock, flags);
+	musb_platform_exit(musb);
 
 	pm_runtime_dont_use_autosuspend(musb->controller);
 	pm_runtime_put_sync(musb->controller);
 	pm_runtime_disable(musb->controller);
-	musb_platform_exit(musb);
 	musb_phy_callback = NULL;
 	if (musb->dma_controller)
 		musb_dma_controller_destroy(musb->dma_controller);
@@ -2708,7 +2710,8 @@ static int musb_resume(struct device *dev)
 	if ((devctl & mask) != (musb->context.devctl & mask))
 		musb->port1_status = 0;
 
-	musb_start(musb);
+	musb_enable_interrupts(musb);
+	musb_platform_enable(musb);
 
 	spin_lock_irqsave(&musb->lock, flags);
 	error = musb_run_resume_work(musb);
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 394b4ac86161..45ed32c2cba9 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -391,13 +391,7 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb,
 		}
 	}
 
-	/*
-	 * The pipe must be broken if current urb->status is set, so don't
-	 * start next urb.
-	 * TODO: to minimize the risk of regression, only check urb->status
-	 * for RX, until we have a test case to understand the behavior of TX.
-	 */
-	if ((!status || !is_in) && qh && qh->is_ready) {
+	if (qh != NULL && qh->is_ready) {
 		musb_dbg(musb, "... next ep%d %cX urb %p",
 		    hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh));
 		musb_start_urb(musb, is_in, qh);
diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index da031c45395a..fbec863350f6 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -602,6 +602,9 @@ static enum usb_charger_type mxs_phy_charger_detect(struct usb_phy *phy)
 	void __iomem *base = phy->io_priv;
 	enum usb_charger_type chgr_type = UNKNOWN_TYPE;
 
+	if (!regmap)
+		return UNKNOWN_TYPE;
+
 	if (mxs_charger_data_contact_detect(mxs_phy))
 		return chgr_type;
 
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 5925d111bd47..39fa2fc1b8b7 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -982,6 +982,10 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt,
 	if ((uintptr_t)pkt->buf & (USBHS_USB_DMAC_XFER_SIZE - 1))
 		goto usbhsf_pio_prepare_pop;
 
+	/* return at this time if the pipe is running */
+	if (usbhs_pipe_is_running(pipe))
+		return 0;
+
 	usbhs_pipe_config_change_bfre(pipe, 1);
 
 	ret = usbhsf_fifo_select(pipe, fifo, 0);
@@ -1172,6 +1176,7 @@ static int usbhsf_dma_pop_done_with_usb_dmac(struct usbhs_pkt *pkt,
 	usbhsf_fifo_clear(pipe, fifo);
 	pkt->actual = usbhs_dma_calc_received_size(pkt, chan, rcv_len);
 
+	usbhs_pipe_running(pipe, 0);
 	usbhsf_dma_stop(pipe, fifo);
 	usbhsf_dma_unmap(pkt);
 	usbhsf_fifo_unselect(pipe, pipe->fifo);
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 5db8ed517e0e..2d8d9150da0c 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -241,6 +241,7 @@ static void option_instat_callback(struct urb *urb);
 #define QUECTEL_PRODUCT_EC21			0x0121
 #define QUECTEL_PRODUCT_EC25			0x0125
 #define QUECTEL_PRODUCT_BG96			0x0296
+#define QUECTEL_PRODUCT_EP06			0x0306
 
 #define CMOTECH_VENDOR_ID			0x16d8
 #define CMOTECH_PRODUCT_6001			0x6001
@@ -689,6 +690,10 @@ static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
 	.reserved = BIT(1) | BIT(4),
 };
 
+static const struct option_blacklist_info quectel_ep06_blacklist = {
+	.reserved = BIT(4) | BIT(5),
+};
+
 static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1203,6 +1208,8 @@ static const struct usb_device_id option_ids[] = {
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06),
+	  .driver_info = (kernel_ulong_t)&quectel_ep06_blacklist },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 3b1b9695177a..6034c39b67d1 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -1076,7 +1076,7 @@ static int uas_post_reset(struct usb_interface *intf)
 		return 0;
 
 	err = uas_configure_endpoints(devinfo);
-	if (err && err != ENODEV)
+	if (err && err != -ENODEV)
 		shost_printk(KERN_ERR, shost,
 			     "%s: alloc streams error %d after reset",
 			     __func__, err);
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 264af199aec8..747d3a9596d9 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2118,6 +2118,13 @@ UNUSUAL_DEV(  0x152d, 0x2566, 0x0114, 0x0114,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BROKEN_FUA ),
 
+/* Reported by Teijo Kinnunen <[email protected]> */
+UNUSUAL_DEV(  0x152d, 0x2567, 0x0117, 0x0117,
+		"JMicron",
+		"USB to ATA/ATAPI Bridge",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_BROKEN_FUA ),
+
 /* Reported-by George Cherian <[email protected]> */
 UNUSUAL_DEV(0x152d, 0x9561, 0x0000, 0x9999,
 		"JMicron",
diff --git a/drivers/usb/typec/fusb302/fusb302.c b/drivers/usb/typec/fusb302/fusb302.c
index 9ce4756adad6..dcd8ef085b30 100644
--- a/drivers/usb/typec/fusb302/fusb302.c
+++ b/drivers/usb/typec/fusb302/fusb302.c
@@ -1857,7 +1857,8 @@ static int fusb302_probe(struct i2c_client *client,
 	chip->tcpm_port = tcpm_register_port(&client->dev, &chip->tcpc_dev);
 	if (IS_ERR(chip->tcpm_port)) {
 		ret = PTR_ERR(chip->tcpm_port);
-		dev_err(dev, "cannot register tcpm port, ret=%d", ret);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "cannot register tcpm port, ret=%d", ret);
 		goto destroy_workqueue;
 	}
 
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index f4d563ee7690..8b637a4b474b 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -252,9 +252,6 @@ struct tcpm_port {
 	unsigned int nr_src_pdo;
 	u32 snk_pdo[PDO_MAX_OBJECTS];
 	unsigned int nr_snk_pdo;
-	unsigned int nr_fixed; /* number of fixed sink PDOs */
-	unsigned int nr_var; /* number of variable sink PDOs */
-	unsigned int nr_batt; /* number of battery sink PDOs */
 	u32 snk_vdo[VDO_MAX_OBJECTS];
 	unsigned int nr_snk_vdo;
 
@@ -1770,90 +1767,39 @@ static int tcpm_pd_check_request(struct tcpm_port *port)
 	return 0;
 }
 
-#define min_power(x, y) min(pdo_max_power(x), pdo_max_power(y))
-#define min_current(x, y) min(pdo_max_current(x), pdo_max_current(y))
-
-static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
-			      int *src_pdo)
+static int tcpm_pd_select_pdo(struct tcpm_port *port)
 {
-	unsigned int i, j, max_mw = 0, max_mv = 0, mw = 0, mv = 0, ma = 0;
+	unsigned int i, max_mw = 0, max_mv = 0;
 	int ret = -EINVAL;
 
 	/*
-	 * Select the source PDO providing the most power which has a
-	 * matchig sink cap.
+	 * Select the source PDO providing the most power while staying within
+	 * the board's voltage limits. Prefer PDO providing exp
 	 */
 	for (i = 0; i < port->nr_source_caps; i++) {
 		u32 pdo = port->source_caps[i];
 		enum pd_pdo_type type = pdo_type(pdo);
+		unsigned int mv, ma, mw;
 
-		if (type == PDO_TYPE_FIXED) {
-			for (j = 0; j < port->nr_fixed; j++) {
-				if (pdo_fixed_voltage(pdo) ==
-				    pdo_fixed_voltage(port->snk_pdo[j])) {
-					ma = min_current(pdo, port->snk_pdo[j]);
-					mv = pdo_fixed_voltage(pdo);
-					mw = ma * mv / 1000;
-					if (mw > max_mw ||
-					    (mw == max_mw && mv > max_mv)) {
-						ret = 0;
-						*src_pdo = i;
-						*sink_pdo = j;
-						max_mw = mw;
-						max_mv = mv;
-					}
-					/* There could only be one fixed pdo
-					 * at a specific voltage level.
-					 * So breaking here.
-					 */
-					break;
-				}
-			}
-		} else if (type == PDO_TYPE_BATT) {
-			for (j = port->nr_fixed;
-			     j < port->nr_fixed +
-				 port->nr_batt;
-			     j++) {
-				if (pdo_min_voltage(pdo) >=
-				     pdo_min_voltage(port->snk_pdo[j]) &&
-				     pdo_max_voltage(pdo) <=
-				     pdo_max_voltage(port->snk_pdo[j])) {
-					mw = min_power(pdo, port->snk_pdo[j]);
-					mv = pdo_min_voltage(pdo);
-					if (mw > max_mw ||
-					    (mw == max_mw && mv > max_mv)) {
-						ret = 0;
-						*src_pdo = i;
-						*sink_pdo = j;
-						max_mw = mw;
-						max_mv = mv;
-					}
-				}
-			}
-		} else if (type == PDO_TYPE_VAR) {
-			for (j = port->nr_fixed +
-				 port->nr_batt;
-			     j < port->nr_fixed +
-				 port->nr_batt +
-				 port->nr_var;
-			     j++) {
-				if (pdo_min_voltage(pdo) >=
-				     pdo_min_voltage(port->snk_pdo[j]) &&
-				     pdo_max_voltage(pdo) <=
-				     pdo_max_voltage(port->snk_pdo[j])) {
-					ma = min_current(pdo, port->snk_pdo[j]);
-					mv = pdo_min_voltage(pdo);
-					mw = ma * mv / 1000;
-					if (mw > max_mw ||
-					    (mw == max_mw && mv > max_mv)) {
-						ret = 0;
-						*src_pdo = i;
-						*sink_pdo = j;
-						max_mw = mw;
-						max_mv = mv;
-					}
-				}
-			}
+		if (type == PDO_TYPE_FIXED)
+			mv = pdo_fixed_voltage(pdo);
+		else
+			mv = pdo_min_voltage(pdo);
+
+		if (type == PDO_TYPE_BATT) {
+			mw = pdo_max_power(pdo);
+		} else {
+			ma = min(pdo_max_current(pdo),
+				 port->max_snk_ma);
+			mw = ma * mv / 1000;
+		}
+
+		/* Perfer higher voltages if available */
+		if ((mw > max_mw || (mw == max_mw && mv > max_mv)) &&
+		    mv <= port->max_snk_mv) {
+			ret = i;
+			max_mw = mw;
+			max_mv = mv;
 		}
 	}
 
@@ -1865,14 +1811,13 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
 	unsigned int mv, ma, mw, flags;
 	unsigned int max_ma, max_mw;
 	enum pd_pdo_type type;
-	int src_pdo_index, snk_pdo_index;
-	u32 pdo, matching_snk_pdo;
+	int index;
+	u32 pdo;
 
-	if (tcpm_pd_select_pdo(port, &snk_pdo_index, &src_pdo_index) < 0)
+	index = tcpm_pd_select_pdo(port);
+	if (index < 0)
 		return -EINVAL;
-
-	pdo = port->source_caps[src_pdo_index];
-	matching_snk_pdo = port->snk_pdo[snk_pdo_index];
+	pdo = port->source_caps[index];
 	type = pdo_type(pdo);
 
 	if (type == PDO_TYPE_FIXED)
@@ -1880,28 +1825,26 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
 	else
 		mv = pdo_min_voltage(pdo);
 
-	/* Select maximum available current within the sink pdo's limit */
+	/* Select maximum available current within the board's power limit */
 	if (type == PDO_TYPE_BATT) {
-		mw = min_power(pdo, matching_snk_pdo);
-		ma = 1000 * mw / mv;
+		mw = pdo_max_power(pdo);
+		ma = 1000 * min(mw, port->max_snk_mw) / mv;
 	} else {
-		ma = min_current(pdo, matching_snk_pdo);
-		mw = ma * mv / 1000;
+		ma = min(pdo_max_current(pdo),
+			 1000 * port->max_snk_mw / mv);
 	}
+	ma = min(ma, port->max_snk_ma);
 
 	flags = RDO_USB_COMM | RDO_NO_SUSPEND;
 
 	/* Set mismatch bit if offered power is less than operating power */
+	mw = ma * mv / 1000;
 	max_ma = ma;
 	max_mw = mw;
 	if (mw < port->operating_snk_mw) {
 		flags |= RDO_CAP_MISMATCH;
-		if (type == PDO_TYPE_BATT &&
-		    (pdo_max_power(matching_snk_pdo) > pdo_max_power(pdo)))
-			max_mw = pdo_max_power(matching_snk_pdo);
-		else if (pdo_max_current(matching_snk_pdo) >
-			 pdo_max_current(pdo))
-			max_ma = pdo_max_current(matching_snk_pdo);
+		max_mw = port->operating_snk_mw;
+		max_ma = max_mw * 1000 / mv;
 	}
 
 	tcpm_log(port, "cc=%d cc1=%d cc2=%d vbus=%d vconn=%s polarity=%d",
@@ -1910,16 +1853,16 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
 		 port->polarity);
 
 	if (type == PDO_TYPE_BATT) {
-		*rdo = RDO_BATT(src_pdo_index + 1, mw, max_mw, flags);
+		*rdo = RDO_BATT(index + 1, mw, max_mw, flags);
 
 		tcpm_log(port, "Requesting PDO %d: %u mV, %u mW%s",
-			 src_pdo_index, mv, mw,
+			 index, mv, mw,
 			 flags & RDO_CAP_MISMATCH ? " [mismatch]" : "");
 	} else {
-		*rdo = RDO_FIXED(src_pdo_index + 1, ma, max_ma, flags);
+		*rdo = RDO_FIXED(index + 1, ma, max_ma, flags);
 
 		tcpm_log(port, "Requesting PDO %d: %u mV, %u mA%s",
-			 src_pdo_index, mv, ma,
+			 index, mv, ma,
 			 flags & RDO_CAP_MISMATCH ? " [mismatch]" : "");
 	}
 
@@ -3650,19 +3593,6 @@ int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
 }
 EXPORT_SYMBOL_GPL(tcpm_update_sink_capabilities);
 
-static int nr_type_pdos(const u32 *pdo, unsigned int nr_pdo,
-			enum pd_pdo_type type)
-{
-	int count = 0;
-	int i;
-
-	for (i = 0; i < nr_pdo; i++) {
-		if (pdo_type(pdo[i]) == type)
-			count++;
-	}
-	return count;
-}
-
 struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 {
 	struct tcpm_port *port;
@@ -3708,15 +3638,6 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 					  tcpc->config->nr_src_pdo);
 	port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, tcpc->config->snk_pdo,
 					  tcpc->config->nr_snk_pdo);
-	port->nr_fixed =  nr_type_pdos(port->snk_pdo,
-				       port->nr_snk_pdo,
-				       PDO_TYPE_FIXED);
-	port->nr_var = nr_type_pdos(port->snk_pdo,
-				    port->nr_snk_pdo,
-				    PDO_TYPE_VAR);
-	port->nr_batt = nr_type_pdos(port->snk_pdo,
-				     port->nr_snk_pdo,
-				     PDO_TYPE_BATT);
 	port->nr_snk_vdo = tcpm_copy_vdos(port->snk_vdo, tcpc->config->snk_vdo,
 					  tcpc->config->nr_snk_vdo);
 
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index 49e552472c3f..dd8ef36ab10e 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -73,6 +73,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
 			goto err;
 
 		sdev->ud.tcp_socket = socket;
+		sdev->ud.sockfd = sockfd;
 
 		spin_unlock_irq(&sdev->ud.lock);
 
@@ -172,6 +173,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
 	if (ud->tcp_socket) {
 		sockfd_put(ud->tcp_socket);
 		ud->tcp_socket = NULL;
+		ud->sockfd = -1;
 	}
 
 	/* 3. free used data */
@@ -266,6 +268,7 @@ static struct stub_device *stub_device_alloc(struct usb_device *udev)
 	sdev->ud.status		= SDEV_ST_AVAILABLE;
 	spin_lock_init(&sdev->ud.lock);
 	sdev->ud.tcp_socket	= NULL;
+	sdev->ud.sockfd		= -1;
 
 	INIT_LIST_HEAD(&sdev->priv_init);
 	INIT_LIST_HEAD(&sdev->priv_tx);
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index c3e1008aa491..20e3d4609583 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -984,6 +984,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
 	if (vdev->ud.tcp_socket) {
 		sockfd_put(vdev->ud.tcp_socket);
 		vdev->ud.tcp_socket = NULL;
+		vdev->ud.sockfd = -1;
 	}
 	pr_info("release socket\n");
 
@@ -1030,6 +1031,7 @@ static void vhci_device_reset(struct usbip_device *ud)
 	if (ud->tcp_socket) {
 		sockfd_put(ud->tcp_socket);
 		ud->tcp_socket = NULL;
+		ud->sockfd = -1;
 	}
 	ud->status = VDEV_ST_NULL;
 
diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c
index d86f72bbbb91..6dcd3ff655c3 100644
--- a/drivers/usb/usbip/vudc_sysfs.c
+++ b/drivers/usb/usbip/vudc_sysfs.c
@@ -105,10 +105,14 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
 	if (rv != 0)
 		return -EINVAL;
 
+	if (!udc) {
+		dev_err(dev, "no device");
+		return -ENODEV;
+	}
 	spin_lock_irqsave(&udc->lock, flags);
 	/* Don't export what we don't have */
-	if (!udc || !udc->driver || !udc->pullup) {
-		dev_err(dev, "no device or gadget not bound");
+	if (!udc->driver || !udc->pullup) {
+		dev_err(dev, "gadget not bound");
 		ret = -ENODEV;
 		goto unlock;
 	}
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index e30e29ae4819..45657e2b1ff7 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -338,11 +338,12 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 {
 	struct page *page[1];
 	struct vm_area_struct *vma;
+	struct vm_area_struct *vmas[1];
 	int ret;
 
 	if (mm == current->mm) {
-		ret = get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE),
-					  page);
+		ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE),
+					      page, vmas);
 	} else {
 		unsigned int flags = 0;
 
@@ -351,7 +352,18 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
 		down_read(&mm->mmap_sem);
 		ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
-					    NULL, NULL);
+					    vmas, NULL);
+		/*
+		 * The lifetime of a vaddr_get_pfn() page pin is
+		 * userspace-controlled. In the fs-dax case this could
+		 * lead to indefinite stalls in filesystem operations.
+		 * Disallow attempts to pin fs-dax pages via this
+		 * interface.
+		 */
+		if (ret > 0 && vma_is_fsdax(vmas[0])) {
+			ret = -EOPNOTSUPP;
+			put_page(page[0]);
+		}
 		up_read(&mm->mmap_sem);
 	}
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index b5fb56b822fd..a31d9b240af8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -170,7 +170,7 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
 	if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
 		ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
 				   vhost_net_buf_get_size(rxq),
-				   __skb_array_destroy_skb);
+				   tun_ptr_free);
 		rxq->head = rxq->tail = 0;
 	}
 }
@@ -948,6 +948,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 		n->vqs[i].done_idx = 0;
 		n->vqs[i].vhost_hlen = 0;
 		n->vqs[i].sock_hlen = 0;
+		n->vqs[i].rx_ring = NULL;
 		vhost_net_buf_init(&n->vqs[i].rxq);
 	}
 	vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
@@ -972,6 +973,7 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
 	vhost_net_disable_vq(n, vq);
 	vq->private_data = NULL;
 	vhost_net_buf_unproduce(nvq);
+	nvq->rx_ring = NULL;
 	mutex_unlock(&vq->mutex);
 	return sock;
 }
@@ -1160,14 +1162,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 		vhost_net_disable_vq(n, vq);
 		vq->private_data = sock;
 		vhost_net_buf_unproduce(nvq);
-		if (index == VHOST_NET_VQ_RX)
-			nvq->rx_ring = get_tap_ptr_ring(fd);
 		r = vhost_vq_init_access(vq);
 		if (r)
 			goto err_used;
 		r = vhost_net_enable_vq(n, vq);
 		if (r)
 			goto err_used;
+		if (index == VHOST_NET_VQ_RX)
+			nvq->rx_ring = get_tap_ptr_ring(fd);
 
 		oldubufs = nvq->ubufs;
 		nvq->ubufs = ubufs;
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0d14e2ff19f1..0898dbdbf955 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -61,9 +61,9 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
 		if (other_cid == 0)
 			continue;
 
-		if (other_cid == guest_cid) {
+		if (other_cid == guest_cid)
 			return vsock;
-		}
+
 	}
 
 	return NULL;
diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c
index 6082f653c68a..67773e8bbb95 100644
--- a/drivers/video/fbdev/geode/video_gx.c
+++ b/drivers/video/fbdev/geode/video_gx.c
@@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info)
 	int timeout = 1000;
 
 	/* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */
-	if (cpu_data(0).x86_mask == 1) {
+	if (cpu_data(0).x86_stepping == 1) {
 		pll_table = gx_pll_table_14MHz;
 		pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz);
 	} else {
diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c
index af6fc97f4ba4..a436d44f1b7f 100644
--- a/drivers/video/fbdev/sbuslib.c
+++ b/drivers/video/fbdev/sbuslib.c
@@ -122,7 +122,7 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg,
 		unsigned char __user *ured;
 		unsigned char __user *ugreen;
 		unsigned char __user *ublue;
-		int index, count, i;
+		unsigned int index, count, i;
 
 		if (get_user(index, &c->index) ||
 		    __get_user(count, &c->count) ||
@@ -161,7 +161,7 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg,
 		unsigned char __user *ugreen;
 		unsigned char __user *ublue;
 		struct fb_cmap *cmap = &info->cmap;
-		int index, count, i;
+		unsigned int index, count, i;
 		u8 red, green, blue;
 
 		if (get_user(index, &c->index) ||
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index eb30f3e09a47..71458f493cf8 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -428,8 +428,6 @@ unmap_release:
 		i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
 	}
 
-	vq->vq.num_free += total_sg;
-
 	if (indirect)
 		kfree(desc);
 
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index aff773bcebdb..37460cd6cabb 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -226,6 +226,7 @@ config ZIIRAVE_WATCHDOG
 config RAVE_SP_WATCHDOG
 	tristate "RAVE SP Watchdog timer"
 	depends on RAVE_SP_CORE
+	depends on NVMEM || !NVMEM
 	select WATCHDOG_CORE
 	help
 	  Support for the watchdog on RAVE SP device.
@@ -903,6 +904,7 @@ config F71808E_WDT
 config SP5100_TCO
 	tristate "AMD/ATI SP5100 TCO Timer/Watchdog"
 	depends on X86 && PCI
+	select WATCHDOG_CORE
 	---help---
 	  Hardware watchdog driver for the AMD/ATI SP5100 chipset. The TCO
 	  (Total Cost of Ownership) timer is a watchdog timer that will reboot
@@ -1008,6 +1010,7 @@ config WAFER_WDT
 config I6300ESB_WDT
 	tristate "Intel 6300ESB Timer/Watchdog"
 	depends on PCI
+	select WATCHDOG_CORE
 	---help---
 	  Hardware driver for the watchdog timer built into the Intel
 	  6300ESB controller hub.
@@ -1837,6 +1840,7 @@ config WATCHDOG_SUN4V
 config XEN_WDT
 	tristate "Xen Watchdog support"
 	depends on XEN
+	select WATCHDOG_CORE
 	help
 	  Say Y here to support the hypervisor watchdog capability provided
 	  by Xen 4.0 and newer.  The watchdog timeout period is normally one
diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index e0678c14480f..3a33c5344bd5 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -566,7 +566,8 @@ static ssize_t watchdog_write(struct file *file, const char __user *buf,
 				char c;
 				if (get_user(c, buf + i))
 					return -EFAULT;
-				expect_close = (c == 'V');
+				if (c == 'V')
+					expect_close = true;
 			}
 
 			/* Properly order writes across fork()ed processes */
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index f1f00dfc0e68..b0a158073abd 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -28,16 +28,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/watchdog.h>
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#include <linux/dmi.h>
-#include <linux/spinlock.h>
-#include <linux/nmi.h>
-#include <linux/kdebug.h>
-#include <linux/notifier.h>
-#include <asm/set_memory.h>
-#endif /* CONFIG_HPWDT_NMI_DECODING */
 #include <asm/nmi.h>
-#include <asm/frame.h>
 
 #define HPWDT_VERSION			"1.4.0"
 #define SECS_TO_TICKS(secs)		((secs) * 1000 / 128)
@@ -48,6 +39,9 @@
 static unsigned int soft_margin = DEFAULT_MARGIN;	/* in seconds */
 static unsigned int reload;			/* the computed soft_margin */
 static bool nowayout = WATCHDOG_NOWAYOUT;
+#ifdef CONFIG_HPWDT_NMI_DECODING
+static unsigned int allow_kdump = 1;
+#endif
 static char expect_release;
 static unsigned long hpwdt_is_open;
 
@@ -63,373 +57,6 @@ static const struct pci_device_id hpwdt_devices[] = {
 };
 MODULE_DEVICE_TABLE(pci, hpwdt_devices);
 
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#define PCI_BIOS32_SD_VALUE		0x5F32335F	/* "_32_" */
-#define CRU_BIOS_SIGNATURE_VALUE	0x55524324
-#define PCI_BIOS32_PARAGRAPH_LEN	16
-#define PCI_ROM_BASE1			0x000F0000
-#define ROM_SIZE			0x10000
-
-struct bios32_service_dir {
-	u32 signature;
-	u32 entry_point;
-	u8 revision;
-	u8 length;
-	u8 checksum;
-	u8 reserved[5];
-};
-
-/* type 212 */
-struct smbios_cru64_info {
-	u8 type;
-	u8 byte_length;
-	u16 handle;
-	u32 signature;
-	u64 physical_address;
-	u32 double_length;
-	u32 double_offset;
-};
-#define SMBIOS_CRU64_INFORMATION	212
-
-/* type 219 */
-struct smbios_proliant_info {
-	u8 type;
-	u8 byte_length;
-	u16 handle;
-	u32 power_features;
-	u32 omega_features;
-	u32 reserved;
-	u32 misc_features;
-};
-#define SMBIOS_ICRU_INFORMATION		219
-
-
-struct cmn_registers {
-	union {
-		struct {
-			u8 ral;
-			u8 rah;
-			u16 rea2;
-		};
-		u32 reax;
-	} u1;
-	union {
-		struct {
-			u8 rbl;
-			u8 rbh;
-			u8 reb2l;
-			u8 reb2h;
-		};
-		u32 rebx;
-	} u2;
-	union {
-		struct {
-			u8 rcl;
-			u8 rch;
-			u16 rec2;
-		};
-		u32 recx;
-	} u3;
-	union {
-		struct {
-			u8 rdl;
-			u8 rdh;
-			u16 red2;
-		};
-		u32 redx;
-	} u4;
-
-	u32 resi;
-	u32 redi;
-	u16 rds;
-	u16 res;
-	u32 reflags;
-}  __attribute__((packed));
-
-static unsigned int hpwdt_nmi_decoding;
-static unsigned int allow_kdump = 1;
-static unsigned int is_icru;
-static unsigned int is_uefi;
-static DEFINE_SPINLOCK(rom_lock);
-static void *cru_rom_addr;
-static struct cmn_registers cmn_regs;
-
-extern asmlinkage void asminline_call(struct cmn_registers *pi86Regs,
-						unsigned long *pRomEntry);
-
-#ifdef CONFIG_X86_32
-/* --32 Bit Bios------------------------------------------------------------ */
-
-#define HPWDT_ARCH	32
-
-asm(".text                          \n\t"
-    ".align 4                       \n\t"
-    ".globl asminline_call	    \n"
-    "asminline_call:                \n\t"
-    "pushl       %ebp               \n\t"
-    "movl        %esp, %ebp         \n\t"
-    "pusha                          \n\t"
-    "pushf                          \n\t"
-    "push        %es                \n\t"
-    "push        %ds                \n\t"
-    "pop         %es                \n\t"
-    "movl        8(%ebp),%eax       \n\t"
-    "movl        4(%eax),%ebx       \n\t"
-    "movl        8(%eax),%ecx       \n\t"
-    "movl        12(%eax),%edx      \n\t"
-    "movl        16(%eax),%esi      \n\t"
-    "movl        20(%eax),%edi      \n\t"
-    "movl        (%eax),%eax        \n\t"
-    "push        %cs                \n\t"
-    "call        *12(%ebp)          \n\t"
-    "pushf                          \n\t"
-    "pushl       %eax               \n\t"
-    "movl        8(%ebp),%eax       \n\t"
-    "movl        %ebx,4(%eax)       \n\t"
-    "movl        %ecx,8(%eax)       \n\t"
-    "movl        %edx,12(%eax)      \n\t"
-    "movl        %esi,16(%eax)      \n\t"
-    "movl        %edi,20(%eax)      \n\t"
-    "movw        %ds,24(%eax)       \n\t"
-    "movw        %es,26(%eax)       \n\t"
-    "popl        %ebx               \n\t"
-    "movl        %ebx,(%eax)        \n\t"
-    "popl        %ebx               \n\t"
-    "movl        %ebx,28(%eax)      \n\t"
-    "pop         %es                \n\t"
-    "popf                           \n\t"
-    "popa                           \n\t"
-    "leave                          \n\t"
-    "ret                            \n\t"
-    ".previous");
-
-
-/*
- *	cru_detect
- *
- *	Routine Description:
- *	This function uses the 32-bit BIOS Service Directory record to
- *	search for a $CRU record.
- *
- *	Return Value:
- *	0        :  SUCCESS
- *	<0       :  FAILURE
- */
-static int cru_detect(unsigned long map_entry,
-	unsigned long map_offset)
-{
-	void *bios32_map;
-	unsigned long *bios32_entrypoint;
-	unsigned long cru_physical_address;
-	unsigned long cru_length;
-	unsigned long physical_bios_base = 0;
-	unsigned long physical_bios_offset = 0;
-	int retval = -ENODEV;
-
-	bios32_map = ioremap(map_entry, (2 * PAGE_SIZE));
-
-	if (bios32_map == NULL)
-		return -ENODEV;
-
-	bios32_entrypoint = bios32_map + map_offset;
-
-	cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE;
-
-	set_memory_x((unsigned long)bios32_map, 2);
-	asminline_call(&cmn_regs, bios32_entrypoint);
-
-	if (cmn_regs.u1.ral != 0) {
-		pr_warn("Call succeeded but with an error: 0x%x\n",
-			cmn_regs.u1.ral);
-	} else {
-		physical_bios_base = cmn_regs.u2.rebx;
-		physical_bios_offset = cmn_regs.u4.redx;
-		cru_length = cmn_regs.u3.recx;
-		cru_physical_address =
-			physical_bios_base + physical_bios_offset;
-
-		/* If the values look OK, then map it in. */
-		if ((physical_bios_base + physical_bios_offset)) {
-			cru_rom_addr =
-				ioremap(cru_physical_address, cru_length);
-			if (cru_rom_addr) {
-				set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK,
-					(cru_length + PAGE_SIZE - 1) >> PAGE_SHIFT);
-				retval = 0;
-			}
-		}
-
-		pr_debug("CRU Base Address:   0x%lx\n", physical_bios_base);
-		pr_debug("CRU Offset Address: 0x%lx\n", physical_bios_offset);
-		pr_debug("CRU Length:         0x%lx\n", cru_length);
-		pr_debug("CRU Mapped Address: %p\n", &cru_rom_addr);
-	}
-	iounmap(bios32_map);
-	return retval;
-}
-
-/*
- *	bios_checksum
- */
-static int bios_checksum(const char __iomem *ptr, int len)
-{
-	char sum = 0;
-	int i;
-
-	/*
-	 * calculate checksum of size bytes. This should add up
-	 * to zero if we have a valid header.
-	 */
-	for (i = 0; i < len; i++)
-		sum += ptr[i];
-
-	return ((sum == 0) && (len > 0));
-}
-
-/*
- *	bios32_present
- *
- *	Routine Description:
- *	This function finds the 32-bit BIOS Service Directory
- *
- *	Return Value:
- *	0        :  SUCCESS
- *	<0       :  FAILURE
- */
-static int bios32_present(const char __iomem *p)
-{
-	struct bios32_service_dir *bios_32_ptr;
-	int length;
-	unsigned long map_entry, map_offset;
-
-	bios_32_ptr = (struct bios32_service_dir *) p;
-
-	/*
-	 * Search for signature by checking equal to the swizzled value
-	 * instead of calling another routine to perform a strcmp.
-	 */
-	if (bios_32_ptr->signature == PCI_BIOS32_SD_VALUE) {
-		length = bios_32_ptr->length * PCI_BIOS32_PARAGRAPH_LEN;
-		if (bios_checksum(p, length)) {
-			/*
-			 * According to the spec, we're looking for the
-			 * first 4KB-aligned address below the entrypoint
-			 * listed in the header. The Service Directory code
-			 * is guaranteed to occupy no more than 2 4KB pages.
-			 */
-			map_entry = bios_32_ptr->entry_point & ~(PAGE_SIZE - 1);
-			map_offset = bios_32_ptr->entry_point - map_entry;
-
-			return cru_detect(map_entry, map_offset);
-		}
-	}
-	return -ENODEV;
-}
-
-static int detect_cru_service(void)
-{
-	char __iomem *p, *q;
-	int rc = -1;
-
-	/*
-	 * Search from 0x0f0000 through 0x0fffff, inclusive.
-	 */
-	p = ioremap(PCI_ROM_BASE1, ROM_SIZE);
-	if (p == NULL)
-		return -ENOMEM;
-
-	for (q = p; q < p + ROM_SIZE; q += 16) {
-		rc = bios32_present(q);
-		if (!rc)
-			break;
-	}
-	iounmap(p);
-	return rc;
-}
-/* ------------------------------------------------------------------------- */
-#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_64
-/* --64 Bit Bios------------------------------------------------------------ */
-
-#define HPWDT_ARCH	64
-
-asm(".text                      \n\t"
-    ".align 4                   \n\t"
-    ".globl asminline_call	\n\t"
-    ".type asminline_call, @function \n\t"
-    "asminline_call:            \n\t"
-    FRAME_BEGIN
-    "pushq      %rax            \n\t"
-    "pushq      %rbx            \n\t"
-    "pushq      %rdx            \n\t"
-    "pushq      %r12            \n\t"
-    "pushq      %r9             \n\t"
-    "movq       %rsi, %r12      \n\t"
-    "movq       %rdi, %r9       \n\t"
-    "movl       4(%r9),%ebx     \n\t"
-    "movl       8(%r9),%ecx     \n\t"
-    "movl       12(%r9),%edx    \n\t"
-    "movl       16(%r9),%esi    \n\t"
-    "movl       20(%r9),%edi    \n\t"
-    "movl       (%r9),%eax      \n\t"
-    "call       *%r12           \n\t"
-    "pushfq                     \n\t"
-    "popq        %r12           \n\t"
-    "movl       %eax, (%r9)     \n\t"
-    "movl       %ebx, 4(%r9)    \n\t"
-    "movl       %ecx, 8(%r9)    \n\t"
-    "movl       %edx, 12(%r9)   \n\t"
-    "movl       %esi, 16(%r9)   \n\t"
-    "movl       %edi, 20(%r9)   \n\t"
-    "movq       %r12, %rax      \n\t"
-    "movl       %eax, 28(%r9)   \n\t"
-    "popq       %r9             \n\t"
-    "popq       %r12            \n\t"
-    "popq       %rdx            \n\t"
-    "popq       %rbx            \n\t"
-    "popq       %rax            \n\t"
-    FRAME_END
-    "ret                        \n\t"
-    ".previous");
-
-/*
- *	dmi_find_cru
- *
- *	Routine Description:
- *	This function checks whether or not a SMBIOS/DMI record is
- *	the 64bit CRU info or not
- */
-static void dmi_find_cru(const struct dmi_header *dm, void *dummy)
-{
-	struct smbios_cru64_info *smbios_cru64_ptr;
-	unsigned long cru_physical_address;
-
-	if (dm->type == SMBIOS_CRU64_INFORMATION) {
-		smbios_cru64_ptr = (struct smbios_cru64_info *) dm;
-		if (smbios_cru64_ptr->signature == CRU_BIOS_SIGNATURE_VALUE) {
-			cru_physical_address =
-				smbios_cru64_ptr->physical_address +
-				smbios_cru64_ptr->double_offset;
-			cru_rom_addr = ioremap(cru_physical_address,
-				smbios_cru64_ptr->double_length);
-			set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK,
-				smbios_cru64_ptr->double_length >> PAGE_SHIFT);
-		}
-	}
-}
-
-static int detect_cru_service(void)
-{
-	cru_rom_addr = NULL;
-
-	dmi_walk(dmi_find_cru, NULL);
-
-	/* if cru_rom_addr has been set then we found a CRU service */
-	return ((cru_rom_addr != NULL) ? 0 : -ENODEV);
-}
-/* ------------------------------------------------------------------------- */
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 /*
  *	Watchdog operations
@@ -486,30 +113,12 @@ static int hpwdt_my_nmi(void)
  */
 static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 {
-	unsigned long rom_pl;
-	static int die_nmi_called;
-
-	if (!hpwdt_nmi_decoding)
-		return NMI_DONE;
-
 	if ((ulReason == NMI_UNKNOWN) && !hpwdt_my_nmi())
 		return NMI_DONE;
 
-	spin_lock_irqsave(&rom_lock, rom_pl);
-	if (!die_nmi_called && !is_icru && !is_uefi)
-		asminline_call(&cmn_regs, cru_rom_addr);
-	die_nmi_called = 1;
-	spin_unlock_irqrestore(&rom_lock, rom_pl);
-
 	if (allow_kdump)
 		hpwdt_stop();
 
-	if (!is_icru && !is_uefi) {
-		if (cmn_regs.u1.ral == 0) {
-			nmi_panic(regs, "An NMI occurred, but unable to determine source.\n");
-			return NMI_HANDLED;
-		}
-	}
 	nmi_panic(regs, "An NMI occurred. Depending on your system the reason "
 		"for the NMI is logged in any one of the following "
 		"resources:\n"
@@ -675,84 +284,11 @@ static struct miscdevice hpwdt_miscdev = {
  *	Init & Exit
  */
 
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef CONFIG_X86_LOCAL_APIC
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-	/*
-	 * If nmi_watchdog is turned off then we can turn on
-	 * our nmi decoding capability.
-	 */
-	hpwdt_nmi_decoding = 1;
-}
-#else
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-	dev_warn(&dev->dev, "NMI decoding is disabled. "
-		"Your kernel does not support a NMI Watchdog.\n");
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-/*
- *	dmi_find_icru
- *
- *	Routine Description:
- *	This function checks whether or not we are on an iCRU-based server.
- *	This check is independent of architecture and needs to be made for
- *	any ProLiant system.
- */
-static void dmi_find_icru(const struct dmi_header *dm, void *dummy)
-{
-	struct smbios_proliant_info *smbios_proliant_ptr;
-
-	if (dm->type == SMBIOS_ICRU_INFORMATION) {
-		smbios_proliant_ptr = (struct smbios_proliant_info *) dm;
-		if (smbios_proliant_ptr->misc_features & 0x01)
-			is_icru = 1;
-		if (smbios_proliant_ptr->misc_features & 0x1400)
-			is_uefi = 1;
-	}
-}
 
 static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
+#ifdef CONFIG_HPWDT_NMI_DECODING
 	int retval;
-
-	/*
-	 * On typical CRU-based systems we need to map that service in
-	 * the BIOS. For 32 bit Operating Systems we need to go through
-	 * the 32 Bit BIOS Service Directory. For 64 bit Operating
-	 * Systems we get that service through SMBIOS.
-	 *
-	 * On systems that support the new iCRU service all we need to
-	 * do is call dmi_walk to get the supported flag value and skip
-	 * the old cru detect code.
-	 */
-	dmi_walk(dmi_find_icru, NULL);
-	if (!is_icru && !is_uefi) {
-
-		/*
-		* We need to map the ROM to get the CRU service.
-		* For 32 bit Operating Systems we need to go through the 32 Bit
-		* BIOS Service Directory
-		* For 64 bit Operating Systems we get that service through SMBIOS.
-		*/
-		retval = detect_cru_service();
-		if (retval < 0) {
-			dev_warn(&dev->dev,
-				"Unable to detect the %d Bit CRU Service.\n",
-				HPWDT_ARCH);
-			return retval;
-		}
-
-		/*
-		* We know this is the only CRU call we need to make so lets keep as
-		* few instructions as possible once the NMI comes in.
-		*/
-		cmn_regs.u1.rah = 0x0D;
-		cmn_regs.u1.ral = 0x02;
-	}
-
 	/*
 	 * Only one function can register for NMI_UNKNOWN
 	 */
@@ -780,45 +316,26 @@ error:
 	dev_warn(&dev->dev,
 		"Unable to register a die notifier (err=%d).\n",
 		retval);
-	if (cru_rom_addr)
-		iounmap(cru_rom_addr);
 	return retval;
+#endif	/* CONFIG_HPWDT_NMI_DECODING */
+	return 0;
 }
 
 static void hpwdt_exit_nmi_decoding(void)
 {
+#ifdef CONFIG_HPWDT_NMI_DECODING
 	unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
 	unregister_nmi_handler(NMI_SERR, "hpwdt");
 	unregister_nmi_handler(NMI_IO_CHECK, "hpwdt");
-	if (cru_rom_addr)
-		iounmap(cru_rom_addr);
-}
-#else /* !CONFIG_HPWDT_NMI_DECODING */
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-}
-
-static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
-{
-	return 0;
+#endif
 }
 
-static void hpwdt_exit_nmi_decoding(void)
-{
-}
-#endif /* CONFIG_HPWDT_NMI_DECODING */
-
 static int hpwdt_init_one(struct pci_dev *dev,
 					const struct pci_device_id *ent)
 {
 	int retval;
 
 	/*
-	 * Check if we can do NMI decoding or not
-	 */
-	hpwdt_check_nmi_decoding(dev);
-
-	/*
 	 * First let's find out if we are on an iLO2+ server. We will
 	 * not run on a legacy ASM box.
 	 * So we only support the G5 ProLiant servers and higher.
@@ -922,6 +439,6 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 #ifdef CONFIG_HPWDT_NMI_DECODING
 module_param(allow_kdump, int, 0);
 MODULE_PARM_DESC(allow_kdump, "Start a kernel dump after NMI occurs");
-#endif /* !CONFIG_HPWDT_NMI_DECODING */
+#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 module_pci_driver(hpwdt_driver);
diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c
index 316c2eb122d2..e8bd9887c566 100644
--- a/drivers/watchdog/sbsa_gwdt.c
+++ b/drivers/watchdog/sbsa_gwdt.c
@@ -50,6 +50,7 @@
  */
 
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -159,7 +160,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd)
 	    !(readl(gwdt->control_base + SBSA_GWDT_WCS) & SBSA_GWDT_WCS_WS0))
 		timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR);
 
-	timeleft += readq(gwdt->control_base + SBSA_GWDT_WCV) -
+	timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) -
 		    arch_counter_get_cntvct();
 
 	do_div(timeleft, gwdt->clk);
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
index 6d1fbda0f461..0da9943d405f 100644
--- a/drivers/watchdog/wdat_wdt.c
+++ b/drivers/watchdog/wdat_wdt.c
@@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev)
 
 		memset(&r, 0, sizeof(r));
 		r.start = gas->address;
-		r.end = r.start + gas->access_width;
+		r.end = r.start + gas->access_width - 1;
 		if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
 			r.flags = IORESOURCE_MEM;
 		} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 1ab4bd11f5f3..762378f1811c 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -755,8 +755,8 @@ out:
 	mutex_unlock(&irq_mapping_update_lock);
 	return irq;
 error_irq:
-	for (; i >= 0; i--)
-		__unbind_from_irq(irq + i);
+	while (nvec--)
+		__unbind_from_irq(irq + nvec);
 	mutex_unlock(&irq_mapping_update_lock);
 	return ret;
 }
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 156e5aea36db..b1092fbefa63 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -416,7 +416,7 @@ static int pvcalls_back_connect(struct xenbus_device *dev,
 					sock);
 	if (!map) {
 		ret = -EFAULT;
-		sock_release(map->sock);
+		sock_release(sock);
 	}
 
 out:
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 753d9cb437d0..2f11ca72a281 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -60,6 +60,7 @@ struct sock_mapping {
 	bool active_socket;
 	struct list_head list;
 	struct socket *sock;
+	atomic_t refcount;
 	union {
 		struct {
 			int irq;
@@ -72,20 +73,25 @@ struct sock_mapping {
 			wait_queue_head_t inflight_conn_req;
 		} active;
 		struct {
-		/* Socket status */
+		/*
+		 * Socket status, needs to be 64-bit aligned due to the
+		 * test_and_* functions which have this requirement on arm64.
+		 */
 #define PVCALLS_STATUS_UNINITALIZED  0
 #define PVCALLS_STATUS_BIND          1
 #define PVCALLS_STATUS_LISTEN        2
-			uint8_t status;
+			uint8_t status __attribute__((aligned(8)));
 		/*
 		 * Internal state-machine flags.
 		 * Only one accept operation can be inflight for a socket.
 		 * Only one poll operation can be inflight for a given socket.
+		 * flags needs to be 64-bit aligned due to the test_and_*
+		 * functions which have this requirement on arm64.
 		 */
 #define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
 #define PVCALLS_FLAG_POLL_INFLIGHT   1
 #define PVCALLS_FLAG_POLL_RET        2
-			uint8_t flags;
+			uint8_t flags __attribute__((aligned(8)));
 			uint32_t inflight_req_id;
 			struct sock_mapping *accept_map;
 			wait_queue_head_t inflight_accept_req;
@@ -93,6 +99,32 @@ struct sock_mapping {
 	};
 };
 
+static inline struct sock_mapping *pvcalls_enter_sock(struct socket *sock)
+{
+	struct sock_mapping *map;
+
+	if (!pvcalls_front_dev ||
+		dev_get_drvdata(&pvcalls_front_dev->dev) == NULL)
+		return ERR_PTR(-ENOTCONN);
+
+	map = (struct sock_mapping *)sock->sk->sk_send_head;
+	if (map == NULL)
+		return ERR_PTR(-ENOTSOCK);
+
+	pvcalls_enter();
+	atomic_inc(&map->refcount);
+	return map;
+}
+
+static inline void pvcalls_exit_sock(struct socket *sock)
+{
+	struct sock_mapping *map;
+
+	map = (struct sock_mapping *)sock->sk->sk_send_head;
+	atomic_dec(&map->refcount);
+	pvcalls_exit();
+}
+
 static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
 {
 	*req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
@@ -369,31 +401,23 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
 		return -EOPNOTSUPP;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
-		return -ENOTCONN;
-	}
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *)sock->sk->sk_send_head;
-	if (!map) {
-		pvcalls_exit();
-		return -ENOTSOCK;
-	}
-
 	spin_lock(&bedata->socket_lock);
 	ret = get_request(bedata, &req_id);
 	if (ret < 0) {
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return ret;
 	}
 	ret = create_active(map, &evtchn);
 	if (ret < 0) {
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return ret;
 	}
 
@@ -423,7 +447,7 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 	smp_rmb();
 	ret = bedata->rsp[req_id].ret;
 	bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
-	pvcalls_exit();
+	pvcalls_exit_sock(sock);
 	return ret;
 }
 
@@ -488,23 +512,15 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
 	if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
 		return -EOPNOTSUPP;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
-		return -ENOTCONN;
-	}
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *) sock->sk->sk_send_head;
-	if (!map) {
-		pvcalls_exit();
-		return -ENOTSOCK;
-	}
-
 	mutex_lock(&map->active.out_mutex);
 	if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
 		mutex_unlock(&map->active.out_mutex);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return -EAGAIN;
 	}
 	if (len > INT_MAX)
@@ -526,7 +542,7 @@ again:
 		tot_sent = sent;
 
 	mutex_unlock(&map->active.out_mutex);
-	pvcalls_exit();
+	pvcalls_exit_sock(sock);
 	return tot_sent;
 }
 
@@ -591,19 +607,11 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
 		return -EOPNOTSUPP;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
-		return -ENOTCONN;
-	}
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *) sock->sk->sk_send_head;
-	if (!map) {
-		pvcalls_exit();
-		return -ENOTSOCK;
-	}
-
 	mutex_lock(&map->active.in_mutex);
 	if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
 		len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
@@ -623,7 +631,7 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		ret = 0;
 
 	mutex_unlock(&map->active.in_mutex);
-	pvcalls_exit();
+	pvcalls_exit_sock(sock);
 	return ret;
 }
 
@@ -637,24 +645,16 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
 		return -EOPNOTSUPP;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
-		return -ENOTCONN;
-	}
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *) sock->sk->sk_send_head;
-	if (map == NULL) {
-		pvcalls_exit();
-		return -ENOTSOCK;
-	}
-
 	spin_lock(&bedata->socket_lock);
 	ret = get_request(bedata, &req_id);
 	if (ret < 0) {
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return ret;
 	}
 	req = RING_GET_REQUEST(&bedata->ring, req_id);
@@ -684,7 +684,7 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
 
 	map->passive.status = PVCALLS_STATUS_BIND;
-	pvcalls_exit();
+	pvcalls_exit_sock(sock);
 	return 0;
 }
 
@@ -695,21 +695,13 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
 	struct xen_pvcalls_request *req;
 	int notify, req_id, ret;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
-		return -ENOTCONN;
-	}
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *) sock->sk->sk_send_head;
-	if (!map) {
-		pvcalls_exit();
-		return -ENOTSOCK;
-	}
-
 	if (map->passive.status != PVCALLS_STATUS_BIND) {
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return -EOPNOTSUPP;
 	}
 
@@ -717,7 +709,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
 	ret = get_request(bedata, &req_id);
 	if (ret < 0) {
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return ret;
 	}
 	req = RING_GET_REQUEST(&bedata->ring, req_id);
@@ -741,7 +733,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
 	bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
 
 	map->passive.status = PVCALLS_STATUS_LISTEN;
-	pvcalls_exit();
+	pvcalls_exit_sock(sock);
 	return ret;
 }
 
@@ -753,21 +745,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 	struct xen_pvcalls_request *req;
 	int notify, req_id, ret, evtchn, nonblock;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
-		return -ENOTCONN;
-	}
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *) sock->sk->sk_send_head;
-	if (!map) {
-		pvcalls_exit();
-		return -ENOTSOCK;
-	}
-
 	if (map->passive.status != PVCALLS_STATUS_LISTEN) {
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return -EINVAL;
 	}
 
@@ -785,13 +769,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 			goto received;
 		}
 		if (nonblock) {
-			pvcalls_exit();
+			pvcalls_exit_sock(sock);
 			return -EAGAIN;
 		}
 		if (wait_event_interruptible(map->passive.inflight_accept_req,
 			!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
 					  (void *)&map->passive.flags))) {
-			pvcalls_exit();
+			pvcalls_exit_sock(sock);
 			return -EINTR;
 		}
 	}
@@ -802,7 +786,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 		clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
 			  (void *)&map->passive.flags);
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return ret;
 	}
 	map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
@@ -810,7 +794,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 		clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
 			  (void *)&map->passive.flags);
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return -ENOMEM;
 	}
 	ret = create_active(map2, &evtchn);
@@ -819,7 +803,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 		clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
 			  (void *)&map->passive.flags);
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return ret;
 	}
 	list_add_tail(&map2->list, &bedata->socket_mappings);
@@ -841,13 +825,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 	/* We could check if we have received a response before returning. */
 	if (nonblock) {
 		WRITE_ONCE(map->passive.inflight_req_id, req_id);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return -EAGAIN;
 	}
 
 	if (wait_event_interruptible(bedata->inflight_req,
 		READ_ONCE(bedata->rsp[req_id].req_id) == req_id)) {
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return -EINTR;
 	}
 	/* read req_id, then the content */
@@ -862,7 +846,7 @@ received:
 		clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
 			  (void *)&map->passive.flags);
 		pvcalls_front_free_map(bedata, map2);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return -ENOMEM;
 	}
 	newsock->sk->sk_send_head = (void *)map2;
@@ -874,7 +858,7 @@ received:
 	clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
 	wake_up(&map->passive.inflight_accept_req);
 
-	pvcalls_exit();
+	pvcalls_exit_sock(sock);
 	return ret;
 }
 
@@ -965,23 +949,16 @@ __poll_t pvcalls_front_poll(struct file *file, struct socket *sock,
 	struct sock_mapping *map;
 	__poll_t ret;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map))
 		return EPOLLNVAL;
-	}
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *) sock->sk->sk_send_head;
-	if (!map) {
-		pvcalls_exit();
-		return EPOLLNVAL;
-	}
 	if (map->active_socket)
 		ret = pvcalls_front_poll_active(file, bedata, map, wait);
 	else
 		ret = pvcalls_front_poll_passive(file, bedata, map, wait);
-	pvcalls_exit();
+	pvcalls_exit_sock(sock);
 	return ret;
 }
 
@@ -995,25 +972,20 @@ int pvcalls_front_release(struct socket *sock)
 	if (sock->sk == NULL)
 		return 0;
 
-	pvcalls_enter();
-	if (!pvcalls_front_dev) {
-		pvcalls_exit();
-		return -EIO;
+	map = pvcalls_enter_sock(sock);
+	if (IS_ERR(map)) {
+		if (PTR_ERR(map) == -ENOTCONN)
+			return -EIO;
+		else
+			return 0;
 	}
-
 	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
 
-	map = (struct sock_mapping *) sock->sk->sk_send_head;
-	if (map == NULL) {
-		pvcalls_exit();
-		return 0;
-	}
-
 	spin_lock(&bedata->socket_lock);
 	ret = get_request(bedata, &req_id);
 	if (ret < 0) {
 		spin_unlock(&bedata->socket_lock);
-		pvcalls_exit();
+		pvcalls_exit_sock(sock);
 		return ret;
 	}
 	sock->sk->sk_send_head = NULL;
@@ -1043,14 +1015,20 @@ int pvcalls_front_release(struct socket *sock)
 		/*
 		 * We need to make sure that sendmsg/recvmsg on this socket have
 		 * not started before we've cleared sk_send_head here. The
-		 * easiest (though not optimal) way to guarantee this is to see
-		 * that no pvcall (other than us) is in progress.
+		 * easiest way to guarantee this is to see that no pvcalls
+		 * (other than us) is in progress on this socket.
 		 */
-		while (atomic_read(&pvcalls_refcount) > 1)
+		while (atomic_read(&map->refcount) > 1)
 			cpu_relax();
 
 		pvcalls_front_free_map(bedata, map);
 	} else {
+		wake_up(&bedata->inflight_req);
+		wake_up(&map->passive.inflight_accept_req);
+
+		while (atomic_read(&map->refcount) > 1)
+			cpu_relax();
+
 		spin_lock(&bedata->socket_lock);
 		list_del(&map->list);
 		spin_unlock(&bedata->socket_lock);
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index bf13d1ec51f3..04e7b3b29bac 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -284,6 +284,10 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset,
 	int pool = tmem_frontswap_poolid;
 	int ret;
 
+	/* THP isn't supported */
+	if (PageTransHuge(page))
+		return -1;
+
 	if (pool < 0)
 		return -1;
 	if (ind64 != ind)
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index 149c5e7efc89..092981171df1 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -76,6 +76,7 @@ struct xb_req_data {
 	struct list_head list;
 	wait_queue_head_t wq;
 	struct xsd_sockmsg msg;
+	uint32_t caller_req_id;
 	enum xsd_sockmsg_type type;
 	char *body;
 	const struct kvec *vec;
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 5b081a01779d..d239fc3c5e3d 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -309,6 +309,7 @@ static int process_msg(void)
 			goto out;
 
 		if (req->state == xb_req_state_wait_reply) {
+			req->msg.req_id = req->caller_req_id;
 			req->msg.type = state.msg.type;
 			req->msg.len = state.msg.len;
 			req->body = state.body;
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 74888cacd0b0..ec9eb4fba59c 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -466,8 +466,11 @@ int xenbus_probe_node(struct xen_bus_type *bus,
 
 	/* Register with generic device framework. */
 	err = device_register(&xendev->dev);
-	if (err)
+	if (err) {
+		put_device(&xendev->dev);
+		xendev = NULL;
 		goto fail;
+	}
 
 	return 0;
 fail:
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 3e59590c7254..3f3b29398ab8 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -227,6 +227,8 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
 	req->state = xb_req_state_queued;
 	init_waitqueue_head(&req->wq);
 
+	/* Save the caller req_id and restore it later in the reply */
+	req->caller_req_id = req->msg.req_id;
 	req->msg.req_id = xs_request_enter(req);
 
 	mutex_lock(&xb_write_mutex);
@@ -310,6 +312,7 @@ static void *xs_talkv(struct xenbus_transaction t,
 	req->num_vecs = num_vecs;
 	req->cb = xs_wake_up;
 
+	msg.req_id = 0;
 	msg.tx_id = t.id;
 	msg.type = type;
 	msg.len = 0;
diff --git a/fs/aio.c b/fs/aio.c
index a062d75109cb..6bcd3fb5265a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,9 +68,9 @@ struct aio_ring {
 #define AIO_RING_PAGES	8
 
 struct kioctx_table {
-	struct rcu_head	rcu;
-	unsigned	nr;
-	struct kioctx	*table[];
+	struct rcu_head		rcu;
+	unsigned		nr;
+	struct kioctx __rcu	*table[];
 };
 
 struct kioctx_cpu {
@@ -115,7 +115,8 @@ struct kioctx {
 	struct page		**ring_pages;
 	long			nr_pages;
 
-	struct work_struct	free_work;
+	struct rcu_head		free_rcu;
+	struct work_struct	free_work;	/* see free_ioctx() */
 
 	/*
 	 * signals when all in-flight requests are done
@@ -329,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
 	for (i = 0; i < table->nr; i++) {
 		struct kioctx *ctx;
 
-		ctx = table->table[i];
+		ctx = rcu_dereference(table->table[i]);
 		if (ctx && ctx->aio_ring_file == file) {
 			if (!atomic_read(&ctx->dead)) {
 				ctx->user_id = ctx->mmap_base = vma->vm_start;
@@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
 	return cancel(&kiocb->common);
 }
 
+/*
+ * free_ioctx() should be RCU delayed to synchronize against the RCU
+ * protected lookup_ioctx() and also needs process context to call
+ * aio_free_ring(), so the double bouncing through kioctx->free_rcu and
+ * ->free_work.
+ */
 static void free_ioctx(struct work_struct *work)
 {
 	struct kioctx *ctx = container_of(work, struct kioctx, free_work);
@@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work)
 	kmem_cache_free(kioctx_cachep, ctx);
 }
 
+static void free_ioctx_rcufn(struct rcu_head *head)
+{
+	struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
+
+	INIT_WORK(&ctx->free_work, free_ioctx);
+	schedule_work(&ctx->free_work);
+}
+
 static void free_ioctx_reqs(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
@@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 	if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
 		complete(&ctx->rq_wait->comp);
 
-	INIT_WORK(&ctx->free_work, free_ioctx);
-	schedule_work(&ctx->free_work);
+	/* Synchronize against RCU protected table->table[] dereferences */
+	call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
 }
 
 /*
@@ -651,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
 	while (1) {
 		if (table)
 			for (i = 0; i < table->nr; i++)
-				if (!table->table[i]) {
+				if (!rcu_access_pointer(table->table[i])) {
 					ctx->id = i;
-					table->table[i] = ctx;
+					rcu_assign_pointer(table->table[i], ctx);
 					spin_unlock(&mm->ioctx_lock);
 
 					/* While kioctx setup is in progress,
@@ -834,11 +849,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 	}
 
 	table = rcu_dereference_raw(mm->ioctx_table);
-	WARN_ON(ctx != table->table[ctx->id]);
-	table->table[ctx->id] = NULL;
+	WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
+	RCU_INIT_POINTER(table->table[ctx->id], NULL);
 	spin_unlock(&mm->ioctx_lock);
 
-	/* percpu_ref_kill() will do the necessary call_rcu() */
+	/* free_ioctx_reqs() will do the necessary RCU synchronization */
 	wake_up_all(&ctx->wait);
 
 	/*
@@ -880,7 +895,8 @@ void exit_aio(struct mm_struct *mm)
 
 	skipped = 0;
 	for (i = 0; i < table->nr; ++i) {
-		struct kioctx *ctx = table->table[i];
+		struct kioctx *ctx =
+			rcu_dereference_protected(table->table[i], true);
 
 		if (!ctx) {
 			skipped++;
@@ -1069,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	if (!table || id >= table->nr)
 		goto out;
 
-	ctx = table->table[id];
+	ctx = rcu_dereference(table->table[id]);
 	if (ctx && ctx->user_id == ctx_id) {
 		percpu_ref_get(&ctx->users);
 		ret = ctx;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4a181fcb5175..fe09ef9c21f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1058,6 +1058,27 @@ retry:
 	return 0;
 }
 
+static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
+{
+	struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
+
+	if (!disk)
+		return NULL;
+	/*
+	 * Now that we hold gendisk reference we make sure bdev we looked up is
+	 * not stale. If it is, it means device got removed and created before
+	 * we looked up gendisk and we fail open in such case. Associating
+	 * unhashed bdev with newly created gendisk could lead to two bdevs
+	 * (and thus two independent caches) being associated with one device
+	 * which is bad.
+	 */
+	if (inode_unhashed(bdev->bd_inode)) {
+		put_disk_and_module(disk);
+		return NULL;
+	}
+	return disk;
+}
+
 /**
  * bd_start_claiming - start claiming a block device
  * @bdev: block device of interest
@@ -1094,7 +1115,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
 	 * @bdev might not have been initialized properly yet, look up
 	 * and grab the outer block device the hard way.
 	 */
-	disk = get_gendisk(bdev->bd_dev, &partno);
+	disk = bdev_get_gendisk(bdev, &partno);
 	if (!disk)
 		return ERR_PTR(-ENXIO);
 
@@ -1111,8 +1132,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
 	else
 		whole = bdgrab(bdev);
 
-	module_put(disk->fops->owner);
-	put_disk(disk);
+	put_disk_and_module(disk);
 	if (!whole)
 		return ERR_PTR(-ENOMEM);
 
@@ -1407,10 +1427,10 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 {
 	struct gendisk *disk;
-	struct module *owner;
 	int ret;
 	int partno;
 	int perm = 0;
+	bool first_open = false;
 
 	if (mode & FMODE_READ)
 		perm |= MAY_READ;
@@ -1430,14 +1450,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  restart:
 
 	ret = -ENXIO;
-	disk = get_gendisk(bdev->bd_dev, &partno);
+	disk = bdev_get_gendisk(bdev, &partno);
 	if (!disk)
 		goto out;
-	owner = disk->fops->owner;
 
 	disk_block_events(disk);
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
+		first_open = true;
 		bdev->bd_disk = disk;
 		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
@@ -1463,8 +1483,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					bdev->bd_queue = NULL;
 					mutex_unlock(&bdev->bd_mutex);
 					disk_unblock_events(disk);
-					put_disk(disk);
-					module_put(owner);
+					put_disk_and_module(disk);
 					goto restart;
 				}
 			}
@@ -1524,15 +1543,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (ret)
 				goto out_unlock_bdev;
 		}
-		/* only one opener holds refs to the module and disk */
-		put_disk(disk);
-		module_put(owner);
 	}
 	bdev->bd_openers++;
 	if (for_part)
 		bdev->bd_part_count++;
 	mutex_unlock(&bdev->bd_mutex);
 	disk_unblock_events(disk);
+	/* only one opener holds refs to the module and disk */
+	if (!first_open)
+		put_disk_and_module(disk);
 	return 0;
 
  out_clear:
@@ -1546,8 +1565,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
 	disk_unblock_events(disk);
-	put_disk(disk);
-	module_put(owner);
+	put_disk_and_module(disk);
  out:
 	bdput(bdev);
 
@@ -1770,8 +1788,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 			disk->fops->release(disk, mode);
 	}
 	if (!bdev->bd_openers) {
-		struct module *owner = disk->fops->owner;
-
 		disk_put_part(bdev->bd_part);
 		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
@@ -1779,8 +1795,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 
-		put_disk(disk);
-		module_put(owner);
+		put_disk_and_module(disk);
 	}
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e4054e533f6d..26484648d090 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1264,7 +1264,16 @@ again:
 	while (node) {
 		ref = rb_entry(node, struct prelim_ref, rbnode);
 		node = rb_next(&ref->rbnode);
-		WARN_ON(ref->count < 0);
+		/*
+		 * ref->count < 0 can happen here if there are delayed
+		 * refs with a node->action of BTRFS_DROP_DELAYED_REF.
+		 * prelim_ref_insert() relies on this when merging
+		 * identical refs to keep the overall count correct.
+		 * prelim_ref_insert() will merge only those refs
+		 * which compare identically.  Any refs having
+		 * e.g. different offsets would not be merged,
+		 * and would retain their original ref->count < 0.
+		 */
 		if (roots && ref->count && ref->root_id && ref->parent == 0) {
 			if (sc && sc->root_objectid &&
 			    ref->root_id != sc->root_objectid) {
@@ -1510,6 +1519,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
 		if (!node)
 			break;
 		bytenr = node->val;
+		shared.share_count = 0;
 		cond_resched();
 	}
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1a462ab85c49..da308774b8a4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2974,7 +2974,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
 	kfree(fs_info->super_copy);
 	kfree(fs_info->super_for_commit);
 	security_free_mnt_opts(&fs_info->security_opts);
-	kfree(fs_info);
+	kvfree(fs_info);
 }
 
 /* tree mod log functions from ctree.c */
@@ -3095,7 +3095,10 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
 			  u64 inode_objectid, u64 ref_objectid, int ins_len,
 			  int cow);
 
-int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
+int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
+			       const char *name,
+			       int name_len, struct btrfs_inode_ref **ref_ret);
+int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
 				   u64 ref_objectid, const char *name,
 				   int name_len,
 				   struct btrfs_inode_extref **extref_ret);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index a1a40cf382e3..7ab5e0128f0c 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -821,7 +821,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 	spin_unlock(&delayed_refs->lock);
 
 	if (qrecord_inserted)
-		return btrfs_qgroup_trace_extent_post(fs_info, record);
+		btrfs_qgroup_trace_extent_post(fs_info, record);
+
 	return 0;
 
 free_head_ref:
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 05751a677da4..c1618ab9fecf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2147,6 +2147,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 			u64 bytes;
 			struct request_queue *req_q;
 
+			if (!stripe->dev->bdev) {
+				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
+				continue;
+			}
 			req_q = bdev_get_queue(stripe->dev->bdev);
 			if (!blk_queue_discard(req_q))
 				continue;
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 39c968f80157..65e1a76bf755 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -22,10 +22,10 @@
 #include "transaction.h"
 #include "print-tree.h"
 
-static int find_name_in_backref(struct btrfs_path *path, const char *name,
-			 int name_len, struct btrfs_inode_ref **ref_ret)
+int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
+			       const char *name,
+			       int name_len, struct btrfs_inode_ref **ref_ret)
 {
-	struct extent_buffer *leaf;
 	struct btrfs_inode_ref *ref;
 	unsigned long ptr;
 	unsigned long name_ptr;
@@ -33,9 +33,8 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
 	u32 cur_offset = 0;
 	int len;
 
-	leaf = path->nodes[0];
-	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+	item_size = btrfs_item_size_nr(leaf, slot);
+	ptr = btrfs_item_ptr_offset(leaf, slot);
 	while (cur_offset < item_size) {
 		ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
 		len = btrfs_inode_ref_name_len(leaf, ref);
@@ -44,18 +43,19 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
 		if (len != name_len)
 			continue;
 		if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
-			*ref_ret = ref;
+			if (ref_ret)
+				*ref_ret = ref;
 			return 1;
 		}
 	}
 	return 0;
 }
 
-int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
+int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
+				   u64 ref_objectid,
 				   const char *name, int name_len,
 				   struct btrfs_inode_extref **extref_ret)
 {
-	struct extent_buffer *leaf;
 	struct btrfs_inode_extref *extref;
 	unsigned long ptr;
 	unsigned long name_ptr;
@@ -63,9 +63,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
 	u32 cur_offset = 0;
 	int ref_name_len;
 
-	leaf = path->nodes[0];
-	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+	item_size = btrfs_item_size_nr(leaf, slot);
+	ptr = btrfs_item_ptr_offset(leaf, slot);
 
 	/*
 	 * Search all extended backrefs in this item. We're only
@@ -113,7 +112,9 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
 		return ERR_PTR(ret);
 	if (ret > 0)
 		return NULL;
-	if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref))
+	if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+					    ref_objectid, name, name_len,
+					    &extref))
 		return NULL;
 	return extref;
 }
@@ -155,7 +156,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
 	 * This should always succeed so error here will make the FS
 	 * readonly.
 	 */
-	if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
+	if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+					    ref_objectid,
 					    name, name_len, &extref)) {
 		btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
 		ret = -EROFS;
@@ -225,7 +227,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
 	} else if (ret < 0) {
 		goto out;
 	}
-	if (!find_name_in_backref(path, name, name_len, &ref)) {
+	if (!btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+					name, name_len, &ref)) {
 		ret = -ENOENT;
 		search_ext_refs = 1;
 		goto out;
@@ -293,7 +296,9 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
 	ret = btrfs_insert_empty_item(trans, root, path, &key,
 				      ins_len);
 	if (ret == -EEXIST) {
-		if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+		if (btrfs_find_name_in_ext_backref(path->nodes[0],
+						   path->slots[0],
+						   ref_objectid,
 						   name, name_len, NULL))
 			goto out;
 
@@ -351,7 +356,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
 	if (ret == -EEXIST) {
 		u32 old_size;
 
-		if (find_name_in_backref(path, name, name_len, &ref))
+		if (btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+					       name, name_len, &ref))
 			goto out;
 
 		old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
@@ -365,7 +371,9 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
 		ret = 0;
 	} else if (ret < 0) {
 		if (ret == -EOVERFLOW) {
-			if (find_name_in_backref(path, name, name_len, &ref))
+			if (btrfs_find_name_in_backref(path->nodes[0],
+						       path->slots[0],
+						       name, name_len, &ref))
 				ret = -EEXIST;
 			else
 				ret = -EMLINK;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 53ca025655fc..f53470112670 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1335,8 +1335,11 @@ next_slot:
 		leaf = path->nodes[0];
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 			ret = btrfs_next_leaf(root, path);
-			if (ret < 0)
+			if (ret < 0) {
+				if (cow_start != (u64)-1)
+					cur_offset = cow_start;
 				goto error;
+			}
 			if (ret > 0)
 				break;
 			leaf = path->nodes[0];
@@ -2040,12 +2043,15 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 			     struct inode *inode, struct list_head *list)
 {
 	struct btrfs_ordered_sum *sum;
+	int ret;
 
 	list_for_each_entry(sum, list, list) {
 		trans->adding_csums = true;
-		btrfs_csum_file_blocks(trans,
+		ret = btrfs_csum_file_blocks(trans,
 		       BTRFS_I(inode)->root->fs_info->csum_root, sum);
 		trans->adding_csums = false;
+		if (ret)
+			return ret;
 	}
 	return 0;
 }
@@ -3059,7 +3065,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		goto out;
 	}
 
-	add_pending_csums(trans, inode, &ordered_extent->list);
+	ret = add_pending_csums(trans, inode, &ordered_extent->list);
+	if (ret) {
+		btrfs_abort_transaction(trans, ret);
+		goto out;
+	}
 
 	btrfs_ordered_update_i_size(inode, 0, ordered_extent);
 	ret = btrfs_update_inode_fallback(trans, root, inode);
@@ -3385,6 +3395,11 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 		ret = btrfs_orphan_reserve_metadata(trans, inode);
 		ASSERT(!ret);
 		if (ret) {
+			/*
+			 * dec doesn't need spin_lock as ->orphan_block_rsv
+			 * would be released only if ->orphan_inodes is
+			 * zero.
+			 */
 			atomic_dec(&root->orphan_inodes);
 			clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 				  &inode->runtime_flags);
@@ -3399,12 +3414,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 	if (insert >= 1) {
 		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
 		if (ret) {
-			atomic_dec(&root->orphan_inodes);
 			if (reserve) {
 				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 					  &inode->runtime_flags);
 				btrfs_orphan_release_metadata(inode);
 			}
+			/*
+			 * btrfs_orphan_commit_root may race with us and set
+			 * ->orphan_block_rsv to zero, in order to avoid that,
+			 * decrease ->orphan_inodes after everything is done.
+			 */
+			atomic_dec(&root->orphan_inodes);
 			if (ret != -EEXIST) {
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 					  &inode->runtime_flags);
@@ -3436,28 +3456,26 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_root *root = inode->root;
 	int delete_item = 0;
-	int release_rsv = 0;
 	int ret = 0;
 
-	spin_lock(&root->orphan_lock);
 	if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 			       &inode->runtime_flags))
 		delete_item = 1;
 
+	if (delete_item && trans)
+		ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+
 	if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 			       &inode->runtime_flags))
-		release_rsv = 1;
-	spin_unlock(&root->orphan_lock);
+		btrfs_orphan_release_metadata(inode);
 
-	if (delete_item) {
+	/*
+	 * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
+	 * to zero, in order to avoid that, decrease ->orphan_inodes after
+	 * everything is done.
+	 */
+	if (delete_item)
 		atomic_dec(&root->orphan_inodes);
-		if (trans)
-			ret = btrfs_del_orphan_item(trans, root,
-						    btrfs_ino(inode));
-	}
-
-	if (release_rsv)
-		btrfs_orphan_release_metadata(inode);
 
 	return ret;
 }
@@ -5281,7 +5299,7 @@ void btrfs_evict_inode(struct inode *inode)
 	trace_btrfs_inode_evict(inode);
 
 	if (!root) {
-		kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+		clear_inode(inode);
 		return;
 	}
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9e61dd624f7b..aa259d6986e1 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1442,8 +1442,13 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
 	int ret;
 
 	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
-	if (ret < 0)
-		return ret;
+	if (ret < 0) {
+		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+		btrfs_warn(fs_info,
+"error accounting new delayed refs extent (err code: %d), quota inconsistent",
+			ret);
+		return 0;
+	}
 
 	/*
 	 * Here we don't need to get the lock of
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index dec0907dfb8a..fcfc20de2df3 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1370,6 +1370,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
 		stripe_start = stripe->physical;
 		if (physical >= stripe_start &&
 		    physical < stripe_start + rbio->stripe_len &&
+		    stripe->dev->bdev &&
 		    bio->bi_disk == stripe->dev->bdev->bd_disk &&
 		    bio->bi_partno == stripe->dev->bdev->bd_partno) {
 			return i;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f0c3f00e97cb..cd2298d185dd 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3268,8 +3268,22 @@ static int relocate_file_extent_cluster(struct inode *inode,
 			nr++;
 		}
 
-		btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL,
-					  0);
+		ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
+						NULL, 0);
+		if (ret) {
+			unlock_page(page);
+			put_page(page);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
+							 PAGE_SIZE);
+			btrfs_delalloc_release_extents(BTRFS_I(inode),
+			                               PAGE_SIZE);
+
+			clear_extent_bits(&BTRFS_I(inode)->io_tree,
+					  page_start, page_end,
+					  EXTENT_LOCKED | EXTENT_BOUNDARY);
+			goto out;
+
+		}
 		set_page_dirty(page);
 
 		unlock_extent(&BTRFS_I(inode)->io_tree,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f306c608dc28..484e2af793de 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5005,6 +5005,9 @@ static int send_hole(struct send_ctx *sctx, u64 end)
 	u64 len;
 	int ret = 0;
 
+	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
+		return send_update_extent(sctx, offset, end - offset);
+
 	p = fs_path_alloc();
 	if (!p)
 		return -ENOMEM;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6e71a2a78363..4b817947e00f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1545,7 +1545,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	 * it for searching for existing supers, so this lets us do that and
 	 * then open_ctree will properly initialize everything later.
 	 */
-	fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
+	fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
 	if (!fs_info) {
 		error = -ENOMEM;
 		goto error_sec_opts;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index afadaadab18e..434457794c27 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -29,6 +29,7 @@
 #include "hash.h"
 #include "compression.h"
 #include "qgroup.h"
+#include "inode-map.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -966,7 +967,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
 	ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
 
 	if (key->type == BTRFS_INODE_EXTREF_KEY) {
-		if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+		if (btrfs_find_name_in_ext_backref(path->nodes[0],
+						   path->slots[0],
+						   ref_objectid,
 						   name, namelen, NULL))
 			match = 1;
 
@@ -1190,7 +1193,8 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
 	read_extent_buffer(eb, *name, (unsigned long)&extref->name,
 			   *namelen);
 
-	*index = btrfs_inode_extref_index(eb, extref);
+	if (index)
+		*index = btrfs_inode_extref_index(eb, extref);
 	if (parent_objectid)
 		*parent_objectid = btrfs_inode_extref_parent(eb, extref);
 
@@ -1211,12 +1215,102 @@ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
 
 	read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
 
-	*index = btrfs_inode_ref_index(eb, ref);
+	if (index)
+		*index = btrfs_inode_ref_index(eb, ref);
 
 	return 0;
 }
 
 /*
+ * Take an inode reference item from the log tree and iterate all names from the
+ * inode reference item in the subvolume tree with the same key (if it exists).
+ * For any name that is not in the inode reference item from the log tree, do a
+ * proper unlink of that name (that is, remove its entry from the inode
+ * reference item and both dir index keys).
+ */
+static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_inode *inode,
+				 struct extent_buffer *log_eb,
+				 int log_slot,
+				 struct btrfs_key *key)
+{
+	int ret;
+	unsigned long ref_ptr;
+	unsigned long ref_end;
+	struct extent_buffer *eb;
+
+again:
+	btrfs_release_path(path);
+	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+	if (ret > 0) {
+		ret = 0;
+		goto out;
+	}
+	if (ret < 0)
+		goto out;
+
+	eb = path->nodes[0];
+	ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
+	ref_end = ref_ptr + btrfs_item_size_nr(eb, path->slots[0]);
+	while (ref_ptr < ref_end) {
+		char *name = NULL;
+		int namelen;
+		u64 parent_id;
+
+		if (key->type == BTRFS_INODE_EXTREF_KEY) {
+			ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+						NULL, &parent_id);
+		} else {
+			parent_id = key->offset;
+			ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+					     NULL);
+		}
+		if (ret)
+			goto out;
+
+		if (key->type == BTRFS_INODE_EXTREF_KEY)
+			ret = btrfs_find_name_in_ext_backref(log_eb, log_slot,
+							     parent_id, name,
+							     namelen, NULL);
+		else
+			ret = btrfs_find_name_in_backref(log_eb, log_slot, name,
+							 namelen, NULL);
+
+		if (!ret) {
+			struct inode *dir;
+
+			btrfs_release_path(path);
+			dir = read_one_inode(root, parent_id);
+			if (!dir) {
+				ret = -ENOENT;
+				kfree(name);
+				goto out;
+			}
+			ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+						 inode, name, namelen);
+			kfree(name);
+			iput(dir);
+			if (ret)
+				goto out;
+			goto again;
+		}
+
+		kfree(name);
+		ref_ptr += namelen;
+		if (key->type == BTRFS_INODE_EXTREF_KEY)
+			ref_ptr += sizeof(struct btrfs_inode_extref);
+		else
+			ref_ptr += sizeof(struct btrfs_inode_ref);
+	}
+	ret = 0;
+ out:
+	btrfs_release_path(path);
+	return ret;
+}
+
+/*
  * replay one inode back reference item found in the log tree.
  * eb, slot and key refer to the buffer and key found in the log tree.
  * root is the destination we are replaying into, and path is for temp
@@ -1344,6 +1438,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 		}
 	}
 
+	/*
+	 * Before we overwrite the inode reference item in the subvolume tree
+	 * with the item from the log tree, we must unlink all names from the
+	 * parent directory that are in the subvolume's tree inode reference
+	 * item, otherwise we end up with an inconsistent subvolume tree where
+	 * dir index entries exist for a name but there is no inode reference
+	 * item with the same name.
+	 */
+	ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot,
+				    key);
+	if (ret)
+		goto out;
+
 	/* finally write the back reference in the inode */
 	ret = overwrite_item(trans, root, path, eb, slot, key);
 out:
@@ -2472,6 +2579,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 					clean_tree_block(fs_info, next);
 					btrfs_wait_tree_block_writeback(next);
 					btrfs_tree_unlock(next);
+				} else {
+					if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+						clear_extent_buffer_dirty(next);
 				}
 
 				WARN_ON(root_owner !=
@@ -2552,6 +2662,9 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 					clean_tree_block(fs_info, next);
 					btrfs_wait_tree_block_writeback(next);
 					btrfs_tree_unlock(next);
+				} else {
+					if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+						clear_extent_buffer_dirty(next);
 				}
 
 				WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
@@ -2630,6 +2743,9 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 				clean_tree_block(fs_info, next);
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
+			} else {
+				if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+					clear_extent_buffer_dirty(next);
 			}
 
 			WARN_ON(log->root_key.objectid !=
@@ -3018,13 +3134,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 
 	while (1) {
 		ret = find_first_extent_bit(&log->dirty_log_pages,
-				0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
+				0, &start, &end,
+				EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT,
 				NULL);
 		if (ret)
 			break;
 
 		clear_extent_bits(&log->dirty_log_pages, start, end,
-				  EXTENT_DIRTY | EXTENT_NEW);
+				  EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
 	}
 
 	/*
@@ -5677,6 +5794,23 @@ again:
 						      path);
 		}
 
+		if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
+			struct btrfs_root *root = wc.replay_dest;
+
+			btrfs_release_path(path);
+
+			/*
+			 * We have just replayed everything, and the highest
+			 * objectid of fs roots probably has changed in case
+			 * some inode_item's got replayed.
+			 *
+			 * root->objectid_mutex is not acquired as log replay
+			 * could only happen during mount.
+			 */
+			ret = btrfs_find_highest_objectid(root,
+						  &root->highest_objectid);
+		}
+
 		key.offset = found_key.offset - 1;
 		wc.replay_dest->log_root = NULL;
 		free_extent_buffer(log->node);
@@ -5825,7 +5959,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	 * this will force the logging code to walk the dentry chain
 	 * up for the file
 	 */
-	if (S_ISREG(inode->vfs_inode.i_mode))
+	if (!S_ISDIR(inode->vfs_inode.i_mode))
 		inode->last_unlink_trans = trans->transid;
 
 	/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b5036bd69e6a..b2d05c6b1c56 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -645,6 +645,7 @@ static void btrfs_free_stale_devices(const char *path,
 				btrfs_sysfs_remove_fsid(fs_devs);
 				list_del(&fs_devs->list);
 				free_fs_devices(fs_devs);
+				break;
 			} else {
 				fs_devs->num_devices--;
 				list_del(&dev->dev_list);
@@ -4828,10 +4829,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	ndevs = min(ndevs, devs_max);
 
 	/*
-	 * the primary goal is to maximize the number of stripes, so use as many
-	 * devices as possible, even if the stripes are not maximum sized.
+	 * The primary goal is to maximize the number of stripes, so use as
+	 * many devices as possible, even if the stripes are not maximum sized.
+	 *
+	 * The DUP profile stores more than one stripe per device, the
+	 * max_avail is the total size so we have to adjust.
 	 */
-	stripe_size = devices_info[ndevs-1].max_avail;
+	stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
 	num_stripes = ndevs * dev_stripes;
 
 	/*
@@ -4866,8 +4870,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			stripe_size = devices_info[ndevs-1].max_avail;
 	}
 
-	stripe_size = div_u64(stripe_size, dev_stripes);
-
 	/* align to BTRFS_STRIPE_LEN */
 	stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6582c4507e6c..0e5bd3e3344e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3965,6 +3965,32 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
 }
 
 /*
+ * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
+ * looks like the link count will hit 0, drop any other caps (other
+ * than PIN) we don't specifically want (due to the file still being
+ * open).
+ */
+int ceph_drop_caps_for_unlink(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
+
+	spin_lock(&ci->i_ceph_lock);
+	if (inode->i_nlink == 1) {
+		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
+
+		ci->i_ceph_flags |= CEPH_I_NODELAY;
+		if (__ceph_caps_dirty(ci)) {
+			struct ceph_mds_client *mdsc =
+				ceph_inode_to_client(inode)->mdsc;
+			__cap_delay_requeue_front(mdsc, ci);
+		}
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	return drop;
+}
+
+/*
  * Helpers for embedding cap and dentry lease releases into mds
  * requests.
  *
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0c4346806e17..f1d9c6cc0491 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1003,26 +1003,6 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 }
 
 /*
- * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps.  If it
- * looks like the link count will hit 0, drop any other caps (other
- * than PIN) we don't specifically want (due to the file still being
- * open).
- */
-static int drop_caps_for_unlink(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
-
-	spin_lock(&ci->i_ceph_lock);
-	if (inode->i_nlink == 1) {
-		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
-		ci->i_ceph_flags |= CEPH_I_NODELAY;
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	return drop;
-}
-
-/*
  * rmdir and unlink are differ only by the metadata op code
  */
 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
@@ -1056,7 +1036,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	req->r_inode_drop = drop_caps_for_unlink(inode);
+	req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry)
 		d_delete(dentry);
@@ -1104,8 +1084,10 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	/* release LINK_RDCACHE on source inode (mds will lock it) */
 	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
-	if (d_really_is_positive(new_dentry))
-		req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
+	if (d_really_is_positive(new_dentry)) {
+		req->r_inode_drop =
+			ceph_drop_caps_for_unlink(d_inode(new_dentry));
+	}
 	err = ceph_mdsc_do_request(mdsc, old_dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry) {
 		/*
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a62d2a9841dc..fb2bc9c15a23 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -225,6 +225,7 @@ static int parse_fsopt_token(char *c, void *private)
 			return -ENOMEM;
 		break;
 	case Opt_mds_namespace:
+		kfree(fsopt->mds_namespace);
 		fsopt->mds_namespace = kstrndup(argstr[0].from,
 						argstr[0].to-argstr[0].from,
 						GFP_KERNEL);
@@ -232,6 +233,7 @@ static int parse_fsopt_token(char *c, void *private)
 			return -ENOMEM;
 		break;
 	case Opt_fscache_uniq:
+		kfree(fsopt->fscache_uniq);
 		fsopt->fscache_uniq = kstrndup(argstr[0].from,
 					       argstr[0].to-argstr[0].from,
 					       GFP_KERNEL);
@@ -711,14 +713,17 @@ static int __init init_caches(void)
 		goto bad_dentry;
 
 	ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
-
 	if (!ceph_file_cachep)
 		goto bad_file;
 
-	if ((error = ceph_fscache_register()))
-		goto bad_file;
+	error = ceph_fscache_register();
+	if (error)
+		goto bad_fscache;
 
 	return 0;
+
+bad_fscache:
+	kmem_cache_destroy(ceph_file_cachep);
 bad_file:
 	kmem_cache_destroy(ceph_dentry_cachep);
 bad_dentry:
@@ -836,7 +841,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 	int err;
 	unsigned long started = jiffies;  /* note the start time */
 	struct dentry *root;
-	int first = 0;   /* first vfsmount for this super_block */
 
 	dout("mount start %p\n", fsc);
 	mutex_lock(&fsc->client->mount_mutex);
@@ -861,17 +865,17 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 			path = fsc->mount_options->server_path + 1;
 			dout("mount opening path %s\n", path);
 		}
+
+		err = ceph_fs_debugfs_init(fsc);
+		if (err < 0)
+			goto out;
+
 		root = open_root_dentry(fsc, path, started);
 		if (IS_ERR(root)) {
 			err = PTR_ERR(root);
 			goto out;
 		}
 		fsc->sb->s_root = dget(root);
-		first = 1;
-
-		err = ceph_fs_debugfs_init(fsc);
-		if (err < 0)
-			goto fail;
 	} else {
 		root = dget(fsc->sb->s_root);
 	}
@@ -881,11 +885,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 	mutex_unlock(&fsc->client->mount_mutex);
 	return root;
 
-fail:
-	if (first) {
-		dput(fsc->sb->s_root);
-		fsc->sb->s_root = NULL;
-	}
 out:
 	mutex_unlock(&fsc->client->mount_mutex);
 	return ERR_PTR(err);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 21b2e5b004eb..1c2086e0fec2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -987,7 +987,7 @@ extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 			    struct ceph_mds_session *session);
 extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
 extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
-
+extern int  ceph_drop_caps_for_unlink(struct inode *inode);
 extern int ceph_encode_inode_release(void **p, struct inode *inode,
 				     int mds, int drop, int unless, int force);
 extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
diff --git a/fs/dcache.c b/fs/dcache.c
index 7c38f39958bc..8945e6cabd93 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -647,11 +647,16 @@ again:
 		spin_unlock(&parent->d_lock);
 		goto again;
 	}
-	rcu_read_unlock();
-	if (parent != dentry)
+	if (parent != dentry) {
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-	else
+		if (unlikely(dentry->d_lockref.count < 0)) {
+			spin_unlock(&parent->d_lock);
+			parent = NULL;
+		}
+	} else {
 		parent = NULL;
+	}
+	rcu_read_unlock();
 	return parent;
 }
 
@@ -2474,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
 
 retry:
 	rcu_read_lock();
-	seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
+	seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
 	r_seq = read_seqbegin(&rename_lock);
 	dentry = __d_lookup_rcu(parent, name, &d_seq);
 	if (unlikely(dentry)) {
@@ -2495,8 +2500,14 @@ retry:
 		rcu_read_unlock();
 		goto retry;
 	}
+
+	if (unlikely(seq & 1)) {
+		rcu_read_unlock();
+		goto retry;
+	}
+
 	hlist_bl_lock(b);
-	if (unlikely(parent->d_inode->i_dir_seq != seq)) {
+	if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
 		hlist_bl_unlock(b);
 		rcu_read_unlock();
 		goto retry;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a0ca9e48e993..1357ef563893 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1274,8 +1274,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	 */
 	if (dio->is_async && iov_iter_rw(iter) == WRITE) {
 		retval = 0;
-		if ((iocb->ki_filp->f_flags & O_DSYNC) ||
-		    IS_SYNC(iocb->ki_filp->f_mapping->host))
+		if (iocb->ki_flags & IOCB_DSYNC)
 			retval = dio_set_defer_completion(dio);
 		else if (!dio->inode->i_sb->s_dio_done_wq) {
 			/*
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 5f22e74bbade..8e568428c88b 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/efi.h>
+#include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
@@ -74,6 +75,11 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
 	ssize_t size = 0;
 	int err;
 
+	while (!__ratelimit(&file->f_cred->user->ratelimit)) {
+		if (!msleep_interruptible(50))
+			return -EINTR;
+	}
+
 	err = efivar_entry_size(var, &datasize);
 
 	/*
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 86863792f36a..51f940e76c5e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -716,7 +716,7 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	__be64 *ptr;
 	sector_t lblock;
 	sector_t lend;
-	int ret;
+	int ret = 0;
 	int eob;
 	unsigned int len;
 	struct buffer_head *bh;
@@ -728,12 +728,14 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 		goto out;
 	}
 
-	if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) {
-		gfs2_stuffed_iomap(inode, iomap);
-		if (pos >= iomap->length)
-			return -ENOENT;
-		ret = 0;
-		goto out;
+	if (gfs2_is_stuffed(ip)) {
+		if (flags & IOMAP_REPORT) {
+			gfs2_stuffed_iomap(inode, iomap);
+			if (pos >= iomap->length)
+				ret = -ENOENT;
+			goto out;
+		}
+		BUG_ON(!(flags & IOMAP_WRITE));
 	}
 
 	lblock = pos >> inode->i_blkbits;
@@ -744,7 +746,7 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	iomap->type = IOMAP_HOLE;
 	iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
 	iomap->flags = IOMAP_F_MERGED;
-	bmap_lock(ip, 0);
+	bmap_lock(ip, flags & IOMAP_WRITE);
 
 	/*
 	 * Directory data blocks have a struct gfs2_meta_header header, so the
@@ -787,27 +789,25 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 		iomap->flags |= IOMAP_F_BOUNDARY;
 	iomap->length = (u64)len << inode->i_blkbits;
 
-	ret = 0;
-
 out_release:
 	release_metapath(&mp);
-	bmap_unlock(ip, 0);
+	bmap_unlock(ip, flags & IOMAP_WRITE);
 out:
 	trace_gfs2_iomap_end(ip, iomap, ret);
 	return ret;
 
 do_alloc:
-	if (!(flags & IOMAP_WRITE)) {
-		if (pos >= i_size_read(inode)) {
+	if (flags & IOMAP_WRITE) {
+		ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+	} else if (flags & IOMAP_REPORT) {
+		loff_t size = i_size_read(inode);
+		if (pos >= size)
 			ret = -ENOENT;
-			goto out_release;
-		}
-		ret = 0;
-		iomap->length = hole_size(inode, lblock, &mp);
-		goto out_release;
+		else if (height <= ip->i_height)
+			iomap->length = hole_size(inode, lblock, &mp);
+		else
+			iomap->length = size - pos;
 	}
-
-	ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
 	goto out_release;
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8fe1b0aa2896..b9a254dcc0e7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
 	pagevec_reinit(pvec);
 }
 
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls.  This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value.  The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX \
+	(((1UL << (PAGE_SHIFT + 1)) - 1) <<  (BITS_PER_LONG - (PAGE_SHIFT + 1)))
+
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file_inode(file);
@@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_ops = &hugetlb_vm_ops;
 
 	/*
-	 * Offset passed to mmap (before page shift) could have been
-	 * negative when represented as a (l)off_t.
+	 * page based offset in vm_pgoff could be sufficiently large to
+	 * overflow a (l)off_t when converted to byte offset.
 	 */
-	if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
+	if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
 		return -EINVAL;
 
+	/* must be huge page aligned */
 	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
 		return -EINVAL;
 
diff --git a/fs/namei.c b/fs/namei.c
index 921ae32dbc80..cafa365eeb70 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -559,9 +559,10 @@ static int __nd_alloc_stack(struct nameidata *nd)
 static bool path_connected(const struct path *path)
 {
 	struct vfsmount *mnt = path->mnt;
+	struct super_block *sb = mnt->mnt_sb;
 
-	/* Only bind mounts can have disconnected paths */
-	if (mnt->mnt_root == mnt->mnt_sb->s_root)
+	/* Bind mounts and multi-root filesystems can have disconnected paths */
+	if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
 		return true;
 
 	return is_subdir(path->dentry, mnt->mnt_root);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2435af56b87e..a50d7813e3ea 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -572,7 +572,7 @@ out:
 }
 
 static bool
-validate_bitmap_values(unsigned long mask)
+validate_bitmap_values(unsigned int mask)
 {
 	return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
 }
@@ -596,17 +596,15 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
 		goto out;
 
 	status = cpu_to_be32(NFS4_OK);
-	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
-		     &args->craa_type_mask))
+	if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_RDATA_DLG))
 		flags = FMODE_READ;
-	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
-		     &args->craa_type_mask))
+	if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_WDATA_DLG))
 		flags |= FMODE_WRITE;
-	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
-		     &args->craa_type_mask))
-		pnfs_recall_all_layouts(cps->clp);
 	if (flags)
 		nfs_expire_unused_delegation_types(cps->clp, flags);
+
+	if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
+		pnfs_recall_all_layouts(cps->clp);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8c10b0562e75..621c517b325c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -86,10 +86,10 @@ struct nfs_direct_req {
 	struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
 	int			mirror_count;
 
+	loff_t			io_start;	/* Start offset for I/O */
 	ssize_t			count,		/* bytes actually processed */
 				max_count,	/* max expected count */
 				bytes_left,	/* bytes left to be sent */
-				io_start,	/* start of IO */
 				error;		/* any reported error */
 	struct completion	completion;	/* wait for i/o completion */
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 49f848fd1f04..7327930ad970 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -873,7 +873,7 @@ static void nfs3_nlm_release_call(void *data)
 	}
 }
 
-const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
+static const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
 	.nlmclnt_alloc_call = nfs3_nlm_alloc_call,
 	.nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare,
 	.nlmclnt_release_call = nfs3_nlm_release_call,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 04612c24d394..979631411a0e 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -868,8 +868,10 @@ static int nfs4_set_client(struct nfs_server *server,
 	if (IS_ERR(clp))
 		return PTR_ERR(clp);
 
-	if (server->nfs_client == clp)
+	if (server->nfs_client == clp) {
+		nfs_put_client(clp);
 		return -ELOOP;
+	}
 
 	/*
 	 * Query for the lease time on clientid setup or renewal
@@ -1244,11 +1246,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion, net);
 	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
-	nfs_put_client(clp);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
 		return error;
 	}
+	nfs_put_client(clp);
 
 	if (server->nfs_client->cl_hostname == NULL)
 		server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c13e826614b5..ee723aa153a3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -292,8 +292,11 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
 void
 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 {
-	struct inode *inode = lo->plh_inode;
+	struct inode *inode;
 
+	if (!lo)
+		return;
+	inode = lo->plh_inode;
 	pnfs_layoutreturn_before_put_layout_hdr(lo);
 
 	if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
@@ -1241,10 +1244,12 @@ retry:
 	spin_lock(&ino->i_lock);
 	lo = nfsi->layout;
 	if (!lo || !pnfs_layout_is_valid(lo) ||
-	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+		lo = NULL;
 		goto out_noroc;
+	}
+	pnfs_get_layout_hdr(lo);
 	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
-		pnfs_get_layout_hdr(lo);
 		spin_unlock(&ino->i_lock);
 		wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
 				TASK_UNINTERRUPTIBLE);
@@ -1312,10 +1317,12 @@ out_noroc:
 		struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
 		if (ld->prepare_layoutreturn)
 			ld->prepare_layoutreturn(args);
+		pnfs_put_layout_hdr(lo);
 		return true;
 	}
 	if (layoutreturn)
 		pnfs_send_layoutreturn(lo, &stateid, iomode, true);
+	pnfs_put_layout_hdr(lo);
 	return false;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 29bacdc56f6a..5e470e233c83 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2631,6 +2631,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
 		/* initial superblock/root creation */
 		mount_info->fill_super(s, mount_info);
 		nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
+		if (!(server->flags & NFS_MOUNT_UNSHARED))
+			s->s_iflags |= SB_I_MULTIROOT;
 	}
 
 	mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7428a669d7a7..e7d8ceae8f26 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 	return status;
 }
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int __nfs_commit_inode(struct inode *inode, int how,
+		struct writeback_control *wbc)
 {
 	LIST_HEAD(head);
 	struct nfs_commit_info cinfo;
 	int may_wait = how & FLUSH_SYNC;
-	int error = 0;
-	int res;
+	int ret, nscan;
 
 	nfs_init_cinfo_from_inode(&cinfo, inode);
 	nfs_commit_begin(cinfo.mds);
-	res = nfs_scan_commit(inode, &head, &cinfo);
-	if (res)
-		error = nfs_generic_commit_list(inode, &head, how, &cinfo);
+	for (;;) {
+		ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
+		if (ret <= 0)
+			break;
+		ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
+		if (ret < 0)
+			break;
+		ret = 0;
+		if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
+			if (nscan < wbc->nr_to_write)
+				wbc->nr_to_write -= nscan;
+			else
+				wbc->nr_to_write = 0;
+		}
+		if (nscan < INT_MAX)
+			break;
+		cond_resched();
+	}
 	nfs_commit_end(cinfo.mds);
-	if (res == 0)
-		return res;
-	if (error < 0)
-		goto out_error;
-	if (!may_wait)
-		goto out_mark_dirty;
-	error = wait_on_commit(cinfo.mds);
-	if (error < 0)
-		return error;
-	return res;
-out_error:
-	res = error;
-	/* Note: If we exit without ensuring that the commit is complete,
-	 * we must mark the inode as dirty. Otherwise, future calls to
-	 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
-	 * that the data is on the disk.
-	 */
-out_mark_dirty:
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	return res;
+	if (ret || !may_wait)
+		return ret;
+	return wait_on_commit(cinfo.mds);
+}
+
+int nfs_commit_inode(struct inode *inode, int how)
+{
+	return __nfs_commit_inode(inode, how, NULL);
 }
 EXPORT_SYMBOL_GPL(nfs_commit_inode);
 
@@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	int flags = FLUSH_SYNC;
 	int ret = 0;
 
-	/* no commits means nothing needs to be done */
-	if (!atomic_long_read(&nfsi->commit_info.ncommit))
-		return ret;
-
 	if (wbc->sync_mode == WB_SYNC_NONE) {
+		/* no commits means nothing needs to be done */
+		if (!atomic_long_read(&nfsi->commit_info.ncommit))
+			goto check_requests_outstanding;
+
 		/* Don't commit yet if this is a non-blocking flush and there
 		 * are a lot of outstanding writes for this mapping.
 		 */
@@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 		flags = 0;
 	}
 
-	ret = nfs_commit_inode(inode, flags);
-	if (ret >= 0) {
-		if (wbc->sync_mode == WB_SYNC_NONE) {
-			if (ret < wbc->nr_to_write)
-				wbc->nr_to_write -= ret;
-			else
-				wbc->nr_to_write = 0;
-		}
-		return 0;
-	}
+	ret = __nfs_commit_inode(inode, flags, wbc);
+	if (!ret) {
+		if (flags & FLUSH_SYNC)
+			return 0;
+	} else if (atomic_long_read(&nfsi->commit_info.ncommit))
+		goto out_mark_dirty;
+
+check_requests_outstanding:
+	if (!atomic_read(&nfsi->commit_info.rpcs_out))
+		return ret;
 out_mark_dirty:
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	return ret;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 150521c9671b..61b770e39809 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -268,6 +268,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl)
 	kfree(nbl);
 }
 
+static void
+remove_blocked_locks(struct nfs4_lockowner *lo)
+{
+	struct nfs4_client *clp = lo->lo_owner.so_client;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+	struct nfsd4_blocked_lock *nbl;
+	LIST_HEAD(reaplist);
+
+	/* Dequeue all blocked locks */
+	spin_lock(&nn->blocked_locks_lock);
+	while (!list_empty(&lo->lo_blocked)) {
+		nbl = list_first_entry(&lo->lo_blocked,
+					struct nfsd4_blocked_lock,
+					nbl_list);
+		list_del_init(&nbl->nbl_list);
+		list_move(&nbl->nbl_lru, &reaplist);
+	}
+	spin_unlock(&nn->blocked_locks_lock);
+
+	/* Now free them */
+	while (!list_empty(&reaplist)) {
+		nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
+					nbl_lru);
+		list_del_init(&nbl->nbl_lru);
+		posix_unblock_lock(&nbl->nbl_lock);
+		free_blocked_lock(nbl);
+	}
+}
+
 static int
 nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
 {
@@ -1866,6 +1895,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
 static void
 __destroy_client(struct nfs4_client *clp)
 {
+	int i;
 	struct nfs4_openowner *oo;
 	struct nfs4_delegation *dp;
 	struct list_head reaplist;
@@ -1895,6 +1925,16 @@ __destroy_client(struct nfs4_client *clp)
 		nfs4_get_stateowner(&oo->oo_owner);
 		release_openowner(oo);
 	}
+	for (i = 0; i < OWNER_HASH_SIZE; i++) {
+		struct nfs4_stateowner *so, *tmp;
+
+		list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
+					 so_strhash) {
+			/* Should be no openowners at this point */
+			WARN_ON_ONCE(so->so_is_open_owner);
+			remove_blocked_locks(lockowner(so));
+		}
+	}
 	nfsd4_return_all_client_layouts(clp);
 	nfsd4_shutdown_callback(clp);
 	if (clp->cl_cb_conn.cb_xprt)
@@ -6355,6 +6395,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	}
 	spin_unlock(&clp->cl_lock);
 	free_ol_stateid_reaplist(&reaplist);
+	remove_blocked_locks(lo);
 	nfs4_put_stateowner(&lo->lo_owner);
 
 	return status;
@@ -7140,6 +7181,8 @@ nfs4_state_destroy_net(struct net *net)
 		}
 	}
 
+	WARN_ON(!list_empty(&nn->blocked_locks_lru));
+
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		while (!list_empty(&nn->unconf_id_hashtbl[i])) {
 			clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -7206,7 +7249,6 @@ nfs4_state_shutdown_net(struct net *net)
 	struct nfs4_delegation *dp = NULL;
 	struct list_head *pos, *next, reaplist;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct nfsd4_blocked_lock *nbl;
 
 	cancel_delayed_work_sync(&nn->laundromat_work);
 	locks_end_grace(&nn->nfsd4_manager);
@@ -7227,24 +7269,6 @@ nfs4_state_shutdown_net(struct net *net)
 		nfs4_put_stid(&dp->dl_stid);
 	}
 
-	BUG_ON(!list_empty(&reaplist));
-	spin_lock(&nn->blocked_locks_lock);
-	while (!list_empty(&nn->blocked_locks_lru)) {
-		nbl = list_first_entry(&nn->blocked_locks_lru,
-					struct nfsd4_blocked_lock, nbl_lru);
-		list_move(&nbl->nbl_lru, &reaplist);
-		list_del_init(&nbl->nbl_list);
-	}
-	spin_unlock(&nn->blocked_locks_lock);
-
-	while (!list_empty(&reaplist)) {
-		nbl = list_first_entry(&reaplist,
-					struct nfsd4_blocked_lock, nbl_lru);
-		list_del_init(&nbl->nbl_lru);
-		posix_unblock_lock(&nbl->nbl_lock);
-		free_blocked_lock(nbl);
-	}
-
 	nfsd4_client_tracking_exit(net);
 	nfs4_state_destroy_net(net);
 }
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 36b0772701a0..60702d677bd4 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns,
 
 	return fd;
 }
+EXPORT_SYMBOL_GPL(open_related_ns);
 
 static long ns_ioctl(struct file *filp, unsigned int ioctl,
 			unsigned long arg)
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 406e72de88f6..ce6ff5a0a6e4 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -24,6 +24,8 @@ config OVERLAY_FS_REDIRECT_DIR
 	  an overlay which has redirects on a kernel that doesn't support this
 	  feature will have unexpected results.
 
+	  If unsure, say N.
+
 config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
 	bool "Overlayfs: follow redirects even if redirects are turned off"
 	default y
@@ -32,8 +34,13 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
 	  Disable this to get a possibly more secure configuration, but that
 	  might not be backward compatible with previous kernels.
 
+	  If backward compatibility is not an issue, then it is safe and
+	  recommended to say N here.
+
 	  For more information, see Documentation/filesystems/overlayfs.txt
 
+	  If unsure, say Y.
+
 config OVERLAY_FS_INDEX
 	bool "Overlayfs: turn on inodes index feature by default"
 	depends on OVERLAY_FS
@@ -51,6 +58,8 @@ config OVERLAY_FS_INDEX
 	  That is, mounting an overlay which has an inodes index on a kernel
 	  that doesn't support this feature will have unexpected results.
 
+	  If unsure, say N.
+
 config OVERLAY_FS_NFS_EXPORT
 	bool "Overlayfs: turn on NFS export feature by default"
 	depends on OVERLAY_FS
@@ -72,3 +81,8 @@ config OVERLAY_FS_NFS_EXPORT
 	  Note, that the NFS export feature is not backward compatible.
 	  That is, mounting an overlay which has a full index on a kernel
 	  that doesn't support this feature will have unexpected results.
+
+	  Most users should say N here and enable this feature on a case-by-
+	  case basis with the "nfs_export=on" mount option.
+
+	  Say N unless you fully understand the consequences.
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index bb94ce9da5c8..87bd4148f4fb 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -19,6 +19,142 @@
 #include <linux/ratelimit.h>
 #include "overlayfs.h"
 
+static int ovl_encode_maybe_copy_up(struct dentry *dentry)
+{
+	int err;
+
+	if (ovl_dentry_upper(dentry))
+		return 0;
+
+	err = ovl_want_write(dentry);
+	if (!err) {
+		err = ovl_copy_up(dentry);
+		ovl_drop_write(dentry);
+	}
+
+	if (err) {
+		pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+				    dentry, err);
+	}
+
+	return err;
+}
+
+/*
+ * Before encoding a non-upper directory file handle from real layer N, we need
+ * to check if it will be possible to reconnect an overlay dentry from the real
+ * lower decoded dentry. This is done by following the overlay ancestry up to a
+ * "layer N connected" ancestor and verifying that all parents along the way are
+ * "layer N connectable". If an ancestor that is NOT "layer N connectable" is
+ * found, we need to copy up an ancestor, which is "layer N connectable", thus
+ * making that ancestor "layer N connected". For example:
+ *
+ * layer 1: /a
+ * layer 2: /a/b/c
+ *
+ * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is
+ * copied up and renamed, upper dir /a will be indexed by lower dir /a from
+ * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*)
+ * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay
+ * dentry from the connected lower dentry /a/b/c.
+ *
+ * To avoid this problem on decode time, we need to copy up an ancestor of
+ * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is
+ * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected"
+ * and when the time comes to decode the file handle from lower dentry /a/b/c,
+ * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding
+ * a connected overlay dentry will be accomplished.
+ *
+ * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an
+ * entry /a in the lower layers above layer N and find the indexed dir /a from
+ * layer 1. If that improvement is made, then the check for "layer N connected"
+ * will need to verify there are no redirects in lower layers above N. In the
+ * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a
+ * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable":
+ *
+ * layer 1: /A (redirect = /a)
+ * layer 2: /a/b/c
+ */
+
+/* Return the lowest layer for encoding a connectable file handle */
+static int ovl_connectable_layer(struct dentry *dentry)
+{
+	struct ovl_entry *oe = OVL_E(dentry);
+
+	/* We can get overlay root from root of any layer */
+	if (dentry == dentry->d_sb->s_root)
+		return oe->numlower;
+
+	/*
+	 * If it's an unindexed merge dir, then it's not connectable with any
+	 * lower layer
+	 */
+	if (ovl_dentry_upper(dentry) &&
+	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		return 0;
+
+	/* We can get upper/overlay path from indexed/lower dentry */
+	return oe->lowerstack[0].layer->idx;
+}
+
+/*
+ * @dentry is "connected" if all ancestors up to root or a "connected" ancestor
+ * have the same uppermost lower layer as the origin's layer. We may need to
+ * copy up a "connectable" ancestor to make it "connected". A "connected" dentry
+ * cannot become non "connected", so cache positive result in dentry flags.
+ *
+ * Return the connected origin layer or < 0 on error.
+ */
+static int ovl_connect_layer(struct dentry *dentry)
+{
+	struct dentry *next, *parent = NULL;
+	int origin_layer;
+	int err = 0;
+
+	if (WARN_ON(dentry == dentry->d_sb->s_root) ||
+	    WARN_ON(!ovl_dentry_lower(dentry)))
+		return -EIO;
+
+	origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx;
+	if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry))
+		return origin_layer;
+
+	/* Find the topmost origin layer connectable ancestor of @dentry */
+	next = dget(dentry);
+	for (;;) {
+		parent = dget_parent(next);
+		if (WARN_ON(parent == next)) {
+			err = -EIO;
+			break;
+		}
+
+		/*
+		 * If @parent is not origin layer connectable, then copy up
+		 * @next which is origin layer connectable and we are done.
+		 */
+		if (ovl_connectable_layer(parent) < origin_layer) {
+			err = ovl_encode_maybe_copy_up(next);
+			break;
+		}
+
+		/* If @parent is connected or indexed we are done */
+		if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) ||
+		    ovl_test_flag(OVL_INDEX, d_inode(parent)))
+			break;
+
+		dput(next);
+		next = parent;
+	}
+
+	dput(parent);
+	dput(next);
+
+	if (!err)
+		ovl_dentry_set_flag(OVL_E_CONNECTED, dentry);
+
+	return err ?: origin_layer;
+}
+
 /*
  * We only need to encode origin if there is a chance that the same object was
  * encoded pre copy up and then we need to stay consistent with the same
@@ -41,73 +177,59 @@
  * L = lower file handle
  *
  * (*) Connecting an overlay dir from real lower dentry is not always
- * possible when there are redirects in lower layers. To mitigate this case,
- * we copy up the lower dir first and then encode an upper dir file handle.
+ * possible when there are redirects in lower layers and non-indexed merge dirs.
+ * To mitigate those case, we may copy up the lower dir ancestor before encode
+ * a lower dir file handle.
+ *
+ * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
  */
-static bool ovl_should_encode_origin(struct dentry *dentry)
+static int ovl_check_encode_origin(struct dentry *dentry)
 {
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 
+	/* Upper file handle for pure upper */
 	if (!ovl_dentry_lower(dentry))
-		return false;
+		return 0;
 
 	/*
-	 * Decoding a merge dir, whose origin's parent is under a redirected
-	 * lower dir is not always possible. As a simple aproximation, we do
-	 * not encode lower dir file handles when overlay has multiple lower
-	 * layers and origin is below the topmost lower layer.
+	 * Upper file handle for non-indexed upper.
 	 *
-	 * TODO: copy up only the parent that is under redirected lower.
+	 * Root is never indexed, so if there's an upper layer, encode upper for
+	 * root.
 	 */
-	if (d_is_dir(dentry) && ofs->upper_mnt &&
-	    OVL_E(dentry)->lowerstack[0].layer->idx > 1)
-		return false;
-
-	/* Decoding a non-indexed upper from origin is not implemented */
 	if (ovl_dentry_upper(dentry) &&
 	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
-		return false;
-
-	return true;
-}
-
-static int ovl_encode_maybe_copy_up(struct dentry *dentry)
-{
-	int err;
-
-	if (ovl_dentry_upper(dentry))
 		return 0;
 
-	err = ovl_want_write(dentry);
-	if (err)
-		return err;
-
-	err = ovl_copy_up(dentry);
+	/*
+	 * Decoding a merge dir, whose origin's ancestor is under a redirected
+	 * lower dir or under a non-indexed upper is not always possible.
+	 * ovl_connect_layer() will try to make origin's layer "connected" by
+	 * copying up a "connectable" ancestor.
+	 */
+	if (d_is_dir(dentry) && ofs->upper_mnt)
+		return ovl_connect_layer(dentry);
 
-	ovl_drop_write(dentry);
-	return err;
+	/* Lower file handle for indexed and non-upper dir/non-dir */
+	return 1;
 }
 
 static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 {
-	struct dentry *origin = ovl_dentry_lower(dentry);
 	struct ovl_fh *fh = NULL;
-	int err;
+	int err, enc_lower;
 
 	/*
-	 * If we should not encode a lower dir file handle, copy up and encode
-	 * an upper dir file handle.
+	 * Check if we should encode a lower or upper file handle and maybe
+	 * copy up an ancestor to make lower file handle connectable.
 	 */
-	if (!ovl_should_encode_origin(dentry)) {
-		err = ovl_encode_maybe_copy_up(dentry);
-		if (err)
-			goto fail;
-
-		origin = NULL;
-	}
+	err = enc_lower = ovl_check_encode_origin(dentry);
+	if (enc_lower < 0)
+		goto fail;
 
-	/* Encode an upper or origin file handle */
-	fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin);
+	/* Encode an upper or lower file handle */
+	fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) :
+				       ovl_dentry_upper(dentry), !enc_lower);
 	err = PTR_ERR(fh);
 	if (IS_ERR(fh))
 		goto fail;
@@ -355,8 +477,8 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
 		dput(upper);
 	}
 
-	if (!this)
-		return NULL;
+	if (IS_ERR_OR_NULL(this))
+		return this;
 
 	if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
 		dput(this);
@@ -498,7 +620,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
 			if (err == -ECHILD) {
 				this = ovl_lookup_real_ancestor(sb, real,
 								layer);
-				err = IS_ERR(this) ? PTR_ERR(this) : 0;
+				err = PTR_ERR_OR_ZERO(this);
 			}
 			if (!err) {
 				dput(connected);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index fcd97b783fa1..3b1bd469accd 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -669,38 +669,59 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
 	return inode;
 }
 
+/*
+ * Does overlay inode need to be hashed by lower inode?
+ */
+static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
+			     struct dentry *lower, struct dentry *index)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	/* No, if pure upper */
+	if (!lower)
+		return false;
+
+	/* Yes, if already indexed */
+	if (index)
+		return true;
+
+	/* Yes, if won't be copied up */
+	if (!ofs->upper_mnt)
+		return true;
+
+	/* No, if lower hardlink is or will be broken on copy up */
+	if ((upper || !ovl_indexdir(sb)) &&
+	    !d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+		return false;
+
+	/* No, if non-indexed upper with NFS export */
+	if (sb->s_export_op && upper)
+		return false;
+
+	/* Otherwise, hash by lower inode for fsnotify */
+	return true;
+}
+
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			    struct dentry *lowerdentry, struct dentry *index,
 			    unsigned int numlower)
 {
-	struct ovl_fs *ofs = sb->s_fs_info;
 	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
 	struct inode *inode;
-	/* Already indexed or could be indexed on copy up? */
-	bool indexed = (index || (ovl_indexdir(sb) && !upperdentry));
-	struct dentry *origin = indexed ? lowerdentry : NULL;
+	bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index);
 	bool is_dir;
 
-	if (WARN_ON(upperdentry && indexed && !lowerdentry))
-		return ERR_PTR(-EIO);
-
 	if (!realinode)
 		realinode = d_inode(lowerdentry);
 
 	/*
-	 * Copy up origin (lower) may exist for non-indexed non-dir upper, but
-	 * we must not use lower as hash key in that case.
-	 * Hash non-dir that is or could be indexed by origin inode.
-	 * Hash dir that is or could be merged by origin inode.
-	 * Hash pure upper and non-indexed non-dir by upper inode.
-	 * Hash non-indexed dir by upper inode for NFS export.
+	 * Copy up origin (lower) may exist for non-indexed upper, but we must
+	 * not use lower as hash key if this is a broken hardlink.
 	 */
 	is_dir = S_ISDIR(realinode->i_mode);
-	if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt))
-		origin = lowerdentry;
-
-	if (upperdentry || origin) {
-		struct inode *key = d_inode(origin ?: upperdentry);
+	if (upperdentry || bylower) {
+		struct inode *key = d_inode(bylower ? lowerdentry :
+						      upperdentry);
 		unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
 
 		inode = iget5_locked(sb, (unsigned long) key,
@@ -728,6 +749,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
 		set_nlink(inode, nlink);
 	} else {
+		/* Lower hardlink that will be broken on copy up */
 		inode = new_inode(sb);
 		if (!inode)
 			goto out_nomem;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index de3e6da1d5a5..70fcfcc684cc 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -913,9 +913,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		stack[ctr].layer = lower.layer;
 		ctr++;
 
-		if (d.stop)
-			break;
-
 		/*
 		 * Following redirects can have security consequences: it's like
 		 * a symlink into the lower layer without the permission checks.
@@ -933,6 +930,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			goto out_put;
 		}
 
+		if (d.stop)
+			break;
+
 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
 			poe = roe;
 			/* Find the current layer on the root dentry */
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 0df25a9c94bd..225ff1171147 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -40,6 +40,7 @@ enum ovl_inode_flag {
 enum ovl_entry_flag {
 	OVL_E_UPPER_ALIAS,
 	OVL_E_OPAQUE,
+	OVL_E_CONNECTED,
 };
 
 /*
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 9ee37c76091d..7c24619ae7fc 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1359,6 +1359,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	/* Root is always merge -> can have whiteouts */
 	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
+	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
 	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
 		       ovl_dentry_lower(root_dentry));
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e8a93bc8285d..d1e82761de81 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -510,6 +510,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 			/* we have to zero-fill user buffer even if no read */
 			if (copy_to_user(buffer, buf, tsz))
 				return -EFAULT;
+		} else if (m->type == KCORE_USER) {
+			/* User page is handled prior to normal kernel page: */
+			if (copy_to_user(buffer, (char *)start, tsz))
+				return -EFAULT;
 		} else {
 			if (kern_addr_valid(start)) {
 				/*
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 9990957264e3..76bf9cc62074 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -118,13 +118,22 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
 #endif
 #ifdef BUS_MCEERR_AO
-		/* 
+		/*
+		 * Other callers might not initialize the si_lsb field,
+		 * so check explicitly for the right codes here.
+		 */
+		if (kinfo->si_signo == SIGBUS &&
+		     kinfo->si_code == BUS_MCEERR_AO)
+			err |= __put_user((short) kinfo->si_addr_lsb,
+					  &uinfo->ssi_addr_lsb);
+#endif
+#ifdef BUS_MCEERR_AR
+		/*
 		 * Other callers might not initialize the si_lsb field,
 		 * so check explicitly for the right codes here.
 		 */
 		if (kinfo->si_signo == SIGBUS &&
-		    (kinfo->si_code == BUS_MCEERR_AR ||
-		     kinfo->si_code == BUS_MCEERR_AO))
+		    kinfo->si_code == BUS_MCEERR_AR)
 			err |= __put_user((short) kinfo->si_addr_lsb,
 					  &uinfo->ssi_addr_lsb);
 #endif
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 8664db25a9a6..215c225b2ca1 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -106,6 +106,7 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
 {
 	return sysfs_do_create_link(kobj, target, name, 0);
 }
+EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn);
 
 /**
  *	sysfs_delete_link - remove symlink in object's directory.
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index fd975524f460..05c66e05ae20 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -767,7 +767,7 @@ int
 xfs_scrub_agfl(
 	struct xfs_scrub_context	*sc)
 {
-	struct xfs_scrub_agfl_info	sai = { 0 };
+	struct xfs_scrub_agfl_info	sai;
 	struct xfs_agf			*agf;
 	xfs_agnumber_t			agno;
 	unsigned int			agflcount;
@@ -795,6 +795,7 @@ xfs_scrub_agfl(
 		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
 		goto out;
 	}
+	memset(&sai, 0, sizeof(sai));
 	sai.sz_entries = agflcount;
 	sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS);
 	if (!sai.entries) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 66e1edbfb2b2..046469fcc1b8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,15 +955,29 @@ static inline bool imap_needs_alloc(struct inode *inode,
 		(IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
 }
 
+static inline bool needs_cow_for_zeroing(struct xfs_bmbt_irec *imap, int nimaps)
+{
+	return nimaps &&
+		imap->br_startblock != HOLESTARTBLOCK &&
+		imap->br_state != XFS_EXT_UNWRITTEN;
+}
+
 static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
 {
 	/*
-	 * COW writes will allocate delalloc space, so we need to make sure
-	 * to take the lock exclusively here.
+	 * COW writes may allocate delalloc space or convert unwritten COW
+	 * extents, so we need to make sure to take the lock exclusively here.
 	 */
 	if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
 		return true;
-	if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE))
+
+	/*
+	 * Extents not yet cached requires exclusive access, don't block.
+	 * This is an opencoded xfs_ilock_data_map_shared() to cater for the
+	 * non-blocking behaviour.
+	 */
+	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+	    !(ip->i_df.if_flags & XFS_IFEXTENTS))
 		return true;
 	return false;
 }
@@ -993,16 +1007,18 @@ xfs_file_iomap_begin(
 		return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
 	}
 
-	if (need_excl_ilock(ip, flags)) {
+	if (need_excl_ilock(ip, flags))
 		lockmode = XFS_ILOCK_EXCL;
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-	} else {
-		lockmode = xfs_ilock_data_map_shared(ip);
-	}
+	else
+		lockmode = XFS_ILOCK_SHARED;
 
-	if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
-		error = -EAGAIN;
-		goto out_unlock;
+	if (flags & IOMAP_NOWAIT) {
+		if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
+			return -EAGAIN;
+		if (!xfs_ilock_nowait(ip, lockmode))
+			return -EAGAIN;
+	} else {
+		xfs_ilock(ip, lockmode);
 	}
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -1024,7 +1040,9 @@ xfs_file_iomap_begin(
 			goto out_unlock;
 	}
 
-	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+	if (xfs_is_reflink_inode(ip) &&
+	    ((flags & IOMAP_WRITE) ||
+	     ((flags & IOMAP_ZERO) && needs_cow_for_zeroing(&imap, nimaps)))) {
 		if (flags & IOMAP_DIRECT) {
 			/*
 			 * A reflinked inode will result in CoW alloc.
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 3a55d6fc271b..7a39f40645f7 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -23,6 +23,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
@@ -456,10 +457,12 @@ xfs_cui_recover(
 	 * transaction.  Normally, any work that needs to be deferred
 	 * gets attached to the same defer_ops that scheduled the
 	 * refcount update.  However, we're in log recovery here, so we
-	 * we create our own defer_ops and use that to finish up any
-	 * work that doesn't fit.
+	 * we use the passed in defer_ops and to finish up any work that
+	 * doesn't fit.  We need to reserve enough blocks to handle a
+	 * full btree split on either end of the refcount range.
 	 */
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+			mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
 	if (error)
 		return error;
 	cudp = xfs_trans_get_cud(tp, cuip);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index f3b139c9aa16..49d3124863a8 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -23,6 +23,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
@@ -470,7 +471,8 @@ xfs_rui_recover(
 		}
 	}
 
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+			mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
 	if (error)
 		return error;
 	rudp = xfs_trans_get_rud(tp, ruip);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7aba628dc527..93588ea3d3d2 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -250,6 +250,7 @@ xfs_parseargs(
 				return -EINVAL;
 			break;
 		case Opt_logdev:
+			kfree(mp->m_logname);
 			mp->m_logname = match_strdup(args);
 			if (!mp->m_logname)
 				return -ENOMEM;
@@ -258,6 +259,7 @@ xfs_parseargs(
 			xfs_warn(mp, "%s option not allowed on this system", p);
 			return -EINVAL;
 		case Opt_rtdev:
+			kfree(mp->m_rtname);
 			mp->m_rtname = match_strdup(args);
 			if (!mp->m_rtname)
 				return -ENOMEM;
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index bc397573c43a..67ab280ad134 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -7,7 +7,8 @@
  * @nr: Bit to set
  * @addr: Address to count from
  *
- * This operation is atomic and provides acquire barrier semantics.
+ * This operation is atomic and provides acquire barrier semantics if
+ * the returned value is 0.
  * It can be used to implement bit locks.
  */
 #define test_and_set_bit_lock(nr, addr)	test_and_set_bit(nr, addr)
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 963b755d19b0..a7613e1b0c87 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -52,6 +52,7 @@ struct bug_entry {
 #ifndef HAVE_ARCH_BUG
 #define BUG() do { \
 	printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
+	barrier_before_unreachable(); \
 	panic("BUG!"); \
 } while (0)
 #endif
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 2cfa3075d148..bfbb44a5ad38 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -983,6 +983,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud);
+int pmd_free_pte_page(pmd_t *pmd);
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
 {
@@ -1008,6 +1010,14 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 {
 	return 0;
 }
+static inline int pud_free_pmd_page(pud_t *pud)
+{
+	return 0;
+}
+static inline int pmd_free_pte_page(pmd_t *pmd)
+{
+	return 0;
+}
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 1c27526c499e..cf13842a6dbd 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -134,6 +134,15 @@ struct drm_crtc_commit {
 	 * &drm_pending_vblank_event pointer to clean up private events.
 	 */
 	struct drm_pending_vblank_event *event;
+
+	/**
+	 * @abort_completion:
+	 *
+	 * A flag that's set after drm_atomic_helper_setup_commit takes a second
+	 * reference for the completion of $drm_crtc_state.event. It's used by
+	 * the free code to remove the second reference if commit fails.
+	 */
+	bool abort_completion;
 };
 
 struct __drm_planes_state {
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 76e237bd989b..6914633037a5 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -77,5 +77,6 @@ void drm_kms_helper_hotplug_event(struct drm_device *dev);
 
 void drm_kms_helper_poll_disable(struct drm_device *dev);
 void drm_kms_helper_poll_enable(struct drm_device *dev);
+bool drm_kms_helper_is_poll_worker(void);
 
 #endif
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index d32b688eb346..d23dcdd1bd95 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -56,6 +56,7 @@ struct drm_printer;
 #define DRIVER_ATOMIC			0x10000
 #define DRIVER_KMS_LEGACY_CONTEXT	0x20000
 #define DRIVER_SYNCOBJ                  0x40000
+#define DRIVER_PREFER_XBGR_30BPP        0x80000
 
 /**
  * struct drm_driver - DRM driver structure
diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h
index 172744a72eb7..7b1656427cbe 100644
--- a/include/dt-bindings/net/ti-dp83867.h
+++ b/include/dt-bindings/net/ti-dp83867.h
@@ -42,4 +42,18 @@
 #define	DP83867_RGMIIDCTL_3_75_NS	0xe
 #define	DP83867_RGMIIDCTL_4_00_NS	0xf
 
+/* IO_MUX_CFG - Clock output selection */
+#define DP83867_CLK_O_SEL_CHN_A_RCLK		0x0
+#define DP83867_CLK_O_SEL_CHN_B_RCLK		0x1
+#define DP83867_CLK_O_SEL_CHN_C_RCLK		0x2
+#define DP83867_CLK_O_SEL_CHN_D_RCLK		0x3
+#define DP83867_CLK_O_SEL_CHN_A_RCLK_DIV5	0x4
+#define DP83867_CLK_O_SEL_CHN_B_RCLK_DIV5	0x5
+#define DP83867_CLK_O_SEL_CHN_C_RCLK_DIV5	0x6
+#define DP83867_CLK_O_SEL_CHN_D_RCLK_DIV5	0x7
+#define DP83867_CLK_O_SEL_CHN_A_TCLK		0x8
+#define DP83867_CLK_O_SEL_CHN_B_TCLK		0x9
+#define DP83867_CLK_O_SEL_CHN_C_TCLK		0xA
+#define DP83867_CLK_O_SEL_CHN_D_TCLK		0xB
+#define DP83867_CLK_O_SEL_REF_CLK		0xC
 #endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cdbd142ca7f2..02924ae2527e 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -360,6 +360,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
 
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 64e10746f282..968173ec2726 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -587,7 +587,7 @@ extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
 const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
 					       const struct device *dev);
 
-void *acpi_get_match_data(const struct device *dev);
+const void *acpi_device_get_match_data(const struct device *dev);
 extern bool acpi_driver_match_device(struct device *dev,
 				     const struct device_driver *drv);
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
@@ -766,7 +766,7 @@ static inline const struct acpi_device_id *acpi_match_device(
 	return NULL;
 }
 
-static inline void *acpi_get_match_data(const struct device *dev)
+static inline const void *acpi_device_get_match_data(const struct device *dev)
 {
 	return NULL;
 }
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 4d356e168692..40373920ea58 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -113,10 +113,12 @@ extern void aarp_proto_init(void);
 /* Inter module exports */
 
 /* Give a device find its atif control structure */
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 static inline struct atalk_iface *atalk_find_dev(struct net_device *dev)
 {
 	return dev->atalk_ptr;
 }
+#endif
 
 extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev);
 extern struct net_device *atrtr_get_dev(struct atalk_addr *sa);
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 3ce61342fa31..b0a7f315bfbe 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,15 +136,21 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
 	VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
 	VIRTCHNL_OP_REQUEST_QUEUES = 29,
+	VIRTCHNL_OP_ENABLE_CHANNELS = 30,
+	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
+	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
+	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 };
 
-/* This macro is used to generate a compilation error if a structure
+/* These macros are used to generate compilation errors if a structure/union
  * is not exactly the correct length. It gives a divide by zero error if the
- * structure is not of the correct size, otherwise it creates an enum that is
- * never used.
+ * structure/union is not of the correct size, otherwise it creates an enum
+ * that is never used.
  */
 #define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \
 	{ virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \
+	{ virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
 
 /* Virtual channel message descriptor. This overlays the admin queue
  * descriptor. All other data is passed in external buffers.
@@ -244,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
+#define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
 
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
 			       VIRTCHNL_VF_OFFLOAD_VLAN | \
@@ -496,6 +503,81 @@ struct virtchnl_rss_hena {
 
 VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
 
+/* VIRTCHNL_OP_ENABLE_CHANNELS
+ * VIRTCHNL_OP_DISABLE_CHANNELS
+ * VF sends these messages to enable or disable channels based on
+ * the user specified queue count and queue offset for each traffic class.
+ * This struct encompasses all the information that the PF needs from
+ * VF to create a channel.
+ */
+struct virtchnl_channel_info {
+	u16 count; /* number of queues in a channel */
+	u16 offset; /* queues in a channel start from 'offset' */
+	u32 pad;
+	u64 max_tx_rate;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info);
+
+struct virtchnl_tc_info {
+	u32	num_tc;
+	u32	pad;
+	struct	virtchnl_channel_info list[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
+
+/* VIRTCHNL_ADD_CLOUD_FILTER
+ * VIRTCHNL_DEL_CLOUD_FILTER
+ * VF sends these messages to add or delete a cloud filter based on the
+ * user specified match and action filters. These structures encompass
+ * all the information that the PF needs from the VF to add/delete a
+ * cloud filter.
+ */
+
+struct virtchnl_l4_spec {
+	u8	src_mac[ETH_ALEN];
+	u8	dst_mac[ETH_ALEN];
+	__be16	vlan_id;
+	__be16	pad; /* reserved for future use */
+	__be32	src_ip[4];
+	__be32	dst_ip[4];
+	__be16	src_port;
+	__be16	dst_port;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec);
+
+union virtchnl_flow_spec {
+	struct	virtchnl_l4_spec tcp_spec;
+	u8	buffer[128]; /* reserved for future use */
+};
+
+VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec);
+
+enum virtchnl_action {
+	/* action types */
+	VIRTCHNL_ACTION_DROP = 0,
+	VIRTCHNL_ACTION_TC_REDIRECT,
+};
+
+enum virtchnl_flow_type {
+	/* flow types */
+	VIRTCHNL_TCP_V4_FLOW = 0,
+	VIRTCHNL_TCP_V6_FLOW,
+};
+
+struct virtchnl_filter {
+	union	virtchnl_flow_spec data;
+	union	virtchnl_flow_spec mask;
+	enum	virtchnl_flow_type flow_type;
+	enum	virtchnl_action action;
+	u32	action_meta;
+	__u8	field_flags;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
+
 /* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
@@ -711,6 +793,25 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_REQUEST_QUEUES:
 		valid_len = sizeof(struct virtchnl_vf_res_request);
 		break;
+	case VIRTCHNL_OP_ENABLE_CHANNELS:
+		valid_len = sizeof(struct virtchnl_tc_info);
+		if (msglen >= valid_len) {
+			struct virtchnl_tc_info *vti =
+				(struct virtchnl_tc_info *)msg;
+			valid_len += vti->num_tc *
+				sizeof(struct virtchnl_channel_info);
+			if (vti->num_tc == 0)
+				err_msg_format = true;
+		}
+		break;
+	case VIRTCHNL_OP_DISABLE_CHANNELS:
+		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+		valid_len = sizeof(struct virtchnl_filter);
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+		valid_len = sizeof(struct virtchnl_filter);
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d0eb659fa733..ce547a25e8ae 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -511,6 +511,7 @@ void zero_fill_bio(struct bio *bio);
 extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
 extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
+extern const char *bio_devname(struct bio *bio, char *buffer);
 
 #define bio_set_dev(bio, bdev) 			\
 do {						\
@@ -529,9 +530,6 @@ do {						\
 #define bio_dev(bio) \
 	disk_devt((bio)->bi_disk)
 
-#define bio_devname(bio, buf) \
-	__bdevname(bio_dev(bio), (buf))
-
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 void bio_disassociate_task(struct bio *bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4f3df807cf8f..ed63f3b69c12 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -49,7 +49,7 @@ struct blk_stat_callback;
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
 
-/* Must be consisitent with blk_mq_poll_stats_bkt() */
+/* Must be consistent with blk_mq_poll_stats_bkt() */
 #define BLK_MQ_POLL_STATS_BKTS 16
 
 /*
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a7f16e0f8d68..8a4566691c8f 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
 ({									       \
 	int __ret = 0;							       \
-	if (cgroup_bpf_enabled && sk) {					       \
+	if (cgroup_bpf_enabled) {					       \
 		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
 						 BPF_CGROUP_INET_SOCK_CREATE); \
 	}								       \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 66df387106de..819229c80eca 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -21,6 +21,7 @@ struct bpf_verifier_env;
 struct perf_event;
 struct bpf_prog;
 struct bpf_map;
+struct sock;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 19b8349a3809..5e2e8a49fb21 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -13,6 +13,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 9f242b876fde..f8e76d01a5ad 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -755,13 +755,13 @@ struct sock_cgroup_data {
  * updaters and return part of the previous pointer as the prioidx or
  * classid.  Such races are short-lived and the result isn't critical.
  */
-static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd)
+static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
 {
 	/* fallback to 1 which is always the ID of the root cgroup */
 	return (skcd->is_data & 1) ? skcd->prioidx : 1;
 }
 
-static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
+static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
 {
 	/* fallback to 0 which is the unconfigured default classid */
 	return (skcd->is_data & 1) ? skcd->classid : 0;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 8a9643857c4a..16c3027074a2 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -17,6 +17,7 @@
 #include <linux/if.h>
 #include <linux/fs.h>
 #include <linux/aio_abi.h>	/* for aio_context_t */
+#include <linux/uaccess.h>
 #include <linux/unistd.h>
 
 #include <asm/compat.h>
@@ -229,13 +230,13 @@ typedef struct compat_siginfo {
 				short int _addr_lsb;	/* Valid LSB of the reported address. */
 				/* used when si_code=SEGV_BNDERR */
 				struct {
-					short _dummy_bnd;
+					compat_uptr_t _dummy_bnd;
 					compat_uptr_t _lower;
 					compat_uptr_t _upper;
 				} _addr_bnd;
 				/* used when si_code=SEGV_PKUERR */
 				struct {
-					short _dummy_pkey;
+					compat_uptr_t _dummy_pkey;
 					u32 _pkey;
 				} _addr_pkey;
 			};
@@ -550,8 +551,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
 extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat);
-extern int put_compat_sigset(compat_sigset_t __user *compat,
-			     const sigset_t *set, unsigned int size);
+
+/*
+ * Defined inline such that size can be compile time constant, which avoids
+ * CONFIG_HARDENED_USERCOPY complaining about copies from task_struct
+ */
+static inline int
+put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
+		  unsigned int size)
+{
+	/* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */
+#ifdef __BIG_ENDIAN
+	compat_sigset_t v;
+	switch (_NSIG_WORDS) {
+	case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
+	case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
+	case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
+	case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
+	}
+	return copy_to_user(compat, &v, size) ? -EFAULT : 0;
+#else
+	return copy_to_user(compat, set, size) ? -EFAULT : 0;
+#endif
+}
 
 asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
 		compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d02a4df3f473..d3f264a5b04d 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -27,3 +27,8 @@
 #if __has_feature(address_sanitizer)
 #define __SANITIZE_ADDRESS__
 #endif
+
+/* Clang doesn't have a way to turn it off per-function, yet. */
+#ifdef __noretpoline
+#undef __noretpoline
+#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 631354acfa72..e2c7f4369eff 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -93,6 +93,10 @@
 #define __weak		__attribute__((weak))
 #define __alias(symbol)	__attribute__((alias(#symbol)))
 
+#ifdef RETPOLINE
+#define __noretpoline __attribute__((indirect_branch("keep")))
+#endif
+
 /*
  * it doesn't make sense on ARM (currently the only user of __naked)
  * to trace naked functions because then mcount is called without
@@ -167,8 +171,6 @@
 
 #if GCC_VERSION >= 40100
 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-
-#define __nostackprotector	__attribute__((__optimize__("no-stack-protector")))
 #endif
 
 #if GCC_VERSION >= 40300
@@ -196,6 +198,11 @@
 #endif /* __CHECKER__ */
 #endif /* GCC_VERSION >= 40300 */
 
+#if GCC_VERSION >= 40400
+#define __optimize(level)	__attribute__((__optimize__(level)))
+#define __nostackprotector	__optimize("no-stack-protector")
+#endif /* GCC_VERSION >= 40400 */
+
 #if GCC_VERSION >= 40500
 
 #ifndef __CHECKER__
@@ -205,6 +212,15 @@
 #endif
 
 /*
+ * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
+ * confuse the stack allocation in gcc, leading to overly large stack
+ * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
+ *
+ * Adding an empty inline assembly before it works around the problem
+ */
+#define barrier_before_unreachable() asm volatile("")
+
+/*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
  * control elsewhere.
@@ -214,7 +230,11 @@
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
 #define unreachable() \
-	do { annotate_unreachable(); __builtin_unreachable(); } while (0)
+	do {					\
+		annotate_unreachable();		\
+		barrier_before_unreachable();	\
+		__builtin_unreachable();	\
+	} while (0)
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c2cc57a2f508..ab4711c63601 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -86,6 +86,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 # define barrier_data(ptr) barrier()
 #endif
 
+/* workaround for GCC PR82365 if needed */
+#ifndef barrier_before_unreachable
+# define barrier_before_unreachable() do { } while (0)
+#endif
+
 /* Unreachable code */
 #ifdef CONFIG_STACK_VALIDATION
 /*
@@ -277,6 +282,10 @@ unsigned long read_word_at_a_time(const void *addr)
 
 #endif /* __ASSEMBLY__ */
 
+#ifndef __optimize
+# define __optimize(level)
+#endif
+
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 871f9e21810c..0b3fc229086c 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -225,7 +225,7 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev,
 }
 #endif
 
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
 void cpuidle_poll_state_init(struct cpuidle_driver *drv);
 #else
 static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {}
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d4a2a7dcd72d..bf53d893ad02 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -170,6 +170,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_not(cpu, mask)		\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_wrap(cpu, mask, start)	\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
 #define for_each_cpu_and(cpu, mask, and)	\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
 #else
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 34fe8463d10e..eb9eab4ecd6d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -578,7 +578,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 
 /*
  * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please
- * don't use this is new code.
+ * don't use this in new code.
  */
 #ifndef arch_dma_supported
 #define arch_dma_supported(dev, mask)	(1)
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 2ec41a7eb54f..ebe41811ed34 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -371,6 +371,11 @@ struct ethtool_ops {
 			    u8 *hfunc);
 	int	(*set_rxfh)(struct net_device *, const u32 *indir,
 			    const u8 *key, const u8 hfunc);
+	int	(*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key,
+				    u8 *hfunc, u32 rss_context);
+	int	(*set_rxfh_context)(struct net_device *, const u32 *indir,
+				    const u8 *key, const u8 hfunc,
+				    u32 *rss_context, bool delete);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 276932d75975..109d05ccea9a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,7 +20,6 @@
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 
-#include <net/xdp.h>
 #include <net/sch_generic.h>
 
 #include <uapi/linux/filter.h>
@@ -30,6 +29,7 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 struct bpf_prog_aux;
+struct xdp_rxq_info;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -507,6 +507,22 @@ struct xdp_buff {
 	struct xdp_rxq_info *rxq;
 };
 
+struct sk_msg_buff {
+	void *data;
+	void *data_end;
+	__u32 apply_bytes;
+	__u32 cork_bytes;
+	int sg_copybreak;
+	int sg_start;
+	int sg_curr;
+	int sg_end;
+	struct scatterlist sg_data[MAX_SKB_FRAGS];
+	bool sg_copy[MAX_SKB_FRAGS];
+	__u32 key;
+	__u32 flags;
+	struct bpf_map *map;
+};
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
@@ -771,6 +787,7 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
 void bpf_warn_invalid_xdp_action(u32 act);
 
 struct sock *do_sk_redirect_map(struct sk_buff *skb);
+struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2a815560fda0..c6baf767619e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1317,6 +1317,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_CGROUPWB	0x00000001	/* cgroup-aware writeback enabled */
 #define SB_I_NOEXEC	0x00000002	/* Ignore executables on this fs */
 #define SB_I_NODEV	0x00000004	/* Ignore devices on this fs */
+#define SB_I_MULTIROOT	0x00000008	/* Multiple roots to the dentry tree */
 
 /* sb->s_iflags to limit user namespace mounts */
 #define SB_I_USERNS_VISIBLE		0x00000010 /* fstype already mounted */
@@ -3198,7 +3199,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma)
 	if (!vma_is_dax(vma))
 		return false;
 	inode = file_inode(vma->vm_file);
-	if (inode->i_mode == S_IFCHR)
+	if (S_ISCHR(inode->i_mode))
 		return false; /* device-dax */
 	return true;
 }
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 4fa1a489efe4..4fe8f289b3f6 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -73,8 +73,8 @@ struct fwnode_operations {
 	struct fwnode_handle *(*get)(struct fwnode_handle *fwnode);
 	void (*put)(struct fwnode_handle *fwnode);
 	bool (*device_is_available)(const struct fwnode_handle *fwnode);
-	void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
-				       const struct device *dev);
+	const void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
+					     const struct device *dev);
 	bool (*property_present)(const struct fwnode_handle *fwnode,
 				 const char *propname);
 	int (*property_read_int_array)(const struct fwnode_handle *fwnode,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5e3531027b51..c826b0b5232a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -198,6 +198,7 @@ struct gendisk {
 	void *private_data;
 
 	int flags;
+	struct rw_semaphore lookup_sem;
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;
@@ -600,8 +601,9 @@ extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);
-extern struct kobject *get_disk(struct gendisk *disk);
+extern struct kobject *get_disk_and_module(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
+extern void put_disk_and_module(struct gendisk *disk);
 extern void blk_register_region(dev_t devt, unsigned long range,
 			struct module *module,
 			struct kobject *(*probe)(dev_t, int *, void *),
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ee6657a0ed69..8fe7e4306816 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -8,6 +8,7 @@
  * Copyright (c) 2006, Michael Wu <[email protected]>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2111,7 +2112,7 @@ enum ieee80211_key_len {
 #define FILS_ERP_MAX_REALM_LEN		253
 #define FILS_ERP_MAX_RRK_LEN		64
 
-#define PMK_MAX_LEN			48
+#define PMK_MAX_LEN			64
 
 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
 enum ieee80211_pub_actioncode {
@@ -2502,6 +2503,17 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr)
 }
 
 /**
+ * ieee80211_get_tid - get qos TID
+ * @hdr: the frame
+ */
+static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr)
+{
+	u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+	return qc[0] & IEEE80211_QOS_CTL_TID_MASK;
+}
+
+/**
  * ieee80211_get_SA - get pointer to SA
  * @hdr: the frame
  *
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index c5b0a75a7812..fd00170b494f 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -25,6 +25,7 @@ struct ptr_ring *tun_get_tx_ring(struct file *file);
 bool tun_is_xdp_buff(void *ptr);
 void *tun_xdp_to_ptr(void *ptr);
 void *tun_ptr_to_xdp(void *ptr);
+void tun_ptr_free(void *ptr);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -50,5 +51,8 @@ static inline void *tun_ptr_to_xdp(void *ptr)
 {
 	return NULL;
 }
+static inline void tun_ptr_free(void *ptr)
+{
+}
 #endif /* CONFIG_TUN */
 #endif /* __IF_TUN_H */
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 5e6a2d4dc366..c4a1cff9c768 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -300,30 +300,34 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features,
 }
 
 /**
- * __vlan_insert_tag - regular VLAN tag inserting
+ * __vlan_insert_inner_tag - inner VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
+ * @mac_len: MAC header length including outer vlan headers
  *
- * Inserts the VLAN tag into @skb as part of the payload
+ * Inserts the VLAN tag into @skb as part of the payload at offset mac_len
  * Returns error if skb_cow_head failes.
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline int __vlan_insert_tag(struct sk_buff *skb,
-				    __be16 vlan_proto, u16 vlan_tci)
+static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
+					  __be16 vlan_proto, u16 vlan_tci,
+					  unsigned int mac_len)
 {
 	struct vlan_ethhdr *veth;
 
 	if (skb_cow_head(skb, VLAN_HLEN) < 0)
 		return -ENOMEM;
 
-	veth = skb_push(skb, VLAN_HLEN);
+	skb_push(skb, VLAN_HLEN);
 
-	/* Move the mac addresses to the beginning of the new header. */
-	memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
+	/* Move the mac header sans proto to the beginning of the new header. */
+	memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
 	skb->mac_header -= VLAN_HLEN;
 
+	veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN);
+
 	/* first, the ethernet type */
 	veth->h_vlan_proto = vlan_proto;
 
@@ -334,12 +338,30 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
 }
 
 /**
- * vlan_insert_tag - regular VLAN tag inserting
+ * __vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
  *
  * Inserts the VLAN tag into @skb as part of the payload
+ * Returns error if skb_cow_head failes.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
+ */
+static inline int __vlan_insert_tag(struct sk_buff *skb,
+				    __be16 vlan_proto, u16 vlan_tci)
+{
+	return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN);
+}
+
+/**
+ * vlan_insert_inner_tag - inner VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
+ * @vlan_tci: VLAN TCI to insert
+ * @mac_len: MAC header length including outer vlan headers
+ *
+ * Inserts the VLAN tag into @skb as part of the payload at offset mac_len
  * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
  *
  * Following the skb_unshare() example, in case of error, the calling function
@@ -347,12 +369,14 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
-					      __be16 vlan_proto, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb,
+						    __be16 vlan_proto,
+						    u16 vlan_tci,
+						    unsigned int mac_len)
 {
 	int err;
 
-	err = __vlan_insert_tag(skb, vlan_proto, vlan_tci);
+	err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len);
 	if (err) {
 		dev_kfree_skb_any(skb);
 		return NULL;
@@ -361,6 +385,26 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
 }
 
 /**
+ * vlan_insert_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
+ */
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
+					      __be16 vlan_proto, u16 vlan_tci)
+{
+	return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN);
+}
+
+/**
  * vlan_insert_tag_set_proto - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
diff --git a/include/linux/init.h b/include/linux/init.h
index 506a98151131..bc27cf03c41e 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -6,10 +6,10 @@
 #include <linux/types.h>
 
 /* Built-in __init functions needn't be compiled with retpoline */
-#if defined(RETPOLINE) && !defined(MODULE)
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#if defined(__noretpoline) && !defined(MODULE)
+#define __noinitretpoline __noretpoline
 #else
-#define __noretpoline
+#define __noinitretpoline
 #endif
 
 /* These macros are used to mark some functions or 
@@ -47,7 +47,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__section(.init.text) __cold  __latent_entropy __noretpoline
+#define __init		__section(.init.text) __cold  __latent_entropy __noinitretpoline
 #define __initdata	__section(.init.data)
 #define __initconst	__section(.init.rodata)
 #define __exitdata	__section(.exit.data)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c00c4c33e432..b26eccc78fb1 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -503,6 +503,7 @@
 
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
+#define ICH_HCR_NPIE			(1 << 3)
 #define ICH_HCR_TC			(1 << 10)
 #define ICH_HCR_TALL0			(1 << 11)
 #define ICH_HCR_TALL1			(1 << 12)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index d3453ee072fc..68d8b1f73682 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -84,6 +84,7 @@
 
 #define GICH_HCR_EN			(1 << 0)
 #define GICH_HCR_UIE			(1 << 1)
+#define GICH_HCR_NPIE			(1 << 3)
 
 #define GICH_LR_VIRTUALID		(0x3ff << 0)
 #define GICH_LR_PHYSID_CPUID_SHIFT	(10)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b6a29c126cc4..2168cc6b8b30 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
 extern void jump_label_init(void);
+extern void jump_label_invalidate_init(void);
 extern void jump_label_lock(void);
 extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
@@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void)
 	static_key_initialized = true;
 }
 
+static inline void jump_label_invalidate_init(void) {}
+
 static __always_inline bool static_key_false(struct static_key *key)
 {
 	if (unlikely(static_key_count(key) > 0))
diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h
index fec5076eda91..dcde9471897d 100644
--- a/include/linux/kconfig.h
+++ b/include/linux/kconfig.h
@@ -4,6 +4,12 @@
 
 #include <generated/autoconf.h>
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#else
+#define __LITTLE_ENDIAN 1234
+#endif
+
 #define __ARG_PLACEHOLDER_1 0,
 #define __take_second_arg(__ignored, val, ...) val
 
@@ -64,4 +70,7 @@
  */
 #define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
 
+/* Make sure we always have all types and struct attributes defined. */
+#include <linux/compiler_types.h>
+
 #endif /* __LINUX_KCONFIG_H */
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 7ff25a808fef..80db19d3a505 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -10,6 +10,7 @@ enum kcore_type {
 	KCORE_VMALLOC,
 	KCORE_RAM,
 	KCORE_VMEMMAP,
+	KCORE_USER,
 	KCORE_OTHER,
 };
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ce51455e2adf..3fd291503576 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option);
 extern char *next_arg(char *args, char **param, char **val);
 
 extern int core_kernel_text(unsigned long addr);
+extern int init_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ac0062b74aed..6930c63126c7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
 }
 #endif
-void kvm_arch_irq_routing_update(struct kvm *kvm);
 
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
@@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
+void kvm_arch_irq_routing_update(struct kvm *kvm);
+
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
 	/*
@@ -1272,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end);
+
 #endif
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8be5077efb5f..f92ea7783652 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
 			    unsigned long  *end_pfn);
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 			  unsigned long *out_end_pfn, int *out_nid);
-unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
 
 /**
  * for_each_mem_pfn_range - early memory pfn range iterator
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 882046863581..c46016bb25eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -523,9 +523,11 @@ static inline void __mod_memcg_state(struct mem_cgroup *memcg,
 static inline void mod_memcg_state(struct mem_cgroup *memcg,
 				   int idx, int val)
 {
-	preempt_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	__mod_memcg_state(memcg, idx, val);
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 /**
@@ -606,9 +608,11 @@ static inline void __mod_lruvec_state(struct lruvec *lruvec,
 static inline void mod_lruvec_state(struct lruvec *lruvec,
 				    enum node_stat_item idx, int val)
 {
-	preempt_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	__mod_lruvec_state(lruvec, idx, val);
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 static inline void __mod_lruvec_page_state(struct page *page,
@@ -630,9 +634,11 @@ static inline void __mod_lruvec_page_state(struct page *page,
 static inline void mod_lruvec_page_state(struct page *page,
 					 enum node_stat_item idx, int val)
 {
-	preempt_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	__mod_lruvec_page_state(page, idx, val);
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
@@ -659,9 +665,11 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
 static inline void count_memcg_events(struct mem_cgroup *memcg,
 				      int idx, unsigned long count)
 {
-	preempt_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	__count_memcg_events(memcg, idx, count);
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 /* idx can be of type enum memcg_event_item or vm_event_item */
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
new file mode 100644
index 000000000000..70e7e5673ce9
--- /dev/null
+++ b/include/linux/mlx5/accel.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_H__
+#define __MLX5_ACCEL_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_accel_esp_aes_gcm_keymat_iv_algo {
+	MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ,
+};
+
+enum mlx5_accel_esp_flags {
+	MLX5_ACCEL_ESP_FLAGS_TUNNEL            = 0,    /* Default */
+	MLX5_ACCEL_ESP_FLAGS_TRANSPORT         = 1UL << 0,
+	MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED     = 1UL << 1,
+	MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2,
+};
+
+enum mlx5_accel_esp_action {
+	MLX5_ACCEL_ESP_ACTION_DECRYPT,
+	MLX5_ACCEL_ESP_ACTION_ENCRYPT,
+};
+
+enum mlx5_accel_esp_keymats {
+	MLX5_ACCEL_ESP_KEYMAT_AES_NONE,
+	MLX5_ACCEL_ESP_KEYMAT_AES_GCM,
+};
+
+enum mlx5_accel_esp_replay {
+	MLX5_ACCEL_ESP_REPLAY_NONE,
+	MLX5_ACCEL_ESP_REPLAY_BMP,
+};
+
+struct aes_gcm_keymat {
+	u64   seq_iv;
+	enum mlx5_accel_esp_aes_gcm_keymat_iv_algo iv_algo;
+
+	u32   salt;
+	u32   icv_len;
+
+	u32   key_len;
+	u32   aes_key[256 / 32];
+};
+
+struct mlx5_accel_esp_xfrm_attrs {
+	enum mlx5_accel_esp_action action;
+	u32   esn;
+	u32   spi;
+	u32   seq;
+	u32   tfc_pad;
+	u32   flags;
+	u32   sa_handle;
+	enum mlx5_accel_esp_replay replay_type;
+	union {
+		struct {
+			u32 size;
+
+		} bmp;
+	} replay;
+	enum mlx5_accel_esp_keymats keymat_type;
+	union {
+		struct aes_gcm_keymat aes_gcm;
+	} keymat;
+};
+
+struct mlx5_accel_esp_xfrm {
+	struct mlx5_core_dev  *mdev;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
+};
+
+enum {
+	MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0,
+};
+
+enum mlx5_accel_ipsec_cap {
+	MLX5_ACCEL_IPSEC_CAP_DEVICE		= 1 << 0,
+	MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA	= 1 << 1,
+	MLX5_ACCEL_IPSEC_CAP_ESP		= 1 << 2,
+	MLX5_ACCEL_IPSEC_CAP_IPV6		= 1 << 3,
+	MLX5_ACCEL_IPSEC_CAP_LSO		= 1 << 4,
+	MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER	= 1 << 5,
+	MLX5_ACCEL_IPSEC_CAP_ESN		= 1 << 6,
+	MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN	= 1 << 7,
+};
+
+#ifdef CONFIG_MLX5_ACCEL
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags);
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			       const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+#else
+
+static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {}
+static inline int
+mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; }
+
+#endif
+#endif
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 48c181a2acc9..445ad194e0fe 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -60,6 +60,7 @@ struct mlx5_core_cq {
 	} tasklet_ctx;
 	int			reset_notify_added;
 	struct list_head	reset_notify;
+	struct mlx5_eq		*eq;
 };
 
 
@@ -171,8 +172,17 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 	mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
 }
 
-int mlx5_init_cq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
+static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
+{
+	refcount_inc(&cq->refcount);
+}
+
+static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
+{
+	if (refcount_dec_and_test(&cq->refcount))
+		complete(&cq->free);
+}
+
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index e5258ee4e38b..4b5939c78cdd 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1013,6 +1013,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_RESERVED,
 	MLX5_CAP_VECTOR_CALC,
 	MLX5_CAP_QOS,
+	MLX5_CAP_DEBUG,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1140,6 +1141,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_QOS(mdev, cap)\
 	MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap)
 
+#define MLX5_CAP_DEBUG(mdev, cap)\
+	MLX5_GET(debug_cap, mdev->caps.hca_cur[MLX5_CAP_DEBUG], cap)
+
 #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
 	MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6ed79a8a8318..cded85ab6fe4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -345,13 +345,6 @@ struct mlx5_buf_list {
 	dma_addr_t		map;
 };
 
-struct mlx5_buf {
-	struct mlx5_buf_list	direct;
-	int			npages;
-	int			size;
-	u8			page_shift;
-};
-
 struct mlx5_frag_buf {
 	struct mlx5_buf_list	*frags;
 	int			npages;
@@ -359,6 +352,15 @@ struct mlx5_frag_buf {
 	u8			page_shift;
 };
 
+struct mlx5_frag_buf_ctrl {
+	struct mlx5_frag_buf	frag_buf;
+	u32			sz_m1;
+	u32			frag_sz_m1;
+	u8			log_sz;
+	u8			log_stride;
+	u8			log_frag_strides;
+};
+
 struct mlx5_eq_tasklet {
 	struct list_head list;
 	struct list_head process_list;
@@ -375,11 +377,18 @@ struct mlx5_eq_pagefault {
 	mempool_t		*pool;
 };
 
+struct mlx5_cq_table {
+	/* protect radix tree */
+	spinlock_t		lock;
+	struct radix_tree_root	tree;
+};
+
 struct mlx5_eq {
 	struct mlx5_core_dev   *dev;
+	struct mlx5_cq_table	cq_table;
 	__be32 __iomem	       *doorbell;
 	u32			cons_index;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	int			size;
 	unsigned int		irqn;
 	u8			eqn;
@@ -453,8 +462,8 @@ struct mlx5_core_srq {
 	struct mlx5_core_rsc_common	common; /* must be first */
 	u32		srqn;
 	int		max;
-	int		max_gs;
-	int		max_avail_gather;
+	size_t		max_gs;
+	size_t		max_avail_gather;
 	int		wqe_shift;
 	void (*event)	(struct mlx5_core_srq *, enum mlx5_event);
 
@@ -526,13 +535,6 @@ struct mlx5_core_health {
 	struct delayed_work		recover_work;
 };
 
-struct mlx5_cq_table {
-	/* protect radix tree
-	 */
-	spinlock_t		lock;
-	struct radix_tree_root	tree;
-};
-
 struct mlx5_qp_table {
 	/* protect radix tree
 	 */
@@ -654,10 +656,6 @@ struct mlx5_priv {
 	struct dentry	       *cmdif_debugfs;
 	/* end: qp staff */
 
-	/* start: cq staff */
-	struct mlx5_cq_table	cq_table;
-	/* end: cq staff */
-
 	/* start: mkey staff */
 	struct mlx5_mkey_table	mkey_table;
 	/* end: mkey staff */
@@ -936,9 +934,9 @@ struct mlx5_hca_vport_context {
 	bool			grh_required;
 };
 
-static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset)
 {
-		return buf->direct.buf + offset;
+		return buf->frags->buf + offset;
 }
 
 #define STRUCT_FIELD(header, field) \
@@ -977,6 +975,25 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+					      void *cqc)
+{
+	fbc->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
+	fbc->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
+	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
+	fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
+	fbc->frag_sz_m1	= (1 << fbc->log_frag_strides) - 1;
+}
+
+static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
+					  u32 ix)
+{
+	unsigned int frag = (ix >> fbc->log_frag_strides);
+
+	return fbc->frag_buf.frags[frag].buf +
+		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
@@ -1002,9 +1019,10 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-			struct mlx5_buf *buf, int node);
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+			struct mlx5_frag_buf *buf, int node);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+		   int size, struct mlx5_frag_buf *buf);
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
 int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			     struct mlx5_frag_buf *buf, int node);
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
@@ -1049,22 +1067,12 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
 void mlx5_unregister_debugfs(void);
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-		       int nent, u64 mask, const char *name,
-		       enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 		    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1076,14 +1084,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
 			 u16 reg_num, int arg, int write);
 
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-		       u32 *out, int outlen);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
 		       int node);
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
 	return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
+	 (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+	 mlx5_core_is_pf(mdev))
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
 	if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644
index 000000000000..d3c9db492b30
--- /dev/null
+++ b/include/linux/mlx5/eswitch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+	SRIOV_NONE,
+	SRIOV_LEGACY,
+	SRIOV_OFFLOADS
+};
+
+enum {
+	REP_ETH,
+	REP_IB,
+	NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+	int		       (*load)(struct mlx5_core_dev *dev,
+				       struct mlx5_eswitch_rep *rep);
+	void		       (*unload)(struct mlx5_eswitch_rep *rep);
+	void		       *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+	void			*priv;
+	bool		       valid;
+};
+
+struct mlx5_eswitch_rep {
+	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+	u16		       vport;
+	u8		       hw_id[ETH_ALEN];
+	u16		       vlan;
+	u32		       vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     int vport_index,
+				     struct mlx5_eswitch_rep_if *rep_if,
+				     u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport_index,
+				       u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+				 int vport,
+				 u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+						int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+				    int vport, u32 sqn);
+#endif
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index a0b48afcb422..47aecc4fa8c2 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -40,6 +40,8 @@
 
 enum {
 	MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO	= 1 << 16,
+	MLX5_FLOW_CONTEXT_ACTION_ENCRYPT	= 1 << 17,
+	MLX5_FLOW_CONTEXT_ACTION_DECRYPT	= 1 << 18,
 };
 
 enum {
@@ -69,6 +71,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_ESW_INGRESS,
 	MLX5_FLOW_NAMESPACE_SNIFFER_RX,
 	MLX5_FLOW_NAMESPACE_SNIFFER_TX,
+	MLX5_FLOW_NAMESPACE_EGRESS,
 };
 
 struct mlx5_flow_table;
@@ -139,11 +142,20 @@ struct mlx5_flow_group *
 mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in);
 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 
+struct mlx5_fs_vlan {
+        u16 ethtype;
+        u16 vid;
+        u8  prio;
+};
+
 struct mlx5_flow_act {
 	u32 action;
+	bool has_flow_tag;
 	u32 flow_tag;
 	u32 encap_id;
 	u32 modify_id;
+	uintptr_t esp_id;
+	struct mlx5_fs_vlan vlan;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h
new file mode 100644
index 000000000000..7b476bbae731
--- /dev/null
+++ b/include/linux/mlx5/fs_helpers.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_HELPERS_
+#define _MLX5_FS_HELPERS_
+
+#include <linux/mlx5/mlx5_ifc.h>
+
+#define MLX5_FS_IPV4_VERSION 4
+#define MLX5_FS_IPV6_VERSION 6
+
+static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c,
+						  const u32 *match_v, u8 match)
+{
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+
+	return MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_protocol) == 0xff &&
+		MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol) == match;
+}
+
+static inline bool mlx5_fs_is_outer_tcp_flow(const u32 *match_c,
+					     const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_TCP);
+}
+
+static inline bool mlx5_fs_is_outer_udp_flow(const u32 *match_c,
+					     const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_UDP);
+}
+
+static inline bool mlx5_fs_is_vxlan_flow(const u32 *match_c)
+{
+	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					   misc_parameters);
+
+	return MLX5_GET(fte_match_set_misc, misc_params_c, vxlan_vni);
+}
+
+static inline bool _mlx5_fs_is_outer_ipv_flow(struct mlx5_core_dev *mdev,
+					      const u32 *match_c,
+					      const u32 *match_v, int version)
+{
+	int match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+						  ft_field_support.outer_ip_version);
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+
+	if (!match_ipv) {
+		u16 ethertype;
+
+		switch (version) {
+		case MLX5_FS_IPV4_VERSION:
+			ethertype = ETH_P_IP;
+			break;
+		case MLX5_FS_IPV6_VERSION:
+			ethertype = ETH_P_IPV6;
+			break;
+		default:
+			return false;
+		}
+
+		return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+				ethertype) == 0xffff &&
+			MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+				 ethertype) == ethertype;
+	}
+
+	return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+			ip_version) == 0xf &&
+		MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+			 ip_version) == version;
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv4_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+			   const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+					  MLX5_FS_IPV4_VERSION);
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv6_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+			   const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+					  MLX5_FS_IPV6_VERSION);
+}
+
+static inline bool mlx5_fs_is_outer_ipsec_flow(const u32 *match_c)
+{
+	void *misc_params_c =
+			MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+
+	return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+}
+
+#endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f4e417686f62..c19e611d2782 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -143,6 +143,7 @@ enum {
 	MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT      = 0x763,
 	MLX5_CMD_OP_QUERY_HCA_VPORT_GID           = 0x764,
 	MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY          = 0x765,
+	MLX5_CMD_OP_QUERY_VNIC_ENV                = 0x76f,
 	MLX5_CMD_OP_QUERY_VPORT_COUNTER           = 0x770,
 	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
 	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
@@ -295,7 +296,9 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         inner_tcp_dport[0x1];
 	u8         inner_tcp_flags[0x1];
 	u8         reserved_at_37[0x9];
-	u8         reserved_at_40[0x1a];
+	u8         reserved_at_40[0x17];
+	u8	   outer_esp_spi[0x1];
+	u8	   reserved_at_58[0x2];
 	u8         bth_dst_qp[0x1];
 
 	u8         reserved_at_5b[0x25];
@@ -311,7 +314,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         flow_table_modify[0x1];
 	u8         encap[0x1];
 	u8         decap[0x1];
-	u8         reserved_at_9[0x17];
+	u8         reserved_at_9[0x1];
+	u8         pop_vlan[0x1];
+	u8         push_vlan[0x1];
+	u8         reserved_at_c[0x14];
 
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
@@ -437,7 +443,9 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 
 	u8         reserved_at_120[0x28];
 	u8         bth_dst_qp[0x18];
-	u8         reserved_at_160[0xa0];
+	u8	   reserved_at_160[0x20];
+	u8	   outer_esp_spi[0x20];
+	u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_cmd_pas_bits {
@@ -589,6 +597,16 @@ struct mlx5_ifc_qos_cap_bits {
 	u8         reserved_at_100[0x700];
 };
 
+struct mlx5_ifc_debug_cap_bits {
+	u8         reserved_at_0[0x20];
+
+	u8         reserved_at_20[0x2];
+	u8         stall_detect[0x1];
+	u8         reserved_at_23[0x1d];
+
+	u8         reserved_at_40[0x7c0];
+};
+
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         csum_cap[0x1];
 	u8         vlan_cap[0x1];
@@ -851,7 +869,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         out_of_seq_cnt[0x1];
 	u8         vport_counters[0x1];
 	u8         retransmission_q_counters[0x1];
-	u8         reserved_at_183[0x1];
+	u8         debug[0x1];
 	u8         modify_rq_counter_set_id[0x1];
 	u8         rq_delay_drop[0x1];
 	u8         max_qp_cnt[0xa];
@@ -861,7 +879,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         vhca_group_manager[0x1];
 	u8         ib_virt[0x1];
 	u8         eth_virt[0x1];
-	u8         reserved_at_1a4[0x1];
+	u8         vnic_env_queue_counters[0x1];
 	u8         ets[0x1];
 	u8         nic_flow_table[0x1];
 	u8         eswitch_flow_table[0x1];
@@ -993,7 +1011,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_330[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_at_340[0x8];
+	u8         nic_receive_steering_discard[0x1];
+	u8         receive_discard_vport_down[0x1];
+	u8         transmit_discard_vport_down[0x1];
+	u8         reserved_at_343[0x5];
 	u8         log_max_flow_counter_bulk[0x8];
 	u8         max_flow_counter_15_0[0x10];
 
@@ -1091,6 +1112,7 @@ enum mlx5_flow_destination_type {
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
 	MLX5_FLOW_DESTINATION_TYPE_TIR          = 0x2,
 
+	MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
 };
 
@@ -1567,7 +1589,17 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
 
 	u8         rx_pause_transition_low[0x20];
 
-	u8         reserved_at_3c0[0x400];
+	u8         reserved_at_3c0[0x40];
+
+	u8         device_stall_minor_watermark_cnt_high[0x20];
+
+	u8         device_stall_minor_watermark_cnt_low[0x20];
+
+	u8         device_stall_critical_watermark_cnt_high[0x20];
+
+	u8         device_stall_critical_watermark_cnt_low[0x20];
+
+	u8         reserved_at_480[0x340];
 };
 
 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
@@ -2282,10 +2314,19 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
 	MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
 	MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100,
+};
+
+struct mlx5_ifc_vlan_bits {
+	u8         ethtype[0x10];
+	u8         prio[0x3];
+	u8         cfi[0x1];
+	u8         vid[0xc];
 };
 
 struct mlx5_ifc_flow_context_bits {
-	u8         reserved_at_0[0x20];
+	struct mlx5_ifc_vlan_bits push_vlan;
 
 	u8         group_id[0x20];
 
@@ -2361,6 +2402,24 @@ struct mlx5_ifc_xrc_srqc_bits {
 	u8         reserved_at_180[0x80];
 };
 
+struct mlx5_ifc_vnic_diagnostic_statistics_bits {
+	u8         counter_error_queues[0x20];
+
+	u8         total_error_queues[0x20];
+
+	u8         send_queue_priority_update_flow[0x20];
+
+	u8         reserved_at_60[0x20];
+
+	u8         nic_receive_steering_discard[0x40];
+
+	u8         receive_discard_vport_down[0x40];
+
+	u8         transmit_discard_vport_down[0x40];
+
+	u8         reserved_at_140[0xec0];
+};
+
 struct mlx5_ifc_traffic_counter_bits {
 	u8         packets[0x40];
 
@@ -3636,6 +3695,35 @@ struct mlx5_ifc_query_vport_state_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_query_vnic_env_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_vnic_diagnostic_statistics_bits vport_env;
+};
+
+enum {
+	MLX5_QUERY_VNIC_ENV_IN_OP_MOD_VPORT_DIAG_STATISTICS  = 0x0,
+};
+
+struct mlx5_ifc_query_vnic_env_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_at_41[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_query_vport_counter_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -7808,7 +7896,11 @@ struct mlx5_ifc_pifr_reg_bits {
 struct mlx5_ifc_pfcc_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_at_10[0x10];
+	u8         reserved_at_10[0xb];
+	u8         ppan_mask_n[0x1];
+	u8         minor_stall_mask[0x1];
+	u8         critical_stall_mask[0x1];
+	u8         reserved_at_1e[0x2];
 
 	u8         ppan[0x4];
 	u8         reserved_at_24[0x4];
@@ -7818,17 +7910,22 @@ struct mlx5_ifc_pfcc_reg_bits {
 
 	u8         pptx[0x1];
 	u8         aptx[0x1];
-	u8         reserved_at_42[0x6];
+	u8         pptx_mask_n[0x1];
+	u8         reserved_at_43[0x5];
 	u8         pfctx[0x8];
 	u8         reserved_at_50[0x10];
 
 	u8         pprx[0x1];
 	u8         aprx[0x1];
-	u8         reserved_at_62[0x6];
+	u8         pprx_mask_n[0x1];
+	u8         reserved_at_63[0x5];
 	u8         pfcrx[0x8];
 	u8         reserved_at_70[0x10];
 
-	u8         reserved_at_80[0x80];
+	u8         device_stall_minor_watermark[0x10];
+	u8         device_stall_critical_watermark[0x10];
+
+	u8         reserved_at_a0[0x60];
 };
 
 struct mlx5_ifc_pelc_reg_bits {
@@ -7869,8 +7966,10 @@ struct mlx5_ifc_peir_reg_bits {
 };
 
 struct mlx5_ifc_pcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7b];
+	u8         reserved_at_0[0x76];
 
+	u8         pfcc_mask[0x1];
+	u8         reserved_at_77[0x4];
 	u8         rx_buffer_fullness_counters[0x1];
 	u8         ptys_connector_type[0x1];
 	u8         reserved_at_7d[0x1];
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 255a88d08078..ec052491ba3d 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -373,7 +373,10 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
 struct mlx5_ifc_ipsec_extended_cap_bits {
 	u8         encapsulation[0x20];
 
-	u8         reserved_0[0x15];
+	u8         reserved_0[0x12];
+	u8         v2_command[0x1];
+	u8         udp_encap[0x1];
+	u8         rx_no_trailer[0x1];
 	u8         ipv4_fragment[0x1];
 	u8         ipv6[0x1];
 	u8         esn[0x1];
@@ -429,4 +432,91 @@ struct mlx5_ifc_ipsec_counters_bits {
 	u8         dropped_cmd[0x40];
 };
 
+enum mlx5_ifc_fpga_ipsec_response_syndrome {
+	MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0,
+	MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+	MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2,
+	MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_resp {
+	__be32 syndrome;
+	union {
+		__be32 sw_sa_handle;
+		__be32 flags;
+	};
+	u8 reserved[24];
+} __packed;
+
+enum mlx5_ifc_fpga_ipsec_cmd_opcode {
+	MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0,
+	MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1,
+	MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2,
+	MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3,
+	MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4,
+	MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5,
+};
+
+enum mlx5_ifc_fpga_ipsec_cap {
+	MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0),
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_cap {
+	__be32 cmd;
+	__be32 flags;
+	u8 reserved[24];
+} __packed;
+
+enum mlx5_ifc_fpga_ipsec_sa_flags {
+	MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0),
+	MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1),
+	MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2),
+	MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3),
+	MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4),
+	MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5),
+	MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6),
+	MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7),
+};
+
+enum mlx5_ifc_fpga_ipsec_sa_enc_mode {
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0,
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1,
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_sa_v1 {
+	__be32 cmd;
+	u8 key_enc[32];
+	u8 key_auth[32];
+	__be32 sip[4];
+	__be32 dip[4];
+	union {
+		struct {
+			__be32 reserved;
+			u8 salt_iv[8];
+			__be32 salt;
+		} __packed gcm;
+		struct {
+			u8 salt[16];
+		} __packed cbc;
+	};
+	__be32 spi;
+	__be32 sw_sa_handle;
+	__be16 tfclen;
+	u8 enc_mode;
+	u8 reserved1[2];
+	u8 flags;
+	u8 reserved2[2];
+};
+
+struct mlx5_ifc_fpga_ipsec_sa {
+	struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1;
+	__be16 udp_sp;
+	__be16 udp_dp;
+	u8 reserved1[4];
+	__be32 esn;
+	__be16 vid;	/* only 12 bits, rest is reserved */
+	__be16 reserved2;
+} __packed;
+
 #endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 035f0d4dc9fe..34aed6032f86 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -151,6 +151,12 @@ int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx);
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx,
 			u8 *pfc_en_rx);
 
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+				  u16 stall_critical_watermark,
+				  u16 stall_minor_watermark);
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+				    u16 *stall_critical_watermark, u16 *stall_minor_watermark);
+
 int mlx5_max_tc(struct mlx5_core_dev *mdev);
 
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 64e193e87394..9208cb8809ac 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -107,6 +107,9 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
+				u64 *rx_discard_vport_down,
+				u64 *tx_discard_vport_down);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 				  int vf, u8 port_num, void *out,
 				  size_t out_sz);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c30b32e3c862..10191c28fc04 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page)
 
 #define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
 
-#ifdef arch_unmap_kpfn
-extern void arch_unmap_kpfn(unsigned long pfn);
-#else
-static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
-#endif
-
 #endif
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 5396521a776a..9a36fad9e068 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -4,11 +4,10 @@
 
 #include <linux/in.h>
 #include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
 #include <net/fib_rules.h>
 #include <net/fib_notifier.h>
 #include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
@@ -56,56 +55,8 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
 }
 #endif
 
-struct vif_device {
-	struct net_device 	*dev;			/* Device we are using */
-	struct netdev_phys_item_id dev_parent_id;	/* Device parent ID    */
-	unsigned long	bytes_in,bytes_out;
-	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
-	unsigned long	rate_limit;		/* Traffic shaping (NI) 	*/
-	unsigned char	threshold;		/* TTL threshold 		*/
-	unsigned short	flags;			/* Control flags 		*/
-	__be32		local,remote;		/* Addresses(remote for tunnels)*/
-	int		link;			/* Physical interface index	*/
-};
-
-struct vif_entry_notifier_info {
-	struct fib_notifier_info info;
-	struct net_device *dev;
-	vifi_t vif_index;
-	unsigned short vif_flags;
-	u32 tb_id;
-};
-
 #define VIFF_STATIC 0x8000
 
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
-	struct list_head	list;
-	possible_net_t		net;
-	u32			id;
-	struct sock __rcu	*mroute_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc_unres_queue;
-	struct vif_device	vif_table[MAXVIFS];
-	struct rhltable		mfc_hash;
-	struct list_head	mfc_cache_list;
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	bool			mroute_do_assert;
-	bool			mroute_do_pim;
-	int			mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
-	MFC_STATIC = BIT(0),
-	MFC_OFFLOAD = BIT(1),
-};
-
 struct mfc_cache_cmp_arg {
 	__be32 mfc_mcastgrp;
 	__be32 mfc_origin;
@@ -113,28 +64,13 @@ struct mfc_cache_cmp_arg {
 
 /**
  * struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
  * @mfc_mcastgrp: destination multicast group address
  * @mfc_origin: source address
  * @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
  */
 struct mfc_cache {
-	struct rhlist_head mnode;
+	struct mr_mfc _c;
 	union {
 		struct {
 			__be32 mfc_mcastgrp;
@@ -142,57 +78,10 @@ struct mfc_cache {
 		};
 		struct mfc_cache_cmp_arg cmparg;
 	};
-	vifi_t mfc_parent;
-	int mfc_flags;
-
-	union {
-		struct {
-			unsigned long expires;
-			struct sk_buff_head unresolved;
-		} unres;
-		struct {
-			unsigned long last_assert;
-			int minvif;
-			int maxvif;
-			unsigned long bytes;
-			unsigned long pkt;
-			unsigned long wrong_if;
-			unsigned long lastuse;
-			unsigned char ttls[MAXVIFS];
-			refcount_t refcount;
-		} res;
-	} mfc_un;
-	struct list_head list;
-	struct rcu_head	rcu;
-};
-
-struct mfc_entry_notifier_info {
-	struct fib_notifier_info info;
-	struct mfc_cache *mfc;
-	u32 tb_id;
 };
 
 struct rtmsg;
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
 		   struct rtmsg *rtm, u32 portid);
-
-#ifdef CONFIG_IP_MROUTE
-void ipmr_cache_free(struct mfc_cache *mfc_cache);
-#else
-static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
-{
-}
-#endif
-
-static inline void ipmr_cache_put(struct mfc_cache *c)
-{
-	if (refcount_dec_and_test(&c->mfc_un.res.refcount))
-		ipmr_cache_free(c);
-}
-static inline void ipmr_cache_hold(struct mfc_cache *c)
-{
-	refcount_inc(&c->mfc_un.res.refcount);
-}
-
 #endif
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 3014c52bfd86..c4a45859f586 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -7,6 +7,8 @@
 #include <linux/skbuff.h>	/* for struct sk_buff_head */
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
+#include <net/fib_rules.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -62,57 +64,33 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
-struct mif_device {
-	struct net_device 	*dev;			/* Device we are using */
-	unsigned long	bytes_in,bytes_out;
-	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
-	unsigned long	rate_limit;		/* Traffic shaping (NI) 	*/
-	unsigned char	threshold;		/* TTL threshold 		*/
-	unsigned short	flags;			/* Control flags 		*/
-	int		link;			/* Physical interface index	*/
-};
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+bool ip6mr_rule_default(const struct fib_rule *rule);
+#else
+static inline bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+	return true;
+}
+#endif
 
 #define VIFF_STATIC 0x8000
 
-struct mfc6_cache {
-	struct list_head list;
-	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
-	struct in6_addr mf6c_origin;			/* Source of packet 		*/
-	mifi_t mf6c_parent;			/* Source interface		*/
-	int mfc_flags;				/* Flags on line		*/
+struct mfc6_cache_cmp_arg {
+	struct in6_addr mf6c_mcastgrp;
+	struct in6_addr mf6c_origin;
+};
 
+struct mfc6_cache {
+	struct mr_mfc _c;
 	union {
 		struct {
-			unsigned long expires;
-			struct sk_buff_head unresolved;	/* Unresolved buffers		*/
-		} unres;
-		struct {
-			unsigned long last_assert;
-			int minvif;
-			int maxvif;
-			unsigned long bytes;
-			unsigned long pkt;
-			unsigned long wrong_if;
-			unsigned long lastuse;
-			unsigned char ttls[MAXMIFS];	/* TTL thresholds		*/
-		} res;
-	} mfc_un;
+			struct in6_addr mf6c_mcastgrp;
+			struct in6_addr mf6c_origin;
+		};
+		struct mfc6_cache_cmp_arg cmparg;
+	};
 };
 
-#define MFC_STATIC		1
-#define MFC_NOTIFY		2
-
-#define MFC6_LINES		64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
-			  (__force u32)(a)->s6_addr32[1] ^ \
-			  (__force u32)(a)->s6_addr32[2] ^ \
-			  (__force u32)(a)->s6_addr32[3] ^ \
-			  (__force u32)(g)->s6_addr32[0] ^ \
-			  (__force u32)(g)->s6_addr32[1] ^ \
-			  (__force u32)(g)->s6_addr32[2] ^ \
-			  (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
 #define MFC_ASSERT_THRESH (3*HZ)		/* Maximal freq. of asserts */
 
 struct rtmsg;
@@ -120,12 +98,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 			   struct rtmsg *rtm, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-	return NULL;
+	return false;
 }
 static inline int ip6mr_sk_done(struct sock *sk)
 {
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644
index 000000000000..d617fe45543e
--- /dev/null
+++ b/include/linux/mroute_base.h
@@ -0,0 +1,474 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/fib_notifier.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+	struct net_device *dev;
+	unsigned long bytes_in, bytes_out;
+	unsigned long pkt_in, pkt_out;
+	unsigned long rate_limit;
+	unsigned char threshold;
+	unsigned short flags;
+	int link;
+
+	/* Currently only used by ipmr */
+	struct netdev_phys_item_id dev_parent_id;
+	__be32 local, remote;
+};
+
+struct vif_entry_notifier_info {
+	struct fib_notifier_info info;
+	struct net_device *dev;
+	unsigned short vif_index;
+	unsigned short vif_flags;
+	u32 tb_id;
+};
+
+static inline int mr_call_vif_notifier(struct notifier_block *nb,
+				       struct net *net,
+				       unsigned short family,
+				       enum fib_event_type event_type,
+				       struct vif_device *vif,
+				       unsigned short vif_index, u32 tb_id)
+{
+	struct vif_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.dev = vif->dev,
+		.vif_index = vif_index,
+		.vif_flags = vif->flags,
+		.tb_id = tb_id,
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static inline int mr_call_vif_notifiers(struct net *net,
+					unsigned short family,
+					enum fib_event_type event_type,
+					struct vif_device *vif,
+					unsigned short vif_index, u32 tb_id,
+					unsigned int *ipmr_seq)
+{
+	struct vif_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.dev = vif->dev,
+		.vif_index = vif_index,
+		.vif_flags = vif->flags,
+		.tb_id = tb_id,
+	};
+
+	ASSERT_RTNL();
+	(*ipmr_seq)++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS	32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+	MFC_STATIC = BIT(0),
+	MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ * @free: Operation used for freeing an entry under RCU
+ */
+struct mr_mfc {
+	struct rhlist_head mnode;
+	unsigned short mfc_parent;
+	int mfc_flags;
+
+	union {
+		struct {
+			unsigned long expires;
+			struct sk_buff_head unresolved;
+		} unres;
+		struct {
+			unsigned long last_assert;
+			int minvif;
+			int maxvif;
+			unsigned long bytes;
+			unsigned long pkt;
+			unsigned long wrong_if;
+			unsigned long lastuse;
+			unsigned char ttls[MAXVIFS];
+			refcount_t refcount;
+		} res;
+	} mfc_un;
+	struct list_head list;
+	struct rcu_head	rcu;
+	void (*free)(struct rcu_head *head);
+};
+
+static inline void mr_cache_put(struct mr_mfc *c)
+{
+	if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+		call_rcu(&c->rcu, c->free);
+}
+
+static inline void mr_cache_hold(struct mr_mfc *c)
+{
+	refcount_inc(&c->mfc_un.res.refcount);
+}
+
+struct mfc_entry_notifier_info {
+	struct fib_notifier_info info;
+	struct mr_mfc *mfc;
+	u32 tb_id;
+};
+
+static inline int mr_call_mfc_notifier(struct notifier_block *nb,
+				       struct net *net,
+				       unsigned short family,
+				       enum fib_event_type event_type,
+				       struct mr_mfc *mfc, u32 tb_id)
+{
+	struct mfc_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.mfc = mfc,
+		.tb_id = tb_id
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static inline int mr_call_mfc_notifiers(struct net *net,
+					unsigned short family,
+					enum fib_event_type event_type,
+					struct mr_mfc *mfc, u32 tb_id,
+					unsigned int *ipmr_seq)
+{
+	struct mfc_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.mfc = mfc,
+		.tb_id = tb_id
+	};
+
+	ASSERT_RTNL();
+	(*ipmr_seq)++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+	const struct rhashtable_params *rht_params;
+	void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+	struct list_head	list;
+	possible_net_t		net;
+	struct mr_table_ops	ops;
+	u32			id;
+	struct sock __rcu	*mroute_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc_unres_queue;
+	struct vif_device	vif_table[MAXVIFS];
+	struct rhltable		mfc_hash;
+	struct list_head	mfc_cache_list;
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	bool			mroute_do_assert;
+	bool			mroute_do_pim;
+	int			mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+		     struct net_device *dev,
+		     unsigned long rate_limit,
+		     unsigned char threshold,
+		     unsigned short flags,
+		     unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+	       struct mr_table_ops *ops,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+			 void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+		   struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct mr_table *(*iter)(struct net *net,
+					      struct mr_table *mrt),
+		     int (*fill)(struct mr_table *mrt,
+				 struct sk_buff *skb,
+				 u32 portid, u32 seq, struct mr_mfc *c,
+				 int cmd, int flags),
+		     spinlock_t *lock);
+
+int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
+	    int (*rules_dump)(struct net *net,
+			      struct notifier_block *nb),
+	    struct mr_table *(*mr_iter)(struct net *net,
+					struct mr_table *mrt),
+	    rwlock_t *mrt_lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+				   struct net_device *dev,
+				   unsigned long rate_limit,
+				   unsigned char threshold,
+				   unsigned short flags,
+				   unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+	       struct mr_table_ops *ops,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net))
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+				       void *hasharg, int parent)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+					   int vifi)
+{
+	return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+					     int vifi, void *hasharg)
+{
+	return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+				 struct mr_mfc *c, struct rtmsg *rtm)
+{
+	return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		 struct mr_table *(*iter)(struct net *net,
+					  struct mr_table *mrt),
+		 int (*fill)(struct mr_table *mrt,
+			     struct sk_buff *skb,
+			     u32 portid, u32 seq, struct mr_mfc *c,
+			     int cmd, int flags),
+		 spinlock_t *lock)
+{
+	return -EINVAL;
+}
+
+static inline int mr_dump(struct net *net, struct notifier_block *nb,
+			  unsigned short family,
+			  int (*rules_dump)(struct net *net,
+					    struct notifier_block *nb),
+			  struct mr_table *(*mr_iter)(struct net *net,
+						      struct mr_table *mrt),
+			  rwlock_t *mrt_lock)
+{
+	return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+	return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+	struct seq_net_private p;
+	struct mr_table *mrt;
+	int ct;
+};
+
+struct mr_mfc_iter {
+	struct seq_net_private p;
+	struct mr_table *mrt;
+	struct list_head *cache;
+
+	/* Lock protecting the mr_table's unresolved queue */
+	spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+				     seq->private, *pos - 1)
+		    : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+		     struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+		      loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+				     struct mr_table *mrt, spinlock_t *lock)
+{
+	struct mr_mfc_iter *it = seq->private;
+
+	it->mrt = mrt;
+	it->cache = NULL;
+	it->lock = lock;
+
+	return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+				     seq->private, *pos - 1)
+		    : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+	struct mr_mfc_iter *it = seq->private;
+	struct mr_table *mrt = it->mrt;
+
+	if (it->cache == &mrt->mfc_unres_queue)
+		spin_unlock_bh(it->lock);
+	else if (it->cache == &mrt->mfc_cache_list)
+		rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+				   loff_t pos)
+{
+	return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+				    void *v, loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+				   struct mr_mfc_iter *it, loff_t pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+				    loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+				     struct mr_table *mrt, spinlock_t *lock)
+{
+	return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f25c13423bd4..cb3bbed4e633 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -66,6 +66,11 @@ struct mutex {
 #endif
 };
 
+/*
+ * Internal helper function; C doesn't allow us to hide it :/
+ *
+ * DO NOT USE (outside of mutex code).
+ */
 static inline struct task_struct *__mutex_owner(struct mutex *lock)
 {
 	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
diff --git a/include/linux/net.h b/include/linux/net.h
index 000d1aada74f..2248a052061d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -222,6 +222,7 @@ enum {
 int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
+bool sock_is_registered(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
 		  struct socket **res, int kern);
 int sock_create(int family, int type, int proto, struct socket **res);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5eef6c8e2741..2a2d9cf50aa2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -585,6 +585,15 @@ struct netdev_queue {
 #endif
 } ____cacheline_aligned_in_smp;
 
+extern int sysctl_fb_tunnels_only_for_init_net;
+
+static inline bool net_has_fallback_tunnels(const struct net *net)
+{
+	return net == &init_net ||
+	       !IS_ENABLED(CONFIG_SYSCTL) ||
+	       !sysctl_fb_tunnels_only_for_init_net;
+}
+
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 {
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
@@ -1381,8 +1390,6 @@ struct net_device_ops {
  * @IFF_MACVLAN: Macvlan device
  * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account
  *	underlying stacked devices
- * @IFF_IPVLAN_MASTER: IPvlan master device
- * @IFF_IPVLAN_SLAVE: IPvlan slave device
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
@@ -1392,6 +1399,7 @@ struct net_device_ops {
  * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
  *	entity (i.e. the master device for bridged veth)
  * @IFF_MACSEC: device is a MACsec device
+ * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1412,16 +1420,15 @@ enum netdev_priv_flags {
 	IFF_LIVE_ADDR_CHANGE		= 1<<15,
 	IFF_MACVLAN			= 1<<16,
 	IFF_XMIT_DST_RELEASE_PERM	= 1<<17,
-	IFF_IPVLAN_MASTER		= 1<<18,
-	IFF_IPVLAN_SLAVE		= 1<<19,
-	IFF_L3MDEV_MASTER		= 1<<20,
-	IFF_NO_QUEUE			= 1<<21,
-	IFF_OPENVSWITCH			= 1<<22,
-	IFF_L3MDEV_SLAVE		= 1<<23,
-	IFF_TEAM			= 1<<24,
-	IFF_RXFH_CONFIGURED		= 1<<25,
-	IFF_PHONY_HEADROOM		= 1<<26,
-	IFF_MACSEC			= 1<<27,
+	IFF_L3MDEV_MASTER		= 1<<18,
+	IFF_NO_QUEUE			= 1<<19,
+	IFF_OPENVSWITCH			= 1<<20,
+	IFF_L3MDEV_SLAVE		= 1<<21,
+	IFF_TEAM			= 1<<22,
+	IFF_RXFH_CONFIGURED		= 1<<23,
+	IFF_PHONY_HEADROOM		= 1<<24,
+	IFF_MACSEC			= 1<<25,
+	IFF_NO_RX_HANDLER		= 1<<26,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1442,8 +1449,6 @@ enum netdev_priv_flags {
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
 #define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
-#define IFF_IPVLAN_MASTER		IFF_IPVLAN_MASTER
-#define IFF_IPVLAN_SLAVE		IFF_IPVLAN_SLAVE
 #define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
@@ -1451,6 +1456,7 @@ enum netdev_priv_flags {
 #define IFF_TEAM			IFF_TEAM
 #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
 #define IFF_MACSEC			IFF_MACSEC
+#define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -1798,11 +1804,17 @@ struct net_device {
 #if IS_ENABLED(CONFIG_TIPC)
 	struct tipc_bearer __rcu *tipc_ptr;
 #endif
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 	void 			*atalk_ptr;
+#endif
 	struct in_device __rcu	*ip_ptr;
+#if IS_ENABLED(CONFIG_DECNET)
 	struct dn_dev __rcu     *dn_ptr;
+#endif
 	struct inet6_dev __rcu	*ip6_ptr;
+#if IS_ENABLED(CONFIG_AX25)
 	void			*ax25_ptr;
+#endif
 	struct wireless_dev	*ieee80211_ptr;
 	struct wpan_dev		*ieee802154_ptr;
 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
@@ -2300,43 +2312,45 @@ struct netdev_lag_lower_state_info {
 
 #include <linux/notifier.h>
 
-/* netdevice notifier chain. Please remember to update the rtnetlink
- * notification exclusion list in rtnetlink_event() when adding new
- * types.
+/* netdevice notifier chain. Please remember to update netdev_cmd_to_name()
+ * and the rtnetlink notification exclusion list in rtnetlink_event() when
+ * adding new types.
  */
-#define NETDEV_UP	0x0001	/* For now you can't veto a device up/down */
-#define NETDEV_DOWN	0x0002
-#define NETDEV_REBOOT	0x0003	/* Tell a protocol stack a network interface
+enum netdev_cmd {
+	NETDEV_UP	= 1,	/* For now you can't veto a device up/down */
+	NETDEV_DOWN,
+	NETDEV_REBOOT,		/* Tell a protocol stack a network interface
 				   detected a hardware crash and restarted
 				   - we can use this eg to kick tcp sessions
 				   once done */
-#define NETDEV_CHANGE	0x0004	/* Notify device state change */
-#define NETDEV_REGISTER 0x0005
-#define NETDEV_UNREGISTER	0x0006
-#define NETDEV_CHANGEMTU	0x0007 /* notify after mtu change happened */
-#define NETDEV_CHANGEADDR	0x0008
-#define NETDEV_GOING_DOWN	0x0009
-#define NETDEV_CHANGENAME	0x000A
-#define NETDEV_FEAT_CHANGE	0x000B
-#define NETDEV_BONDING_FAILOVER 0x000C
-#define NETDEV_PRE_UP		0x000D
-#define NETDEV_PRE_TYPE_CHANGE	0x000E
-#define NETDEV_POST_TYPE_CHANGE	0x000F
-#define NETDEV_POST_INIT	0x0010
-#define NETDEV_UNREGISTER_FINAL 0x0011
-#define NETDEV_RELEASE		0x0012
-#define NETDEV_NOTIFY_PEERS	0x0013
-#define NETDEV_JOIN		0x0014
-#define NETDEV_CHANGEUPPER	0x0015
-#define NETDEV_RESEND_IGMP	0x0016
-#define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
-#define NETDEV_CHANGEINFODATA	0x0018
-#define NETDEV_BONDING_INFO	0x0019
-#define NETDEV_PRECHANGEUPPER	0x001A
-#define NETDEV_CHANGELOWERSTATE	0x001B
-#define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
-#define NETDEV_UDP_TUNNEL_DROP_INFO	0x001D
-#define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
+	NETDEV_CHANGE,		/* Notify device state change */
+	NETDEV_REGISTER,
+	NETDEV_UNREGISTER,
+	NETDEV_CHANGEMTU,	/* notify after mtu change happened */
+	NETDEV_CHANGEADDR,
+	NETDEV_GOING_DOWN,
+	NETDEV_CHANGENAME,
+	NETDEV_FEAT_CHANGE,
+	NETDEV_BONDING_FAILOVER,
+	NETDEV_PRE_UP,
+	NETDEV_PRE_TYPE_CHANGE,
+	NETDEV_POST_TYPE_CHANGE,
+	NETDEV_POST_INIT,
+	NETDEV_RELEASE,
+	NETDEV_NOTIFY_PEERS,
+	NETDEV_JOIN,
+	NETDEV_CHANGEUPPER,
+	NETDEV_RESEND_IGMP,
+	NETDEV_PRECHANGEMTU,	/* notify before mtu change happened */
+	NETDEV_CHANGEINFODATA,
+	NETDEV_BONDING_INFO,
+	NETDEV_PRECHANGEUPPER,
+	NETDEV_CHANGELOWERSTATE,
+	NETDEV_UDP_TUNNEL_PUSH_INFO,
+	NETDEV_UDP_TUNNEL_DROP_INFO,
+	NETDEV_CHANGE_TX_QUEUE_LEN,
+};
+const char *netdev_cmd_to_name(enum netdev_cmd cmd);
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -4217,16 +4231,6 @@ static inline bool netif_is_macvlan_port(const struct net_device *dev)
 	return dev->priv_flags & IFF_MACVLAN_PORT;
 }
 
-static inline bool netif_is_ipvlan(const struct net_device *dev)
-{
-	return dev->priv_flags & IFF_IPVLAN_SLAVE;
-}
-
-static inline bool netif_is_ipvlan_port(const struct net_device *dev)
-{
-	return dev->priv_flags & IFF_IPVLAN_MASTER;
-}
-
 static inline bool netif_is_bond_master(const struct net_device *dev)
 {
 	return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 1313b35c3ab7..14529511c4b8 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -285,6 +285,8 @@ unsigned int *xt_alloc_entry_offsets(unsigned int size);
 bool xt_find_jump_offset(const unsigned int *offsets,
 			 unsigned int target, unsigned int size);
 
+int xt_check_proc_name(const char *name, unsigned int size);
+
 int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
 		   bool inv_proto);
 int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index b99bced39ac2..e791ebc65c9c 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -5,6 +5,7 @@
 
 #ifndef _LINUX_NOSPEC_H
 #define _LINUX_NOSPEC_H
+#include <asm/barrier.h>
 
 /**
  * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
@@ -20,20 +21,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 						    unsigned long size)
 {
 	/*
-	 * Warn developers about inappropriate array_index_nospec() usage.
-	 *
-	 * Even if the CPU speculates past the WARN_ONCE branch, the
-	 * sign bit of @index is taken into account when generating the
-	 * mask.
-	 *
-	 * This warning is compiled out when the compiler can infer that
-	 * @index and @size are less than LONG_MAX.
-	 */
-	if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
-			"array_index_nospec() limited to range of [0, LONG_MAX]\n"))
-		return 0;
-
-	/*
 	 * Always calculate and emit the mask even if the compiler
 	 * thinks the mask is not needed. The compiler does not take
 	 * into account the value of @index under speculation.
@@ -66,7 +53,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 	BUILD_BUG_ON(sizeof(_i) > sizeof(long));			\
 	BUILD_BUG_ON(sizeof(_s) > sizeof(long));			\
 									\
-	_i &= _mask;							\
-	_i;								\
+	(typeof(_i)) (_i & _mask);					\
 })
 #endif /* _LINUX_NOSPEC_H */
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 88865e0ebf4d..091033a6b836 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -13,7 +13,6 @@ struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn);
 int of_pci_get_devfn(struct device_node *np);
-int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 int of_pci_get_max_link_speed(struct device_node *node);
@@ -34,12 +33,6 @@ static inline int of_pci_get_devfn(struct device_node *np)
 }
 
 static inline int
-of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-	return 0;
-}
-
-static inline int
 of_pci_parse_bus_range(struct device_node *node, struct resource *res)
 {
 	return -EINVAL;
@@ -67,6 +60,16 @@ of_pci_get_max_link_speed(struct device_node *node)
 static inline void of_pci_check_probe_only(void) { }
 #endif
 
+#if IS_ENABLED(CONFIG_OF_IRQ)
+int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
+#else
+static inline int
+of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	return 0;
+}
+#endif
+
 #if defined(CONFIG_OF_ADDRESS)
 int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 864d167a1073..009cdf3d65b6 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -30,10 +30,14 @@
  * calls io_destroy() or the process exits.
  *
  * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
- * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove
- * the kioctx from the proccess's list of kioctxs - after that, there can't be
- * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop
- * the initial ref with percpu_ref_put().
+ * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref.
+ * After that, there can't be any new users of the kioctx (from lookup_ioctx())
+ * and it's then safe to drop the initial ref with percpu_ref_put().
+ *
+ * Note that the free path, free_ioctx(), needs to go through explicit call_rcu()
+ * to synchronize with RCU protected lookup_ioctx().  percpu_ref operations don't
+ * imply RCU grace periods of any kind and if a user wants to combine percpu_ref
+ * with RCU protection, it must be done explicitly.
  *
  * Code that does a two stage shutdown like this often needs some kind of
  * explicit synchronization to ensure the initial refcount can only be dropped
@@ -113,8 +117,10 @@ void percpu_ref_reinit(struct percpu_ref *ref);
  * Must be used to drop the initial ref on a percpu refcount; must be called
  * precisely once before shutdown.
  *
- * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the
- * percpu counters and dropping the initial ref.
+ * Switches @ref into atomic mode before gathering up the percpu counters
+ * and dropping the initial ref.
+ *
+ * There are no implied RCU grace periods between kill and release.
  */
 static inline void percpu_ref_kill(struct percpu_ref *ref)
 {
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index af0f44effd44..40036a57d072 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -14,26 +14,10 @@
 
 #include <linux/interrupt.h>
 #include <linux/perf_event.h>
+#include <linux/platform_device.h>
 #include <linux/sysfs.h>
 #include <asm/cputype.h>
 
-/*
- * struct arm_pmu_platdata - ARM PMU platform data
- *
- * @handle_irq: an optional handler which will be called from the
- *	interrupt and passed the address of the low level handler,
- *	and can be used to implement any platform specific handling
- *	before or after calling it.
- *
- * @irq_flags: if non-zero, these flags will be passed to request_irq
- *             when requesting interrupts for this PMU device.
- */
-struct arm_pmu_platdata {
-	irqreturn_t (*handle_irq)(int irq, void *dev,
-				  irq_handler_t pmu_handler);
-	unsigned long irq_flags;
-};
-
 #ifdef CONFIG_ARM_PMU
 
 /*
@@ -92,7 +76,6 @@ enum armpmu_attr_groups {
 
 struct arm_pmu {
 	struct pmu	pmu;
-	cpumask_t	active_irqs;
 	cpumask_t	supported_cpus;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
@@ -174,12 +157,11 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
 
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
+struct arm_pmu *armpmu_alloc_atomic(void);
 void armpmu_free(struct arm_pmu *pmu);
 int armpmu_register(struct arm_pmu *pmu);
-int armpmu_request_irqs(struct arm_pmu *armpmu);
-void armpmu_free_irqs(struct arm_pmu *armpmu);
-int armpmu_request_irq(struct arm_pmu *armpmu, int cpu);
-void armpmu_free_irq(struct arm_pmu *armpmu, int cpu);
+int armpmu_request_irq(int irq, int cpu);
+void armpmu_free_irq(int irq, int cpu);
 
 #define ARMV8_PMU_PDEV_NAME "armv8-pmu"
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5a0c3e53e7c2..f0b5870a6d40 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -924,6 +924,7 @@ void phy_device_remove(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
+int __phy_resume(struct phy_device *phydev);
 int phy_loopback(struct phy_device *phydev, bool enable);
 struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
 			      phy_interface_t interface);
@@ -983,6 +984,10 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
 	return 0;
 }
+int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
+				u16 regnum);
+int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
+				 u16 regnum, u16 val);
 
 /* Clause 45 PHY */
 int genphy_c45_restart_aneg(struct phy_device *phydev);
@@ -994,6 +999,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
 static inline int phy_read_status(struct phy_device *phydev)
 {
 	if (!phydev->drv)
@@ -1011,7 +1024,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
 int phy_drivers_register(struct phy_driver *new_driver, int n,
 			 struct module *owner);
 void phy_state_machine(struct work_struct *work);
-void phy_change(struct phy_device *phydev);
 void phy_change_work(struct work_struct *work);
 void phy_mac_interrupt(struct phy_device *phydev);
 void phy_start_machine(struct phy_device *phydev);
diff --git a/include/linux/property.h b/include/linux/property.h
index 769d372c1edf..2eea4b310fc2 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -283,7 +283,7 @@ bool device_dma_supported(struct device *dev);
 
 enum dev_dma_attr device_get_dma_attr(struct device *dev);
 
-void *device_get_match_data(struct device *dev);
+const void *device_get_match_data(struct device *dev);
 
 int device_get_phy_mode(struct device *dev);
 
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index a079656b614c..059242030631 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -75,5 +75,9 @@ void __init ptp_classifier_init(void);
 static inline void ptp_classifier_init(void)
 {
 }
+static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
+{
+	return PTP_CLASS_NONE;
+}
 #endif
 #endif /* _PTP_CLASSIFY_H_ */
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index b884b7794187..6894976b54e3 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -296,13 +296,14 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
 {
 	void *ptr;
 
+	/* The READ_ONCE in __ptr_ring_peek guarantees that anyone
+	 * accessing data through the pointer is up to date. Pairs
+	 * with smp_wmb in __ptr_ring_produce.
+	 */
 	ptr = __ptr_ring_peek(r);
 	if (ptr)
 		__ptr_ring_discard_one(r);
 
-	/* Make sure anyone accessing data through the pointer is up to date. */
-	/* Pairs with smp_wmb in __ptr_ring_produce. */
-	smp_read_barrier_depends();
 	return ptr;
 }
 
@@ -469,7 +470,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
  */
 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
 {
-	if (size * sizeof(void *) > KMALLOC_MAX_SIZE)
+	if (size > KMALLOC_MAX_SIZE / sizeof(void *))
 		return NULL;
 	return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
 }
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index c9df2527e0cd..668a21f04b09 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -766,8 +766,10 @@ slow_path:
 		if (!key ||
 		    (params.obj_cmpfn ?
 		     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
-		     rhashtable_compare(&arg, rht_obj(ht, head))))
+		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
+			pprev = &head->next;
 			continue;
+		}
 
 		data = rht_obj(ht, head);
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1fdcde96eb65..c7d1e4689325 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -33,9 +33,10 @@ extern void rtnl_lock(void);
 extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
+extern int rtnl_lock_killable(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
+extern struct rw_semaphore pernet_ops_rwsem;
 
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1149533aa2fa..9806184bb3d5 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -36,7 +36,18 @@ static inline void mmgrab(struct mm_struct *mm)
 	atomic_inc(&mm->mm_count);
 }
 
-extern void mmdrop(struct mm_struct *mm);
+extern void __mmdrop(struct mm_struct *mm);
+
+static inline void mmdrop(struct mm_struct *mm)
+{
+	/*
+	 * The implicit full barrier implied by atomic_dec_and_test() is
+	 * required by the membarrier system call before returning to
+	 * user-space, after storing to rq->curr.
+	 */
+	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+		__mmdrop(mm);
+}
 
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 0dcf4e480ef7..96fe289c4c6e 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
 
 #include <linux/uidgid.h>
 #include <linux/atomic.h>
+#include <linux/ratelimit.h>
 
 struct key;
 
@@ -41,6 +42,9 @@ struct user_struct {
     defined(CONFIG_NET)
 	atomic_long_t locked_vm;
 #endif
+
+	/* Miscellaneous per-user rate limit */
+	struct ratelimit_state ratelimit;
 };
 
 extern int uids_sysfs_init(void);
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index dc368b8ce215..11c86fbfeb98 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -4,7 +4,7 @@
  *
  * Distributed under the terms of the GNU GPL, version 2
  *
- * Please see kernel/semaphore.c for documentation of these functions
+ * Please see kernel/locking/semaphore.c for documentation of these functions
  */
 #ifndef __LINUX_SEMAPHORE_H
 #define __LINUX_SEMAPHORE_H
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index e724d5a3dd80..ebce9e24906a 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
 #if IS_ENABLED(CONFIG_SFP)
 int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		   unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-				    const struct sfp_eeprom_id *id);
 void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		       unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *link_modes);
 
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
 	return PORT_OTHER;
 }
 
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-						const struct sfp_eeprom_id *id)
-{
-	return PHY_INTERFACE_MODE_NA;
-}
-
 static inline void sfp_parse_support(struct sfp_bus *bus,
 				     const struct sfp_eeprom_id *id,
 				     unsigned long *support)
 {
 }
 
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+						   const struct sfp_eeprom_id *id,
+						   unsigned long *link_modes)
+{
+	return PHY_INTERFACE_MODE_NA;
+}
+
 static inline int sfp_get_module_info(struct sfp_bus *bus,
 				      struct ethtool_modinfo *modinfo)
 {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5ebc0f869720..47082f54ec1f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -466,6 +466,9 @@ struct ubuf_info {
 
 #define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
 
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
+void mm_unaccount_pinned_pages(struct mmpin *mmp);
+
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
 struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
 					struct ubuf_info *uarg);
@@ -3285,8 +3288,7 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
-unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
-bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu);
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
 bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
@@ -3646,7 +3648,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
 	return true;
 }
 
-/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
+/* For small packets <= CHECKSUM_BREAK perform checksum complete directly
  * in checksum_init.
  */
 #define CHECKSUM_BREAK 76
@@ -4038,6 +4040,12 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
+/* Note: Should be called only if skb_is_gso(skb) is true */
+static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
+}
+
 static inline void skb_gso_reset(struct sk_buff *skb)
 {
 	skb_shinfo(skb)->gso_size = 0;
@@ -4045,6 +4053,22 @@ static inline void skb_gso_reset(struct sk_buff *skb)
 	skb_shinfo(skb)->gso_type = 0;
 }
 
+static inline void skb_increase_gso_size(struct skb_shared_info *shinfo,
+					 u16 increment)
+{
+	if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+		return;
+	shinfo->gso_size += increment;
+}
+
+static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo,
+					 u16 decrement)
+{
+	if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+		return;
+	shinfo->gso_size -= decrement;
+}
+
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
@@ -4104,38 +4128,6 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb)
 	return !skb->head_frag || skb_cloned(skb);
 }
 
-/**
- * skb_gso_network_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_network_seglen is used to determine the real size of the
- * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
- *
- * The MAC/L2 header is not accounted for.
- */
-static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
-{
-	unsigned int hdr_len = skb_transport_header(skb) -
-			       skb_network_header(skb);
-	return hdr_len + skb_gso_transport_seglen(skb);
-}
-
-/**
- * skb_gso_mac_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_mac_seglen is used to determine the real size of the
- * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
- * headers (TCP/UDP).
- */
-static inline unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
-{
-	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-	return hdr_len + skb_gso_transport_seglen(skb);
-}
-
 /* Local Checksum Offload.
  * Compute outer checksum based on the assumption that the
  * inner checksum will be offloaded later.
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9286a5a8c60c..60e01482a9c4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
 
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
@@ -353,4 +354,6 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen
 			  unsigned int flags, struct timespec *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
 			  unsigned int vlen, unsigned int flags);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7b6a59f722a3..a1a3f4ed94ce 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -337,8 +337,6 @@ extern void deactivate_file_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
 extern void swap_setup(void);
 
-extern void add_page_to_unevictable_list(struct page *page);
-
 extern void lru_cache_add_active_or_unevictable(struct page *page,
 						struct vm_area_struct *vma);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 0a6c71e0ad01..47f8af22f216 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -364,6 +364,7 @@ struct tty_file_private {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
+#define TTY_HUPPING		19	/* Hangup in progress */
 #define TTY_LDISC_HALTED	22	/* Line discipline is halted */
 
 /* Values for tty->flow_change */
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index 5bdbd9f49395..07ee0f84a46c 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -90,6 +90,28 @@ static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
 #endif
 }
 
+static inline unsigned long
+u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+{
+	unsigned long flags = 0;
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	local_irq_save(flags);
+	write_seqcount_begin(&syncp->seq);
+#endif
+	return flags;
+}
+
+static inline void
+u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
+				unsigned long flags)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	write_seqcount_end(&syncp->seq);
+	local_irq_restore(flags);
+#endif
+}
+
 static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index f1fcec2fd5f8..b7a99ce56bc9 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -63,4 +63,7 @@
  */
 #define USB_QUIRK_DISCONNECT_SUSPEND		BIT(12)
 
+/* Device needs a pause after every control message. */
+#define USB_QUIRK_DELAY_CTRL_MSG		BIT(13)
+
 #endif /* __LINUX_USB_QUIRKS_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4a54ef96aff5..0c3301421c57 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -456,7 +456,6 @@ extern int schedule_on_each_cpu(work_func_t func);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern bool flush_work(struct work_struct *work);
-extern bool cancel_work(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
@@ -465,6 +464,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
+extern struct work_struct *current_work(void);
 extern bool current_is_workqueue_rescuer(void);
 extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
diff --git a/include/media/demux.h b/include/media/demux.h
index c4df6cee48e6..bf00a5a41a90 100644
--- a/include/media/demux.h
+++ b/include/media/demux.h
@@ -117,7 +117,7 @@ struct dmx_ts_feed {
  *		  specified by @filter_value that will be used on the filter
  *		  match logic.
  * @filter_mode:  Contains a 16 bytes (128 bits) filter mode.
- * @parent:	  Pointer to struct dmx_section_feed.
+ * @parent:	  Back-pointer to struct dmx_section_feed.
  * @priv:	  Pointer to private data of the API client.
  *
  *
@@ -130,8 +130,9 @@ struct dmx_section_filter {
 	u8 filter_value[DMX_MAX_FILTER_SIZE];
 	u8 filter_mask[DMX_MAX_FILTER_SIZE];
 	u8 filter_mode[DMX_MAX_FILTER_SIZE];
-	struct dmx_section_feed *parent; /* Back-pointer */
-	void *priv; /* Pointer to private data of the API client */
+	struct dmx_section_feed *parent;
+
+	void *priv;
 };
 
 /**
@@ -193,6 +194,10 @@ struct dmx_section_feed {
  * @buffer2:		Pointer to the tail of the filtered TS packets, or NULL.
  * @buffer2_length:	Length of the TS data in buffer2.
  * @source:		Indicates which TS feed is the source of the callback.
+ * @buffer_flags:	Address where buffer flags are stored. Those are
+ *			used to report discontinuity users via DVB
+ *			memory mapped API, as defined by
+ *			&enum dmx_buffer_flags.
  *
  * This function callback prototype, provided by the client of the demux API,
  * is called from the demux code. The function is only called when filtering
@@ -245,7 +250,8 @@ typedef int (*dmx_ts_cb)(const u8 *buffer1,
 			 size_t buffer1_length,
 			 const u8 *buffer2,
 			 size_t buffer2_length,
-			 struct dmx_ts_feed *source);
+			 struct dmx_ts_feed *source,
+			 u32 *buffer_flags);
 
 /**
  * typedef dmx_section_cb - DVB demux TS filter callback function prototype
@@ -261,6 +267,10 @@ typedef int (*dmx_ts_cb)(const u8 *buffer1,
  *			including headers and CRC.
  * @source:		Indicates which section feed is the source of the
  *			callback.
+ * @buffer_flags:	Address where buffer flags are stored. Those are
+ *			used to report discontinuity users via DVB
+ *			memory mapped API, as defined by
+ *			&enum dmx_buffer_flags.
  *
  * This function callback prototype, provided by the client of the demux API,
  * is called from the demux code. The function is only called when
@@ -286,7 +296,8 @@ typedef int (*dmx_section_cb)(const u8 *buffer1,
 			      size_t buffer1_len,
 			      const u8 *buffer2,
 			      size_t buffer2_len,
-			      struct dmx_section_filter *source);
+			      struct dmx_section_filter *source,
+			      u32 *buffer_flags);
 
 /*
  * DVB Front-End
diff --git a/include/media/dmxdev.h b/include/media/dmxdev.h
index 2f5cb2c7b6a7..baafa3b8aca4 100644
--- a/include/media/dmxdev.h
+++ b/include/media/dmxdev.h
@@ -163,6 +163,7 @@ struct dmxdev_filter {
  * @demux:		pointer to &struct dmx_demux.
  * @filternum:		number of filters.
  * @capabilities:	demux capabilities as defined by &enum dmx_demux_caps.
+ * @may_do_mmap:	flag used to indicate if the device may do mmap.
  * @exit:		flag to indicate that the demux is being released.
  * @dvr_orig_fe:	pointer to &struct dmx_frontend.
  * @dvr_buffer:		embedded &struct dvb_ringbuffer for DVB output.
@@ -180,6 +181,7 @@ struct dmxdev {
 	int filternum;
 	int capabilities;
 
+	unsigned int may_do_mmap:1;
 	unsigned int exit:1;
 #define DMXDEV_CAP_DUPLEX 1
 	struct dmx_frontend *dvr_orig_fe;
diff --git a/include/media/dvb_demux.h b/include/media/dvb_demux.h
index b07092038f4b..3b6aeca7a49e 100644
--- a/include/media/dvb_demux.h
+++ b/include/media/dvb_demux.h
@@ -115,6 +115,8 @@ struct dvb_demux_filter {
  * @pid:	PID to be filtered.
  * @timeout:	feed timeout.
  * @filter:	pointer to &struct dvb_demux_filter.
+ * @buffer_flags: Buffer flags used to report discontinuity users via DVB
+ *		  memory mapped API, as defined by &enum dmx_buffer_flags.
  * @ts_type:	type of TS, as defined by &enum ts_filter_type.
  * @pes_type:	type of PES, as defined by &enum dmx_ts_pes.
  * @cc:		MPEG-TS packet continuity counter
@@ -145,6 +147,8 @@ struct dvb_demux_feed {
 	ktime_t timeout;
 	struct dvb_demux_filter *filter;
 
+	u32 buffer_flags;
+
 	enum ts_filter_type ts_type;
 	enum dmx_ts_pes pes_type;
 
diff --git a/include/media/dvb_vb2.h b/include/media/dvb_vb2.h
index 01d1202d1a55..8cb88452cd6c 100644
--- a/include/media/dvb_vb2.h
+++ b/include/media/dvb_vb2.h
@@ -85,6 +85,12 @@ struct dvb_buffer {
  * @nonblocking:
  *		If different than zero, device is operating on non-blocking
  *		mode.
+ * @flags:	buffer flags as defined by &enum dmx_buffer_flags.
+ *		Filled only at &DMX_DQBUF. &DMX_QBUF should zero this field.
+ * @count:	monotonic counter for filled buffers. Helps to identify
+ *		data stream loses. Filled only at &DMX_DQBUF. &DMX_QBUF should
+ *		zero this field.
+ *
  * @name:	name of the device type. Currently, it can either be
  *		"dvr" or "demux_filter".
  */
@@ -100,10 +106,14 @@ struct dvb_vb2_ctx {
 	int	buf_siz;
 	int	buf_cnt;
 	int	nonblocking;
+
+	enum dmx_buffer_flags flags;
+	u32	count;
+
 	char	name[DVB_VB2_NAME_MAX + 1];
 };
 
-#ifndef DVB_MMAP
+#ifndef CONFIG_DVB_MMAP
 static inline int dvb_vb2_init(struct dvb_vb2_ctx *ctx,
 			       const char *name, int non_blocking)
 {
@@ -114,7 +124,7 @@ static inline int dvb_vb2_release(struct dvb_vb2_ctx *ctx)
 	return 0;
 };
 #define dvb_vb2_is_streaming(ctx) (0)
-#define dvb_vb2_fill_buffer(ctx, file, wait) (0)
+#define dvb_vb2_fill_buffer(ctx, file, wait, flags) (0)
 
 static inline __poll_t dvb_vb2_poll(struct dvb_vb2_ctx *ctx,
 				    struct file *file,
@@ -153,9 +163,13 @@ int dvb_vb2_is_streaming(struct dvb_vb2_ctx *ctx);
  * @ctx:	control struct for VB2 handler
  * @src:	place where the data is stored
  * @len:	number of bytes to be copied from @src
+ * @buffer_flags:
+ *		pointer to buffer flags as defined by &enum dmx_buffer_flags.
+ *		can be NULL.
  */
 int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx,
-			const unsigned char *src, int len);
+			const unsigned char *src, int len,
+			enum dmx_buffer_flags *buffer_flags);
 
 /**
  * dvb_vb2_poll - Wrapper to vb2_core_streamon() for Digital TV
diff --git a/include/net/Space.h b/include/net/Space.h
index 27fb5c937c4f..9cce0d80d37a 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -20,8 +20,6 @@ struct net_device *cs89x0_probe(int unit);
 struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
-struct net_device *mac8390_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
 struct net_device *cops_probe(int unit);
 struct net_device *ltpc_probe(void);
 
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 6ed9692f20bd..9e59ebfded62 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -87,13 +87,17 @@ struct tc_action_ops {
 		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
-	int     (*lookup)(struct net *, struct tc_action **, u32);
+	int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
-			int bind);
+			int bind, struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
-			struct netlink_callback *, int, const struct tc_action_ops *);
+			struct netlink_callback *, int,
+			const struct tc_action_ops *,
+			struct netlink_ext_ack *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
+	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 };
 
@@ -137,14 +141,14 @@ static inline void tc_action_net_exit(struct list_head *net_list,
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       const struct tc_action_ops *ops);
+		       const struct tc_action_ops *ops,
+		       struct netlink_ext_ack *extack);
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		    int bind);
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   struct tc_action **a, const struct tc_action_ops *ops,
 		   int bind, bool cpustats);
-void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
@@ -162,10 +166,12 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions);
+		    struct list_head *actions, size_t *attr_size,
+		    struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind);
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c4185a7b0e90..132e5b95167a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict);
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags);
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 76fb39c272a7..c91bc87931c7 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -318,10 +318,12 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *);
 extern ax25_dev *ax25_dev_list;
 extern spinlock_t ax25_dev_lock;
 
+#if IS_ENABLED(CONFIG_AX25)
 static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
 {
 	return dev->ax25_ptr;
 }
+#endif
 
 ax25_dev *ax25_addr_ax25dev(ax25_address *);
 void ax25_dev_device_up(struct net_device *);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 81174f9b8d14..fc40843baed3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1147,6 +1147,7 @@ struct cfg80211_tid_stats {
  * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
  * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
  *	(IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
+ * @ack_signal: signal strength (in dBm) of the last ACK frame.
  */
 struct station_info {
 	u64 filled;
@@ -1191,6 +1192,7 @@ struct station_info {
 	u64 rx_duration;
 	u8 rx_beacon_signal_avg;
 	struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
+	s8 ack_signal;
 };
 
 #if IS_ENABLED(CONFIG_CFG80211)
@@ -1905,11 +1907,16 @@ struct cfg80211_auth_request {
  * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
  * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
  * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
+ * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external
+ *	authentication capability. Drivers can offload authentication to
+ *	userspace if this flag is set. Only applicable for cfg80211_connect()
+ *	request (connect callback).
  */
 enum cfg80211_assoc_req_flags {
-	ASSOC_REQ_DISABLE_HT		= BIT(0),
-	ASSOC_REQ_DISABLE_VHT		= BIT(1),
-	ASSOC_REQ_USE_RRM		= BIT(2),
+	ASSOC_REQ_DISABLE_HT			= BIT(0),
+	ASSOC_REQ_DISABLE_VHT			= BIT(1),
+	ASSOC_REQ_USE_RRM			= BIT(2),
+	CONNECT_REQ_EXTERNAL_AUTH_SUPPORT	= BIT(3),
 };
 
 /**
@@ -2601,6 +2608,33 @@ struct cfg80211_pmk_conf {
 };
 
 /**
+ * struct cfg80211_external_auth_params - Trigger External authentication.
+ *
+ * Commonly used across the external auth request and event interfaces.
+ *
+ * @action: action type / trigger for external authentication. Only significant
+ *	for the authentication request event interface (driver to user space).
+ * @bssid: BSSID of the peer with which the authentication has
+ *	to happen. Used by both the authentication request event and
+ *	authentication response command interface.
+ * @ssid: SSID of the AP.  Used by both the authentication request event and
+ *	authentication response command interface.
+ * @key_mgmt_suite: AKM suite of the respective authentication. Used by the
+ *	authentication request event interface.
+ * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication,
+ *	use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you
+ *	the real status code for failures. Used only for the authentication
+ *	response command interface (user space to driver).
+ */
+struct cfg80211_external_auth_params {
+	enum nl80211_external_auth_action action;
+	u8 bssid[ETH_ALEN] __aligned(2);
+	struct cfg80211_ssid ssid;
+	unsigned int key_mgmt_suite;
+	u16 status;
+};
+
+/**
  * struct cfg80211_ops - backend description for wireless configuration
  *
  * This struct is registered by fullmac card drivers and/or wireless stacks
@@ -2923,6 +2957,9 @@ struct cfg80211_pmk_conf {
  *	(invoked with the wireless_dev mutex held)
  * @del_pmk: delete the previously configured PMK for the given authenticator.
  *	(invoked with the wireless_dev mutex held)
+ *
+ * @external_auth: indicates result of offloaded authentication processing from
+ *     user space
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3216,6 +3253,8 @@ struct cfg80211_ops {
 			   const struct cfg80211_pmk_conf *conf);
 	int	(*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
 			   const u8 *aa);
+	int     (*external_auth)(struct wiphy *wiphy, struct net_device *dev,
+				 struct cfg80211_external_auth_params *params);
 };
 
 /*
@@ -3517,6 +3556,35 @@ enum wiphy_vendor_command_flags {
 };
 
 /**
+ * enum wiphy_opmode_flag - Station's ht/vht operation mode information flags
+ *
+ * @STA_OPMODE_MAX_BW_CHANGED: Max Bandwidth changed
+ * @STA_OPMODE_SMPS_MODE_CHANGED: SMPS mode changed
+ * @STA_OPMODE_N_SS_CHANGED: max N_SS (number of spatial streams) changed
+ *
+ */
+enum wiphy_opmode_flag {
+	STA_OPMODE_MAX_BW_CHANGED	= BIT(0),
+	STA_OPMODE_SMPS_MODE_CHANGED	= BIT(1),
+	STA_OPMODE_N_SS_CHANGED		= BIT(2),
+};
+
+/**
+ * struct sta_opmode_info - Station's ht/vht operation mode information
+ * @changed: contains value from &enum wiphy_opmode_flag
+ * @smps_mode: New SMPS mode of a station
+ * @bw: new max bandwidth value of a station
+ * @rx_nss: new rx_nss value of a station
+ */
+
+struct sta_opmode_info {
+	u32 changed;
+	u8 smps_mode;
+	u8 bw;
+	u8 rx_nss;
+};
+
+/**
  * struct wiphy_vendor_command - vendor command definition
  * @info: vendor command identifying information, as used in nl80211
  * @flags: flags, see &enum wiphy_vendor_command_flags
@@ -4342,10 +4410,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  *	of it being pushed into the SKB
  * @addr: the device MAC address
  * @iftype: the virtual interface type
+ * @data_offset: offset of payload after the 802.11 header
  * Return: 0 on success. Non-zero on error.
  */
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-				  const u8 *addr, enum nl80211_iftype iftype);
+				  const u8 *addr, enum nl80211_iftype iftype,
+				  u8 data_offset);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -4357,7 +4427,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 					 enum nl80211_iftype iftype)
 {
-	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
 }
 
 /**
@@ -5685,6 +5755,20 @@ void cfg80211_radar_event(struct wiphy *wiphy,
 			  struct cfg80211_chan_def *chandef, gfp_t gfp);
 
 /**
+ * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
+ * @dev: network device
+ * @mac: MAC address of a station which opmode got modified
+ * @sta_opmode: station's current opmode value
+ * @gfp: context flags
+ *
+ * Driver should call this function when station's opmode modified via action
+ * frame.
+ */
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+				       struct sta_opmode_info *sta_opmode,
+				       gfp_t gfp);
+
+/**
  * cfg80211_cac_event - Channel availability check (CAC) event
  * @netdev: network device
  * @chandef: chandef for the current channel
@@ -5758,10 +5842,13 @@ bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
  * @addr: the address of the peer
  * @cookie: the cookie filled in @probe_client previously
  * @acked: indicates whether probe was acked or not
+ * @ack_signal: signal strength (in dBm) of the ACK frame.
+ * @is_valid_ack_signal: indicates the ack_signal is valid or not.
  * @gfp: allocation flags
  */
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-			   u64 cookie, bool acked, gfp_t gfp);
+			   u64 cookie, bool acked, s32 ack_signal,
+			   bool is_valid_ack_signal, gfp_t gfp);
 
 /**
  * cfg80211_report_obss_beacon - report beacon from other APs
@@ -6202,6 +6289,17 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
+/**
+ * cfg80211_external_auth_request - userspace request for authentication
+ * @netdev: network device
+ * @params: External authentication parameters
+ * @gfp: allocation flags
+ * Returns: 0 on success, < 0 on error
+ */
+int cfg80211_external_auth_request(struct net_device *netdev,
+				   struct cfg80211_external_auth_params *params,
+				   gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6545b03e97f7..e21d8cadd480 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -234,13 +234,9 @@ struct devlink_dpipe_headers {
 /**
  * struct devlink_resource_ops - resource ops
  * @occ_get: get the occupied size
- * @size_validate: validate the size of the resource before update, reload
- *                 is needed for changes to take place
  */
 struct devlink_resource_ops {
 	u64 (*occ_get)(struct devlink *devlink);
-	int (*size_validate)(struct devlink *devlink, u64 size,
-			     struct netlink_ext_ack *extack);
 };
 
 /**
@@ -257,6 +253,18 @@ struct devlink_resource_size_params {
 	enum devlink_resource_unit unit;
 };
 
+static inline void
+devlink_resource_size_params_init(struct devlink_resource_size_params *size_params,
+				  u64 size_min, u64 size_max,
+				  u64 size_granularity,
+				  enum devlink_resource_unit unit)
+{
+	size_params->size_min = size_min;
+	size_params->size_max = size_max;
+	size_params->size_granularity = size_granularity;
+	size_params->unit = unit;
+}
+
 /**
  * struct devlink_resource - devlink resource
  * @name: name of the resource
@@ -278,7 +286,7 @@ struct devlink_resource {
 	u64 size_new;
 	bool size_valid;
 	struct devlink_resource *parent;
-	struct devlink_resource_size_params *size_params;
+	struct devlink_resource_size_params size_params;
 	struct list_head list;
 	struct list_head resource_list;
 	const struct devlink_resource_ops *resource_ops;
@@ -398,11 +406,10 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
 
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
-			      bool top_hierarchy,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
-			      struct devlink_resource_size_params *size_params,
+			      const struct devlink_resource_size_params *size_params,
 			      const struct devlink_resource_ops *resource_ops);
 void devlink_resources_unregister(struct devlink *devlink,
 				  struct devlink_resource *resource);
@@ -552,11 +559,10 @@ devlink_dpipe_match_put(struct sk_buff *skb,
 static inline int
 devlink_resource_register(struct devlink *devlink,
 			  const char *resource_name,
-			  bool top_hierarchy,
 			  u64 resource_size,
 			  u64 resource_id,
 			  u64 parent_resource_id,
-			  struct devlink_resource_size_params *size_params,
+			  const struct devlink_resource_size_params *size_params,
 			  const struct devlink_resource_ops *resource_ops)
 {
 	return 0;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6cb602dd970c..60fb4ec8ba61 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/ethtool.h>
+#include <linux/net_tstamp.h>
 #include <net/devlink.h>
 #include <net/switchdev.h>
 
@@ -101,6 +102,7 @@ struct dsa_platform_data {
 };
 
 struct packet_type;
+struct dsa_switch;
 
 struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -357,7 +359,7 @@ struct dsa_switch_ops {
 	void	(*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
 	void	(*get_ethtool_stats)(struct dsa_switch *ds,
 				     int port, uint64_t *data);
-	int	(*get_sset_count)(struct dsa_switch *ds);
+	int	(*get_sset_count)(struct dsa_switch *ds, int port);
 
 	/*
 	 * ethtool Wake-on-LAN
@@ -368,6 +370,12 @@ struct dsa_switch_ops {
 			   struct ethtool_wolinfo *w);
 
 	/*
+	 * ethtool timestamp info
+	 */
+	int	(*get_ts_info)(struct dsa_switch *ds, int port,
+			       struct ethtool_ts_info *ts);
+
+	/*
 	 * Suspend and resume
 	 */
 	int	(*suspend)(struct dsa_switch *ds);
@@ -469,6 +477,18 @@ struct dsa_switch_ops {
 					 int port, struct net_device *br);
 	void	(*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
 					  int port, struct net_device *br);
+
+	/*
+	 * PTP functionality
+	 */
+	int	(*port_hwtstamp_get)(struct dsa_switch *ds, int port,
+				     struct ifreq *ifr);
+	int	(*port_hwtstamp_set)(struct dsa_switch *ds, int port,
+				     struct ifreq *ifr);
+	bool	(*port_txtstamp)(struct dsa_switch *ds, int port,
+				 struct sk_buff *clone, unsigned int type);
+	bool	(*port_rxtstamp)(struct dsa_switch *ds, int port,
+				 struct sk_buff *skb, unsigned int type);
 };
 
 struct dsa_switch_driver {
diff --git a/include/net/dst.h b/include/net/dst.h
index c63d2c37f6e9..b3219cd8a5a1 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -356,6 +356,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
  *	skb_tunnel_rx - prepare skb for rx reinsert
  *	@skb: buffer
  *	@dev: tunnel device
+ *	@net: netns for packet i/o
  *
  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
  *	so make some cleanups, and perform accounting.
diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index 72fd5067c353..67634675e919 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -54,7 +54,7 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
  *	local BH must be disabled.
  */
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-		       const struct in6_addr *addr);
+		       const struct in6_addr *saddr);
 
 /**
  *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
@@ -71,7 +71,7 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
  *	dst_cache_reset - invalidate the cache contents
  *	@dst_cache: the cache
  *
- *	This do not free the cached dst to avoid races and contentions.
+ *	This does not free the cached dst to avoid races and contentions.
  *	the dst will be freed on later cache lookup.
  */
 static inline void dst_cache_reset(struct dst_cache *dst_cache)
diff --git a/include/net/ethoc.h b/include/net/ethoc.h
index bb7f467da7fc..29ba069a1d93 100644
--- a/include/net/ethoc.h
+++ b/include/net/ethoc.h
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
 };
 
 #endif /* !LINUX_NET_ETHOC_H */
-
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 648caf90ec07..e5cfcfc7dd93 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -26,7 +26,8 @@ struct fib_rule {
 	u32			table;
 	u8			action;
 	u8			l3mdev;
-	/* 2 bytes hole, try to use */
+	u8                      proto;
+	u8			ip_proto;
 	u32			target;
 	__be64			tun_id;
 	struct fib_rule __rcu	*ctarget;
@@ -39,11 +40,14 @@ struct fib_rule {
 	char			iifname[IFNAMSIZ];
 	char			oifname[IFNAMSIZ];
 	struct fib_kuid_range	uid_range;
+	struct fib_rule_port_range	sport_range;
+	struct fib_rule_port_range	dport_range;
 	struct rcu_head		rcu;
 };
 
 struct fib_lookup_arg {
 	void			*lookup_ptr;
+	const void		*lookup_data;
 	void			*result;
 	struct fib_rule		*rule;
 	u32			table;
@@ -108,7 +112,12 @@ struct fib_rule_notifier_info {
 	[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
 	[FRA_GOTO]	= { .type = NLA_U32 }, \
 	[FRA_L3MDEV]	= { .type = NLA_U8 }, \
-	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }
+	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }, \
+	[FRA_PROTOCOL]  = { .type = NLA_U8 }, \
+	[FRA_IP_PROTO]  = { .type = NLA_U8 }, \
+	[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+	[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
@@ -142,6 +151,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
 	return frh->table;
 }
 
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+	return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+					 __be16 port)
+{
+	return ntohs(port) >= a->start &&
+		ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+	return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+		a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+					       struct fib_rule_port_range *b)
+{
+	return a->start == b->start &&
+		a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+	return rule->ip_proto ||
+		fib_rule_port_range_set(&rule->sport_range) ||
+		fib_rule_port_range_set(&rule->dport_range);
+}
+
 struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
 					 struct net *);
 void fib_rules_unregister(struct fib_rules_ops *);
diff --git a/include/net/flow.h b/include/net/flow.h
index f1624fd5b1d0..8ce21793094e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
 }
-				      
+
 
 struct flowi6 {
 	struct flowi_common	__fl_common;
@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family)
 
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
-static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
-{
-	struct flow_keys keys;
-
-	return __get_hash_from_flowi6(fl6, &keys);
-}
-
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
-
-static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
-{
-	struct flow_keys keys;
-
-	return __get_hash_from_flowi4(fl4, &keys);
-}
-
 #endif
diff --git a/include/net/gre.h b/include/net/gre.h
index f90585decbce..797142eee9cd 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		     bool *csum_err, __be16 proto, int nhs);
 
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
 static inline int gre_calc_hlen(__be16 o_flags)
 {
 	int addend = 4;
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index d91f9e7f4d71..960236fb1681 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -149,6 +149,8 @@ enum ieee80211_radiotap_ampdu_flags {
 	IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008,
 	IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010,
 	IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020,
+	IEEE80211_RADIOTAP_AMPDU_EOF = 0x0040,
+	IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN = 0x0080,
 };
 
 /* for IEEE80211_RADIOTAP_VHT */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c1a93ce35e62..b68fea022a82 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
 	u16	    net_header_len;
 	u16	    net_frag_header_len;
 	u16	    sockaddr_len;
-	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
+	int	    (*setsockopt)(struct sock *sk, int level, int optname,
 				  char __user *optval, unsigned int optlen);
-	int	    (*getsockopt)(struct sock *sk, int level, int optname, 
+	int	    (*getsockopt)(struct sock *sk, int level, int optname,
 				  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
 	int	    (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
 
 /** inet_connection_sock - INET connection oriented sock
  *
- * @icsk_accept_queue:	   FIFO of established children 
+ * @icsk_accept_queue:	   FIFO of established children
  * @icsk_bind_hash:	   Bind node
  * @icsk_timeout:	   Timeout
  * @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
 		unsigned long	  timeout;	 /* Currently scheduled timeout		   */
 		__u32		  lrcvtime;	 /* timestamp of last received data packet */
 		__u16		  last_seg_size; /* Size of last incoming segment	   */
-		__u16		  rcv_mss;	 /* MSS used for delayed ACK decisions	   */ 
+		__u16		  rcv_mss;	 /* MSS used for delayed ACK decisions	   */
 	} icsk_ack;
 	struct {
 		int		  enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	
+
 	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
 		icsk->icsk_pending = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
diff --git a/include/net/ip.h b/include/net/ip.h
index 746abff9ce51..36f8f7811093 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -91,6 +91,17 @@ static inline int inet_sdif(struct sk_buff *skb)
 	return 0;
 }
 
+/* Special input handler for packets caught by router alert option.
+   They are selected only by protocol field, and then processed likely
+   local ones; but only if someone wants them! Otherwise, router
+   not running rsvpd will kill RSVP.
+
+   It is user level problem, what it will make with them.
+   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+   but receiver should be enough clever f.e. to forward mtrace requests,
+   sent to multicast group to reach destination designated router.
+ */
+
 struct ip_ra_chain {
 	struct ip_ra_chain __rcu *next;
 	struct sock		*sk;
@@ -101,8 +112,6 @@ struct ip_ra_chain {
 	struct rcu_head		rcu;
 };
 
-extern struct ip_ra_chain __rcu *ip_ra_chain;
-
 /* IP flags. */
 #define IP_CE		0x8000		/* Flag: "Congestion"		*/
 #define IP_DF		0x4000		/* Flag: "Don't Fragment"	*/
@@ -186,15 +195,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void ip4_datagram_release_cb(struct sock *sk);
 
 struct ip_reply_arg {
-	struct kvec iov[1];   
+	struct kvec iov[1];
 	int	    flags;
 	__wsum 	    csum;
 	int	    csumoffset; /* u16 offset of csum in iov[0].iov_base */
-				/* -1 if not needed */ 
+				/* -1 if not needed */
 	int	    bound_dev_if;
 	u8  	    tos;
 	kuid_t	    uid;
-}; 
+};
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
 
@@ -328,6 +337,13 @@ int ip_decrease_ttl(struct iphdr *iph)
 	return --iph->ttl;
 }
 
+static inline int ip_mtu_locked(const struct dst_entry *dst)
+{
+	const struct rtable *rt = (const struct rtable *)dst;
+
+	return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
+}
+
 static inline
 int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 {
@@ -335,7 +351,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 
 	return  pmtudisc == IP_PMTUDISC_DO ||
 		(pmtudisc == IP_PMTUDISC_WANT &&
-		 !(dst_metric_locked(dst, RTAX_MTU)));
+		 !ip_mtu_locked(dst));
 }
 
 static inline bool ip_sk_accept_pmtu(const struct sock *sk)
@@ -361,7 +377,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 	struct net *net = dev_net(dst->dev);
 
 	if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
-	    dst_metric_locked(dst, RTAX_MTU) ||
+	    ip_mtu_locked(dst) ||
 	    !forwarding)
 		return dst_mtu(dst);
 
@@ -577,13 +593,13 @@ int ip_frag_mem(struct net *net);
 /*
  *	Functions provided by ip_forward.c
  */
- 
+
 int ip_forward(struct sk_buff *skb);
- 
+
 /*
  *	Functions provided by ip_options.c
  */
- 
+
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
 
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 34ec321d6a03..5e86fd9dc857 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -350,7 +350,8 @@ struct fib6_table {
 
 typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 					 struct fib6_table *,
-					 struct flowi6 *, int);
+					 struct flowi6 *,
+					 const struct sk_buff *, int);
 
 struct fib6_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info {
 struct fib6_table *fib6_get_table(struct net *net, u32 id);
 struct fib6_table *fib6_new_table(struct net *net, u32 id);
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup);
 
 struct fib6_node *fib6_lookup(struct fib6_node *root,
@@ -415,6 +417,24 @@ void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
 int fib6_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi6 *fl6,
+						 struct flow_keys *flkeys)
+{
+	unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+	if (!net->ipv6.fib6_rules_require_fldissect)
+		return false;
+
+	skb_flow_dissect_flow_keys(skb, flkeys, flag);
+	fl6->fl6_sport = flkeys->ports.src;
+	fl6->fl6_dport = flkeys->ports.dst;
+	fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+	return true;
+}
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -436,5 +456,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
 {
 	return 0;
 }
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi6 *fl6,
+						 struct flow_keys *flkeys)
+{
+	return false;
+}
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d23a65f3cd..0084013d6bed 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
-					 struct flowi6 *fl6, int flags);
+					 struct flowi6 *fl6,
+					 const struct sk_buff *skb, int flags);
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 					 struct flowi6 *fl6, int flags);
@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-				   int flags);
+				   const struct sk_buff *skb, int flags);
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-			       int ifindex, struct flowi6 *fl6, int flags);
+			       int ifindex, struct flowi6 *fl6,
+			       const struct sk_buff *skb, int flags);
 
 void ip6_route_init_special_entries(void);
 int ip6_route_init(void);
@@ -126,8 +128,10 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 }
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-			    const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+			    const struct in6_addr *saddr, int oif,
+			    const struct sk_buff *skb, int flags);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
@@ -179,6 +183,9 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event);
 void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
 void rt6_multipath_rebalance(struct rt6_info *rt);
 
+void rt6_uncached_list_add(struct rt6_info *rt);
+void rt6_uncached_list_del(struct rt6_info *rt);
+
 static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
 {
 	const struct dst_entry *dst = skb_dst(skb);
@@ -266,4 +273,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
 	       ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
 	       !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
 }
+
 #endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f80524396c06..81d0f2107ff1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -59,6 +59,7 @@ struct fib_nh_exception {
 	int				fnhe_genid;
 	__be32				fnhe_daddr;
 	u32				fnhe_pmtu;
+	bool				fnhe_mtu_locked;
 	__be32				fnhe_gw;
 	unsigned long			fnhe_expires;
 	struct rtable __rcu		*fnhe_rth_input;
@@ -157,7 +158,7 @@ struct fib_result_nl {
 	unsigned char	nh_sel;
 	unsigned char	type;
 	unsigned char	scope;
-	int             err;      
+	int             err;
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -293,6 +294,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
 	return 0;
 }
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi4 *fl4,
+						 struct flow_keys *flkeys)
+{
+	return false;
+}
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -341,6 +349,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
 int fib4_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib4_rules_seq_read(struct net *net);
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi4 *fl4,
+						 struct flow_keys *flkeys)
+{
+	unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+	if (!net->ipv4.fib_rules_require_fldissect)
+		return false;
+
+	skb_flow_dissect_flow_keys(skb, flkeys, flag);
+	fl4->fl4_sport = flkeys->ports.src;
+	fl4->fl4_dport = flkeys->ports.dst;
+	fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+	return true;
+}
+
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
@@ -370,8 +396,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb);
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+		       const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1f16773cfd76..540a4b4417bf 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -180,8 +180,10 @@ struct tnl_ptk_info {
 
 struct ip_tunnel_net {
 	struct net_device *fb_tunnel_dev;
+	struct rtnl_link_ops *rtnl_link_ops;
 	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 	struct ip_tunnel __rcu *collect_md_tun;
+	int type;
 };
 
 static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
@@ -254,6 +256,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 
 #ifdef CONFIG_INET
 
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+				       int proto,
+				       __be32 daddr, __be32 saddr,
+				       __be32 key, __u8 tos, int oif,
+				       __u32 mark)
+{
+	memset(fl4, 0, sizeof(*fl4));
+	fl4->flowi4_oif = oif;
+	fl4->daddr = daddr;
+	fl4->saddr = saddr;
+	fl4->flowi4_tos = tos;
+	fl4->flowi4_proto = proto;
+	fl4->fl4_gre_key = key;
+	fl4->flowi4_mark = mark;
+}
+
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7a98cd583c73..50a6f0ddb878 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -105,8 +105,8 @@
 
 #define IPV6_ADDR_ANY		0x0000U
 
-#define IPV6_ADDR_UNICAST      	0x0001U	
-#define IPV6_ADDR_MULTICAST    	0x0002U	
+#define IPV6_ADDR_UNICAST	0x0001U
+#define IPV6_ADDR_MULTICAST	0x0002U
 
 #define IPV6_ADDR_LOOPBACK	0x0010U
 #define IPV6_ADDR_LINKLOCAL	0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 #endif
 }
 
-static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
 				    const struct in6_addr *addr,
 				    int plen)
 {
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
 	addr[1] = wl;
 }
 
-static inline void ipv6_addr_set(struct in6_addr *addr, 
+static inline void ipv6_addr_set(struct in6_addr *addr,
 				     __be32 w1, __be32 w2,
 				     __be32 w3, __be32 w4)
 {
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
 	}
 
 	/*
-	 *	we should *never* get to this point since that 
+	 *	we should *never* get to this point since that
 	 *	would mean the addrs are equal
 	 *
 	 *	However, we do get to it 8) And exacly, when
@@ -888,6 +888,17 @@ static inline int ip6_default_np_autolabel(struct net *net)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+	return net->ipv6.sysctl.multipath_hash_policy;
+}
+#else
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+	return 0;
+}
+#endif
 
 /*
  *	Header manipulation
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index d747ef975cd8..33fd9ba7e0e5 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -127,6 +127,17 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
 int lwtunnel_xmit(struct sk_buff *skb);
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+	if (lwtunnel_output_redirect(dst->lwtstate)) {
+		dst->lwtstate->orig_output = dst->output;
+		dst->output = lwtunnel_output;
+	}
+	if (lwtunnel_input_redirect(dst->lwtstate)) {
+		dst->lwtstate->orig_input = dst->input;
+		dst->input = lwtunnel_input;
+	}
+}
 #else
 
 static inline void lwtstate_free(struct lwtunnel_state *lws)
@@ -158,6 +169,10 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
 	return false;
 }
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+}
+
 static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
 					     unsigned int mtu)
 {
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 906e90223066..2449982daf75 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6,6 +6,7 @@
  * Copyright 2007-2010	Johannes Berg <[email protected]>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -934,6 +935,7 @@ struct ieee80211_tx_info {
 			u8 ampdu_len;
 			u8 antenna;
 			u16 tx_time;
+			bool is_valid_ack_signal;
 			void *status_driver_data[19 / sizeof(void *)];
 		} status;
 		struct {
@@ -1098,6 +1100,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	the first subframe.
  * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
  *	be done in the hardware.
+ * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
+ *	frame
+ * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1124,6 +1129,8 @@ enum mac80211_rx_flags {
 	RX_FLAG_MIC_STRIPPED		= BIT(21),
 	RX_FLAG_ALLOW_SAME_PN		= BIT(22),
 	RX_FLAG_ICV_STRIPPED		= BIT(23),
+	RX_FLAG_AMPDU_EOF_BIT		= BIT(24),
+	RX_FLAG_AMPDU_EOF_BIT_KNOWN	= BIT(25),
 };
 
 /**
@@ -2063,6 +2070,17 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
  *	TDLS links.
  *
+ * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
+ *	mgd_prepare_tx() callback to be called before transmission of a
+ *	deauthentication frame in case the association was completed but no
+ *	beacon was heard. This is required in multi-channel scenarios, where the
+ *	virtual interface might not be given air time for the transmission of
+ *	the frame, as it is not synced with the AP/P2P GO yet, and thus the
+ *	deauthentication frame might not be transmitted.
+ >
+ * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
+ *	support QoS NDP for AP probing - that's most likely a driver bug.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2106,6 +2124,8 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_REPORTS_LOW_ACK,
 	IEEE80211_HW_SUPPORTS_TX_FRAG,
 	IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
+	IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
+	IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
@@ -3350,6 +3370,9 @@ enum ieee80211_reconfig_type {
  *	management frame prior to having successfully associated to allow the
  *	driver to give it channel time for the transmission, to get a response
  *	and to be able to synchronize with the GO.
+ *	For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
+ *	would also call this function before transmitting a deauthentication
+ *	frame in case that no beacon was heard from the AP/P2P GO.
  *	The callback will be called before each transmission and upon return
  *	mac80211 will transmit the frame right away.
  *	The callback is optional and can (should!) sleep.
@@ -4149,7 +4172,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid);
  * The TX headroom reserved by mac80211 for its own tx_status functions.
  * This is enough for the radiotap header.
  */
-#define IEEE80211_TX_STATUS_HEADROOM	14
+#define IEEE80211_TX_STATUS_HEADROOM	ALIGN(14, 4)
 
 /**
  * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index f306b2aa15a4..1ab4f920f109 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -40,7 +40,7 @@ struct net_device;
 struct sock;
 struct ctl_table_header;
 struct net_generic;
-struct sock;
+struct uevent_sock;
 struct netns_ipvs;
 
 
@@ -59,8 +59,13 @@ struct net {
 	atomic64_t		cookie_gen;
 
 	struct list_head	list;		/* list of network namespaces */
-	struct list_head	cleanup_list;	/* namespaces on death row */
-	struct list_head	exit_list;	/* Use only net_mutex */
+	struct list_head	exit_list;	/* To linked to call pernet exit
+						 * methods on dead net (
+						 * pernet_ops_rwsem read locked),
+						 * or to unregister pernet ops
+						 * (pernet_ops_rwsem write locked).
+						 */
+	struct llist_node	cleanup_list;	/* namespaces on death row */
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 	struct ucounts		*ucounts;
@@ -79,6 +84,8 @@ struct net {
 	struct sock 		*rtnl;			/* rtnetlink socket */
 	struct sock		*genl_sock;
 
+	struct uevent_sock	*uevent_sock;		/* uevent socket */
+
 	struct list_head 	dev_base_head;
 	struct hlist_head 	*dev_name_head;
 	struct hlist_head	*dev_index_head;
@@ -89,8 +96,9 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
-	struct list_head	fib_notifier_ops;  /* protected by net_mutex */
-
+	struct list_head	fib_notifier_ops;  /* Populated by
+						    * register_pernet_subsys()
+						    */
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
 	struct netns_mib	mib;
@@ -308,6 +316,24 @@ struct net *get_net_ns_by_id(struct net *net, int id);
 
 struct pernet_operations {
 	struct list_head list;
+	/*
+	 * Below methods are called without any exclusive locks.
+	 * More than one net may be constructed and destructed
+	 * in parallel on several cpus. Every pernet_operations
+	 * have to keep in mind all other pernet_operations and
+	 * to introduce a locking, if they share common resources.
+	 *
+	 * The only time they are called with exclusive lock is
+	 * from register_pernet_subsys(), unregister_pernet_subsys()
+	 * register_pernet_device() and unregister_pernet_device().
+	 *
+	 * Exit methods using blocking RCU primitives, such as
+	 * synchronize_rcu(), should be implemented via exit_batch.
+	 * Then, destruction of a group of net requires single
+	 * synchronize_rcu() related to these pernet_operations,
+	 * instead of separate synchronize_rcu() for every net.
+	 * Please, avoid synchronize_rcu() at all, where it's possible.
+	 */
 	int (*init)(struct net *net);
 	void (*exit)(struct net *net);
 	void (*exit_batch)(struct list_head *net_exit_list);
diff --git a/include/net/netevent.h b/include/net/netevent.h
index 40e7bab68490..d9918261701c 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -26,7 +26,8 @@ enum netevent_notif_type {
 	NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
-	NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 44668c29701a..8491bc9c86b1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -49,9 +49,12 @@ struct netns_ipv4 {
 #endif
 	struct ipv4_devconf	*devconf_all;
 	struct ipv4_devconf	*devconf_dflt;
+	struct ip_ra_chain __rcu *ra_chain;
+	struct mutex		ra_mutex;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
+	unsigned int		fib_rules_require_fldissect;
 	struct fib_table __rcu	*fib_main;
 	struct fib_table __rcu	*fib_default;
 #endif
@@ -167,6 +170,9 @@ struct netns_ipv4 {
 	atomic_t tfo_active_disable_times;
 	unsigned long tfo_active_disable_stamp;
 
+	int sysctl_udp_wmem_min;
+	int sysctl_udp_rmem_min;
+
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	int sysctl_udp_l3mdev_accept;
 #endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc4569cb8..c29f09cfc9d7 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_gc_elasticity;
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
+	int multipath_hash_policy;
 	int flowlabel_consistency;
 	int auto_flowlabels;
 	int icmpv6_time;
@@ -71,7 +72,8 @@ struct netns_ipv6 {
 	unsigned int		 ip6_rt_gc_expire;
 	unsigned long		 ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	bool			 fib6_has_custom_rules;
+	unsigned int		fib6_rules_require_fldissect;
+	bool			fib6_has_custom_rules;
 	struct rt6_info         *ip6_prohibit_entry;
 	struct rt6_info         *ip6_blk_hole_entry;
 	struct fib6_table       *fib6_local_tbl;
@@ -84,7 +86,7 @@ struct netns_ipv6 {
 	struct sock		*mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-	struct mr6_table	*mrt6;
+	struct mr_table		*mrt6;
 #else
 	struct list_head	mr6_tables;
 	struct fib_rules_ops	*mr6_rules_ops;
@@ -94,6 +96,8 @@ struct netns_ipv6 {
 	atomic_t		fib6_sernum;
 	struct seg6_pernet_data *seg6_data;
 	struct fib_notifier_ops	*notifier_ops;
+	struct fib_notifier_ops	*ip6mr_notifier_ops;
+	unsigned int ipmr_seq; /* protected by rtnl_mutex */
 	struct {
 		struct hlist_head head;
 		spinlock_t	lock;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 87406252f0a3..e828d31be5da 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -806,6 +806,7 @@ enum tc_prio_command {
 	TC_PRIO_REPLACE,
 	TC_PRIO_DESTROY,
 	TC_PRIO_STATS,
+	TC_PRIO_GRAFT,
 };
 
 struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
 	struct gnet_stats_queue *qstats;
 };
 
+struct tc_prio_qopt_offload_graft_params {
+	u8 band;
+	u32 child_handle;
+};
+
 struct tc_prio_qopt_offload {
 	enum tc_prio_command command;
 	u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
 	union {
 		struct tc_prio_qopt_offload_params replace_params;
 		struct tc_qopt_offload_stats stats;
+		struct tc_prio_qopt_offload_graft_params graft_params;
 	};
 };
+
 #endif
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index ebc5a2ed8631..f83cacce3308 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -78,7 +78,7 @@ struct regulatory_request {
 	int wiphy_idx;
 	enum nl80211_reg_initiator initiator;
 	enum nl80211_user_reg_hint_type user_reg_hint_type;
-	char alpha2[2];
+	char alpha2[3];
 	enum nl80211_dfs_regions dfs_region;
 	bool intersect;
 	bool processed;
diff --git a/include/net/route.h b/include/net/route.h
index 1eb9ce470e25..dbb032d5921b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -63,9 +63,8 @@ struct rtable {
 	__be32			rt_gateway;
 
 	/* Miscellaneous cached information */
-	u32			rt_pmtu;
-
-	u32			rt_table_id;
+	u32			rt_mtu_locked:1,
+				rt_pmtu:31;
 
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
@@ -227,6 +226,9 @@ struct in_ifaddr;
 void fib_add_ifaddr(struct in_ifaddr *);
 void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
 
+void rt_add_uncached_list(struct rtable *rt);
+void rt_del_uncached_list(struct rtable *rt);
+
 static inline void ip_rt_put(struct rtable *rt)
 {
 	/* dst_release() accepts a NULL parameter.
diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h
new file mode 100644
index 000000000000..040f07b47f1f
--- /dev/null
+++ b/include/net/rsi_91x.h
@@ -0,0 +1,56 @@
+/**
+ * Copyright (c) 2017 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __RSI_HEADER_H__
+#define __RSI_HEADER_H__
+
+#include <linux/skbuff.h>
+
+/* HAL queue information */
+#define RSI_COEX_Q			0x0
+#define RSI_BT_Q			0x2
+#define RSI_WLAN_Q                      0x3
+#define RSI_WIFI_MGMT_Q                 0x4
+#define RSI_WIFI_DATA_Q                 0x5
+#define RSI_BT_MGMT_Q			0x6
+#define RSI_BT_DATA_Q			0x7
+
+enum rsi_coex_queues {
+	RSI_COEX_Q_INVALID = -1,
+	RSI_COEX_Q_COMMON = 0,
+	RSI_COEX_Q_BT,
+	RSI_COEX_Q_WLAN
+};
+
+enum rsi_host_intf {
+	RSI_HOST_INTF_SDIO = 0,
+	RSI_HOST_INTF_USB
+};
+
+struct rsi_proto_ops {
+	int (*coex_send_pkt)(void *priv, struct sk_buff *skb, u8 hal_queue);
+	enum rsi_host_intf (*get_host_intf)(void *priv);
+	void (*set_bt_context)(void *priv, void *context);
+};
+
+struct rsi_mod_ops {
+	int (*attach)(void *priv, struct rsi_proto_ops *ops);
+	void (*detach)(void *priv);
+	int (*recv_pkt)(void *priv, const u8 *msg);
+};
+
+extern const struct rsi_mod_ops rsi_bt_ops;
+#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e2ab13687fb9..493e311bbe93 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
 	return false;
 }
 
-/* Reset all TX qdiscs greater then index of a device.  */
+/* Reset all TX qdiscs greater than index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
 	struct Qdisc *qdisc;
@@ -824,6 +824,16 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
 	*to_free = skb;
 }
 
+static inline void __qdisc_drop_all(struct sk_buff *skb,
+				    struct sk_buff **to_free)
+{
+	if (skb->prev)
+		skb->prev->next = *to_free;
+	else
+		skb->next = *to_free;
+	*to_free = skb;
+}
+
 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
 						   struct qdisc_skb_head *qh,
 						   struct sk_buff **to_free)
@@ -956,6 +966,15 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
 	return NET_XMIT_DROP;
 }
 
+static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
+				 struct sk_buff **to_free)
+{
+	__qdisc_drop_all(skb, to_free);
+	qdisc_qstats_drop(sch);
+
+	return NET_XMIT_DROP;
+}
+
 /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how
    long it will take to send a packet given its size.
  */
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index e5c57d0a082d..687e7f80037d 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -62,8 +62,10 @@ struct sctp_auth_bytes {
 /* Definition for a shared key, weather endpoint or association */
 struct sctp_shared_key {
 	struct list_head key_list;
-	__u16 key_id;
 	struct sctp_auth_bytes *key;
+	refcount_t refcnt;
+	__u16 key_id;
+	__u8 deactivated;
 };
 
 #define key_for_each(__key, __list_head) \
@@ -103,21 +105,22 @@ int sctp_auth_send_cid(enum sctp_cid chunk,
 int sctp_auth_recv_cid(enum sctp_cid chunk,
 		       const struct sctp_association *asoc);
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-			    struct sk_buff *skb,
-			    struct sctp_auth_chunk *auth, gfp_t gfp);
+			      struct sk_buff *skb, struct sctp_auth_chunk *auth,
+			      struct sctp_shared_key *ep_key, gfp_t gfp);
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key);
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key);
 
 /* API Helpers */
 int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id);
 int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 			    struct sctp_hmacalgo *hmacs);
-int sctp_auth_set_key(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
+int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc,
 		      struct sctp_authkey *auth_key);
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
-		      __u16 key_id);
+			     struct sctp_association *asoc, __u16 key_id);
 int sctp_auth_del_key_id(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
-		      __u16 key_id);
+			 struct sctp_association *asoc, __u16 key_id);
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+			   struct sctp_association *asoc, __u16 key_id);
 
 #endif
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index b55c6a48a206..6640f84fe536 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -100,6 +100,7 @@ enum sctp_verb {
 	SCTP_CMD_SET_SK_ERR,	 /* Set sk_err */
 	SCTP_CMD_ASSOC_CHANGE,	 /* generate and send assoc_change event */
 	SCTP_CMD_ADAPTATION_IND, /* generate and send adaptation event */
+	SCTP_CMD_PEER_NO_AUTH,   /* generate and send authentication event */
 	SCTP_CMD_ASSOC_SHKEY,    /* generate the association shared keys */
 	SCTP_CMD_T1_RETRAN,	 /* Mark for retransmission after T1 timeout  */
 	SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index f7ae6b0a21d0..72c5b8fc3232 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -180,14 +180,7 @@ struct sctp_transport *sctp_epaddr_lookup_transport(
 /*
  * sctp/proc.c
  */
-int sctp_snmp_proc_init(struct net *net);
-void sctp_snmp_proc_exit(struct net *net);
-int sctp_eps_proc_init(struct net *net);
-void sctp_eps_proc_exit(struct net *net);
-int sctp_assocs_proc_init(struct net *net);
-void sctp_assocs_proc_exit(struct net *net);
-int sctp_remaddr_proc_init(struct net *net);
-void sctp_remaddr_proc_exit(struct net *net);
+int __net_init sctp_proc_init(struct net *net);
 
 /*
  * sctp/offload.c
@@ -318,7 +311,6 @@ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0)
 {.label= #name, .counter= &sctp_dbg_objcnt_## name}
 
 void sctp_dbg_objcnt_init(struct net *);
-void sctp_dbg_objcnt_exit(struct net *);
 
 #else
 
@@ -326,7 +318,6 @@ void sctp_dbg_objcnt_exit(struct net *);
 #define SCTP_DBG_OBJCNT_DEC(name)
 
 static inline void sctp_dbg_objcnt_init(struct net *net) { return; }
-static inline void sctp_dbg_objcnt_exit(struct net *net) { return; }
 
 #endif /* CONFIG_SCTP_DBG_OBJCOUNT */
 
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2883c43c5258..2d0e782c9055 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -263,7 +263,8 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
 				    __u32 new_cum_tsn, size_t nstreams,
 				    struct sctp_fwdtsn_skip *skiplist);
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc);
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+				  __u16 key_id);
 struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc,
 					  __u16 stream_num, __be16 *stream_list,
 					  bool out, bool in);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 03e92dda1813..c63249ea34c3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -577,8 +577,12 @@ struct sctp_chunk {
 	/* This points to the sk_buff containing the actual data.  */
 	struct sk_buff *skb;
 
-	/* In case of GSO packets, this will store the head one */
-	struct sk_buff *head_skb;
+	union {
+		/* In case of GSO packets, this will store the head one */
+		struct sk_buff *head_skb;
+		/* In case of auth enabled, this will point to the shkey */
+		struct sctp_shared_key *shkey;
+	};
 
 	/* These are the SCTP headers by reverse order in a packet.
 	 * Note that some of these may happen more than once.  In that
@@ -1337,12 +1341,12 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
 	const struct sctp_endpoint *ep,
 	const union sctp_addr *paddr,
 	struct sctp_transport **);
-int sctp_endpoint_is_peeled_off(struct sctp_endpoint *,
-				const union sctp_addr *);
+bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
+				 const union sctp_addr *paddr);
 struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
 					struct net *, const union sctp_addr *);
-int sctp_has_association(struct net *net, const union sctp_addr *laddr,
-			 const union sctp_addr *paddr);
+bool sctp_has_association(struct net *net, const union sctp_addr *laddr,
+			  const union sctp_addr *paddr);
 
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
 		     const struct sctp_association *asoc,
@@ -1995,6 +1999,7 @@ struct sctp_association {
 	 * The current generated assocaition shared key (secret)
 	 */
 	struct sctp_auth_bytes *asoc_shared_key;
+	struct sctp_shared_key *shkey;
 
 	/* SCTP AUTH: hmac id of the first peer requested algorithm
 	 * that we support.
@@ -2112,6 +2117,9 @@ struct sctp_cmsgs {
 	struct sctp_initmsg *init;
 	struct sctp_sndrcvinfo *srinfo;
 	struct sctp_sndinfo *sinfo;
+	struct sctp_prinfo *prinfo;
+	struct sctp_authinfo *authinfo;
+	struct msghdr *addrs_msg;
 };
 
 /* Structure for tracking memory objects */
diff --git a/include/net/sock.h b/include/net/sock.h
index 3aa7b7d6e6c7..709311132d4c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -417,6 +417,7 @@ struct sock {
 	struct page_frag	sk_frag;
 	netdev_features_t	sk_route_caps;
 	netdev_features_t	sk_route_nocaps;
+	netdev_features_t	sk_route_forced_caps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	gfp_t			sk_allocation;
@@ -1137,6 +1138,7 @@ struct proto {
 
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
+int sock_load_diag_module(int family, int protocol);
 
 #ifdef SOCK_REFCNT_DEBUG
 static inline void sk_refcnt_debug_inc(struct sock *sk)
@@ -1862,15 +1864,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
 	sk->sk_route_caps &= ~flags;
 }
 
-static inline bool sk_check_csum_caps(struct sock *sk)
-{
-	return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
-	       (sk->sk_family == PF_INET &&
-		(sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
-	       (sk->sk_family == PF_INET6 &&
-		(sk->sk_route_caps & NETIF_F_IPV6_CSUM));
-}
-
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
 					   struct iov_iter *from, char *to,
 					   int copy, int offset)
@@ -2149,6 +2142,10 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+		int sg_start, int *sg_curr, unsigned int *sg_size,
+		int first_coalesce);
+
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e3fc667f9ac2..9c9b3768b350 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
 					      struct sk_buff *skb,
 					      const struct tcphdr *th);
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
-			   struct request_sock *req, bool fastopen);
+			   struct request_sock *req, bool fastopen,
+			   bool *lost_race);
 int tcp_child_process(struct sock *parent, struct sock *child,
 		      struct sk_buff *skb);
 void tcp_enter_loss(struct sock *sk);
@@ -510,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -980,8 +979,8 @@ struct tcp_congestion_ops {
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-	/* suggest number of segments for each skb to transmit (optional) */
-	u32 (*tso_segs_goal)(struct sock *sk);
+	/* override sysctl_tcp_min_tso_segs */
+	u32 (*min_tso_segs)(struct sock *sk);
 	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
 	u32 (*sndbuf_expand)(struct sock *sk);
 	/* call when packets are delivered to update cwnd and pacing rate,
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index 50e78a74d0df..2875e169d744 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -32,21 +32,21 @@ enum {
 
 #define TCP_STATE_MASK	0xF
 
-#define TCP_ACTION_FIN	(1 << 7)
+#define TCP_ACTION_FIN	(1 << TCP_CLOSE)
 
 enum {
-	TCPF_ESTABLISHED = (1 << 1),
-	TCPF_SYN_SENT	 = (1 << 2),
-	TCPF_SYN_RECV	 = (1 << 3),
-	TCPF_FIN_WAIT1	 = (1 << 4),
-	TCPF_FIN_WAIT2	 = (1 << 5),
-	TCPF_TIME_WAIT	 = (1 << 6),
-	TCPF_CLOSE	 = (1 << 7),
-	TCPF_CLOSE_WAIT	 = (1 << 8),
-	TCPF_LAST_ACK	 = (1 << 9),
-	TCPF_LISTEN	 = (1 << 10),
-	TCPF_CLOSING	 = (1 << 11),
-	TCPF_NEW_SYN_RECV = (1 << 12),
+	TCPF_ESTABLISHED = (1 << TCP_ESTABLISHED),
+	TCPF_SYN_SENT	 = (1 << TCP_SYN_SENT),
+	TCPF_SYN_RECV	 = (1 << TCP_SYN_RECV),
+	TCPF_FIN_WAIT1	 = (1 << TCP_FIN_WAIT1),
+	TCPF_FIN_WAIT2	 = (1 << TCP_FIN_WAIT2),
+	TCPF_TIME_WAIT	 = (1 << TCP_TIME_WAIT),
+	TCPF_CLOSE	 = (1 << TCP_CLOSE),
+	TCPF_CLOSE_WAIT	 = (1 << TCP_CLOSE_WAIT),
+	TCPF_LAST_ACK	 = (1 << TCP_LAST_ACK),
+	TCPF_LISTEN	 = (1 << TCP_LISTEN),
+	TCPF_CLOSING	 = (1 << TCP_CLOSING),
+	TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
 };
 
 #endif	/* _LINUX_TCP_STATES_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index 4913430ab807..437a746300bf 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -40,6 +40,7 @@
 #include <linux/socket.h>
 #include <linux/tcp.h>
 #include <net/tcp.h>
+#include <net/strparser.h>
 
 #include <uapi/linux/tls.h>
 
@@ -58,8 +59,18 @@
 
 struct tls_sw_context {
 	struct crypto_aead *aead_send;
+	struct crypto_aead *aead_recv;
 	struct crypto_wait async_wait;
 
+	/* Receive context */
+	struct strparser strp;
+	void (*saved_data_ready)(struct sock *sk);
+	unsigned int (*sk_poll)(struct file *file, struct socket *sock,
+				struct poll_table_struct *wait);
+	struct sk_buff *recv_pkt;
+	u8 control;
+	bool decrypted;
+
 	/* Sending context */
 	char aad_space[TLS_AAD_SPACE_SIZE];
 
@@ -81,23 +92,32 @@ enum {
 	TLS_PENDING_CLOSED_RECORD
 };
 
+struct cipher_context {
+	u16 prepend_size;
+	u16 tag_size;
+	u16 overhead_size;
+	u16 iv_size;
+	char *iv;
+	u16 rec_seq_size;
+	char *rec_seq;
+};
+
 struct tls_context {
 	union {
 		struct tls_crypto_info crypto_send;
 		struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128;
 	};
+	union {
+		struct tls_crypto_info crypto_recv;
+		struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
+	};
 
 	void *priv_ctx;
 
-	u8 tx_conf:2;
+	u8 conf:2;
 
-	u16 prepend_size;
-	u16 tag_size;
-	u16 overhead_size;
-	u16 iv_size;
-	char *iv;
-	u16 rec_seq_size;
-	char *rec_seq;
+	struct cipher_context tx;
+	struct cipher_context rx;
 
 	struct scatterlist *partially_sent_record;
 	u16 partially_sent_offset;
@@ -124,12 +144,19 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 		  unsigned int optlen);
 
 
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx);
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
-void tls_sw_free_tx_resources(struct sock *sk);
+void tls_sw_free_resources(struct sock *sk);
+int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		   int nonblock, int flags, int *addr_len);
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+			 struct poll_table_struct *wait);
+ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+			   struct pipe_inode_info *pipe,
+			   size_t len, unsigned int flags);
 
 void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
 void tls_icsk_clean_acked(struct sock *sk);
@@ -170,9 +197,9 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 	return tls_ctx->pending_open_record_frags;
 }
 
-static inline void tls_err_abort(struct sock *sk)
+static inline void tls_err_abort(struct sock *sk, int err)
 {
-	sk->sk_err = EBADMSG;
+	sk->sk_err = err;
 	sk->sk_error_report(sk);
 }
 
@@ -190,10 +217,10 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
 }
 
 static inline void tls_advance_record_sn(struct sock *sk,
-					 struct tls_context *ctx)
+					 struct cipher_context *ctx)
 {
 	if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
-		tls_err_abort(sk);
+		tls_err_abort(sk, EBADMSG);
 	tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 			     ctx->iv_size);
 }
@@ -203,9 +230,9 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
 			     size_t plaintext_len,
 			     unsigned char record_type)
 {
-	size_t pkt_len, iv_size = ctx->iv_size;
+	size_t pkt_len, iv_size = ctx->tx.iv_size;
 
-	pkt_len = plaintext_len + iv_size + ctx->tag_size;
+	pkt_len = plaintext_len + iv_size + ctx->tx.tag_size;
 
 	/* we cover nonce explicit here as well, so buf should be of
 	 * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
@@ -217,7 +244,7 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
 	buf[3] = pkt_len >> 8;
 	buf[4] = pkt_len & 0xFF;
 	memcpy(buf + TLS_NONCE_OFFSET,
-	       ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
+	       ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
 }
 
 static inline void tls_make_aad(char *buf,
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 81bdbf97319b..9185e45b997f 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -64,6 +64,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
 		UDP_SKB_CB(skb)->cscov = cscov;
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
+		skb->csum_valid = 0;
         }
 
 	return 0;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7d2077665c0b..aa027ba1d032 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
 static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{ 
-	return 1; 
-} 
+{
+	return 1;
+}
 static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
 {
 	return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
 {
 	if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
 	    (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
-	     ipv6_addr_any((struct in6_addr *)saddr) || 
+	     ipv6_addr_any((struct in6_addr *)saddr) ||
 	     ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
 		return 1;
 	return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
 static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
  	return -ENOPROTOOPT;
-} 
+}
 
 static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 73b2387e3f74..6eb174753acf 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1537,10 +1537,6 @@ struct ib_xrcd {
 
 	struct mutex		tgt_qp_mutex;
 	struct list_head	tgt_qp_list;
-	/*
-	 * Implementation details of the RDMA core, don't use in drivers:
-	 */
-	struct rdma_restrack_entry res;
 };
 
 struct ib_ah {
@@ -2126,8 +2122,8 @@ struct ib_device {
 	 * net device of device @device at port @port_num or NULL if such
 	 * a net device doesn't exist. The vendor driver should call dev_hold
 	 * on this net device. The HW vendor's device driver must guarantee
-	 * that this function returns NULL before the net device reaches
-	 * NETDEV_UNREGISTER_FINAL state.
+	 * that this function returns NULL before the net device has finished
+	 * NETDEV_UNREGISTER state.
 	 */
 	struct net_device	  *(*get_netdev)(struct ib_device *device,
 						 u8 port_num);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index c2d81167c858..2cdf8dcf4bdc 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -29,10 +29,6 @@ enum rdma_restrack_type {
 	 */
 	RDMA_RESTRACK_QP,
 	/**
-	 * @RDMA_RESTRACK_XRCD: XRC domain (XRCD)
-	 */
-	RDMA_RESTRACK_XRCD,
-	/**
 	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
 	 */
 	RDMA_RESTRACK_MAX
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 6da44079aa58..38287d9d23a1 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -276,10 +276,7 @@ struct uverbs_object_tree_def {
  */
 
 struct uverbs_ptr_attr {
-	union {
-		u64		data;
-		void	__user *ptr;
-	};
+	u64		data;
 	u16		len;
 	/* Combination of bits from enum UVERBS_ATTR_F_XXXX */
 	u16		flags;
@@ -351,38 +348,60 @@ static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr
 }
 
 static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
-				 size_t idx, const void *from)
+				 size_t idx, const void *from, size_t size)
 {
 	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
 	u16 flags;
+	size_t min_size;
 
 	if (IS_ERR(attr))
 		return PTR_ERR(attr);
 
+	min_size = min_t(size_t, attr->ptr_attr.len, size);
+	if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+		return -EFAULT;
+
 	flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT;
-	return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) &&
-		!put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT;
+	if (put_user(flags, &attr->uattr->flags))
+		return -EFAULT;
+
+	return 0;
 }
 
-static inline int _uverbs_copy_from(void *to, size_t to_size,
+static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
+{
+	return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
+}
+
+static inline int _uverbs_copy_from(void *to,
 				    const struct uverbs_attr_bundle *attrs_bundle,
-				    size_t idx)
+				    size_t idx,
+				    size_t size)
 {
 	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
 
 	if (IS_ERR(attr))
 		return PTR_ERR(attr);
 
-	if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data))
+	/*
+	 * Validation ensures attr->ptr_attr.len >= size. If the caller is
+	 * using UVERBS_ATTR_SPEC_F_MIN_SZ then it must call copy_from with
+	 * the right size.
+	 */
+	if (unlikely(size < attr->ptr_attr.len))
+		return -EINVAL;
+
+	if (uverbs_attr_ptr_is_inline(attr))
 		memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len);
-	else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len))
+	else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+				attr->ptr_attr.len))
 		return -EFAULT;
 
 	return 0;
 }
 
 #define uverbs_copy_from(to, attrs_bundle, idx)				      \
-	_uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx)
+	_uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to))
 
 /* =================================================
  *	 Definitions -> Specs infrastructure
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index d8d4a902a88d..2280b2351739 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -68,6 +68,9 @@ struct scsi_cmnd {
 	struct list_head list;  /* scsi_cmnd participates in queue lists */
 	struct list_head eh_entry; /* entry for the host eh_cmd_q */
 	struct delayed_work abort_work;
+
+	struct rcu_head rcu;
+
 	int eh_eflags;		/* Used by error handlr */
 
 	/*
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 1a1df0d21ee3..a8b7bf879ced 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -571,8 +571,6 @@ struct Scsi_Host {
 		struct blk_mq_tag_set	tag_set;
 	};
 
-	struct rcu_head rcu;
-
 	atomic_t host_busy;		   /* commands actually active on low-level */
 	atomic_t host_blocked;
 
diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h
index c2d1b15da136..a91f25151a5b 100644
--- a/include/soc/arc/mcip.h
+++ b/include/soc/arc/mcip.h
@@ -15,6 +15,7 @@
 
 #define ARC_REG_MCIP_BCR	0x0d0
 #define ARC_REG_MCIP_IDU_BCR	0x0D5
+#define ARC_REG_GFRC_BUILD	0x0D6
 #define ARC_REG_MCIP_CMD	0x600
 #define ARC_REG_MCIP_WDATA	0x601
 #define ARC_REG_MCIP_READBACK	0x602
@@ -36,10 +37,14 @@ struct mcip_cmd {
 #define CMD_SEMA_RELEASE		0x12
 
 #define CMD_DEBUG_SET_MASK		0x34
+#define CMD_DEBUG_READ_MASK		0x35
 #define CMD_DEBUG_SET_SELECT		0x36
+#define CMD_DEBUG_READ_SELECT		0x37
 
 #define CMD_GFRC_READ_LO		0x42
 #define CMD_GFRC_READ_HI		0x43
+#define CMD_GFRC_SET_CORE		0x47
+#define CMD_GFRC_READ_CORE		0x48
 
 #define CMD_IDU_ENABLE			0x71
 #define CMD_IDU_DISABLE			0x72
diff --git a/include/sound/ac97/regs.h b/include/sound/ac97/regs.h
index 4bb86d379bd5..9a4fa0c3264a 100644
--- a/include/sound/ac97/regs.h
+++ b/include/sound/ac97/regs.h
@@ -31,7 +31,7 @@
 #define AC97_HEADPHONE		0x04	/* Headphone Volume (optional) */
 #define AC97_MASTER_MONO	0x06	/* Master Volume Mono (optional) */
 #define AC97_MASTER_TONE	0x08	/* Master Tone (Bass & Treble) (optional) */
-#define AC97_PC_BEEP		0x0a	/* PC Beep Volume (optinal) */
+#define AC97_PC_BEEP		0x0a	/* PC Beep Volume (optional) */
 #define AC97_PHONE		0x0c	/* Phone Volume (optional) */
 #define AC97_MIC		0x0e	/* MIC Volume */
 #define AC97_LINE		0x10	/* Line In Volume */
diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h
index 200f731be557..7b706ff21335 100644
--- a/include/trace/events/mmc.h
+++ b/include/trace/events/mmc.h
@@ -86,8 +86,8 @@ TRACE_EVENT(mmc_request_start,
 		  __entry->stop_flags, __entry->stop_retries,
 		  __entry->sbc_opcode, __entry->sbc_arg,
 		  __entry->sbc_flags, __entry->sbc_retries,
-		  __entry->blocks, __entry->blk_addr,
-		  __entry->blksz, __entry->data_flags, __entry->tag,
+		  __entry->blocks, __entry->blksz,
+		  __entry->blk_addr, __entry->data_flags, __entry->tag,
 		  __entry->can_retune, __entry->doing_retune,
 		  __entry->retune_now, __entry->need_retune,
 		  __entry->hold_retune, __entry->retune_period)
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
index b8adf05c534e..7dd8f34c37df 100644
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -368,7 +368,7 @@ TRACE_EVENT(xen_mmu_flush_tlb,
 	    TP_printk("%s", "")
 	);
 
-TRACE_EVENT(xen_mmu_flush_tlb_single,
+TRACE_EVENT(xen_mmu_flush_tlb_one_user,
 	    TP_PROTO(unsigned long addr),
 	    TP_ARGS(addr),
 	    TP_STRUCT__entry(
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 85dc965afd89..99c902e460c2 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -102,13 +102,13 @@ typedef struct siginfo {
 				short _addr_lsb; /* LSB of the reported address */
 				/* used when si_code=SEGV_BNDERR */
 				struct {
-					short _dummy_bnd;
+					void *_dummy_bnd;
 					void __user *_lower;
 					void __user *_upper;
 				} _addr_bnd;
 				/* used when si_code=SEGV_PKUERR */
 				struct {
-					short _dummy_pkey;
+					void *_dummy_pkey;
 					__u32 _pkey;
 				} _addr_pkey;
 			};
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 91a31ffed828..9a781f0611df 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer {
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
+#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
 
 struct drm_virtgpu_getparam {
 	__u64 param;
diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 5cb360be2a11..894d8d2f713d 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -196,8 +196,6 @@ struct batadv_bla_claim_dst {
 	__be16 group;		/* group id */
 };
 
-#pragma pack()
-
 /**
  * struct batadv_ogm_packet - ogm (routing protocol) packet
  * @packet_type: batman-adv packet type, part of the general header
@@ -222,9 +220,6 @@ struct batadv_ogm_packet {
 	__u8   reserved;
 	__u8   tq;
 	__be16 tvlv_len;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
@@ -249,9 +244,6 @@ struct batadv_ogm2_packet {
 	__u8   orig[ETH_ALEN];
 	__be16 tvlv_len;
 	__be32 throughput;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
@@ -405,7 +397,6 @@ struct batadv_icmp_packet_rr {
  * misalignment of the payload after the ethernet header. It may also lead to
  * leakage of information when the padding it not initialized before sending.
  */
-#pragma pack(2)
 
 /**
  * struct batadv_unicast_packet - unicast packet for network payload
@@ -533,8 +524,6 @@ struct batadv_coded_packet {
 	__be16 coded_len;
 };
 
-#pragma pack()
-
 /**
  * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
  * @packet_type: batman-adv packet type, part of the general header
@@ -641,4 +630,6 @@ struct batadv_tvlv_mcast_data {
 	__u8 reserved[3];
 };
 
+#pragma pack()
+
 #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index ae00c99cbed0..324a0e1143e7 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: MIT */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
@@ -92,6 +92,53 @@ enum batadv_tt_client_flags {
 };
 
 /**
+ * enum batadv_mcast_flags_priv - Private, own multicast flags
+ *
+ * These are internal, multicast related flags. Currently they describe certain
+ * multicast related attributes of the segment this originator bridges into the
+ * mesh.
+ *
+ * Those attributes are used to determine the public multicast flags this
+ * originator is going to announce via TT.
+ *
+ * For netlink, if BATADV_MCAST_FLAGS_BRIDGED is unset then all querier
+ * related flags are undefined.
+ */
+enum batadv_mcast_flags_priv {
+	/**
+	 * @BATADV_MCAST_FLAGS_BRIDGED: There is a bridge on top of the mesh
+	 * interface.
+	 */
+	BATADV_MCAST_FLAGS_BRIDGED			= (1 << 0),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS: Whether an IGMP querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS		= (1 << 1),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS: Whether an MLD querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS		= (1 << 2),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING: If an IGMP querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING	= (1 << 3),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING: If an MLD querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING	= (1 << 4),
+};
+
+/**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  */
 enum batadv_nl_attrs {
@@ -272,6 +319,31 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_BLA_CRC,
 
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_IP4ADDRESS: Client IPv4 address
+	 */
+	BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_HWADDRESS: Client MAC address
+	 */
+	BATADV_ATTR_DAT_CACHE_HWADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_VID: VLAN ID
+	 */
+	BATADV_ATTR_DAT_CACHE_VID,
+
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS: Per originator multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS,
+
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS_PRIV: Private, own multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS_PRIV,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
@@ -361,6 +433,16 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_GET_BLA_BACKBONE,
 
+	/**
+	 * @BATADV_CMD_GET_DAT_CACHE: Query list of DAT cache entries
+	 */
+	BATADV_CMD_GET_DAT_CACHE,
+
+	/**
+	 * @BATADV_CMD_GET_MCAST_FLAGS: Query list of multicast flags
+	 */
+	BATADV_CMD_GET_MCAST_FLAGS,
+
 	/* add new commands above here */
 
 	/**
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 20d1490d6377..3c50e07ee833 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -131,7 +131,7 @@ enum {
 #define BLKTRACE_BDEV_SIZE	32
 
 /*
- * User setup structure passed with BLKTRACESTART
+ * User setup structure passed with BLKTRACESETUP
  */
 struct blk_user_trace_setup {
 	char name[BLKTRACE_BDEV_SIZE];	/* output */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index db6bdc375126..18b7c510c511 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -231,6 +233,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -696,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *	@pt_regs: pointer to struct pt_regs
  *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -757,7 +790,11 @@ union bpf_attr {
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -800,6 +837,7 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+#define BPF_F_SEQ_NUMBER		(1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
@@ -919,6 +957,14 @@ enum sk_action {
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
index 8f95303f9d80..eb1b9d21250c 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -13,6 +13,7 @@
 struct bpf_perf_event_data {
 	bpf_user_pt_regs_t regs;
 	__u64 sample_period;
+	__u64 addr;
 };
 
 #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h
index 5f3c5a918f00..b4112f0b6dd3 100644
--- a/include/uapi/linux/dvb/dmx.h
+++ b/include/uapi/linux/dvb/dmx.h
@@ -212,6 +212,32 @@ struct dmx_stc {
 };
 
 /**
+ * enum dmx_buffer_flags - DMX memory-mapped buffer flags
+ *
+ * @DMX_BUFFER_FLAG_HAD_CRC32_DISCARD:
+ *	Indicates that the Kernel discarded one or more frames due to wrong
+ *	CRC32 checksum.
+ * @DMX_BUFFER_FLAG_TEI:
+ *	Indicates that the Kernel has detected a Transport Error indicator
+ *	(TEI) on a filtered pid.
+ * @DMX_BUFFER_PKT_COUNTER_MISMATCH:
+ *	Indicates that the Kernel has detected a packet counter mismatch
+ *	on a filtered pid.
+ * @DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED:
+ *	Indicates that the Kernel has detected one or more frame discontinuity.
+ * @DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR:
+ *	Received at least one packet with a frame discontinuity indicator.
+ */
+
+enum dmx_buffer_flags {
+	DMX_BUFFER_FLAG_HAD_CRC32_DISCARD		= 1 << 0,
+	DMX_BUFFER_FLAG_TEI				= 1 << 1,
+	DMX_BUFFER_PKT_COUNTER_MISMATCH			= 1 << 2,
+	DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED		= 1 << 3,
+	DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR		= 1 << 4,
+};
+
+/**
  * struct dmx_buffer - dmx buffer info
  *
  * @index:	id number of the buffer
@@ -220,15 +246,24 @@ struct dmx_stc {
  *		offset from the start of the device memory for this plane,
  *		(or a "cookie" that should be passed to mmap() as offset)
  * @length:	size in bytes of the buffer
+ * @flags:	bit array of buffer flags as defined by &enum dmx_buffer_flags.
+ *		Filled only at &DMX_DQBUF.
+ * @count:	monotonic counter for filled buffers. Helps to identify
+ *		data stream loses. Filled only at &DMX_DQBUF.
  *
  * Contains data exchanged by application and driver using one of the streaming
  * I/O methods.
+ *
+ * Please notice that, for &DMX_QBUF, only @index should be filled.
+ * On &DMX_DQBUF calls, all fields will be filled by the Kernel.
  */
 struct dmx_buffer {
 	__u32			index;
 	__u32			bytesused;
 	__u32			offset;
 	__u32			length;
+	__u32			flags;
+	__u32			count;
 };
 
 /**
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 44a0b675a6bc..4ca65b56084f 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -217,10 +217,14 @@ struct ethtool_value {
 	__u32	data;
 };
 
+#define PFC_STORM_PREVENTION_AUTO	0xffff
+#define PFC_STORM_PREVENTION_DISABLE	0
+
 enum tunable_id {
 	ETHTOOL_ID_UNSPEC,
 	ETHTOOL_RX_COPYBREAK,
 	ETHTOOL_TX_COPYBREAK,
+	ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
 	/*
 	 * Add your fresh new tubale attribute above and remember to update
 	 * tunable_strings[] in net/core/ethtool.c
@@ -914,12 +918,15 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
  * @data: Command-dependent value
  * @fs: Flow classification rule
+ * @rss_context: RSS context to be affected
  * @rule_cnt: Number of rules to be affected
  * @rule_locs: Array of used rule locations
  *
  * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
  * the fields included in the flow hash, e.g. %RXH_IP_SRC.  The following
- * structure fields must not be used.
+ * structure fields must not be used, except that if @flow_type includes
+ * the %FLOW_RSS flag, then @rss_context determines which RSS context to
+ * act on.
  *
  * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
  * on return.
@@ -931,7 +938,9 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * set in @data then special location values should not be used.
  *
  * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
- * existing rule on entry and @fs contains the rule on return.
+ * existing rule on entry and @fs contains the rule on return; if
+ * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is
+ * filled with the RSS context ID associated with the rule.
  *
  * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
  * user buffer for @rule_locs on entry.  On return, @data is the size
@@ -942,7 +951,11 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
  * @fs.@location either specifies the location to use or is a special
  * location value with %RX_CLS_LOC_SPECIAL flag set.  On return,
- * @fs.@location is the actual rule location.
+ * @fs.@location is the actual rule location.  If @fs.@flow_type
+ * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to
+ * use for flow spreading traffic which matches this rule.  The value
+ * from the rxfh indirection table will be added to @fs.@ring_cookie
+ * to choose which ring to deliver to.
  *
  * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
  * existing rule on entry.
@@ -963,7 +976,10 @@ struct ethtool_rxnfc {
 	__u32				flow_type;
 	__u64				data;
 	struct ethtool_rx_flow_spec	fs;
-	__u32				rule_cnt;
+	union {
+		__u32			rule_cnt;
+		__u32			rss_context;
+	};
 	__u32				rule_locs[0];
 };
 
@@ -990,7 +1006,11 @@ struct ethtool_rxfh_indir {
 /**
  * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
  * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
- * @rss_context: RSS context identifier.
+ * @rss_context: RSS context identifier.  Context 0 is the default for normal
+ *	traffic; other contexts can be referenced as the destination for RX flow
+ *	classification rules.  %ETH_RXFH_CONTEXT_ALLOC is used with command
+ *	%ETHTOOL_SRSSH to allocate a new RSS context; on return this field will
+ *	contain the ID of the newly allocated context.
  * @indir_size: On entry, the array size of the user buffer for the
  *	indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
  *	%ETH_RXFH_INDIR_NO_CHANGE.  On return from %ETHTOOL_GRSSH,
@@ -1009,7 +1029,8 @@ struct ethtool_rxfh_indir {
  * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
  * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
  * and a @indir_size of zero means the indir table should be reset to default
- * values. An hfunc of zero means that hash function setting is not requested.
+ * values (if @rss_context == 0) or that the RSS context should be deleted.
+ * An hfunc of zero means that hash function setting is not requested.
  */
 struct ethtool_rxfh {
 	__u32   cmd;
@@ -1021,6 +1042,7 @@ struct ethtool_rxfh {
 	__u32	rsvd32;
 	__u32   rss_config[0];
 };
+#define ETH_RXFH_CONTEXT_ALLOC		0xffffffff
 #define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
 
 /**
@@ -1635,6 +1657,8 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 /* Flag to enable additional fields in struct ethtool_rx_flow_spec */
 #define	FLOW_EXT	0x80000000
 #define	FLOW_MAC_EXT	0x40000000
+/* Flag to enable RSS spreading of traffic matching rule (nfc only) */
+#define	FLOW_RSS	0x20000000
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9b5a0..232df14e1287 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,7 +23,7 @@ struct fib_rule_hdr {
 	__u8		tos;
 
 	__u8		table;
-	__u8		res1;	/* reserved */
+	__u8		res1;   /* reserved */
 	__u8		res2;	/* reserved */
 	__u8		action;
 
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
 	__u32		end;
 };
 
+struct fib_rule_port_range {
+	__u16		start;
+	__u16		end;
+};
+
 enum {
 	FRA_UNSPEC,
 	FRA_DST,	/* destination address */
@@ -58,6 +63,10 @@ enum {
 	FRA_PAD,
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	FRA_UID_RANGE,	/* UID range */
+	FRA_PROTOCOL,   /* Originator of the rule */
+	FRA_IP_PROTO,	/* ip proto */
+	FRA_SPORT_RANGE, /* sport */
+	FRA_DPORT_RANGE, /* dport */
 	__FRA_MAX
 };
 
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index f8cb5760ea4f..3a45b4ad71a3 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -23,7 +23,6 @@
 #define _UAPI_LINUX_IF_ETHER_H
 
 #include <linux/types.h>
-#include <linux/libc-compat.h>
 
 /*
  *	IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
@@ -31,6 +30,7 @@
  */
 
 #define ETH_ALEN	6		/* Octets in one ethernet addr	 */
+#define ETH_TLEN	2		/* Octets in ethernet type field */
 #define ETH_HLEN	14		/* Total octets in header.	 */
 #define ETH_ZLEN	60		/* Min. octets in frame sans FCS */
 #define ETH_DATA_LEN	1500		/* Max. octets in payload	 */
@@ -89,6 +89,7 @@
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
 #define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
+#define ETH_P_PREAUTH	0x88C7		/* 802.11 Preauthentication */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_MACSEC	0x88E5		/* 802.1ae MACsec */
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
@@ -151,6 +152,11 @@
  *	This is an Ethernet frame header.
  */
 
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR		1
+#endif
+
 #if __UAPI_DEF_ETHHDR
 struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6d9447700e18..68699f654118 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -941,4 +941,43 @@ enum {
 	IFLA_EVENT_BONDING_OPTIONS,	/* change in bonding options */
 };
 
+/* tun section */
+
+enum {
+	IFLA_TUN_UNSPEC,
+	IFLA_TUN_OWNER,
+	IFLA_TUN_GROUP,
+	IFLA_TUN_TYPE,
+	IFLA_TUN_PI,
+	IFLA_TUN_VNET_HDR,
+	IFLA_TUN_PERSIST,
+	IFLA_TUN_MULTI_QUEUE,
+	IFLA_TUN_NUM_QUEUES,
+	IFLA_TUN_NUM_DISABLED_QUEUES,
+	__IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+
+enum {
+	IFLA_RMNET_UNSPEC,
+	IFLA_RMNET_MUX_ID,
+	IFLA_RMNET_FLAGS,
+	__IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX	(__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+	__u32	flags;
+	__u32	mask;
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0fb5ef939732..7b26d4b0b052 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
 
 /*
  * Extension capability list.
@@ -934,6 +935,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_AIS_MIGRATION 150
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index fc29efaa918c..8254c937c9f4 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -264,10 +264,4 @@
 
 #endif /* __GLIBC__ */
 
-/* Definitions for if_ether.h */
-/* allow libcs like musl to deactivate this, glibc does not implement this. */
-#ifndef __UAPI_DEF_ETHHDR
-#define __UAPI_DEF_ETHHDR		1
-#endif
-
 #endif /* _UAPI_LIBC_COMPAT_H */
diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
new file mode 100644
index 000000000000..4c292ecbb748
--- /dev/null
+++ b/include/uapi/linux/ncsi.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __UAPI_NCSI_NETLINK_H__
+#define __UAPI_NCSI_NETLINK_H__
+
+/**
+ * enum ncsi_nl_commands - supported NCSI commands
+ *
+ * @NCSI_CMD_UNSPEC: unspecified command to catch errors
+ * @NCSI_CMD_PKG_INFO: list package and channel attributes. Requires
+ *	NCSI_ATTR_IFINDEX. If NCSI_ATTR_PACKAGE_ID is specified returns the
+ *	specific package and its channels - otherwise a dump request returns
+ *	all packages and their associated channels.
+ * @NCSI_CMD_SET_INTERFACE: set preferred package and channel combination.
+ *	Requires NCSI_ATTR_IFINDEX and the preferred NCSI_ATTR_PACKAGE_ID and
+ *	optionally the preferred NCSI_ATTR_CHANNEL_ID.
+ * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination.
+ *	Requires NCSI_ATTR_IFINDEX.
+ * @NCSI_CMD_MAX: highest command number
+ */
+enum ncsi_nl_commands {
+	NCSI_CMD_UNSPEC,
+	NCSI_CMD_PKG_INFO,
+	NCSI_CMD_SET_INTERFACE,
+	NCSI_CMD_CLEAR_INTERFACE,
+
+	__NCSI_CMD_AFTER_LAST,
+	NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_attrs - General NCSI netlink attributes
+ *
+ * @NCSI_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_ATTR_IFINDEX: ifindex of network device using NCSI
+ * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
+ * @NCSI_ATTR_PACKAGE_ID: package ID
+ * @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_attrs {
+	NCSI_ATTR_UNSPEC,
+	NCSI_ATTR_IFINDEX,
+	NCSI_ATTR_PACKAGE_LIST,
+	NCSI_ATTR_PACKAGE_ID,
+	NCSI_ATTR_CHANNEL_ID,
+
+	__NCSI_ATTR_AFTER_LAST,
+	NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_pkg_attrs - NCSI netlink package-specific attributes
+ *
+ * @NCSI_PKG_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_PKG_ATTR: nested array of package attributes
+ * @NCSI_PKG_ATTR_ID: package ID
+ * @NCSI_PKG_ATTR_FORCED: flag signifying a package has been set as preferred
+ * @NCSI_PKG_ATTR_CHANNEL_LIST: nested array of NCSI_CHANNEL_ATTR attributes
+ * @NCSI_PKG_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_pkg_attrs {
+	NCSI_PKG_ATTR_UNSPEC,
+	NCSI_PKG_ATTR,
+	NCSI_PKG_ATTR_ID,
+	NCSI_PKG_ATTR_FORCED,
+	NCSI_PKG_ATTR_CHANNEL_LIST,
+
+	__NCSI_PKG_ATTR_AFTER_LAST,
+	NCSI_PKG_ATTR_MAX = __NCSI_PKG_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_channel_attrs - NCSI netlink channel-specific attributes
+ *
+ * @NCSI_CHANNEL_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_CHANNEL_ATTR: nested array of channel attributes
+ * @NCSI_CHANNEL_ATTR_ID: channel ID
+ * @NCSI_CHANNEL_ATTR_VERSION_MAJOR: channel major version number
+ * @NCSI_CHANNEL_ATTR_VERSION_MINOR: channel minor version number
+ * @NCSI_CHANNEL_ATTR_VERSION_STR: channel version string
+ * @NCSI_CHANNEL_ATTR_LINK_STATE: channel link state flags
+ * @NCSI_CHANNEL_ATTR_ACTIVE: channels with this flag are in
+ *	NCSI_CHANNEL_ACTIVE state
+ * @NCSI_CHANNEL_ATTR_FORCED: flag signifying a channel has been set as
+ *	preferred
+ * @NCSI_CHANNEL_ATTR_VLAN_LIST: nested array of NCSI_CHANNEL_ATTR_VLAN_IDs
+ * @NCSI_CHANNEL_ATTR_VLAN_ID: VLAN ID being filtered on this channel
+ * @NCSI_CHANNEL_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_channel_attrs {
+	NCSI_CHANNEL_ATTR_UNSPEC,
+	NCSI_CHANNEL_ATTR,
+	NCSI_CHANNEL_ATTR_ID,
+	NCSI_CHANNEL_ATTR_VERSION_MAJOR,
+	NCSI_CHANNEL_ATTR_VERSION_MINOR,
+	NCSI_CHANNEL_ATTR_VERSION_STR,
+	NCSI_CHANNEL_ATTR_LINK_STATE,
+	NCSI_CHANNEL_ATTR_ACTIVE,
+	NCSI_CHANNEL_ATTR_FORCED,
+	NCSI_CHANNEL_ATTR_VLAN_LIST,
+	NCSI_CHANNEL_ATTR_VLAN_ID,
+
+	__NCSI_CHANNEL_ATTR_AFTER_LAST,
+	NCSI_CHANNEL_ATTR_MAX = __NCSI_CHANNEL_ATTR_AFTER_LAST - 1
+};
+
+#endif /* __UAPI_NCSI_NETLINK_H__ */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c587a61c32bf..c13c84304be3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -992,6 +992,32 @@
  *
  * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
  *
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ *	drivers that do not define separate commands for authentication and
+ *	association, but rely on user space for the authentication to happen.
+ *	This interface acts both as the event request (driver to user space)
+ *	to trigger the authentication and command response (userspace to
+ *	driver) to indicate the authentication status.
+ *
+ *	User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ *	trigger a connection. The host driver selects a BSS and further uses
+ *	this interface to offload only the authentication part to the user
+ *	space. Authentication frames are passed between the driver and user
+ *	space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ *	further with the association after getting successful authentication
+ *	status. User space indicates the authentication status through
+ *	%NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ *	command interface.
+ *
+ *	Host driver reports this status on an authentication failure to the
+ *	user space through the connect result as the user space would have
+ *	initiated the connection through the connect request.
+ *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ *	ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE,
+ *	&NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its
+ *	address(specified in &NL80211_ATTR_MAC).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1198,6 +1224,10 @@ enum nl80211_commands {
 
 	NL80211_CMD_RELOAD_REGDB,
 
+	NL80211_CMD_EXTERNAL_AUTH,
+
+	NL80211_CMD_STA_OPMODE_CHANGED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2153,6 +2183,19 @@ enum nl80211_commands {
  * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
  * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
  *
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ *     authentication operation (u32 attribute with an
+ *     &enum nl80211_external_auth_action value). This is used with the
+ *     &NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ *     space supports external authentication. This attribute shall be used
+ *     only with %NL80211_CMD_CONNECT request. The driver may offload
+ *     authentication processing to user space if this capability is indicated
+ *     in NL80211_CMD_CONNECT requests from the user space.
+ *
+ * @NL80211_ATTR_NSS: Station's New/updated  RX_NSS value notified using this
+ *	u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2579,6 +2622,12 @@ enum nl80211_attrs {
 	NL80211_ATTR_PMKR0_NAME,
 	NL80211_ATTR_PORT_AUTHORIZED,
 
+	NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
+	NL80211_ATTR_NSS,
+	NL80211_ATTR_ACK_SIGNAL,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2899,6 +2948,7 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
  *	received from the station (u64, usec)
  * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2937,6 +2987,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TID_STATS,
 	NL80211_STA_INFO_RX_DURATION,
 	NL80211_STA_INFO_PAD,
+	NL80211_STA_INFO_ACK_SIGNAL,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
@@ -4945,6 +4996,9 @@ enum nl80211_feature_flags {
  *	probe request tx deferral and suppression
  * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
  *	value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4972,6 +5026,9 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
 	NL80211_EXT_FEATURE_MFP_OPTIONAL,
+	NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -5032,6 +5089,10 @@ enum nl80211_timeout_reason {
  * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
  * requests.
  *
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
  * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
  * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
  * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
@@ -5059,7 +5120,20 @@ enum nl80211_timeout_reason {
  *	and suppression (if it has received a broadcast Probe Response frame,
  *	Beacon frame or FILS Discovery frame from an AP that the STA considers
  *	a suitable candidate for (re-)association - suitable in terms of
- *	SSID and/or RSSI
+ *	SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ *	accomplish the scan. Thus, this flag intends the driver to perform the
+ *	scan request with lesser span/duration. It is specific to the driver
+ *	implementations on how this is accomplished. Scan accuracy may get
+ *	impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ *	optimal possible power. Drivers can resort to their specific means to
+ *	optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ *	results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ *	possible scan results. This flag hints the driver to use the best
+ *	possible scan configuration to improve the accuracy in scanning.
+ *	Latency and power use may get impacted with this flag.
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -5070,6 +5144,9 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP		= 1<<5,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE		= 1<<6,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION	= 1<<7,
+	NL80211_SCAN_FLAG_LOW_SPAN				= 1<<8,
+	NL80211_SCAN_FLAG_LOW_POWER				= 1<<9,
+	NL80211_SCAN_FLAG_HIGH_ACCURACY				= 1<<10,
 };
 
 /**
@@ -5469,4 +5546,15 @@ enum nl80211_nan_match_attributes {
 	NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
 };
 
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ *     authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+	NL80211_EXTERNAL_AUTH_START,
+	NL80211_EXTERNAL_AUTH_ABORT,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 46c506615f4a..be05e66c167b 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -475,6 +475,7 @@ enum {
 
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
 };
 
 /* Match-all classifier */
@@ -555,7 +556,8 @@ enum {
 #define	TCF_EM_VLAN		6
 #define	TCF_EM_CANID		7
 #define	TCF_EM_IPSET		8
-#define	TCF_EM_MAX		8
+#define	TCF_EM_IPT		9
+#define	TCF_EM_MAX		9
 
 enum {
 	TCF_EM_PROG_TC
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index 3d77fe91239a..9008f31c7eb6 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -42,7 +42,7 @@ typedef enum {
 	SEV_RET_INVALID_PLATFORM_STATE,
 	SEV_RET_INVALID_GUEST_STATE,
 	SEV_RET_INAVLID_CONFIG,
-	SEV_RET_INVALID_len,
+	SEV_RET_INVALID_LEN,
 	SEV_RET_ALREADY_OWNED,
 	SEV_RET_INVALID_CERTIFICATE,
 	SEV_RET_POLICY_FAILURE,
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index e46d82b91166..d5a1b8a492b9 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -69,8 +69,8 @@ struct ptrace_peeksiginfo_args {
 #define PTRACE_SECCOMP_GET_METADATA	0x420d
 
 struct seccomp_metadata {
-	unsigned long filter_off;	/* Input: which filter */
-	unsigned int flags;		/* Output: filter's flags */
+	__u64 filter_off;	/* Input: which filter */
+	__u64 flags;		/* Output: filter's flags */
 };
 
 /* Read signals from a shared (process wide) queue */
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index e71d4491f225..a66b213de3d7 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -103,6 +103,8 @@
 #define RDS_CMSG_MASKED_ATOMIC_FADD	8
 #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
 #define RDS_CMSG_RXPATH_LATENCY		11
+#define	RDS_CMSG_ZCOPY_COOKIE		12
+#define	RDS_CMSG_ZCOPY_COMPLETION	13
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
@@ -316,6 +318,12 @@ struct rds_rdma_notify {
 #define RDS_RDMA_DROPPED	3
 #define RDS_RDMA_OTHER_ERROR	4
 
+#define	RDS_MAX_ZCOOKIES	8
+struct rds_zcopy_cookies {
+	__u32 num;
+	__u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
 /*
  * Common set of flags for all RDMA related structs
  */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 4c4db14786bd..afd4346386e0 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -99,6 +99,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RECVRCVINFO	32
 #define SCTP_RECVNXTINFO	33
 #define SCTP_DEFAULT_SNDINFO	34
+#define SCTP_AUTH_DEACTIVATE_KEY	35
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
@@ -260,6 +261,31 @@ struct sctp_nxtinfo {
 	sctp_assoc_t nxt_assoc_id;
 };
 
+/* 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+ */
+struct sctp_prinfo {
+	__u16 pr_policy;
+	__u32 pr_value;
+};
+
+/* 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+ */
+struct sctp_authinfo {
+	__u16 auth_keynumber;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -271,6 +297,8 @@ enum sctp_sinfo_flags {
 	SCTP_ADDR_OVER		= (1 << 1), /* Override the primary destination. */
 	SCTP_ABORT		= (1 << 2), /* Send an ABORT message to the peer. */
 	SCTP_SACK_IMMEDIATELY	= (1 << 3), /* SACK should be sent without delay. */
+	/* 2 bits here have been used by SCTP_PR_SCTP_MASK */
+	SCTP_SENDALL		= (1 << 6),
 	SCTP_NOTIFICATION	= MSG_NOTIFICATION, /* Next message is not user msg but notification. */
 	SCTP_EOF		= MSG_FIN,  /* Initiate graceful shutdown process. */
 };
@@ -293,6 +321,14 @@ typedef enum sctp_cmsg_type {
 #define SCTP_RCVINFO	SCTP_RCVINFO
 	SCTP_NXTINFO,		/* 5.3.6 SCTP Next Receive Information Structure */
 #define SCTP_NXTINFO	SCTP_NXTINFO
+	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
+#define SCTP_PRINFO	SCTP_PRINFO
+	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure */
+#define SCTP_AUTHINFO	SCTP_AUTHINFO
+	SCTP_DSTADDRV4,		/* 5.3.9 SCTP Destination IPv4 Address Structure */
+#define SCTP_DSTADDRV4	SCTP_DSTADDRV4
+	SCTP_DSTADDRV6,		/* 5.3.10 SCTP Destination IPv6 Address Structure */
+#define SCTP_DSTADDRV6	SCTP_DSTADDRV6
 } sctp_cmsg_t;
 
 /*
@@ -482,7 +518,12 @@ struct sctp_authkey_event {
 	sctp_assoc_t auth_assoc_id;
 };
 
-enum { SCTP_AUTH_NEWKEY = 0, };
+enum {
+	SCTP_AUTH_NEW_KEY,
+#define	SCTP_AUTH_NEWKEY	SCTP_AUTH_NEW_KEY /* compatible with before */
+	SCTP_AUTH_FREE_KEY,
+	SCTP_AUTH_NO_AUTH,
+};
 
 /*
  * 6.1.9. SCTP_SENDER_DRY_EVENT
diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h
new file mode 100644
index 000000000000..49a65530992c
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_TC_EM_IPT_H
+#define __LINUX_TC_EM_IPT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+enum {
+	TCA_EM_IPT_UNSPEC,
+	TCA_EM_IPT_HOOK,
+	TCA_EM_IPT_MATCH_NAME,
+	TCA_EM_IPT_MATCH_REVISION,
+	TCA_EM_IPT_NFPROTO,
+	TCA_EM_IPT_MATCH_DATA,
+	__TCA_EM_IPT_MAX
+};
+
+#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64635fa..560374c978f9 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -241,6 +241,9 @@ enum {
 	TCP_NLA_MIN_RTT,        /* minimum RTT */
 	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
+	TCP_NLA_CA_STATE,	/* ca_state of socket */
+	TCP_NLA_SND_SSTHRESH,	/* Slow start size threshold */
 
 };
 
diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 14bacc7e6cef..4ac9f1f02b06 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -61,50 +61,6 @@ struct tipc_name_seq {
 	__u32 upper;
 };
 
-/* TIPC Address Size, Offset, Mask specification for Z.C.N
- */
-#define TIPC_NODE_BITS          12
-#define TIPC_CLUSTER_BITS       12
-#define TIPC_ZONE_BITS          8
-
-#define TIPC_NODE_OFFSET        0
-#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
-#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
-
-#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
-#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
-#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
-
-#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
-#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
-#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
-
-#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
-
-static inline __u32 tipc_addr(unsigned int zone,
-			      unsigned int cluster,
-			      unsigned int node)
-{
-	return (zone << TIPC_ZONE_OFFSET) |
-		(cluster << TIPC_CLUSTER_OFFSET) |
-		node;
-}
-
-static inline unsigned int tipc_zone(__u32 addr)
-{
-	return addr >> TIPC_ZONE_OFFSET;
-}
-
-static inline unsigned int tipc_cluster(__u32 addr)
-{
-	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
-}
-
-static inline unsigned int tipc_node(__u32 addr)
-{
-	return addr & TIPC_NODE_MASK;
-}
-
 /*
  * Application-accessible port name types
  */
@@ -117,9 +73,10 @@ static inline unsigned int tipc_node(__u32 addr)
 /*
  * Publication scopes when binding port names and port name sequences
  */
-#define TIPC_ZONE_SCOPE         1
-#define TIPC_CLUSTER_SCOPE      2
-#define TIPC_NODE_SCOPE         3
+enum tipc_scope {
+	TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */
+	TIPC_NODE_SCOPE    = 3
+};
 
 /*
  * Limiting values for messages
@@ -243,7 +200,7 @@ struct sockaddr_tipc {
 struct tipc_group_req {
 	__u32 type;      /* group id */
 	__u32 instance;  /* member id */
-	__u32 scope;     /* zone/cluster/node */
+	__u32 scope;     /* cluster/node */
 	__u32 flags;
 };
 
@@ -268,4 +225,53 @@ struct tipc_sioc_ln_req {
 	__u32 bearer_id;
 	char linkname[TIPC_MAX_LINK_NAME];
 };
+
+
+/* The macros and functions below are deprecated:
+ */
+
+#define TIPC_ZONE_SCOPE         1
+
+#define TIPC_NODE_BITS          12
+#define TIPC_CLUSTER_BITS       12
+#define TIPC_ZONE_BITS          8
+
+#define TIPC_NODE_OFFSET        0
+#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
+#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
+
+#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
+#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
+#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
+
+#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
+#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
+#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
+
+#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
+
+static inline __u32 tipc_addr(unsigned int zone,
+			      unsigned int cluster,
+			      unsigned int node)
+{
+	return (zone << TIPC_ZONE_OFFSET) |
+		(cluster << TIPC_CLUSTER_OFFSET) |
+		node;
+}
+
+static inline unsigned int tipc_zone(__u32 addr)
+{
+	return addr >> TIPC_ZONE_OFFSET;
+}
+
+static inline unsigned int tipc_cluster(__u32 addr)
+{
+	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
+}
+
+static inline unsigned int tipc_node(__u32 addr)
+{
+	return addr & TIPC_NODE_MASK;
+}
+
 #endif
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 469aa67a5ecb..0affb682e5e3 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -114,6 +114,13 @@ enum {
 	TIPC_NLA_SOCK_REF,		/* u32 */
 	TIPC_NLA_SOCK_CON,		/* nest */
 	TIPC_NLA_SOCK_HAS_PUBL,		/* flag */
+	TIPC_NLA_SOCK_STAT,		/* nest */
+	TIPC_NLA_SOCK_TYPE,		/* u32 */
+	TIPC_NLA_SOCK_INO,		/* u32 */
+	TIPC_NLA_SOCK_UID,		/* u32 */
+	TIPC_NLA_SOCK_TIPC_STATE,	/* u32 */
+	TIPC_NLA_SOCK_COOKIE,		/* u64 */
+	TIPC_NLA_SOCK_PAD,		/* flag */
 
 	__TIPC_NLA_SOCK_MAX,
 	TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
@@ -162,6 +169,8 @@ enum {
 	TIPC_NLA_NET_UNSPEC,
 	TIPC_NLA_NET_ID,		/* u32 */
 	TIPC_NLA_NET_ADDR,		/* u32 */
+	TIPC_NLA_NET_NODEID,		/* u64 */
+	TIPC_NLA_NET_NODEID_W1,		/* u64 */
 
 	__TIPC_NLA_NET_MAX,
 	TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1
@@ -238,6 +247,18 @@ enum {
 	TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1
 };
 
+/* Nest, socket statistics info */
+enum {
+	TIPC_NLA_SOCK_STAT_RCVQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_SENDQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_LINK_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_CONN_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_DROP,	/* u32 */
+
+	__TIPC_NLA_SOCK_STAT_MAX,
+	TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1
+};
+
 /* Nest, link propreties. Valid for link, media and bearer */
 enum {
 	TIPC_NLA_PROP_UNSPEC,
diff --git a/include/uapi/linux/tipc_sockets_diag.h b/include/uapi/linux/tipc_sockets_diag.h
new file mode 100644
index 000000000000..7678cf2f0dcc
--- /dev/null
+++ b/include/uapi/linux/tipc_sockets_diag.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* AF_TIPC sock_diag interface for querying open sockets */
+
+#ifndef _UAPI__TIPC_SOCKETS_DIAG_H__
+#define _UAPI__TIPC_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+#include <linux/sock_diag.h>
+
+/* Request */
+struct tipc_sock_diag_req {
+	__u8	sdiag_family;	/* must be AF_TIPC */
+	__u8	sdiag_protocol;	/* must be 0 */
+	__u16	pad;		/* must be 0 */
+	__u32	tidiag_states;	/* query*/
+};
+#endif /* _UAPI__TIPC_SOCKETS_DIAG_H__ */
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 293b2cdad88d..c6633e97eca4 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -38,6 +38,7 @@
 
 /* TLS socket options */
 #define TLS_TX			1	/* Set transmit parameters */
+#define TLS_RX			2	/* Set receive parameters */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
@@ -59,6 +60,7 @@
 #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE		8
 
 #define TLS_SET_RECORD_TYPE	1
+#define TLS_GET_RECORD_TYPE	2
 
 struct tls_crypto_info {
 	__u16 version;
diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
index 4b0b0b756f3e..0af83d80fb3e 100644
--- a/include/uapi/misc/ocxl.h
+++ b/include/uapi/misc/ocxl.h
@@ -32,6 +32,22 @@ struct ocxl_ioctl_attach {
 	__u64 reserved3;
 };
 
+struct ocxl_ioctl_metadata {
+	__u16 version; // struct version, always backwards compatible
+
+	// Version 0 fields
+	__u8  afu_version_major;
+	__u8  afu_version_minor;
+	__u32 pasid;		// PASID assigned to the current context
+
+	__u64 pp_mmio_size;	// Per PASID MMIO size
+	__u64 global_mmio_size;
+
+	// End version 0 fields
+
+	__u64 reserved[13]; // Total of 16*u64
+};
+
 struct ocxl_ioctl_irq_fd {
 	__u64 irq_offset;
 	__s32 eventfd;
@@ -45,5 +61,6 @@ struct ocxl_ioctl_irq_fd {
 #define OCXL_IOCTL_IRQ_ALLOC	_IOR(OCXL_MAGIC, 0x11, __u64)
 #define OCXL_IOCTL_IRQ_FREE	_IOW(OCXL_MAGIC, 0x12, __u64)
 #define OCXL_IOCTL_IRQ_SET_FD	_IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd)
+#define OCXL_IOCTL_GET_METADATA _IOR(OCXL_MAGIC, 0x14, struct ocxl_ioctl_metadata)
 
 #endif /* _UAPI_MISC_OCXL_H */
diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h
index 03557b5f9aa6..46de0885e800 100644
--- a/include/uapi/rdma/rdma_user_ioctl.h
+++ b/include/uapi/rdma/rdma_user_ioctl.h
@@ -65,7 +65,7 @@ struct ib_uverbs_attr {
 	__u16 len;		/* only for pointers */
 	__u16 flags;		/* combination of UVERBS_ATTR_F_XXXX */
 	__u16 reserved;
-	__u64 data;		/* ptr to command, inline data or idr/fd */
+	__aligned_u64 data;	/* ptr to command, inline data or idr/fd */
 };
 
 struct ib_uverbs_ioctl_hdr {
@@ -73,7 +73,7 @@ struct ib_uverbs_ioctl_hdr {
 	__u16 object_id;
 	__u16 method_id;
 	__u16 num_attrs;
-	__u64 reserved;
+	__aligned_u64 reserved;
 	struct ib_uverbs_attr  attrs[0];
 };
 
diff --git a/init/main.c b/init/main.c
index a8100b954839..969eaf140ef0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -89,6 +89,7 @@
 #include <linux/io.h>
 #include <linux/cache.h>
 #include <linux/rodata_test.h>
+#include <linux/jump_label.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused)
 	/* need to finish all async __init code before freeing the memory */
 	async_synchronize_full();
 	ftrace_free_init_mem();
+	jump_label_invalidate_init();
 	free_initmem();
 	mark_readonly();
 	system_state = SYSTEM_RUNNING;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b1f66480135b..14750e7c5ee4 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array)
 {
 	int i;
 
-	for (i = 0; i < array->map.max_entries; i++)
+	for (i = 0; i < array->map.max_entries; i++) {
 		free_percpu(array->pptrs[i]);
+		cond_resched();
+	}
 }
 
 static int bpf_array_alloc_percpu(struct bpf_array *array)
@@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
 			return -ENOMEM;
 		}
 		array->pptrs[i] = ptr;
+		cond_resched();
 	}
 
 	return 0;
@@ -73,11 +76,11 @@ static int array_map_alloc_check(union bpf_attr *attr)
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 {
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
-	int numa_node = bpf_map_attr_numa_node(attr);
+	int ret, numa_node = bpf_map_attr_numa_node(attr);
 	u32 elem_size, index_mask, max_entries;
 	bool unpriv = !capable(CAP_SYS_ADMIN);
+	u64 cost, array_size, mask64;
 	struct bpf_array *array;
-	u64 array_size, mask64;
 
 	elem_size = round_up(attr->value_size, 8);
 
@@ -109,8 +112,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 		array_size += (u64) max_entries * elem_size;
 
 	/* make sure there is no u32 overflow later in round_up() */
-	if (array_size >= U32_MAX - PAGE_SIZE)
+	cost = array_size;
+	if (cost >= U32_MAX - PAGE_SIZE)
 		return ERR_PTR(-ENOMEM);
+	if (percpu) {
+		cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
+		if (cost >= U32_MAX - PAGE_SIZE)
+			return ERR_PTR(-ENOMEM);
+	}
+	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	ret = bpf_map_precharge_memlock(cost);
+	if (ret < 0)
+		return ERR_PTR(ret);
 
 	/* allocate all map elements and zero-initialize them */
 	array = bpf_map_area_alloc(array_size, numa_node);
@@ -121,20 +135,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 
 	/* copy mandatory map attributes */
 	bpf_map_init_from_attr(&array->map, attr);
+	array->map.pages = cost;
 	array->elem_size = elem_size;
 
-	if (!percpu)
-		goto out;
-
-	array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
-
-	if (array_size >= U32_MAX - PAGE_SIZE ||
-	    bpf_array_alloc_percpu(array)) {
+	if (percpu && bpf_array_alloc_percpu(array)) {
 		bpf_map_area_free(array);
 		return ERR_PTR(-ENOMEM);
 	}
-out:
-	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
 
 	return &array->map;
 }
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 29ca9208dcfa..d315b393abdd 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 	 * so always copy 'cnt' prog_ids to the user.
 	 * In a rare race the user will see zero prog_ids
 	 */
-	ids = kcalloc(cnt, sizeof(u32), GFP_USER);
+	ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
 	if (!ids)
 		return -ENOMEM;
 	rcu_read_lock();
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index fbfdada6caee..a4bb0b34375a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data)
 static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
 						       int map_id)
 {
-	gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
+	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
 	struct bpf_cpu_map_entry *rcpu;
 	int numa, err;
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 81e2f6995adb..bf6da59ae0d0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -178,6 +178,9 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 static struct dentry *
 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 {
+	/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
+	 * extensions.
+	 */
 	if (strchr(dentry->d_name.name, '.'))
 		return ERR_PTR(-EPERM);
 
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 7b469d10d0e9..b4b5b81e7251 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map)
 	struct lpm_trie_node __rcu **slot;
 	struct lpm_trie_node *node;
 
-	raw_spin_lock(&trie->lock);
+	/* Wait for outstanding programs to complete
+	 * update/lookup/delete/get_next_key and free the trie.
+	 */
+	synchronize_rcu();
 
 	/* Always start at the root and walk down to a node that has no
 	 * children. Then free that node, nullify its reference in the parent
@@ -566,10 +569,9 @@ static void trie_free(struct bpf_map *map)
 		slot = &trie->root;
 
 		for (;;) {
-			node = rcu_dereference_protected(*slot,
-					lockdep_is_held(&trie->lock));
+			node = rcu_dereference_protected(*slot, 1);
 			if (!node)
-				goto unlock;
+				goto out;
 
 			if (rcu_access_pointer(node->child[0])) {
 				slot = &node->child[0];
@@ -587,8 +589,8 @@ static void trie_free(struct bpf_map *map)
 		}
 	}
 
-unlock:
-	raw_spin_unlock(&trie->lock);
+out:
+	kfree(trie);
 }
 
 static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 48c33417d13c..69c5bccabd22 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -38,6 +38,7 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
+#include <linux/mm.h>
 #include <net/strparser.h>
 #include <net/tcp.h>
 
@@ -47,6 +48,7 @@
 struct bpf_stab {
 	struct bpf_map map;
 	struct sock **sock_map;
+	struct bpf_prog *bpf_tx_msg;
 	struct bpf_prog *bpf_parse;
 	struct bpf_prog *bpf_verdict;
 };
@@ -62,8 +64,7 @@ struct smap_psock_map_entry {
 
 struct smap_psock {
 	struct rcu_head	rcu;
-	/* refcnt is used inside sk_callback_lock */
-	u32 refcnt;
+	refcount_t refcnt;
 
 	/* datapath variables */
 	struct sk_buff_head rxqueue;
@@ -74,7 +75,16 @@ struct smap_psock {
 	int save_off;
 	struct sk_buff *save_skb;
 
+	/* datapath variables for tx_msg ULP */
+	struct sock *sk_redir;
+	int apply_bytes;
+	int cork_bytes;
+	int sg_size;
+	int eval;
+	struct sk_msg_buff *cork;
+
 	struct strparser strp;
+	struct bpf_prog *bpf_tx_msg;
 	struct bpf_prog *bpf_parse;
 	struct bpf_prog *bpf_verdict;
 	struct list_head maps;
@@ -92,6 +102,11 @@ struct smap_psock {
 	void (*save_write_space)(struct sock *sk);
 };
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+			    int offset, size_t size, int flags);
+
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
 	return rcu_dereference_sk_user_data(sk);
@@ -116,27 +131,41 @@ static int bpf_tcp_init(struct sock *sk)
 
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
+
+	if (psock->bpf_tx_msg) {
+		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
+		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
+	}
+
 	sk->sk_prot = &tcp_bpf_proto;
 	rcu_read_unlock();
 	return 0;
 }
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
+
 static void bpf_tcp_release(struct sock *sk)
 {
 	struct smap_psock *psock;
 
 	rcu_read_lock();
 	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out;
 
-	if (likely(psock)) {
-		sk->sk_prot = psock->sk_proto;
-		psock->sk_proto = NULL;
+	if (psock->cork) {
+		free_start_sg(psock->sock, psock->cork);
+		kfree(psock->cork);
+		psock->cork = NULL;
 	}
+
+	sk->sk_prot = psock->sk_proto;
+	psock->sk_proto = NULL;
+out:
 	rcu_read_unlock();
 }
 
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
@@ -175,6 +204,7 @@ enum __sk_action {
 	__SK_DROP = 0,
 	__SK_PASS,
 	__SK_REDIRECT,
+	__SK_NONE,
 };
 
 static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
@@ -186,10 +216,621 @@ static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
 	.release	= bpf_tcp_release,
 };
 
+static int memcopy_from_iter(struct sock *sk,
+			     struct sk_msg_buff *md,
+			     struct iov_iter *from, int bytes)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_curr, rc = -ENOSPC;
+
+	do {
+		int copy;
+		char *to;
+
+		if (md->sg_copybreak >= sg[i].length) {
+			md->sg_copybreak = 0;
+
+			if (++i == MAX_SKB_FRAGS)
+				i = 0;
+
+			if (i == md->sg_end)
+				break;
+		}
+
+		copy = sg[i].length - md->sg_copybreak;
+		to = sg_virt(&sg[i]) + md->sg_copybreak;
+		md->sg_copybreak += copy;
+
+		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+			rc = copy_from_iter_nocache(to, copy, from);
+		else
+			rc = copy_from_iter(to, copy, from);
+
+		if (rc != copy) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		bytes -= copy;
+		if (!bytes)
+			break;
+
+		md->sg_copybreak = 0;
+		if (++i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != md->sg_end);
+out:
+	md->sg_curr = i;
+	return rc;
+}
+
+static int bpf_tcp_push(struct sock *sk, int apply_bytes,
+			struct sk_msg_buff *md,
+			int flags, bool uncharge)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sg;
+	int offset, ret = 0;
+	struct page *p;
+	size_t size;
+
+	while (1) {
+		sg = md->sg_data + md->sg_start;
+		size = (apply && apply_bytes < sg->length) ?
+			apply_bytes : sg->length;
+		offset = sg->offset;
+
+		tcp_rate_check_app_limited(sk);
+		p = sg_page(sg);
+retry:
+		ret = do_tcp_sendpages(sk, p, offset, size, flags);
+		if (ret != size) {
+			if (ret > 0) {
+				if (apply)
+					apply_bytes -= ret;
+				size -= ret;
+				offset += ret;
+				if (uncharge)
+					sk_mem_uncharge(sk, ret);
+				goto retry;
+			}
+
+			sg->length = size;
+			sg->offset = offset;
+			return ret;
+		}
+
+		if (apply)
+			apply_bytes -= ret;
+		sg->offset += ret;
+		sg->length -= ret;
+		if (uncharge)
+			sk_mem_uncharge(sk, ret);
+
+		if (!sg->length) {
+			put_page(p);
+			md->sg_start++;
+			if (md->sg_start == MAX_SKB_FRAGS)
+				md->sg_start = 0;
+			memset(sg, 0, sizeof(*sg));
+
+			if (md->sg_start == md->sg_end)
+				break;
+		}
+
+		if (apply && !apply_bytes)
+			break;
+	}
+	return 0;
+}
+
+static inline void bpf_compute_data_pointers_sg(struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data + md->sg_start;
+
+	if (md->sg_copy[md->sg_start]) {
+		md->data = md->data_end = 0;
+	} else {
+		md->data = sg_virt(sg);
+		md->data_end = md->data + sg->length;
+	}
+}
+
+static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_start;
+
+	do {
+		int uncharge = (bytes < sg[i].length) ? bytes : sg[i].length;
+
+		sk_mem_uncharge(sk, uncharge);
+		bytes -= uncharge;
+		if (!bytes)
+			break;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != md->sg_end);
+}
+
+static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_start, free;
+
+	while (bytes && sg[i].length) {
+		free = sg[i].length;
+		if (bytes < free) {
+			sg[i].length -= bytes;
+			sg[i].offset += bytes;
+			sk_mem_uncharge(sk, bytes);
+			break;
+		}
+
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+		bytes -= sg[i].length;
+		sg[i].length = 0;
+		sg[i].page_link = 0;
+		sg[i].offset = 0;
+		i++;
+
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+}
+
+static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = start, free = 0;
+
+	while (sg[i].length) {
+		free += sg[i].length;
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+		sg[i].length = 0;
+		sg[i].page_link = 0;
+		sg[i].offset = 0;
+		i++;
+
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+
+	return free;
+}
+
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+	int free = free_sg(sk, md->sg_start, md);
+
+	md->sg_start = md->sg_end;
+	return free;
+}
+
+static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+	return free_sg(sk, md->sg_curr, md);
+}
+
+static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
+{
+	return ((_rc == SK_PASS) ?
+	       (md->map ? __SK_REDIRECT : __SK_PASS) :
+	       __SK_DROP);
+}
+
+static unsigned int smap_do_tx_msg(struct sock *sk,
+				   struct smap_psock *psock,
+				   struct sk_msg_buff *md)
+{
+	struct bpf_prog *prog;
+	unsigned int rc, _rc;
+
+	preempt_disable();
+	rcu_read_lock();
+
+	/* If the policy was removed mid-send then default to 'accept' */
+	prog = READ_ONCE(psock->bpf_tx_msg);
+	if (unlikely(!prog)) {
+		_rc = SK_PASS;
+		goto verdict;
+	}
+
+	bpf_compute_data_pointers_sg(md);
+	rc = (*prog->bpf_func)(md, prog->insnsi);
+	psock->apply_bytes = md->apply_bytes;
+
+	/* Moving return codes from UAPI namespace into internal namespace */
+	_rc = bpf_map_msg_verdict(rc, md);
+
+	/* The psock has a refcount on the sock but not on the map and because
+	 * we need to drop rcu read lock here its possible the map could be
+	 * removed between here and when we need it to execute the sock
+	 * redirect. So do the map lookup now for future use.
+	 */
+	if (_rc == __SK_REDIRECT) {
+		if (psock->sk_redir)
+			sock_put(psock->sk_redir);
+		psock->sk_redir = do_msg_redirect_map(md);
+		if (!psock->sk_redir) {
+			_rc = __SK_DROP;
+			goto verdict;
+		}
+		sock_hold(psock->sk_redir);
+	}
+verdict:
+	rcu_read_unlock();
+	preempt_enable();
+
+	return _rc;
+}
+
+static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
+				       struct sk_msg_buff *md,
+				       int flags)
+{
+	struct smap_psock *psock;
+	struct scatterlist *sg;
+	int i, err, free = 0;
+
+	sg = md->sg_data;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out_rcu;
+
+	if (!refcount_inc_not_zero(&psock->refcnt))
+		goto out_rcu;
+
+	rcu_read_unlock();
+	lock_sock(sk);
+	err = bpf_tcp_push(sk, send, md, flags, false);
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	if (unlikely(err))
+		goto out;
+	return 0;
+out_rcu:
+	rcu_read_unlock();
+out:
+	i = md->sg_start;
+	while (sg[i].length) {
+		free += sg[i].length;
+		put_page(sg_page(&sg[i]));
+		sg[i].length = 0;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+	return free;
+}
+
+static inline void bpf_md_init(struct smap_psock *psock)
+{
+	if (!psock->apply_bytes) {
+		psock->eval =  __SK_NONE;
+		if (psock->sk_redir) {
+			sock_put(psock->sk_redir);
+			psock->sk_redir = NULL;
+		}
+	}
+}
+
+static void apply_bytes_dec(struct smap_psock *psock, int i)
+{
+	if (psock->apply_bytes) {
+		if (psock->apply_bytes < i)
+			psock->apply_bytes = 0;
+		else
+			psock->apply_bytes -= i;
+	}
+}
+
+static int bpf_exec_tx_verdict(struct smap_psock *psock,
+			       struct sk_msg_buff *m,
+			       struct sock *sk,
+			       int *copied, int flags)
+{
+	bool cork = false, enospc = (m->sg_start == m->sg_end);
+	struct sock *redir;
+	int err = 0;
+	int send;
+
+more_data:
+	if (psock->eval == __SK_NONE)
+		psock->eval = smap_do_tx_msg(sk, psock, m);
+
+	if (m->cork_bytes &&
+	    m->cork_bytes > psock->sg_size && !enospc) {
+		psock->cork_bytes = m->cork_bytes - psock->sg_size;
+		if (!psock->cork) {
+			psock->cork = kcalloc(1,
+					sizeof(struct sk_msg_buff),
+					GFP_ATOMIC | __GFP_NOWARN);
+
+			if (!psock->cork) {
+				err = -ENOMEM;
+				goto out_err;
+			}
+		}
+		memcpy(psock->cork, m, sizeof(*m));
+		goto out_err;
+	}
+
+	send = psock->sg_size;
+	if (psock->apply_bytes && psock->apply_bytes < send)
+		send = psock->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		err = bpf_tcp_push(sk, send, m, flags, true);
+		if (unlikely(err)) {
+			*copied -= free_start_sg(sk, m);
+			break;
+		}
+
+		apply_bytes_dec(psock, send);
+		psock->sg_size -= send;
+		break;
+	case __SK_REDIRECT:
+		redir = psock->sk_redir;
+		apply_bytes_dec(psock, send);
+
+		if (psock->cork) {
+			cork = true;
+			psock->cork = NULL;
+		}
+
+		return_mem_sg(sk, send, m);
+		release_sock(sk);
+
+		err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
+		lock_sock(sk);
+
+		if (cork) {
+			free_start_sg(sk, m);
+			kfree(m);
+			m = NULL;
+		}
+		if (unlikely(err))
+			*copied -= err;
+		else
+			psock->sg_size -= send;
+		break;
+	case __SK_DROP:
+	default:
+		free_bytes_sg(sk, send, m);
+		apply_bytes_dec(psock, send);
+		*copied -= send;
+		psock->sg_size -= send;
+		err = -EACCES;
+		break;
+	}
+
+	if (likely(!err)) {
+		bpf_md_init(psock);
+		if (m &&
+		    m->sg_data[m->sg_start].page_link &&
+		    m->sg_data[m->sg_start].length)
+			goto more_data;
+	}
+
+out_err:
+	return err;
+}
+
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
+	struct sk_msg_buff md = {0};
+	unsigned int sg_copy = 0;
+	struct smap_psock *psock;
+	int copied = 0, err = 0;
+	struct scatterlist *sg;
+	long timeo;
+
+	/* Its possible a sock event or user removed the psock _but_ the ops
+	 * have not been reprogrammed yet so we get here. In this case fallback
+	 * to tcp_sendmsg. Note this only works because we _only_ ever allow
+	 * a single ULP there is no hierarchy here.
+	 */
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		return tcp_sendmsg(sk, msg, size);
+	}
+
+	/* Increment the psock refcnt to ensure its not released while sending a
+	 * message. Required because sk lookup and bpf programs are used in
+	 * separate rcu critical sections. Its OK if we lose the map entry
+	 * but we can't lose the sock reference.
+	 */
+	if (!refcount_inc_not_zero(&psock->refcnt)) {
+		rcu_read_unlock();
+		return tcp_sendmsg(sk, msg, size);
+	}
+
+	sg = md.sg_data;
+	sg_init_table(sg, MAX_SKB_FRAGS);
+	rcu_read_unlock();
+
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	while (msg_data_left(msg)) {
+		struct sk_msg_buff *m;
+		bool enospc = false;
+		int copy;
+
+		if (sk->sk_err) {
+			err = sk->sk_err;
+			goto out_err;
+		}
+
+		copy = msg_data_left(msg);
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+
+		m = psock->cork_bytes ? psock->cork : &md;
+		m->sg_curr = m->sg_copybreak ? m->sg_curr : m->sg_end;
+		err = sk_alloc_sg(sk, copy, m->sg_data,
+				  m->sg_start, &m->sg_end, &sg_copy,
+				  m->sg_end - 1);
+		if (err) {
+			if (err != -ENOSPC)
+				goto wait_for_memory;
+			enospc = true;
+			copy = sg_copy;
+		}
+
+		err = memcopy_from_iter(sk, m, &msg->msg_iter, copy);
+		if (err < 0) {
+			free_curr_sg(sk, m);
+			goto out_err;
+		}
+
+		psock->sg_size += copy;
+		copied += copy;
+		sg_copy = 0;
+
+		/* When bytes are being corked skip running BPF program and
+		 * applying verdict unless there is no more buffer space. In
+		 * the ENOSPC case simply run BPF prorgram with currently
+		 * accumulated data. We don't have much choice at this point
+		 * we could try extending the page frags or chaining complex
+		 * frags but even in these cases _eventually_ we will hit an
+		 * OOM scenario. More complex recovery schemes may be
+		 * implemented in the future, but BPF programs must handle
+		 * the case where apply_cork requests are not honored. The
+		 * canonical method to verify this is to check data length.
+		 */
+		if (psock->cork_bytes) {
+			if (copy > psock->cork_bytes)
+				psock->cork_bytes = 0;
+			else
+				psock->cork_bytes -= copy;
+
+			if (psock->cork_bytes && !enospc)
+				goto out_cork;
+
+			/* All cork bytes accounted for re-run filter */
+			psock->eval = __SK_NONE;
+			psock->cork_bytes = 0;
+		}
+
+		err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+		if (unlikely(err < 0))
+			goto out_err;
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto out_err;
+	}
+out_err:
+	if (err < 0)
+		err = sk_stream_error(sk, msg->msg_flags, err);
+out_cork:
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied ? copied : err;
+}
+
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+			    int offset, size_t size, int flags)
+{
+	struct sk_msg_buff md = {0}, *m = NULL;
+	int err = 0, copied = 0;
+	struct smap_psock *psock;
+	struct scatterlist *sg;
+	bool enospc = false;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto accept;
+
+	if (!refcount_inc_not_zero(&psock->refcnt))
+		goto accept;
+	rcu_read_unlock();
+
+	lock_sock(sk);
+
+	if (psock->cork_bytes)
+		m = psock->cork;
+	else
+		m = &md;
+
+	/* Catch case where ring is full and sendpage is stalled. */
+	if (unlikely(m->sg_end == m->sg_start &&
+	    m->sg_data[m->sg_end].length))
+		goto out_err;
+
+	psock->sg_size += size;
+	sg = &m->sg_data[m->sg_end];
+	sg_set_page(sg, page, size, offset);
+	get_page(page);
+	m->sg_copy[m->sg_end] = true;
+	sk_mem_charge(sk, size);
+	m->sg_end++;
+	copied = size;
+
+	if (m->sg_end == MAX_SKB_FRAGS)
+		m->sg_end = 0;
+
+	if (m->sg_end == m->sg_start)
+		enospc = true;
+
+	if (psock->cork_bytes) {
+		if (size > psock->cork_bytes)
+			psock->cork_bytes = 0;
+		else
+			psock->cork_bytes -= size;
+
+		if (psock->cork_bytes && !enospc)
+			goto out_err;
+
+		/* All cork bytes accounted for re-run filter */
+		psock->eval = __SK_NONE;
+		psock->cork_bytes = 0;
+	}
+
+	err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+out_err:
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied ? copied : err;
+accept:
+	rcu_read_unlock();
+	return tcp_sendpage(sk, page, offset, size, flags);
+}
+
+static void bpf_tcp_msg_add(struct smap_psock *psock,
+			    struct sock *sk,
+			    struct bpf_prog *tx_msg)
+{
+	struct bpf_prog *orig_tx_msg;
+
+	orig_tx_msg = xchg(&psock->bpf_tx_msg, tx_msg);
+	if (orig_tx_msg)
+		bpf_prog_put(orig_tx_msg);
+}
+
 static int bpf_tcp_ulp_register(void)
 {
 	tcp_bpf_proto = tcp_prot;
 	tcp_bpf_proto.close = bpf_tcp_close;
+	/* Once BPF TX ULP is registered it is never unregistered. It
+	 * will be in the ULP list for the lifetime of the system. Doing
+	 * duplicate registers is not a problem.
+	 */
 	return tcp_register_ulp(&bpf_tcp_ulp_ops);
 }
 
@@ -373,15 +1014,13 @@ static void smap_destroy_psock(struct rcu_head *rcu)
 
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
 {
-	psock->refcnt--;
-	if (psock->refcnt)
-		return;
-
-	tcp_cleanup_ulp(sock);
-	smap_stop_sock(psock, sock);
-	clear_bit(SMAP_TX_RUNNING, &psock->state);
-	rcu_assign_sk_user_data(sock, NULL);
-	call_rcu_sched(&psock->rcu, smap_destroy_psock);
+	if (refcount_dec_and_test(&psock->refcnt)) {
+		tcp_cleanup_ulp(sock);
+		smap_stop_sock(psock, sock);
+		clear_bit(SMAP_TX_RUNNING, &psock->state);
+		rcu_assign_sk_user_data(sock, NULL);
+		call_rcu_sched(&psock->rcu, smap_destroy_psock);
+	}
 }
 
 static int smap_parse_func_strparser(struct strparser *strp,
@@ -415,7 +1054,6 @@ static int smap_parse_func_strparser(struct strparser *strp,
 	return rc;
 }
 
-
 static int smap_read_sock_done(struct strparser *strp, int err)
 {
 	return err;
@@ -485,12 +1123,22 @@ static void smap_gc_work(struct work_struct *w)
 		bpf_prog_put(psock->bpf_parse);
 	if (psock->bpf_verdict)
 		bpf_prog_put(psock->bpf_verdict);
+	if (psock->bpf_tx_msg)
+		bpf_prog_put(psock->bpf_tx_msg);
+
+	if (psock->cork) {
+		free_start_sg(psock->sock, psock->cork);
+		kfree(psock->cork);
+	}
 
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		list_del(&e->list);
 		kfree(e);
 	}
 
+	if (psock->sk_redir)
+		sock_put(psock->sk_redir);
+
 	sock_put(psock->sock);
 	kfree(psock);
 }
@@ -506,12 +1154,13 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 	if (!psock)
 		return ERR_PTR(-ENOMEM);
 
+	psock->eval =  __SK_NONE;
 	psock->sock = sock;
 	skb_queue_head_init(&psock->rxqueue);
 	INIT_WORK(&psock->tx_work, smap_tx_work);
 	INIT_WORK(&psock->gc_work, smap_gc_work);
 	INIT_LIST_HEAD(&psock->maps);
-	psock->refcnt = 1;
+	refcount_set(&psock->refcnt, 1);
 
 	rcu_assign_sk_user_data(sock, psock);
 	sock_hold(sock);
@@ -521,8 +1170,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_stab *stab;
-	int err = -EINVAL;
 	u64 cost;
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return ERR_PTR(-EPERM);
@@ -547,6 +1196,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 
 	/* make sure page count doesn't overflow */
 	cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+	err = -EINVAL;
 	if (cost >= U32_MAX - PAGE_SIZE)
 		goto free_stab;
 
@@ -713,10 +1363,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 {
 	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
 	struct smap_psock_map_entry *e = NULL;
-	struct bpf_prog *verdict, *parse;
+	struct bpf_prog *verdict, *parse, *tx_msg;
 	struct sock *osock, *sock;
 	struct smap_psock *psock;
 	u32 i = *(u32 *)key;
+	bool new = false;
 	int err;
 
 	if (unlikely(flags > BPF_EXIST))
@@ -739,6 +1390,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 	 */
 	verdict = READ_ONCE(stab->bpf_verdict);
 	parse = READ_ONCE(stab->bpf_parse);
+	tx_msg = READ_ONCE(stab->bpf_tx_msg);
 
 	if (parse && verdict) {
 		/* bpf prog refcnt may be zero if a concurrent attach operation
@@ -757,6 +1409,17 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		}
 	}
 
+	if (tx_msg) {
+		tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
+		if (IS_ERR(tx_msg)) {
+			if (verdict)
+				bpf_prog_put(verdict);
+			if (parse)
+				bpf_prog_put(parse);
+			return PTR_ERR(tx_msg);
+		}
+	}
+
 	write_lock_bh(&sock->sk_callback_lock);
 	psock = smap_psock_sk(sock);
 
@@ -771,7 +1434,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			err = -EBUSY;
 			goto out_progs;
 		}
-		psock->refcnt++;
+		if (READ_ONCE(psock->bpf_tx_msg) && tx_msg) {
+			err = -EBUSY;
+			goto out_progs;
+		}
+		if (!refcount_inc_not_zero(&psock->refcnt)) {
+			err = -EAGAIN;
+			goto out_progs;
+		}
 	} else {
 		psock = smap_init_psock(sock, stab);
 		if (IS_ERR(psock)) {
@@ -779,11 +1449,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			goto out_progs;
 		}
 
-		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
-		if (err)
-			goto out_progs;
-
 		set_bit(SMAP_TX_RUNNING, &psock->state);
+		new = true;
 	}
 
 	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
@@ -796,6 +1463,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 	/* 3. At this point we have a reference to a valid psock that is
 	 * running. Attach any BPF programs needed.
 	 */
+	if (tx_msg)
+		bpf_tcp_msg_add(psock, sock, tx_msg);
+	if (new) {
+		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
+		if (err)
+			goto out_free;
+	}
+
 	if (parse && verdict && !psock->strp_enabled) {
 		err = smap_init_sock(psock, sock);
 		if (err)
@@ -817,8 +1492,6 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		struct smap_psock *opsock = smap_psock_sk(osock);
 
 		write_lock_bh(&osock->sk_callback_lock);
-		if (osock != sock && parse)
-			smap_stop_sock(opsock, osock);
 		smap_list_remove(opsock, &stab->sock_map[i]);
 		smap_release_sock(opsock, osock);
 		write_unlock_bh(&osock->sk_callback_lock);
@@ -831,6 +1504,8 @@ out_progs:
 		bpf_prog_put(verdict);
 	if (parse)
 		bpf_prog_put(parse);
+	if (tx_msg)
+		bpf_prog_put(tx_msg);
 	write_unlock_bh(&sock->sk_callback_lock);
 	kfree(e);
 	return err;
@@ -845,6 +1520,9 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 		return -EINVAL;
 
 	switch (type) {
+	case BPF_SK_MSG_VERDICT:
+		orig = xchg(&stab->bpf_tx_msg, prog);
+		break;
 	case BPF_SK_SKB_STREAM_PARSER:
 		orig = xchg(&stab->bpf_parse, prog);
 		break;
@@ -906,6 +1584,10 @@ static void sock_map_release(struct bpf_map *map, struct file *map_file)
 	orig = xchg(&stab->bpf_verdict, NULL);
 	if (orig)
 		bpf_prog_put(orig);
+
+	orig = xchg(&stab->bpf_tx_msg, NULL);
+	if (orig)
+		bpf_prog_put(orig);
 }
 
 const struct bpf_map_ops sock_map_ops = {
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b0ecf43f5894..57eeb1234b67 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -9,16 +9,19 @@
 #include <linux/filter.h>
 #include <linux/stacktrace.h>
 #include <linux/perf_event.h>
+#include <linux/elf.h>
+#include <linux/pagemap.h>
 #include "percpu_freelist.h"
 
-#define STACK_CREATE_FLAG_MASK \
-	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define STACK_CREATE_FLAG_MASK					\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
+	 BPF_F_STACK_BUILD_ID)
 
 struct stack_map_bucket {
 	struct pcpu_freelist_node fnode;
 	u32 hash;
 	u32 nr;
-	u64 ip[];
+	u64 data[];
 };
 
 struct bpf_stack_map {
@@ -29,6 +32,17 @@ struct bpf_stack_map {
 	struct stack_map_bucket *buckets[];
 };
 
+static inline bool stack_map_use_build_id(struct bpf_map *map)
+{
+	return (map->map_flags & BPF_F_STACK_BUILD_ID);
+}
+
+static inline int stack_map_data_size(struct bpf_map *map)
+{
+	return stack_map_use_build_id(map) ?
+		sizeof(struct bpf_stack_build_id) : sizeof(u64);
+}
+
 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
 {
 	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
@@ -68,8 +82,16 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    value_size < 8 || value_size % 8 ||
-	    value_size / 8 > sysctl_perf_event_max_stack)
+	    value_size < 8 || value_size % 8)
+		return ERR_PTR(-EINVAL);
+
+	BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
+	if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
+		if (value_size % sizeof(struct bpf_stack_build_id) ||
+		    value_size / sizeof(struct bpf_stack_build_id)
+		    > sysctl_perf_event_max_stack)
+			return ERR_PTR(-EINVAL);
+	} else if (value_size / 8 > sysctl_perf_event_max_stack)
 		return ERR_PTR(-EINVAL);
 
 	/* hash table size must be power of 2 */
@@ -114,13 +136,184 @@ free_smap:
 	return ERR_PTR(err);
 }
 
+#define BPF_BUILD_ID 3
+/*
+ * Parse build id from the note segment. This logic can be shared between
+ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+ * identical.
+ */
+static inline int stack_map_parse_build_id(void *page_addr,
+					   unsigned char *build_id,
+					   void *note_start,
+					   Elf32_Word note_size)
+{
+	Elf32_Word note_offs = 0, new_offs;
+
+	/* check for overflow */
+	if (note_start < page_addr || note_start + note_size < note_start)
+		return -EINVAL;
+
+	/* only supports note that fits in the first page */
+	if (note_start + note_size > page_addr + PAGE_SIZE)
+		return -EINVAL;
+
+	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+		if (nhdr->n_type == BPF_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU") &&
+		    nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
+			memcpy(build_id,
+			       note_start + note_offs +
+			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+			       BPF_BUILD_ID_SIZE);
+			return 0;
+		}
+		new_offs = note_offs + sizeof(Elf32_Nhdr) +
+			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+		if (new_offs <= note_offs)  /* overflow */
+			break;
+		note_offs = new_offs;
+	}
+	return -EINVAL;
+}
+
+/* Parse build ID from 32-bit ELF */
+static int stack_map_get_build_id_32(void *page_addr,
+				     unsigned char *build_id)
+{
+	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+	Elf32_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i)
+		if (phdr[i].p_type == PT_NOTE)
+			return stack_map_parse_build_id(page_addr, build_id,
+					page_addr + phdr[i].p_offset,
+					phdr[i].p_filesz);
+	return -EINVAL;
+}
+
+/* Parse build ID from 64-bit ELF */
+static int stack_map_get_build_id_64(void *page_addr,
+				     unsigned char *build_id)
+{
+	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+	Elf64_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i)
+		if (phdr[i].p_type == PT_NOTE)
+			return stack_map_parse_build_id(page_addr, build_id,
+					page_addr + phdr[i].p_offset,
+					phdr[i].p_filesz);
+	return -EINVAL;
+}
+
+/* Parse build ID of ELF file mapped to vma */
+static int stack_map_get_build_id(struct vm_area_struct *vma,
+				  unsigned char *build_id)
+{
+	Elf32_Ehdr *ehdr;
+	struct page *page;
+	void *page_addr;
+	int ret;
+
+	/* only works for page backed storage  */
+	if (!vma->vm_file)
+		return -EINVAL;
+
+	page = find_get_page(vma->vm_file->f_mapping, 0);
+	if (!page)
+		return -EFAULT;	/* page not mapped */
+
+	ret = -EINVAL;
+	page_addr = page_address(page);
+	ehdr = (Elf32_Ehdr *)page_addr;
+
+	/* compare magic x7f "ELF" */
+	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+		goto out;
+
+	/* only support executable file and shared object file */
+	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+		goto out;
+
+	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+		ret = stack_map_get_build_id_32(page_addr, build_id);
+	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+		ret = stack_map_get_build_id_64(page_addr, build_id);
+out:
+	put_page(page);
+	return ret;
+}
+
+static void stack_map_get_build_id_offset(struct bpf_map *map,
+					  struct stack_map_bucket *bucket,
+					  u64 *ips, u32 trace_nr, bool user)
+{
+	int i;
+	struct vm_area_struct *vma;
+	struct bpf_stack_build_id *id_offs;
+
+	bucket->nr = trace_nr;
+	id_offs = (struct bpf_stack_build_id *)bucket->data;
+
+	/*
+	 * We cannot do up_read() in nmi context, so build_id lookup is
+	 * only supported for non-nmi events. If at some point, it is
+	 * possible to run find_vma() without taking the semaphore, we
+	 * would like to allow build_id lookup in nmi context.
+	 *
+	 * Same fallback is used for kernel stack (!user) on a stackmap
+	 * with build_id.
+	 */
+	if (!user || !current || !current->mm || in_nmi() ||
+	    down_read_trylock(&current->mm->mmap_sem) == 0) {
+		/* cannot access current->mm, fall back to ips */
+		for (i = 0; i < trace_nr; i++) {
+			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+			id_offs[i].ip = ips[i];
+		}
+		return;
+	}
+
+	for (i = 0; i < trace_nr; i++) {
+		vma = find_vma(current->mm, ips[i]);
+		if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+			/* per entry fall back to ips */
+			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+			id_offs[i].ip = ips[i];
+			continue;
+		}
+		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
+			- vma->vm_start;
+		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+	}
+	up_read(&current->mm->mmap_sem);
+}
+
 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	   u64, flags)
 {
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
 	struct perf_callchain_entry *trace;
 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
-	u32 max_depth = map->value_size / 8;
+	u32 max_depth = map->value_size / stack_map_data_size(map);
 	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
 	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
@@ -128,6 +321,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	bool user = flags & BPF_F_USER_STACK;
 	bool kernel = !user;
 	u64 *ips;
+	bool hash_matches;
 
 	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
 			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
@@ -156,24 +350,43 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	id = hash & (smap->n_buckets - 1);
 	bucket = READ_ONCE(smap->buckets[id]);
 
-	if (bucket && bucket->hash == hash) {
-		if (flags & BPF_F_FAST_STACK_CMP)
+	hash_matches = bucket && bucket->hash == hash;
+	/* fast cmp */
+	if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
+		return id;
+
+	if (stack_map_use_build_id(map)) {
+		/* for build_id+offset, pop a bucket before slow cmp */
+		new_bucket = (struct stack_map_bucket *)
+			pcpu_freelist_pop(&smap->freelist);
+		if (unlikely(!new_bucket))
+			return -ENOMEM;
+		stack_map_get_build_id_offset(map, new_bucket, ips,
+					      trace_nr, user);
+		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
+		if (hash_matches && bucket->nr == trace_nr &&
+		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
+			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
 			return id;
-		if (bucket->nr == trace_nr &&
-		    memcmp(bucket->ip, ips, trace_len) == 0)
+		}
+		if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
+			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
+			return -EEXIST;
+		}
+	} else {
+		if (hash_matches && bucket->nr == trace_nr &&
+		    memcmp(bucket->data, ips, trace_len) == 0)
 			return id;
+		if (bucket && !(flags & BPF_F_REUSE_STACKID))
+			return -EEXIST;
+
+		new_bucket = (struct stack_map_bucket *)
+			pcpu_freelist_pop(&smap->freelist);
+		if (unlikely(!new_bucket))
+			return -ENOMEM;
+		memcpy(new_bucket->data, ips, trace_len);
 	}
 
-	/* this call stack is not in the map, try to add it */
-	if (bucket && !(flags & BPF_F_REUSE_STACKID))
-		return -EEXIST;
-
-	new_bucket = (struct stack_map_bucket *)
-		pcpu_freelist_pop(&smap->freelist);
-	if (unlikely(!new_bucket))
-		return -ENOMEM;
-
-	memcpy(new_bucket->ip, ips, trace_len);
 	new_bucket->hash = hash;
 	new_bucket->nr = trace_nr;
 
@@ -212,8 +425,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 	if (!bucket)
 		return -ENOENT;
 
-	trace_len = bucket->nr * sizeof(u64);
-	memcpy(value, bucket->ip, trace_len);
+	trace_len = bucket->nr * stack_map_data_size(map);
+	memcpy(value, bucket->data, trace_len);
 	memset(value + trace_len, 0, map->value_size - trace_len);
 
 	old_bucket = xchg(&smap->buckets[id], bucket);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e24aa3241387..dd172ee16716 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1315,7 +1315,8 @@ static int bpf_obj_get(const union bpf_attr *attr)
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
+static int sockmap_get_from_fd(const union bpf_attr *attr,
+			       int type, bool attach)
 {
 	struct bpf_prog *prog = NULL;
 	int ufd = attr->target_fd;
@@ -1329,8 +1330,7 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
 		return PTR_ERR(map);
 
 	if (attach) {
-		prog = bpf_prog_get_type(attr->attach_bpf_fd,
-					 BPF_PROG_TYPE_SK_SKB);
+		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
 		if (IS_ERR(prog)) {
 			fdput(f);
 			return PTR_ERR(prog);
@@ -1382,9 +1382,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_DEVICE:
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
+	case BPF_SK_MSG_VERDICT:
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, true);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
 	default:
 		return -EINVAL;
 	}
@@ -1437,9 +1439,11 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_DEVICE:
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
+	case BPF_SK_MSG_VERDICT:
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
 	default:
 		return -EINVAL;
 	}
@@ -1845,7 +1849,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	union bpf_attr attr = {};
 	int err;
 
-	if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
+	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	err = check_uarg_tail_zero(uattr, sizeof(attr), size);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5fb69a85d967..e9f7c20691c1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -508,10 +508,6 @@ err:
 static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
-	BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
 
 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 
@@ -1252,6 +1248,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_XDP:
 	case BPF_PROG_TYPE_LWT_XMIT:
 	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_SK_MSG:
 		if (meta)
 			return meta->pkt_access;
 
@@ -1356,6 +1353,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 	return reg->type == PTR_TO_CTX;
 }
 
+static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
+{
+	const struct bpf_reg_state *reg = cur_regs(env) + regno;
+
+	return type_is_pkt_pointer(reg->type);
+}
+
 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
 				   const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
@@ -1416,10 +1420,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
 }
 
 static int check_ptr_alignment(struct bpf_verifier_env *env,
-			       const struct bpf_reg_state *reg,
-			       int off, int size)
+			       const struct bpf_reg_state *reg, int off,
+			       int size, bool strict_alignment_once)
 {
-	bool strict = env->strict_alignment;
+	bool strict = env->strict_alignment || strict_alignment_once;
 	const char *pointer_desc = "";
 
 	switch (reg->type) {
@@ -1576,9 +1580,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
  * if t==write && value_regno==-1, some unknown value is stored into memory
  * if t==read && value_regno==-1, don't care what we read from memory
  */
-static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
-			    int bpf_size, enum bpf_access_type t,
-			    int value_regno)
+static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
+			    int off, int bpf_size, enum bpf_access_type t,
+			    int value_regno, bool strict_alignment_once)
 {
 	struct bpf_reg_state *regs = cur_regs(env);
 	struct bpf_reg_state *reg = regs + regno;
@@ -1590,7 +1594,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		return size;
 
 	/* alignment checks will add in reg->off themselves */
-	err = check_ptr_alignment(env, reg, off, size);
+	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
 	if (err)
 		return err;
 
@@ -1735,21 +1739,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 		return -EACCES;
 	}
 
-	if (is_ctx_reg(env, insn->dst_reg)) {
-		verbose(env, "BPF_XADD stores into R%d context is not allowed\n",
-			insn->dst_reg);
+	if (is_ctx_reg(env, insn->dst_reg) ||
+	    is_pkt_reg(env, insn->dst_reg)) {
+		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+			insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
+			"context" : "packet");
 		return -EACCES;
 	}
 
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-			       BPF_SIZE(insn->code), BPF_READ, -1);
+			       BPF_SIZE(insn->code), BPF_READ, -1, true);
 	if (err)
 		return err;
 
 	/* check whether atomic_add can write into the same memory */
 	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-				BPF_SIZE(insn->code), BPF_WRITE, -1);
+				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 }
 
 /* when register 'regno' is passed into function that will read 'access_size'
@@ -2066,7 +2072,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_MAP_TYPE_SOCKMAP:
 		if (func_id != BPF_FUNC_sk_redirect_map &&
 		    func_id != BPF_FUNC_sock_map_update &&
-		    func_id != BPF_FUNC_map_delete_elem)
+		    func_id != BPF_FUNC_map_delete_elem &&
+		    func_id != BPF_FUNC_msg_redirect_map)
 			goto error;
 		break;
 	default:
@@ -2104,6 +2111,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_FUNC_sk_redirect_map:
+	case BPF_FUNC_msg_redirect_map:
 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
 			goto error;
 		break;
@@ -2388,7 +2396,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	 * is inferred from register state.
 	 */
 	for (i = 0; i < meta.access_size; i++) {
-		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
+		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
+				       BPF_WRITE, -1, false);
 		if (err)
 			return err;
 	}
@@ -4632,7 +4641,7 @@ static int do_check(struct bpf_verifier_env *env)
 			 */
 			err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_READ,
-					       insn->dst_reg);
+					       insn->dst_reg, false);
 			if (err)
 				return err;
 
@@ -4684,7 +4693,7 @@ static int do_check(struct bpf_verifier_env *env)
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
-					       insn->src_reg);
+					       insn->src_reg, false);
 			if (err)
 				return err;
 
@@ -4719,7 +4728,7 @@ static int do_check(struct bpf_verifier_env *env)
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
-					       -1);
+					       -1, false);
 			if (err)
 				return err;
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8cda3bc3ae22..4bfb2908ec15 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3183,6 +3183,16 @@ static int cgroup_enable_threaded(struct cgroup *cgrp)
 	if (cgroup_is_threaded(cgrp))
 		return 0;
 
+	/*
+	 * If @cgroup is populated or has domain controllers enabled, it
+	 * can't be switched.  While the below cgroup_can_be_thread_root()
+	 * test can catch the same conditions, that's only when @parent is
+	 * not mixable, so let's check it explicitly.
+	 */
+	if (cgroup_is_populated(cgrp) ||
+	    cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
+		return -EOPNOTSUPP;
+
 	/* we're joining the parent's domain, ensure its validity */
 	if (!cgroup_is_valid_domain(dom_cgrp) ||
 	    !cgroup_can_be_thread_root(dom_cgrp))
diff --git a/kernel/compat.c b/kernel/compat.c
index 3247fe761f60..3f5fa8902e7d 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -488,25 +488,6 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat)
 }
 EXPORT_SYMBOL_GPL(get_compat_sigset);
 
-int
-put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
-		  unsigned int size)
-{
-	/* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */
-#ifdef __BIG_ENDIAN
-	compat_sigset_t v;
-	switch (_NSIG_WORDS) {
-	case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
-	case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
-	case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
-	case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
-	}
-	return copy_to_user(compat, &v, size) ? -EFAULT : 0;
-#else
-	return copy_to_user(compat, set, size) ? -EFAULT : 0;
-#endif
-}
-
 #ifdef CONFIG_NUMA
 COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
 		       compat_uptr_t __user *, pages32,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 96db9ae5d5af..4b838470fac4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2246,7 +2246,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
 			struct perf_event_context *task_ctx,
 			enum event_type_t event_type)
 {
-	enum event_type_t ctx_event_type = event_type & EVENT_ALL;
+	enum event_type_t ctx_event_type;
 	bool cpu_event = !!(event_type & EVENT_CPU);
 
 	/*
@@ -2256,6 +2256,8 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
 	if (event_type & EVENT_PINNED)
 		event_type |= EVENT_FLEXIBLE;
 
+	ctx_event_type = event_type & EVENT_ALL;
+
 	perf_pmu_disable(cpuctx->ctx.pmu);
 	if (task_ctx)
 		task_ctx_sched_out(cpuctx, task_ctx, event_type);
diff --git a/kernel/extable.c b/kernel/extable.c
index a17fdb63dc3e..6a5b61ebc66c 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
 	return e;
 }
 
-static inline int init_kernel_text(unsigned long addr)
+int init_kernel_text(unsigned long addr)
 {
 	if (addr >= (unsigned long)_sinittext &&
 	    addr < (unsigned long)_einittext)
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
index 21b0122cb39c..1d5632d8bbcc 100644
--- a/kernel/fail_function.c
+++ b/kernel/fail_function.c
@@ -14,6 +14,15 @@
 
 static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
 
+static void fei_post_handler(struct kprobe *kp, struct pt_regs *regs,
+			     unsigned long flags)
+{
+	/*
+	 * A dummy post handler is required to prohibit optimizing, because
+	 * jump optimization does not support execution path overriding.
+	 */
+}
+
 struct fei_attr {
 	struct list_head list;
 	struct kprobe kp;
@@ -56,6 +65,7 @@ static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr)
 			return NULL;
 		}
 		attr->kp.pre_handler = fei_kprobe_handler;
+		attr->kp.post_handler = fei_post_handler;
 		attr->retval = adjust_error_retval(addr, 0);
 		INIT_LIST_HEAD(&attr->list);
 	}
diff --git a/kernel/fork.c b/kernel/fork.c
index be8aa5b98666..e5d9d405ae4e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -592,7 +592,7 @@ static void check_mm(struct mm_struct *mm)
  * is dropped: either by a lazy thread or by
  * mmput. Free the page directory and the mm.
  */
-static void __mmdrop(struct mm_struct *mm)
+void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
@@ -603,18 +603,7 @@ static void __mmdrop(struct mm_struct *mm)
 	put_user_ns(mm->user_ns);
 	free_mm(mm);
 }
-
-void mmdrop(struct mm_struct *mm)
-{
-	/*
-	 * The implicit full barrier implied by atomic_dec_and_test() is
-	 * required by the membarrier system call before returning to
-	 * user-space, after storing to rq->curr.
-	 */
-	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
-		__mmdrop(mm);
-}
-EXPORT_SYMBOL_GPL(mmdrop);
+EXPORT_SYMBOL_GPL(__mmdrop);
 
 static void mmdrop_async_fn(struct work_struct *work)
 {
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e6a9c36470ee..82b8b18ee1eb 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1726,25 +1726,14 @@ static int irq_domain_debug_show(struct seq_file *m, void *p)
 	irq_domain_debug_show_one(m, d, 0);
 	return 0;
 }
-
-static int irq_domain_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, irq_domain_debug_show, inode->i_private);
-}
-
-static const struct file_operations dfs_domain_ops = {
-	.open		= irq_domain_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
 
 static void debugfs_add_domain_dir(struct irq_domain *d)
 {
 	if (!d->name || !domain_dir || d->debugfs_file)
 		return;
 	d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
-					      &dfs_domain_ops);
+					      &irq_domain_debug_fops);
 }
 
 static void debugfs_remove_domain_dir(struct irq_domain *d)
@@ -1760,7 +1749,8 @@ void __init irq_domain_debugfs_init(struct dentry *root)
 	if (!domain_dir)
 		return;
 
-	debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+	debugfs_create_file("default", 0444, domain_dir, NULL,
+			    &irq_domain_debug_fops);
 	mutex_lock(&irq_domain_mutex);
 	list_for_each_entry(d, &irq_domain_list, link)
 		debugfs_add_domain_dir(d);
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 5187dfe809ac..4c5770407031 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -16,6 +16,7 @@ struct cpumap {
 	unsigned int		available;
 	unsigned int		allocated;
 	unsigned int		managed;
+	bool			initialized;
 	bool			online;
 	unsigned long		alloc_map[IRQ_MATRIX_SIZE];
 	unsigned long		managed_map[IRQ_MATRIX_SIZE];
@@ -81,9 +82,11 @@ void irq_matrix_online(struct irq_matrix *m)
 
 	BUG_ON(cm->online);
 
-	bitmap_zero(cm->alloc_map, m->matrix_bits);
-	cm->available = m->alloc_size - (cm->managed + m->systembits_inalloc);
-	cm->allocated = 0;
+	if (!cm->initialized) {
+		cm->available = m->alloc_size;
+		cm->available -= cm->managed + m->systembits_inalloc;
+		cm->initialized = true;
+	}
 	m->global_available += cm->available;
 	cm->online = true;
 	m->online_maps++;
@@ -370,14 +373,16 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
 	if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end))
 		return;
 
-	if (cm->online) {
-		clear_bit(bit, cm->alloc_map);
-		cm->allocated--;
+	clear_bit(bit, cm->alloc_map);
+	cm->allocated--;
+
+	if (cm->online)
 		m->total_allocated--;
-		if (!managed) {
-			cm->available++;
+
+	if (!managed) {
+		cm->available++;
+		if (cm->online)
 			m->global_available++;
-		}
 	}
 	trace_irq_matrix_free(bit, cpu, m, cm);
 }
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index b4517095db6a..e7214093dcd1 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -366,12 +366,16 @@ static void __jump_label_update(struct static_key *key,
 {
 	for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
 		/*
-		 * entry->code set to 0 invalidates module init text sections
-		 * kernel_text_address() verifies we are not in core kernel
-		 * init code, see jump_label_invalidate_module_init().
+		 * An entry->code of 0 indicates an entry which has been
+		 * disabled because it was in an init text area.
 		 */
-		if (entry->code && kernel_text_address(entry->code))
-			arch_jump_label_transform(entry, jump_label_type(entry));
+		if (entry->code) {
+			if (kernel_text_address(entry->code))
+				arch_jump_label_transform(entry, jump_label_type(entry));
+			else
+				WARN_ONCE(1, "can't patch jump_label at %pS",
+					  (void *)(unsigned long)entry->code);
+		}
 	}
 }
 
@@ -417,6 +421,19 @@ void __init jump_label_init(void)
 	cpus_read_unlock();
 }
 
+/* Disable any jump label entries in __init code */
+void __init jump_label_invalidate_init(void)
+{
+	struct jump_entry *iter_start = __start___jump_table;
+	struct jump_entry *iter_stop = __stop___jump_table;
+	struct jump_entry *iter;
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (init_kernel_text(iter->code))
+			iter->code = 0;
+	}
+}
+
 #ifdef CONFIG_MODULES
 
 static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@@ -633,6 +650,7 @@ static void jump_label_del_module(struct module *mod)
 	}
 }
 
+/* Disable any jump label entries in module init code */
 static void jump_label_invalidate_module_init(struct module *mod)
 {
 	struct jump_entry *iter_start = mod->jump_entries;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index da2ccf142358..102160ff5c66 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,67 +978,90 @@ static int prepare_kprobe(struct kprobe *p)
 }
 
 /* Caller must lock kprobe_mutex */
-static void arm_kprobe_ftrace(struct kprobe *p)
+static int arm_kprobe_ftrace(struct kprobe *p)
 {
-	int ret;
+	int ret = 0;
 
 	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
 				   (unsigned long)p->addr, 0, 0);
-	WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
-	kprobe_ftrace_enabled++;
-	if (kprobe_ftrace_enabled == 1) {
+	if (ret) {
+		pr_debug("Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+		return ret;
+	}
+
+	if (kprobe_ftrace_enabled == 0) {
 		ret = register_ftrace_function(&kprobe_ftrace_ops);
-		WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+		if (ret) {
+			pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
+			goto err_ftrace;
+		}
 	}
+
+	kprobe_ftrace_enabled++;
+	return ret;
+
+err_ftrace:
+	/*
+	 * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a
+	 * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
+	 * empty filter_hash which would undesirably trace all functions.
+	 */
+	ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
+	return ret;
 }
 
 /* Caller must lock kprobe_mutex */
-static void disarm_kprobe_ftrace(struct kprobe *p)
+static int disarm_kprobe_ftrace(struct kprobe *p)
 {
-	int ret;
+	int ret = 0;
 
-	kprobe_ftrace_enabled--;
-	if (kprobe_ftrace_enabled == 0) {
+	if (kprobe_ftrace_enabled == 1) {
 		ret = unregister_ftrace_function(&kprobe_ftrace_ops);
-		WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+		if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
+			return ret;
 	}
+
+	kprobe_ftrace_enabled--;
+
 	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
 			   (unsigned long)p->addr, 1, 0);
 	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+	return ret;
 }
 #else	/* !CONFIG_KPROBES_ON_FTRACE */
 #define prepare_kprobe(p)	arch_prepare_kprobe(p)
-#define arm_kprobe_ftrace(p)	do {} while (0)
-#define disarm_kprobe_ftrace(p)	do {} while (0)
+#define arm_kprobe_ftrace(p)	(-ENODEV)
+#define disarm_kprobe_ftrace(p)	(-ENODEV)
 #endif
 
 /* Arm a kprobe with text_mutex */
-static void arm_kprobe(struct kprobe *kp)
+static int arm_kprobe(struct kprobe *kp)
 {
-	if (unlikely(kprobe_ftrace(kp))) {
-		arm_kprobe_ftrace(kp);
-		return;
-	}
+	if (unlikely(kprobe_ftrace(kp)))
+		return arm_kprobe_ftrace(kp);
+
 	cpus_read_lock();
 	mutex_lock(&text_mutex);
 	__arm_kprobe(kp);
 	mutex_unlock(&text_mutex);
 	cpus_read_unlock();
+
+	return 0;
 }
 
 /* Disarm a kprobe with text_mutex */
-static void disarm_kprobe(struct kprobe *kp, bool reopt)
+static int disarm_kprobe(struct kprobe *kp, bool reopt)
 {
-	if (unlikely(kprobe_ftrace(kp))) {
-		disarm_kprobe_ftrace(kp);
-		return;
-	}
+	if (unlikely(kprobe_ftrace(kp)))
+		return disarm_kprobe_ftrace(kp);
 
 	cpus_read_lock();
 	mutex_lock(&text_mutex);
 	__disarm_kprobe(kp, reopt);
 	mutex_unlock(&text_mutex);
 	cpus_read_unlock();
+
+	return 0;
 }
 
 /*
@@ -1362,9 +1385,15 @@ out:
 
 	if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
 		ap->flags &= ~KPROBE_FLAG_DISABLED;
-		if (!kprobes_all_disarmed)
+		if (!kprobes_all_disarmed) {
 			/* Arm the breakpoint again. */
-			arm_kprobe(ap);
+			ret = arm_kprobe(ap);
+			if (ret) {
+				ap->flags |= KPROBE_FLAG_DISABLED;
+				list_del_rcu(&p->list);
+				synchronize_sched();
+			}
+		}
 	}
 	return ret;
 }
@@ -1573,8 +1602,14 @@ int register_kprobe(struct kprobe *p)
 	hlist_add_head_rcu(&p->hlist,
 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
-	if (!kprobes_all_disarmed && !kprobe_disabled(p))
-		arm_kprobe(p);
+	if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
+		ret = arm_kprobe(p);
+		if (ret) {
+			hlist_del_rcu(&p->hlist);
+			synchronize_sched();
+			goto out;
+		}
+	}
 
 	/* Try to optimize kprobe */
 	try_to_optimize_kprobe(p);
@@ -1608,11 +1643,12 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
 static struct kprobe *__disable_kprobe(struct kprobe *p)
 {
 	struct kprobe *orig_p;
+	int ret;
 
 	/* Get an original kprobe for return */
 	orig_p = __get_valid_kprobe(p);
 	if (unlikely(orig_p == NULL))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	if (!kprobe_disabled(p)) {
 		/* Disable probe if it is a child probe */
@@ -1626,8 +1662,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
 			 * should have already been disarmed, so
 			 * skip unneed disarming process.
 			 */
-			if (!kprobes_all_disarmed)
-				disarm_kprobe(orig_p, true);
+			if (!kprobes_all_disarmed) {
+				ret = disarm_kprobe(orig_p, true);
+				if (ret) {
+					p->flags &= ~KPROBE_FLAG_DISABLED;
+					return ERR_PTR(ret);
+				}
+			}
 			orig_p->flags |= KPROBE_FLAG_DISABLED;
 		}
 	}
@@ -1644,8 +1685,8 @@ static int __unregister_kprobe_top(struct kprobe *p)
 
 	/* Disable kprobe. This will disarm it if needed. */
 	ap = __disable_kprobe(p);
-	if (ap == NULL)
-		return -EINVAL;
+	if (IS_ERR(ap))
+		return PTR_ERR(ap);
 
 	if (ap == p)
 		/*
@@ -2078,12 +2119,14 @@ static void kill_kprobe(struct kprobe *p)
 int disable_kprobe(struct kprobe *kp)
 {
 	int ret = 0;
+	struct kprobe *p;
 
 	mutex_lock(&kprobe_mutex);
 
 	/* Disable this kprobe */
-	if (__disable_kprobe(kp) == NULL)
-		ret = -EINVAL;
+	p = __disable_kprobe(kp);
+	if (IS_ERR(p))
+		ret = PTR_ERR(p);
 
 	mutex_unlock(&kprobe_mutex);
 	return ret;
@@ -2116,7 +2159,9 @@ int enable_kprobe(struct kprobe *kp)
 
 	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
 		p->flags &= ~KPROBE_FLAG_DISABLED;
-		arm_kprobe(p);
+		ret = arm_kprobe(p);
+		if (ret)
+			p->flags |= KPROBE_FLAG_DISABLED;
 	}
 out:
 	mutex_unlock(&kprobe_mutex);
@@ -2407,11 +2452,12 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
 	.release        = seq_release,
 };
 
-static void arm_all_kprobes(void)
+static int arm_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
-	unsigned int i;
+	unsigned int i, total = 0, errors = 0;
+	int err, ret = 0;
 
 	mutex_lock(&kprobe_mutex);
 
@@ -2428,46 +2474,74 @@ static void arm_all_kprobes(void)
 	/* Arming kprobes doesn't optimize kprobe itself */
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
-		hlist_for_each_entry_rcu(p, head, hlist)
-			if (!kprobe_disabled(p))
-				arm_kprobe(p);
+		/* Arm all kprobes on a best-effort basis */
+		hlist_for_each_entry_rcu(p, head, hlist) {
+			if (!kprobe_disabled(p)) {
+				err = arm_kprobe(p);
+				if (err)  {
+					errors++;
+					ret = err;
+				}
+				total++;
+			}
+		}
 	}
 
-	printk(KERN_INFO "Kprobes globally enabled\n");
+	if (errors)
+		pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
+			errors, total);
+	else
+		pr_info("Kprobes globally enabled\n");
 
 already_enabled:
 	mutex_unlock(&kprobe_mutex);
-	return;
+	return ret;
 }
 
-static void disarm_all_kprobes(void)
+static int disarm_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
-	unsigned int i;
+	unsigned int i, total = 0, errors = 0;
+	int err, ret = 0;
 
 	mutex_lock(&kprobe_mutex);
 
 	/* If kprobes are already disarmed, just return */
 	if (kprobes_all_disarmed) {
 		mutex_unlock(&kprobe_mutex);
-		return;
+		return 0;
 	}
 
 	kprobes_all_disarmed = true;
-	printk(KERN_INFO "Kprobes globally disabled\n");
 
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
+		/* Disarm all kprobes on a best-effort basis */
 		hlist_for_each_entry_rcu(p, head, hlist) {
-			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-				disarm_kprobe(p, false);
+			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
+				err = disarm_kprobe(p, false);
+				if (err) {
+					errors++;
+					ret = err;
+				}
+				total++;
+			}
 		}
 	}
+
+	if (errors)
+		pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
+			errors, total);
+	else
+		pr_info("Kprobes globally disabled\n");
+
 	mutex_unlock(&kprobe_mutex);
 
 	/* Wait for disarming all kprobes by optimizer */
 	wait_for_kprobe_optimizer();
+
+	return ret;
 }
 
 /*
@@ -2494,6 +2568,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
 {
 	char buf[32];
 	size_t buf_size;
+	int ret = 0;
 
 	buf_size = min(count, (sizeof(buf)-1));
 	if (copy_from_user(buf, user_buf, buf_size))
@@ -2504,17 +2579,20 @@ static ssize_t write_enabled_file_bool(struct file *file,
 	case 'y':
 	case 'Y':
 	case '1':
-		arm_all_kprobes();
+		ret = arm_all_kprobes();
 		break;
 	case 'n':
 	case 'N':
 	case '0':
-		disarm_all_kprobes();
+		ret = disarm_all_kprobes();
 		break;
 	default:
 		return -EINVAL;
 	}
 
+	if (ret)
+		return ret;
+
 	return count;
 }
 
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 38ece035039e..d880296245c5 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -379,6 +379,14 @@ queue:
 	tail = encode_tail(smp_processor_id(), idx);
 
 	node += idx;
+
+	/*
+	 * Ensure that we increment the head node->count before initialising
+	 * the actual node. If the compiler is kind enough to reorder these
+	 * stores, then an IRQ could overwrite our assignments.
+	 */
+	barrier();
+
 	node->locked = 0;
 	node->next = NULL;
 	pv_init_node(node);
@@ -408,14 +416,15 @@ queue:
 	 */
 	if (old & _Q_TAIL_MASK) {
 		prev = decode_tail(old);
+
 		/*
-		 * The above xchg_tail() is also a load of @lock which
-		 * generates, through decode_tail(), a pointer.  The address
-		 * dependency matches the RELEASE of xchg_tail() such that
-		 * the subsequent access to @prev happens after.
+		 * We must ensure that the stores to @node are observed before
+		 * the write to prev->next. The address dependency from
+		 * xchg_tail is not sufficient to ensure this because the read
+		 * component of xchg_tail is unordered with respect to the
+		 * initialisation of @node.
 		 */
-
-		WRITE_ONCE(prev->next, node);
+		smp_store_release(&prev->next, node);
 
 		pv_wait_node(node, prev);
 		arch_mcs_spin_lock_contended(&node->locked);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 65cc0cb984e6..940633c63254 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1616,11 +1616,12 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
 void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
 {
 	DEFINE_WAKE_Q(wake_q);
+	unsigned long flags;
 	bool postunlock;
 
-	raw_spin_lock_irq(&lock->wait_lock);
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
-	raw_spin_unlock_irq(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	if (postunlock)
 		rt_mutex_postunlock(&wake_q);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 4849be5f9b3c..895e6b76b25e 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -275,8 +275,15 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap)
 	return (res->start + resource_size(res)) >> PAGE_SHIFT;
 }
 
+static unsigned long pfn_next(unsigned long pfn)
+{
+	if (pfn % 1024 == 0)
+		cond_resched();
+	return pfn + 1;
+}
+
 #define for_each_device_pfn(pfn, map) \
-	for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
+	for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
 
 static void devm_memremap_pages_release(void *data)
 {
@@ -337,10 +344,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	resource_size_t align_start, align_size, align_end;
 	struct vmem_altmap *altmap = pgmap->altmap_valid ?
 			&pgmap->altmap : NULL;
+	struct resource *res = &pgmap->res;
 	unsigned long pfn, pgoff, order;
 	pgprot_t pgprot = PAGE_KERNEL;
-	int error, nid, is_ram, i = 0;
-	struct resource *res = &pgmap->res;
+	int error, nid, is_ram;
 
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -409,8 +416,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 		list_del(&page->lru);
 		page->pgmap = pgmap;
 		percpu_ref_get(pgmap->ref);
-		if (!(++i % 1024))
-			cond_resched();
 	}
 
 	devm_add_action(dev, devm_memremap_pages_release, pgmap);
@@ -422,7 +427,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
  err_pfn_remap:
  err_radix:
 	pgmap_radix_release(res, pgoff);
-	devres_free(pgmap);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(devm_memremap_pages);
diff --git a/kernel/module.c b/kernel/module.c
index ad2d420024f6..e42764acedb4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4228,7 +4228,7 @@ static int modules_open(struct inode *inode, struct file *file)
 		m->private = kallsyms_show_value() ? NULL : (void *)8ul;
 	}
 
-	return 0;
+	return err;
 }
 
 static const struct file_operations proc_modules_operations = {
diff --git a/kernel/panic.c b/kernel/panic.c
index 2cfef408fec9..4b794f1d8561 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -640,7 +640,7 @@ device_initcall(register_warn_debugfs);
  */
 __visible void __stack_chk_fail(void)
 {
-	panic("stack-protector: Kernel stack is corrupted in: %p\n",
+	panic("stack-protector: Kernel stack is corrupted in: %pB\n",
 		__builtin_return_address(0));
 }
 EXPORT_SYMBOL(__stack_chk_fail);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index fc1123583fa6..f274fbef821d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2397,7 +2397,7 @@ skip:
 
 		if (console_lock_spinning_disable_and_check()) {
 			printk_safe_exit_irqrestore(flags);
-			return;
+			goto out;
 		}
 
 		printk_safe_exit_irqrestore(flags);
@@ -2430,6 +2430,7 @@ skip:
 	if (retry && console_trylock())
 		goto again;
 
+out:
 	if (wake_klogd)
 		wake_up_klogd();
 }
diff --git a/kernel/relay.c b/kernel/relay.c
index c3029402f15c..c955b10c973c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -163,7 +163,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan)
 {
 	struct rchan_buf *buf;
 
-	if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
+	if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
 		return NULL;
 
 	buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bf724c1952ea..c94895bc5a2c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2601,19 +2601,31 @@ static inline void finish_task(struct task_struct *prev)
 #endif
 }
 
-static inline void finish_lock_switch(struct rq *rq)
+static inline void
+prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
 {
+	/*
+	 * Since the runqueue lock will be released by the next
+	 * task (which is an invalid locking op but in the case
+	 * of the scheduler it's an obvious special-case), so we
+	 * do an early lockdep release here:
+	 */
+	rq_unpin_lock(rq, rf);
+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
-	rq->lock.owner = current;
+	rq->lock.owner = next;
 #endif
+}
+
+static inline void finish_lock_switch(struct rq *rq)
+{
 	/*
 	 * If we are tracking spinlock dependencies then we have to
 	 * fix up the runqueue lock - which gets 'carried over' from
 	 * prev into current:
 	 */
 	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
 	raw_spin_unlock_irq(&rq->lock);
 }
 
@@ -2844,14 +2856,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
 	rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
 
-	/*
-	 * Since the runqueue lock will be released by the next
-	 * task (which is an invalid locking op but in the case
-	 * of the scheduler it's an obvious special-case), so we
-	 * do an early lockdep release here:
-	 */
-	rq_unpin_lock(rq, rf);
-	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+	prepare_lock_switch(rq, next, rf);
 
 	/* Here we just switch the register state and the stack. */
 	switch_to(prev, next, prev);
@@ -6678,13 +6683,18 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
 		parent_quota = parent_b->hierarchical_quota;
 
 		/*
-		 * Ensure max(child_quota) <= parent_quota, inherit when no
+		 * Ensure max(child_quota) <= parent_quota.  On cgroup2,
+		 * always take the min.  On cgroup1, only inherit when no
 		 * limit is set:
 		 */
-		if (quota == RUNTIME_INF)
-			quota = parent_quota;
-		else if (parent_quota != RUNTIME_INF && quota > parent_quota)
-			return -EINVAL;
+		if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
+			quota = min(quota, parent_quota);
+		} else {
+			if (quota == RUNTIME_INF)
+				quota = parent_quota;
+			else if (parent_quota != RUNTIME_INF && quota > parent_quota)
+				return -EINVAL;
+		}
 	}
 	cfs_b->hierarchical_quota = quota;
 
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index dd062a1c8cf0..7936f548e071 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -19,8 +19,6 @@
 
 #include "sched.h"
 
-#define SUGOV_KTHREAD_PRIORITY	50
-
 struct sugov_tunables {
 	struct gov_attr_set attr_set;
 	unsigned int rate_limit_us;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9bb0e0c412ec..9df09782025c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1153,6 +1153,7 @@ static void update_curr_dl(struct rq *rq)
 	struct sched_dl_entity *dl_se = &curr->dl;
 	u64 delta_exec, scaled_delta_exec;
 	int cpu = cpu_of(rq);
+	u64 now;
 
 	if (!dl_task(curr) || !on_dl_rq(dl_se))
 		return;
@@ -1165,7 +1166,8 @@ static void update_curr_dl(struct rq *rq)
 	 * natural solution, but the full ramifications of this
 	 * approach need further study.
 	 */
-	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
+	now = rq_clock_task(rq);
+	delta_exec = now - curr->se.exec_start;
 	if (unlikely((s64)delta_exec <= 0)) {
 		if (unlikely(dl_se->dl_yielded))
 			goto throttle;
@@ -1178,7 +1180,7 @@ static void update_curr_dl(struct rq *rq)
 	curr->se.sum_exec_runtime += delta_exec;
 	account_group_exec_runtime(curr, delta_exec);
 
-	curr->se.exec_start = rq_clock_task(rq);
+	curr->se.exec_start = now;
 	cgroup_account_cputime(curr, delta_exec);
 
 	sched_rt_avg_update(rq, delta_exec);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 663b2355a3aa..aad49451584e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -950,12 +950,13 @@ static void update_curr_rt(struct rq *rq)
 {
 	struct task_struct *curr = rq->curr;
 	struct sched_rt_entity *rt_se = &curr->rt;
-	u64 now = rq_clock_task(rq);
 	u64 delta_exec;
+	u64 now;
 
 	if (curr->sched_class != &rt_sched_class)
 		return;
 
+	now = rq_clock_task(rq);
 	delta_exec = now - curr->se.exec_start;
 	if (unlikely((s64)delta_exec <= 0))
 		return;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 940fa408a288..dc77548167ef 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1076,14 +1076,16 @@ long seccomp_get_metadata(struct task_struct *task,
 
 	size = min_t(unsigned long, size, sizeof(kmd));
 
-	if (copy_from_user(&kmd, data, size))
+	if (size < sizeof(kmd.filter_off))
+		return -EINVAL;
+
+	if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
 		return -EFAULT;
 
 	filter = get_nth_filter(task, kmd.filter_off);
 	if (IS_ERR(filter))
 		return PTR_ERR(filter);
 
-	memset(&kmd, 0, sizeof(kmd));
 	if (filter->log)
 		kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 48150ab42de9..4a4fd567fb26 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1894,6 +1894,12 @@ int timers_dead_cpu(unsigned int cpu)
 		raw_spin_lock_irq(&new_base->lock);
 		raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
+		/*
+		 * The current CPUs base clock might be stale. Update it
+		 * before moving the timers over.
+		 */
+		forward_timer_base(new_base);
+
 		BUG_ON(old_base->running_timer);
 
 		for (i = 0; i < WHEEL_SIZE; i++)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fc2838ac8b78..7f9691c86b6e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -661,7 +661,41 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx,
+static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_perf_event_output:
+		return &bpf_perf_event_output_proto_tp;
+	case BPF_FUNC_get_stackid:
+		return &bpf_get_stackid_proto_tp;
+	default:
+		return tracing_func_proto(func_id);
+	}
+}
+
+static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    struct bpf_insn_access_aux *info)
+{
+	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
+		return false;
+	if (type != BPF_READ)
+		return false;
+	if (off % size != 0)
+		return false;
+
+	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
+	return true;
+}
+
+const struct bpf_verifier_ops tracepoint_verifier_ops = {
+	.get_func_proto  = tp_prog_func_proto,
+	.is_valid_access = tp_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops tracepoint_prog_ops = {
+};
+
+BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
 	   struct bpf_perf_event_value *, buf, u32, size)
 {
 	int err = -EINVAL;
@@ -678,8 +712,8 @@ clear:
 	return err;
 }
 
-static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
-         .func           = bpf_perf_prog_read_value_tp,
+static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
+         .func           = bpf_perf_prog_read_value,
          .gpl_only       = true,
          .ret_type       = RET_INTEGER,
          .arg1_type      = ARG_PTR_TO_CTX,
@@ -687,7 +721,7 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
          .arg3_type      = ARG_CONST_SIZE,
 };
 
-static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
 	case BPF_FUNC_perf_event_output:
@@ -695,39 +729,16 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_get_stackid:
 		return &bpf_get_stackid_proto_tp;
 	case BPF_FUNC_perf_prog_read_value:
-		return &bpf_perf_prog_read_value_proto_tp;
+		return &bpf_perf_prog_read_value_proto;
 	default:
 		return tracing_func_proto(func_id);
 	}
 }
 
-static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-				    struct bpf_insn_access_aux *info)
-{
-	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
-		return false;
-	if (type != BPF_READ)
-		return false;
-	if (off % size != 0)
-		return false;
-
-	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
-	return true;
-}
-
-const struct bpf_verifier_ops tracepoint_verifier_ops = {
-	.get_func_proto  = tp_prog_func_proto,
-	.is_valid_access = tp_prog_is_valid_access,
-};
-
-const struct bpf_prog_ops tracepoint_prog_ops = {
-};
-
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    struct bpf_insn_access_aux *info)
 {
-	const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
-					 sample_period);
+	const int size_u64 = sizeof(u64);
 
 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
 		return false;
@@ -738,8 +749,13 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 
 	switch (off) {
 	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
-		bpf_ctx_record_field_size(info, size_sp);
-		if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
+		bpf_ctx_record_field_size(info, size_u64);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+			return false;
+		break;
+	case bpf_ctx_range(struct bpf_perf_event_data, addr):
+		bpf_ctx_record_field_size(info, size_u64);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
 			return false;
 		break;
 	default:
@@ -766,6 +782,14 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
 				      bpf_target_off(struct perf_sample_data, period, 8,
 						     target_size));
 		break;
+	case offsetof(struct bpf_perf_event_data, addr):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+						       data), si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_perf_event_data_kern, data));
+		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct perf_sample_data, addr, 8,
+						     target_size));
+		break;
 	default:
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
 						       regs), si->dst_reg, si->src_reg,
@@ -779,7 +803,7 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
 }
 
 const struct bpf_verifier_ops perf_event_verifier_ops = {
-	.get_func_proto		= tp_prog_func_proto,
+	.get_func_proto		= pe_prog_func_proto,
 	.is_valid_access	= pe_prog_is_valid_access,
 	.convert_ctx_access	= pe_prog_convert_ctx_access,
 };
@@ -872,6 +896,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
 		return -EINVAL;
 	if (copy_from_user(&query, uquery, sizeof(query)))
 		return -EFAULT;
+	if (query.ids_len > BPF_TRACE_MAX_PROGS)
+		return -E2BIG;
 
 	mutex_lock(&bpf_event_mutex);
 	ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
diff --git a/kernel/user.c b/kernel/user.c
index 9a20acce460d..36288d840675 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -101,6 +101,7 @@ struct user_struct root_user = {
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
+	.ratelimit	= RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
 };
 
 /*
@@ -191,6 +192,8 @@ struct user_struct *alloc_uid(kuid_t uid)
 
 		new->uid = uid;
 		atomic_set(&new->__count, 1);
+		ratelimit_state_init(&new->ratelimit, HZ, 100);
+		ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
 
 		/*
 		 * Before adding this, check whether we raced
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 017044c26233..6ec6ba65127b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3018,14 +3018,6 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork)
 	return ret;
 }
 
-/*
- * See cancel_delayed_work()
- */
-bool cancel_work(struct work_struct *work)
-{
-	return __cancel_work(work, false);
-}
-
 /**
  * cancel_delayed_work - cancel a delayed work
  * @dwork: delayed_work to cancel
@@ -4180,6 +4172,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 
 /**
+ * current_work - retrieve %current task's work struct
+ *
+ * Determine if %current task is a workqueue worker and what it's working on.
+ * Useful to find out the context that the %current task is running in.
+ *
+ * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
+ */
+struct work_struct *current_work(void)
+{
+	struct worker *worker = current_wq_worker();
+
+	return worker ? worker->current_work : NULL;
+}
+EXPORT_SYMBOL(current_work);
+
+/**
  * current_is_workqueue_rescuer - is %current workqueue rescuer?
  *
  * Determine whether %current is a workqueue rescuer.  Can be used from
@@ -5321,7 +5329,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
 
 	ret = device_register(&wq_dev->dev);
 	if (ret) {
-		kfree(wq_dev);
+		put_device(&wq_dev->dev);
 		wq->wq_dev = NULL;
 		return ret;
 	}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6088408ef26c..64155e310a9f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1642,6 +1642,7 @@ config DMA_API_DEBUG
 
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
+	def_bool y
 
 if RUNTIME_TESTING_MENU
 
diff --git a/lib/btree.c b/lib/btree.c
index f93a945274af..590facba2c50 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -3,7 +3,7 @@
  *
  * As should be obvious for Linux kernel code, license is GPLv2
  *
- * Copyright (c) 2007-2008 Joern Engel <[email protected]>
+ * Copyright (c) 2007-2008 Joern Engel <[email protected]>
  * Bits and pieces stolen from Peter Zijlstra's code, which is
  * Copyright 2007, Red Hat Inc. Peter Zijlstra
  * GPLv2
@@ -76,6 +76,8 @@ struct btree_geo btree_geo128 = {
 };
 EXPORT_SYMBOL_GPL(btree_geo128);
 
+#define MAX_KEYLEN	(2 * LONG_PER_U64)
+
 static struct kmem_cache *btree_cachep;
 
 void *btree_alloc(gfp_t gfp_mask, void *pool_data)
@@ -313,7 +315,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
 {
 	int i, height;
 	unsigned long *node, *oldnode;
-	unsigned long *retry_key = NULL, key[geo->keylen];
+	unsigned long *retry_key = NULL, key[MAX_KEYLEN];
 
 	if (keyzero(geo, __key))
 		return NULL;
@@ -639,8 +641,8 @@ EXPORT_SYMBOL_GPL(btree_remove);
 int btree_merge(struct btree_head *target, struct btree_head *victim,
 		struct btree_geo *geo, gfp_t gfp)
 {
-	unsigned long key[geo->keylen];
-	unsigned long dup[geo->keylen];
+	unsigned long key[MAX_KEYLEN];
+	unsigned long dup[MAX_KEYLEN];
 	void *val;
 	int err;
 
diff --git a/lib/bug.c b/lib/bug.c
index c1b0fad31b10..1077366f496b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -150,6 +150,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		return BUG_TRAP_TYPE_NONE;
 
 	bug = find_bug(bugaddr);
+	if (!bug)
+		return BUG_TRAP_TYPE_NONE;
 
 	file = NULL;
 	line = 0;
@@ -191,7 +193,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 	if (file)
 		pr_crit("kernel BUG at %s:%u!\n", file, line);
 	else
-		pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
+		pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
 			(void *)bugaddr);
 
 	return BUG_TRAP_TYPE_BUG;
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 1b34d210452c..7f5cdc1e6b29 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1491,12 +1491,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 	if (unlikely(virt == NULL))
 		return;
 
-	entry = dma_entry_alloc();
-	if (!entry)
+	/* handle vmalloc and linear addresses */
+	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
 		return;
 
-	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+	entry = dma_entry_alloc();
+	if (!entry)
 		return;
 
 	entry->type      = dma_debug_coherent;
@@ -1528,7 +1528,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 	};
 
 	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
 		return;
 
 	if (is_vmalloc_addr(virt))
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index 40b1f92f2214..c9e8e21cb334 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -84,6 +84,10 @@ again:
 	return page_address(page);
 }
 
+/*
+ * NOTE: this function must never look at the dma_addr argument, because we want
+ * to be able to use it as a helper for iommu implementations as well.
+ */
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs)
 {
@@ -152,5 +156,6 @@ const struct dma_map_ops dma_direct_ops = {
 	.map_sg			= dma_direct_map_sg,
 	.dma_supported		= dma_direct_supported,
 	.mapping_error		= dma_direct_mapping_error,
+	.is_phys		= 1,
 };
 EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/idr.c b/lib/idr.c
index c98d77fcf393..823b813f08f8 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -36,8 +36,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
-	int base = idr->idr_base;
-	int id = *nextid;
+	unsigned int base = idr->idr_base;
+	unsigned int id = *nextid;
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return -EINVAL;
@@ -204,10 +204,11 @@ int idr_for_each(const struct idr *idr,
 
 	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
 		int ret;
+		unsigned long id = iter.index + base;
 
-		if (WARN_ON_ONCE(iter.index > INT_MAX))
+		if (WARN_ON_ONCE(id > INT_MAX))
 			break;
-		ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
+		ret = fn(id, rcu_dereference_raw(*slot), data);
 		if (ret)
 			return ret;
 	}
@@ -230,8 +231,8 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
-	int base = idr->idr_base;
-	int id = *nextid;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
 	id = (id < base) ? 0 : id - base;
 	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
@@ -431,7 +432,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			memset(bitmap, 0, sizeof(*bitmap));
 			bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
 			rcu_assign_pointer(*slot, bitmap);
 		}
@@ -464,7 +464,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			memset(bitmap, 0, sizeof(*bitmap));
 			__set_bit(bit, bitmap->bitmap);
 			radix_tree_iter_replace(root, &iter, slot, bitmap);
 		}
diff --git a/lib/ioremap.c b/lib/ioremap.c
index b808a390e4c3..54e5bbaa3200 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
-		    IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
+		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
+		    pmd_free_pte_page(pmd)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
-		    IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
+		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
+		    pud_free_pmd_page(pud)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9fe6ec8fda28..15ea216a67ce 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -32,11 +33,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -602,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+				struct netlink_ext_ack *extack)
+{
+	/* u64 to chars: 2^64 - 1 = 21 chars */
+	char buf[sizeof("SEQNUM=") + 21];
+	struct sk_buff *skbc;
+	int ret;
+
+	/* bump and prepare sequence number */
+	ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+	if (ret < 0 || (size_t)ret >= sizeof(buf))
+		return -ENOMEM;
+	ret++;
+
+	/* verify message does not overflow */
+	if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+		NL_SET_ERR_MSG(extack, "uevent message too big");
+		return -EINVAL;
+	}
+
+	/* copy skb and extend to accommodate sequence number */
+	skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+	if (!skbc)
+		return -ENOMEM;
+
+	/* append sequence number */
+	skb_put_data(skbc, buf, ret);
+
+	/* remove msg header */
+	skb_pull(skbc, NLMSG_HDRLEN);
+
+	/* set portid 0 to inform userspace message comes from kernel */
+	NETLINK_CB(skbc).portid = 0;
+	NETLINK_CB(skbc).dst_group = 1;
+
+	ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct netlink_ext_ack *extack)
+{
+	struct net *net;
+	int ret;
+
+	if (!nlmsg_data(nlh))
+		return -EINVAL;
+
+	/*
+	 * Verify that we are allowed to send messages to the target
+	 * network namespace. The caller must have CAP_SYS_ADMIN in the
+	 * owning user namespace of the target network namespace.
+	 */
+	net = sock_net(NETLINK_CB(skb).sk);
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+		NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+		return -EPERM;
+	}
+
+	mutex_lock(&uevent_sock_mutex);
+	ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+	mutex_unlock(&uevent_sock_mutex);
+
+	return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+	netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
-		.flags	= NL_CFG_F_NONROOT_RECV,
+		.input = uevent_net_rcv,
+		.flags	= NL_CFG_F_NONROOT_RECV
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
@@ -621,6 +700,9 @@ static int uevent_net_init(struct net *net)
 		kfree(ue_sk);
 		return -ENODEV;
 	}
+
+	net->uevent_sock = ue_sk;
+
 	mutex_lock(&uevent_sock_mutex);
 	list_add_tail(&ue_sk->list, &uevent_sock_list);
 	mutex_unlock(&uevent_sock_mutex);
@@ -629,17 +711,9 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-	struct uevent_sock *ue_sk;
+	struct uevent_sock *ue_sk = net->uevent_sock;
 
 	mutex_lock(&uevent_sock_mutex);
-	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-		if (sock_net(ue_sk->sk) == net)
-			goto found;
-	}
-	mutex_unlock(&uevent_sock_mutex);
-	return;
-
-found:
 	list_del(&ue_sk->list);
 	mutex_unlock(&uevent_sock_mutex);
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 30e7dd88148b..9f96fa7bc000 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is in the
  * process of switching to atomic mode by percpu_ref_switch_to_atomic().
+ *
+ * There are no implied RCU grace periods between kill and release.
  */
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 0a7ae3288a24..8e00138d593f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2125,7 +2125,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 		preempt_enable();
 
 	if (!this_cpu_read(ida_bitmap)) {
-		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+		struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
 		if (!bitmap)
 			return 0;
 		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 3825c30aaa36..47de025b6245 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -506,8 +506,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 		if (!key ||
 		    (ht->p.obj_cmpfn ?
 		     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
-		     rhashtable_compare(&arg, rht_obj(ht, head))))
+		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
+			pprev = &head->next;
 			continue;
+		}
 
 		if (!ht->rhlist)
 			return rht_obj(ht, head);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index b4e22345963f..b2badf6b23cd 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -24,10 +24,11 @@
 #include <linux/if_vlan.h>
 #include <linux/random.h>
 #include <linux/highmem.h>
+#include <linux/sched.h>
 
 /* General test specific settings */
 #define MAX_SUBTESTS	3
-#define MAX_TESTRUNS	10000
+#define MAX_TESTRUNS	1000
 #define MAX_DATA	128
 #define MAX_INSNS	512
 #define MAX_K		0xffffFFFF
@@ -5466,7 +5467,7 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: Jump, gap, jump, ...",
 		{ },
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86)
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 #else
 		CLASSIC | FLAG_NO_DATA,
@@ -6573,6 +6574,93 @@ static bool exclude_test(int test_id)
 	return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static __init struct sk_buff *build_test_skb(void)
+{
+	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+	struct sk_buff *skb[2];
+	struct page *page[2];
+	int i, data_size = 8;
+
+	for (i = 0; i < 2; i++) {
+		page[i] = alloc_page(GFP_KERNEL);
+		if (!page[i]) {
+			if (i == 0)
+				goto err_page0;
+			else
+				goto err_page1;
+		}
+
+		/* this will set skb[i]->head_frag */
+		skb[i] = dev_alloc_skb(headroom + data_size);
+		if (!skb[i]) {
+			if (i == 0)
+				goto err_skb0;
+			else
+				goto err_skb1;
+		}
+
+		skb_reserve(skb[i], headroom);
+		skb_put(skb[i], data_size);
+		skb[i]->protocol = htons(ETH_P_IP);
+		skb_reset_network_header(skb[i]);
+		skb_set_mac_header(skb[i], -ETH_HLEN);
+
+		skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
+		// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+	}
+
+	/* setup shinfo */
+	skb_shinfo(skb[0])->gso_size = 1448;
+	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb[0])->gso_segs = 0;
+	skb_shinfo(skb[0])->frag_list = skb[1];
+
+	/* adjust skb[0]'s len */
+	skb[0]->len += skb[1]->len;
+	skb[0]->data_len += skb[1]->data_len;
+	skb[0]->truesize += skb[1]->truesize;
+
+	return skb[0];
+
+err_skb1:
+	__free_page(page[1]);
+err_page1:
+	kfree_skb(skb[0]);
+err_skb0:
+	__free_page(page[0]);
+err_page0:
+	return NULL;
+}
+
+static __init int test_skb_segment(void)
+{
+	netdev_features_t features;
+	struct sk_buff *skb, *segs;
+	int ret = -1;
+
+	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+		   NETIF_F_IPV6_CSUM;
+	features |= NETIF_F_RXCSUM;
+	skb = build_test_skb();
+	if (!skb) {
+		pr_info("%s: failed to build_test_skb", __func__);
+		goto done;
+	}
+
+	segs = skb_segment(skb, features);
+	if (segs) {
+		kfree_skb_list(segs);
+		ret = 0;
+		pr_info("%s: success in skb_segment!", __func__);
+	} else {
+		pr_info("%s: failed in skb_segment!", __func__);
+	}
+	kfree_skb(skb);
+done:
+	return ret;
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -6582,6 +6670,7 @@ static __init int test_bpf(void)
 		struct bpf_prog *fp;
 		int err;
 
+		cond_resched();
 		if (exclude_test(i))
 			continue;
 
@@ -6630,9 +6719,11 @@ static int __init test_bpf_init(void)
 		return ret;
 
 	ret = test_bpf();
-
 	destroy_bpf_tests();
-	return ret;
+	if (ret)
+		return ret;
+
+	return test_skb_segment();
 }
 
 static void __exit test_bpf_exit(void)
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index e372b97eee13..0e5b7a61460b 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1141,7 +1141,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
 	mutex_lock(&reg_dev_mutex);
 
 	/* int should suffice for number of devices, test for wrap */
-	if (unlikely(num_test_devs + 1) < 0) {
+	if (num_test_devs + 1 == INT_MAX) {
 		pr_err("reached limit of number of test devices\n");
 		goto out;
 	}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 76d3667fdea2..f4000c137dbe 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -79,6 +79,21 @@ struct thread_data {
 	struct test_obj *objs;
 };
 
+static u32 my_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct test_obj_rhl *obj = data;
+
+	return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+}
+
+static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+	const struct test_obj_rhl *test_obj = obj;
+	const struct test_obj_val *val = arg->key;
+
+	return test_obj->value.id - val->id;
+}
+
 static struct rhashtable_params test_rht_params = {
 	.head_offset = offsetof(struct test_obj, node),
 	.key_offset = offsetof(struct test_obj, value),
@@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = {
 	.nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
+static struct rhashtable_params test_rht_params_dup = {
+	.head_offset = offsetof(struct test_obj_rhl, list_node),
+	.key_offset = offsetof(struct test_obj_rhl, value),
+	.key_len = sizeof(struct test_obj_val),
+	.hashfn = jhash,
+	.obj_hashfn = my_hashfn,
+	.obj_cmpfn = my_cmpfn,
+	.nelem_hint = 128,
+	.automatic_shrinking = false,
+};
+
 static struct semaphore prestart_sem;
 static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
 
@@ -465,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array,
 	return err;
 }
 
+static unsigned int __init print_ht(struct rhltable *rhlt)
+{
+	struct rhashtable *ht;
+	const struct bucket_table *tbl;
+	char buff[512] = "";
+	unsigned int i, cnt = 0;
+
+	ht = &rhlt->ht;
+	tbl = rht_dereference(ht->tbl, ht);
+	for (i = 0; i < tbl->size; i++) {
+		struct rhash_head *pos, *next;
+		struct test_obj_rhl *p;
+
+		pos = rht_dereference(tbl->buckets[i], ht);
+		next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
+
+		if (!rht_is_a_nulls(pos)) {
+			sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
+		}
+
+		while (!rht_is_a_nulls(pos)) {
+			struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead);
+			sprintf(buff, "%s[[", buff);
+			do {
+				pos = &list->rhead;
+				list = rht_dereference(list->next, ht);
+				p = rht_obj(ht, pos);
+
+				sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid,
+					list? ", " : " ");
+				cnt++;
+			} while (list);
+
+			pos = next,
+			next = !rht_is_a_nulls(pos) ?
+				rht_dereference(pos->next, ht) : NULL;
+
+			sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : "");
+		}
+	}
+	printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
+
+	return cnt;
+}
+
+static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,
+				  int cnt, bool slow)
+{
+	struct rhltable rhlt;
+	unsigned int i, ret;
+	const char *key;
+	int err = 0;
+
+	err = rhltable_init(&rhlt, &test_rht_params_dup);
+	if (WARN_ON(err))
+		return err;
+
+	for (i = 0; i < cnt; i++) {
+		rhl_test_objects[i].value.tid = i;
+		key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead);
+		key += test_rht_params_dup.key_offset;
+
+		if (slow) {
+			err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key,
+							     &rhl_test_objects[i].list_node.rhead));
+			if (err == -EAGAIN)
+				err = 0;
+		} else
+			err = rhltable_insert(&rhlt,
+					      &rhl_test_objects[i].list_node,
+					      test_rht_params_dup);
+		if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))
+			goto skip_print;
+	}
+
+	ret = print_ht(&rhlt);
+	WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");
+
+skip_print:
+	rhltable_destroy(&rhlt);
+
+	return 0;
+}
+
+static int __init test_insert_duplicates_run(void)
+{
+	struct test_obj_rhl rhl_test_objects[3] = {};
+
+	pr_info("test inserting duplicates\n");
+
+	/* two different values that map to same bucket */
+	rhl_test_objects[0].value.id = 1;
+	rhl_test_objects[1].value.id = 21;
+
+	/* and another duplicate with same as [0] value
+	 * which will be second on the bucket list */
+	rhl_test_objects[2].value.id = rhl_test_objects[0].value.id;
+
+	test_insert_dup(rhl_test_objects, 2, false);
+	test_insert_dup(rhl_test_objects, 3, false);
+	test_insert_dup(rhl_test_objects, 2, true);
+	test_insert_dup(rhl_test_objects, 3, true);
+
+	return 0;
+}
+
 static int thread_lookup_test(struct thread_data *tdata)
 {
 	unsigned int entries = tdata->entries;
@@ -613,6 +745,8 @@ static int __init test_rht_init(void)
 	do_div(total_time, runs);
 	pr_info("Average test time: %llu\n", total_time);
 
+	test_insert_duplicates_run();
+
 	if (!tcount)
 		return 0;
 
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 77ee6ced11b1..d7a708f82559 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1849,7 +1849,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 {
 	const int default_width = 2 * sizeof(void *);
 
-	if (!ptr && *fmt != 'K') {
+	if (!ptr && *fmt != 'K' && *fmt != 'x') {
 		/*
 		 * Print (null) with the same width as a pointer so it makes
 		 * tabular output look nice.
diff --git a/mm/gup.c b/mm/gup.c
index 1b46e6e74881..6afae32571ca 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -516,7 +516,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 	}
 
 	if (ret & VM_FAULT_RETRY) {
-		if (nonblocking)
+		if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
 			*nonblocking = 0;
 		return -EBUSY;
 	}
@@ -890,7 +890,10 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
 				break;
 		}
 		if (*locked) {
-			/* VM_FAULT_RETRY didn't trigger */
+			/*
+			 * VM_FAULT_RETRY didn't trigger or it was a
+			 * FOLL_NOWAIT.
+			 */
 			if (!pages_done)
 				pages_done = ret;
 			break;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 87ab9b8f56b5..5a68730eebd6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
+				  true)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1316,7 +1317,7 @@ alloc:
 	}
 
 	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
-					huge_gfp, &memcg, true))) {
+				huge_gfp | __GFP_NORETRY, &memcg, true))) {
 		put_page(new_page);
 		split_huge_pmd(vma, vmf->pmd, vmf->address);
 		if (page)
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 
 	list_for_each_safe(pos, next, &list) {
 		page = list_entry((void *)pos, struct page, mapping);
-		lock_page(page);
+		if (!trylock_page(page))
+			goto next;
 		/* split_huge_page() removes page from list on success */
 		if (!split_huge_page(page))
 			split++;
 		unlock_page(page);
+next:
 		put_page(page);
 	}
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c204e3d132b..976bbc5646fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
+#include <linux/mmdebug.h>
 #include <linux/sched/signal.h>
 #include <linux/rmap.h>
 #include <linux/string_helpers.h>
@@ -1583,7 +1584,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 		page = NULL;
 	} else {
 		h->surplus_huge_pages++;
-		h->nr_huge_pages_node[page_to_nid(page)]++;
+		h->surplus_huge_pages_node[page_to_nid(page)]++;
 	}
 
 out_unlock:
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
 	struct resv_map *resv_map;
 	long gbl_reserve;
 
+	/* This should never happen */
+	if (from > to) {
+		VM_WARN(1, "%s called with a negative range\n", __func__);
+		return -EINVAL;
+	}
+
 	/*
 	 * Only apply hugepage reservation if asked. At fault time, an
 	 * attempt will be made for VM_NORESERVE to allocate a page
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b7e2268dfc9a..e42568284e06 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 			goto out;
 		}
 
-		VM_BUG_ON_PAGE(PageCompound(page), page);
+		/* TODO: teach khugepaged to collapse THP mapped with pte */
+		if (PageCompound(page)) {
+			result = SCAN_PAGE_COMPOUND;
+			goto out;
+		}
+
 		VM_BUG_ON_PAGE(!PageAnon(page), page);
 
 		/*
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out_nolock;
 	}
 
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+	/* Do not oom kill for khugepaged charges */
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+					   &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out_nolock;
 	}
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+	/* Do not oom kill for khugepaged charges */
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+					   &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out;
 	}
diff --git a/mm/memblock.c b/mm/memblock.c
index 5a9ca2a1751b..48376bd33274 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
 		*out_nid = r->nid;
 }
 
-unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
-						      unsigned long max_pfn)
-{
-	struct memblock_type *type = &memblock.memory;
-	unsigned int right = type->cnt;
-	unsigned int mid, left = 0;
-	phys_addr_t addr = PFN_PHYS(pfn + 1);
-
-	do {
-		mid = (right + left) / 2;
-
-		if (addr < type->regions[mid].base)
-			right = mid;
-		else if (addr >= (type->regions[mid].base +
-				  type->regions[mid].size))
-			left = mid + 1;
-		else {
-			/* addr is within the region, so pfn + 1 is valid */
-			return min(pfn + 1, max_pfn);
-		}
-	} while (left < right);
-
-	if (right == type->cnt)
-		return max_pfn;
-	else
-		return min(PHYS_PFN(type->regions[right].base), max_pfn);
-}
-
 /**
  * memblock_set_node - set node ID on memblock regions
  * @base: base of area to set node ID for
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4b80ccee4535..8291b75f42c8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags)
 		return 0;
 	}
 
-	arch_unmap_kpfn(pfn);
-
 	orig_head = hpage = compound_head(p);
 	num_poisoned_pages_inc();
 
diff --git a/mm/memory.c b/mm/memory.c
index dd8de96f5547..5fcfc24904d1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -80,7 +80,7 @@
 
 #include "internal.h"
 
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
 #endif
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d879f1d8a44a..32cba0332787 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 	case MPOL_INTERLEAVE:
 		return !!nodes_equal(a->v.nodes, b->v.nodes);
 	case MPOL_PREFERRED:
+		/* a's ->flags is the same as b's */
+		if (a->flags & MPOL_F_LOCAL)
+			return true;
 		return a->v.preferred_node == b->v.preferred_node;
 	default:
 		BUG();
diff --git a/mm/mlock.c b/mm/mlock.c
index 79398200e423..74e5a6547c3d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -64,6 +64,12 @@ void clear_page_mlock(struct page *page)
 	mod_zone_page_state(page_zone(page), NR_MLOCK,
 			    -hpage_nr_pages(page));
 	count_vm_event(UNEVICTABLE_PGCLEARED);
+	/*
+	 * The previous TestClearPageMlocked() corresponds to the smp_mb()
+	 * in __pagevec_lru_add_fn().
+	 *
+	 * See __pagevec_lru_add_fn for more explanation.
+	 */
 	if (!isolate_lru_page(page)) {
 		putback_lru_page(page);
 	} else {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 81e18ceef579..1741dd23e7c1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,6 +46,7 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
+#include <xen/xen.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
@@ -347,6 +348,9 @@ static inline bool update_defer_init(pg_data_t *pgdat,
 	/* Always populate low zones for address-constrained allocations */
 	if (zone_end < pgdat_end_pfn(pgdat))
 		return true;
+	/* Xen PV domains need page structures early */
+	if (xen_pv_domain())
+		return true;
 	(*nr_initialised)++;
 	if ((*nr_initialised > pgdat->static_init_pgcnt) &&
 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
@@ -1906,7 +1910,9 @@ static int move_freepages(struct zone *zone,
 	 * Remove at a later date when no bug reports exist related to
 	 * grouping pages by mobility
 	 */
-	VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
+	VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
+	          pfn_valid(page_to_pfn(end_page)) &&
+	          page_zone(start_page) != page_zone(end_page));
 #endif
 
 	if (num_movable)
@@ -3590,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
 		return false;
 
 	/* this guy won't enter reclaim */
-	if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+	if (current->flags & PF_MEMALLOC)
 		return false;
 
 	/* We're only interested __GFP_FS allocations for now */
@@ -5350,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		if (context != MEMMAP_EARLY)
 			goto not_early;
 
-		if (!early_pfn_valid(pfn)) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-			/*
-			 * Skip to the pfn preceding the next valid one (or
-			 * end_pfn), such that we hit a valid pfn (or end_pfn)
-			 * on our next iteration of the loop.
-			 */
-			pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
-#endif
+		if (!early_pfn_valid(pfn))
 			continue;
-		}
 		if (!early_pfn_in_nid(pfn, nid))
 			continue;
 		if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index d2a76642c4ae..38de70ab1a0d 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -34,7 +34,7 @@
 #include <linux/log2.h>
 
 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
-			       int page_start, int page_end)
+			       int page_start, int page_end, gfp_t gfp)
 {
 	return 0;
 }
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
 	/* nada */
 }
 
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
 {
 	const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
 	struct pcpu_chunk *chunk;
 	struct page *pages;
 	int i;
 
-	chunk = pcpu_alloc_chunk();
+	chunk = pcpu_alloc_chunk(gfp);
 	if (!chunk)
 		return NULL;
 
-	pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
+	pages = alloc_pages(gfp, order_base_2(nr_pages));
 	if (!pages) {
 		pcpu_free_chunk(chunk);
 		return NULL;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 9158e5a81391..d8078de912de 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void)
 	lockdep_assert_held(&pcpu_alloc_mutex);
 
 	if (!pages)
-		pages = pcpu_mem_zalloc(pages_size);
+		pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
 	return pages;
 }
 
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
  * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
  * @page_start: page index of the first page to be allocated
  * @page_end: page index of the last page to be allocated + 1
+ * @gfp: allocation flags passed to the underlying allocator
  *
  * Allocate pages [@page_start,@page_end) into @pages for all units.
  * The allocation is for @chunk.  Percpu core doesn't care about the
  * content of @pages and will pass it verbatim to pcpu_map_pages().
  */
 static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
-			    struct page **pages, int page_start, int page_end)
+			    struct page **pages, int page_start, int page_end,
+			    gfp_t gfp)
 {
-	const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
 	unsigned int cpu, tcpu;
 	int i;
 
+	gfp |= __GFP_HIGHMEM;
+
 	for_each_possible_cpu(cpu) {
 		for (i = page_start; i < page_end; i++) {
 			struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
  * @chunk: chunk of interest
  * @page_start: the start page
  * @page_end: the end page
+ * @gfp: allocation flags passed to the underlying memory allocator
  *
  * For each cpu, populate and map pages [@page_start,@page_end) into
  * @chunk.
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
  * pcpu_alloc_mutex, does GFP_KERNEL allocation.
  */
 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
-			       int page_start, int page_end)
+			       int page_start, int page_end, gfp_t gfp)
 {
 	struct page **pages;
 
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
 	if (!pages)
 		return -ENOMEM;
 
-	if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
+	if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
 		return -ENOMEM;
 
 	if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
 	pcpu_free_pages(chunk, pages, page_start, page_end);
 }
 
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
 {
 	struct pcpu_chunk *chunk;
 	struct vm_struct **vms;
 
-	chunk = pcpu_alloc_chunk();
+	chunk = pcpu_alloc_chunk(gfp);
 	if (!chunk)
 		return NULL;
 
diff --git a/mm/percpu.c b/mm/percpu.c
index 50e7fdf84055..9297098519a6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/kmemleak.h>
+#include <linux/sched.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
 /**
  * pcpu_mem_zalloc - allocate memory
  * @size: bytes to allocate
+ * @gfp: allocation flags
  *
  * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vzalloc() is used.  The returned
- * memory is always zeroed.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
+ * This is to facilitate passing through whitelisted flags.  The
+ * returned memory is always zeroed.
  *
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void *pcpu_mem_zalloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
 {
 	if (WARN_ON_ONCE(!slab_is_available()))
 		return NULL;
 
 	if (size <= PAGE_SIZE)
-		return kzalloc(size, GFP_KERNEL);
+		return kzalloc(size, gfp);
 	else
-		return vzalloc(size);
+		return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
 }
 
 /**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 	return chunk;
 }
 
-static struct pcpu_chunk *pcpu_alloc_chunk(void)
+static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
 {
 	struct pcpu_chunk *chunk;
 	int region_bits;
 
-	chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
+	chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
 	if (!chunk)
 		return NULL;
 
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 	region_bits = pcpu_chunk_map_bits(chunk);
 
 	chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
-					   sizeof(chunk->alloc_map[0]));
+					   sizeof(chunk->alloc_map[0]), gfp);
 	if (!chunk->alloc_map)
 		goto alloc_map_fail;
 
 	chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
-					   sizeof(chunk->bound_map[0]));
+					   sizeof(chunk->bound_map[0]), gfp);
 	if (!chunk->bound_map)
 		goto bound_map_fail;
 
 	chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
-					   sizeof(chunk->md_blocks[0]));
+					   sizeof(chunk->md_blocks[0]), gfp);
 	if (!chunk->md_blocks)
 		goto md_blocks_fail;
 
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
  * pcpu_addr_to_page		- translate address to physical address
  * pcpu_verify_alloc_info	- check alloc_info is acceptable during init
  */
-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static struct pcpu_chunk *pcpu_create_chunk(void);
+static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
+			       int page_start, int page_end, gfp_t gfp);
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
+				  int page_start, int page_end);
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
 static struct page *pcpu_addr_to_page(void *addr);
 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
 				 gfp_t gfp)
 {
+	/* whitelisted flags that can be passed to the backing allocators */
+	gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
 	bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
 	bool do_warn = !(gfp & __GFP_NOWARN);
 	static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
 		return NULL;
 	}
 
-	if (!is_atomic)
-		mutex_lock(&pcpu_alloc_mutex);
+	if (!is_atomic) {
+		/*
+		 * pcpu_balance_workfn() allocates memory under this mutex,
+		 * and it may wait for memory reclaim. Allow current task
+		 * to become OOM victim, in case of memory pressure.
+		 */
+		if (gfp & __GFP_NOFAIL)
+			mutex_lock(&pcpu_alloc_mutex);
+		else if (mutex_lock_killable(&pcpu_alloc_mutex))
+			return NULL;
+	}
 
 	spin_lock_irqsave(&pcpu_lock, flags);
 
@@ -1421,7 +1434,7 @@ restart:
 	}
 
 	if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
-		chunk = pcpu_create_chunk();
+		chunk = pcpu_create_chunk(pcpu_gfp);
 		if (!chunk) {
 			err = "failed to allocate new chunk";
 			goto fail;
@@ -1450,7 +1463,7 @@ area_found:
 					   page_start, page_end) {
 			WARN_ON(chunk->immutable);
 
-			ret = pcpu_populate_chunk(chunk, rs, re);
+			ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
 
 			spin_lock_irqsave(&pcpu_lock, flags);
 			if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
  * pcpu_balance_workfn - manage the amount of free chunks and populated pages
  * @work: unused
  *
- * Reclaim all fully free chunks except for the first one.
+ * Reclaim all fully free chunks except for the first one.  This is also
+ * responsible for maintaining the pool of empty populated pages.  However,
+ * it is possible that this is called when physical memory is scarce causing
+ * OOM killer to be triggered.  We should avoid doing so until an actual
+ * allocation causes the failure as it is possible that requests can be
+ * serviced from already backed regions.
  */
 static void pcpu_balance_workfn(struct work_struct *work)
 {
+	/* gfp flags passed to underlying allocators */
+	const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
 	LIST_HEAD(to_free);
 	struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
 	struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
 			spin_unlock_irq(&pcpu_lock);
 		}
 		pcpu_destroy_chunk(chunk);
+		cond_resched();
 	}
 
 	/*
@@ -1645,7 +1666,7 @@ retry_pop:
 					   chunk->nr_pages) {
 			int nr = min(re - rs, nr_to_pop);
 
-			ret = pcpu_populate_chunk(chunk, rs, rs + nr);
+			ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
 			if (!ret) {
 				nr_to_pop -= nr;
 				spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ retry_pop:
 
 	if (nr_to_pop) {
 		/* ran out of chunks to populate, create a new one and retry */
-		chunk = pcpu_create_chunk();
+		chunk = pcpu_create_chunk(gfp);
 		if (chunk) {
 			spin_lock_irq(&pcpu_lock);
 			pcpu_chunk_relocate(chunk, -1);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688b75ee..b85919243399 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -493,36 +493,45 @@ next:
 		info = list_entry(pos, struct shmem_inode_info, shrinklist);
 		inode = &info->vfs_inode;
 
-		if (nr_to_split && split >= nr_to_split) {
-			iput(inode);
-			continue;
-		}
+		if (nr_to_split && split >= nr_to_split)
+			goto leave;
 
-		page = find_lock_page(inode->i_mapping,
+		page = find_get_page(inode->i_mapping,
 				(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
 		if (!page)
 			goto drop;
 
+		/* No huge page at the end of the file: nothing to split */
 		if (!PageTransHuge(page)) {
-			unlock_page(page);
 			put_page(page);
 			goto drop;
 		}
 
+		/*
+		 * Leave the inode on the list if we failed to lock
+		 * the page at this time.
+		 *
+		 * Waiting for the lock may lead to deadlock in the
+		 * reclaim path.
+		 */
+		if (!trylock_page(page)) {
+			put_page(page);
+			goto leave;
+		}
+
 		ret = split_huge_page(page);
 		unlock_page(page);
 		put_page(page);
 
-		if (ret) {
-			/* split failed: leave it on the list */
-			iput(inode);
-			continue;
-		}
+		/* If split failed leave the inode on the list */
+		if (ret)
+			goto leave;
 
 		split++;
 drop:
 		list_del_init(&info->shrinklist);
 		removed++;
+leave:
 		iput(inode);
 	}
 
diff --git a/mm/swap.c b/mm/swap.c
index 567a7b96e41d..0f17330dd0e5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -446,30 +446,6 @@ void lru_cache_add(struct page *page)
 }
 
 /**
- * add_page_to_unevictable_list - add a page to the unevictable list
- * @page:  the page to be added to the unevictable list
- *
- * Add page directly to its zone's unevictable list.  To avoid races with
- * tasks that might be making the page evictable, through eg. munlock,
- * munmap or exit, while it's not on the lru, we want to add the page
- * while it's locked or otherwise "invisible" to other tasks.  This is
- * difficult to do when using the pagevec cache, so bypass that.
- */
-void add_page_to_unevictable_list(struct page *page)
-{
-	struct pglist_data *pgdat = page_pgdat(page);
-	struct lruvec *lruvec;
-
-	spin_lock_irq(&pgdat->lru_lock);
-	lruvec = mem_cgroup_page_lruvec(page, pgdat);
-	ClearPageActive(page);
-	SetPageUnevictable(page);
-	SetPageLRU(page);
-	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
-	spin_unlock_irq(&pgdat->lru_lock);
-}
-
-/**
  * lru_cache_add_active_or_unevictable
  * @page:  the page to be added to LRU
  * @vma:   vma in which page is mapped for determining reclaimability
@@ -484,13 +460,9 @@ void lru_cache_add_active_or_unevictable(struct page *page,
 {
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 
-	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
 		SetPageActive(page);
-		lru_cache_add(page);
-		return;
-	}
-
-	if (!TestSetPageMlocked(page)) {
+	else if (!TestSetPageMlocked(page)) {
 		/*
 		 * We use the irq-unsafe __mod_zone_page_stat because this
 		 * counter is not modified from interrupt context, and the pte
@@ -500,7 +472,7 @@ void lru_cache_add_active_or_unevictable(struct page *page,
 				    hpage_nr_pages(page));
 		count_vm_event(UNEVICTABLE_PGMLOCKED);
 	}
-	add_page_to_unevictable_list(page);
+	lru_cache_add(page);
 }
 
 /*
@@ -886,15 +858,55 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
 				 void *arg)
 {
-	int file = page_is_file_cache(page);
-	int active = PageActive(page);
-	enum lru_list lru = page_lru(page);
+	enum lru_list lru;
+	int was_unevictable = TestClearPageUnevictable(page);
 
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 
 	SetPageLRU(page);
+	/*
+	 * Page becomes evictable in two ways:
+	 * 1) Within LRU lock [munlock_vma_pages() and __munlock_pagevec()].
+	 * 2) Before acquiring LRU lock to put the page to correct LRU and then
+	 *   a) do PageLRU check with lock [check_move_unevictable_pages]
+	 *   b) do PageLRU check before lock [clear_page_mlock]
+	 *
+	 * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
+	 * following strict ordering:
+	 *
+	 * #0: __pagevec_lru_add_fn		#1: clear_page_mlock
+	 *
+	 * SetPageLRU()				TestClearPageMlocked()
+	 * smp_mb() // explicit ordering	// above provides strict
+	 *					// ordering
+	 * PageMlocked()			PageLRU()
+	 *
+	 *
+	 * if '#1' does not observe setting of PG_lru by '#0' and fails
+	 * isolation, the explicit barrier will make sure that page_evictable
+	 * check will put the page in correct LRU. Without smp_mb(), SetPageLRU
+	 * can be reordered after PageMlocked check and can make '#1' to fail
+	 * the isolation of the page whose Mlocked bit is cleared (#0 is also
+	 * looking at the same page) and the evictable page will be stranded
+	 * in an unevictable LRU.
+	 */
+	smp_mb();
+
+	if (page_evictable(page)) {
+		lru = page_lru(page);
+		update_page_reclaim_stat(lruvec, page_is_file_cache(page),
+					 PageActive(page));
+		if (was_unevictable)
+			count_vm_event(UNEVICTABLE_PGRESCUED);
+	} else {
+		lru = LRU_UNEVICTABLE;
+		ClearPageActive(page);
+		SetPageUnevictable(page);
+		if (!was_unevictable)
+			count_vm_event(UNEVICTABLE_PGCULLED);
+	}
+
 	add_page_to_lru_list(page, lruvec, lru);
-	update_page_reclaim_stat(lruvec, file, active);
 	trace_mm_lru_insertion(page, lru);
 }
 
@@ -913,7 +925,7 @@ EXPORT_SYMBOL(__pagevec_lru_add);
  * @pvec:	Where the resulting entries are placed
  * @mapping:	The address_space to search
  * @start:	The starting entry index
- * @nr_pages:	The maximum number of pages
+ * @nr_entries:	The maximum number of pages
  * @indices:	The cache indices corresponding to the entries in @pvec
  *
  * pagevec_lookup_entries() will search for and return a group of up
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 673942094328..ebff729cc956 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1943,11 +1943,15 @@ void *vmalloc_exec(unsigned long size)
 }
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
-#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
 #else
-#define GFP_VMALLOC32 GFP_KERNEL
+/*
+ * 64b systems should always have either DMA or DMA32 zones. For others
+ * GFP_DMA32 should do the right thing and use the normal zone.
+ */
+#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
 #endif
 
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 444749669187..cd5dc3faaa57 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -769,64 +769,7 @@ int remove_mapping(struct address_space *mapping, struct page *page)
  */
 void putback_lru_page(struct page *page)
 {
-	bool is_unevictable;
-	int was_unevictable = PageUnevictable(page);
-
-	VM_BUG_ON_PAGE(PageLRU(page), page);
-
-redo:
-	ClearPageUnevictable(page);
-
-	if (page_evictable(page)) {
-		/*
-		 * For evictable pages, we can use the cache.
-		 * In event of a race, worst case is we end up with an
-		 * unevictable page on [in]active list.
-		 * We know how to handle that.
-		 */
-		is_unevictable = false;
-		lru_cache_add(page);
-	} else {
-		/*
-		 * Put unevictable pages directly on zone's unevictable
-		 * list.
-		 */
-		is_unevictable = true;
-		add_page_to_unevictable_list(page);
-		/*
-		 * When racing with an mlock or AS_UNEVICTABLE clearing
-		 * (page is unlocked) make sure that if the other thread
-		 * does not observe our setting of PG_lru and fails
-		 * isolation/check_move_unevictable_pages,
-		 * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
-		 * the page back to the evictable list.
-		 *
-		 * The other side is TestClearPageMlocked() or shmem_lock().
-		 */
-		smp_mb();
-	}
-
-	/*
-	 * page's status can change while we move it among lru. If an evictable
-	 * page is on unevictable list, it never be freed. To avoid that,
-	 * check after we added it to the list, again.
-	 */
-	if (is_unevictable && page_evictable(page)) {
-		if (!isolate_lru_page(page)) {
-			put_page(page);
-			goto redo;
-		}
-		/* This means someone else dropped this page from LRU
-		 * So, it will be freed or putback to LRU again. There is
-		 * nothing to do here.
-		 */
-	}
-
-	if (was_unevictable && !is_unevictable)
-		count_vm_event(UNEVICTABLE_PGRESCUED);
-	else if (!was_unevictable && is_unevictable)
-		count_vm_event(UNEVICTABLE_PGCULLED);
-
+	lru_cache_add(page);
 	put_page(page);		/* drop ref from isolate */
 }
 
@@ -1837,6 +1780,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		set_bit(PGDAT_WRITEBACK, &pgdat->flags);
 
 	/*
+	 * If dirty pages are scanned that are not queued for IO, it
+	 * implies that flushers are not doing their job. This can
+	 * happen when memory pressure pushes dirty pages to the end of
+	 * the LRU before the dirty limits are breached and the dirty
+	 * data has expired. It can also happen when the proportion of
+	 * dirty pages grows not through writes but through memory
+	 * pressure reclaiming all the clean cache. And in some cases,
+	 * the flushers simply cannot keep up with the allocation
+	 * rate. Nudge the flusher threads in case they are asleep.
+	 */
+	if (stat.nr_unqueued_dirty == nr_taken)
+		wakeup_flusher_threads(WB_REASON_VMSCAN);
+
+	/*
 	 * Legacy memcg will stall in page writeback so avoid forcibly
 	 * stalling here.
 	 */
@@ -1848,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
 			set_bit(PGDAT_CONGESTED, &pgdat->flags);
 
-		/*
-		 * If dirty pages are scanned that are not queued for IO, it
-		 * implies that flushers are not doing their job. This can
-		 * happen when memory pressure pushes dirty pages to the end of
-		 * the LRU before the dirty limits are breached and the dirty
-		 * data has expired. It can also happen when the proportion of
-		 * dirty pages grows not through writes but through memory
-		 * pressure reclaiming all the clean cache. And in some cases,
-		 * the flushers simply cannot keep up with the allocation
-		 * rate. Nudge the flusher threads in case they are asleep, but
-		 * also allow kswapd to start writing pages during reclaim.
-		 */
-		if (stat.nr_unqueued_dirty == nr_taken) {
-			wakeup_flusher_threads(WB_REASON_VMSCAN);
+		/* Allow kswapd to start writing pages during reclaim. */
+		if (stat.nr_unqueued_dirty == nr_taken)
 			set_bit(PGDAT_DIRTY, &pgdat->flags);
-		}
 
 		/*
 		 * If kswapd scans pages marked marked for immediate
diff --git a/mm/zpool.c b/mm/zpool.c
index f8cb83e7699b..01a771e304fa 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -360,7 +360,7 @@ u64 zpool_get_total_size(struct zpool *zpool)
 
 /**
  * zpool_evictable() - Test if zpool is potentially evictable
- * @pool	The zpool to test
+ * @zpool:	The zpool to test
  *
  * Zpool is only potentially evictable when it's created with struct
  * zpool_ops.evict and its driver implements struct zpool_driver.shrink.
diff --git a/mm/zswap.c b/mm/zswap.c
index c004aa4fd3f4..61a5c41972db 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1007,6 +1007,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	u8 *src, *dst;
 	struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
 
+	/* THP isn't supported */
+	if (PageTransHuge(page)) {
+		ret = -EINVAL;
+		goto reject;
+	}
+
 	if (!zswap_enabled || !tree) {
 		ret = -ENODEV;
 		goto reject;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 64aa9f755e1d..45c9bf5ff3a0 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -48,8 +48,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		 * original position later
 		 */
 		skb_push(skb, offset);
-		skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
-					      skb->vlan_tci);
+		skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto,
+						    skb->vlan_tci, skb->mac_len);
 		if (!skb)
 			return false;
 		skb_pull(skb, offset + VLAN_HLEN);
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index a662ccc166df..a627a5db2125 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -148,8 +148,8 @@ int __net_init vlan_proc_init(struct net *net)
 	if (!vn->proc_vlan_dir)
 		goto err;
 
-	vn->proc_vlan_conf = proc_create(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
-				     vn->proc_vlan_dir, &vlan_fops);
+	vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600,
+					 vn->proc_vlan_dir, &vlan_fops);
 	if (!vn->proc_vlan_conf)
 		goto err;
 	return 0;
@@ -172,7 +172,7 @@ int vlan_proc_add_dev(struct net_device *vlandev)
 	if (!strcmp(vlandev->name, name_conf))
 		return -EINVAL;
 	vlan->dent =
-		proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
+		proc_create_data(vlandev->name, S_IFREG | 0600,
 				 vn->proc_vlan_dir, &vlandev_fops, vlandev);
 	if (!vlan->dent)
 		return -ENOBUFS;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index f3a4efcf1456..3aa5a93ad107 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -160,7 +160,8 @@ static void req_done(struct virtqueue *vq)
 		spin_unlock_irqrestore(&chan->lock, flags);
 		/* Wakeup if anyone waiting for VirtIO ring space. */
 		wake_up(chan->vc_wq);
-		p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
+		if (len)
+			p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
 	}
 }
 
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index a3bf9d519193..7214aea14cb3 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -257,22 +257,22 @@ int __init atalk_proc_init(void)
 	if (!atalk_proc_dir)
 		goto out;
 
-	p = proc_create("interface", S_IRUGO, atalk_proc_dir,
+	p = proc_create("interface", 0444, atalk_proc_dir,
 			&atalk_seq_interface_fops);
 	if (!p)
 		goto out_interface;
 
-	p = proc_create("route", S_IRUGO, atalk_proc_dir,
+	p = proc_create("route", 0444, atalk_proc_dir,
 			&atalk_seq_route_fops);
 	if (!p)
 		goto out_route;
 
-	p = proc_create("socket", S_IRUGO, atalk_proc_dir,
+	p = proc_create("socket", 0444, atalk_proc_dir,
 			&atalk_seq_socket_fops);
 	if (!p)
 		goto out_socket;
 
-	p = proc_create("arp", S_IRUGO, atalk_proc_dir, &atalk_seq_arp_fops);
+	p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops);
 	if (!p)
 		goto out_arp;
 
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5d2fed9f5710..39b94ca5f65d 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -96,12 +96,12 @@ static ssize_t show_link_rate(struct device *cdev,
 	return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
 }
 
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
-static DEVICE_ATTR(atmindex, S_IRUGO, show_atmindex, NULL);
-static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL);
+static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL);
+static DEVICE_ATTR(carrier, 0444, show_carrier, NULL);
+static DEVICE_ATTR(type, 0444, show_type, NULL);
+static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL);
 
 static struct device_attribute *atm_attrs[] = {
 	&dev_attr_atmaddress,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d4f6029d5109..f07dbc632222 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -893,7 +893,7 @@ static int __init atm_clip_init(void)
 	{
 		struct proc_dir_entry *p;
 
-		p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops);
+		p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops);
 		if (!p) {
 			pr_err("Unable to initialize /proc/net/atm/arp\n");
 			atm_clip_exit_noproc();
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09a1f056712a..01d5d20a6eb1 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1042,7 +1042,7 @@ static int __init lane_module_init(void)
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *p;
 
-	p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops);
+	p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops);
 	if (!p) {
 		pr_err("Unable to initialize /proc/net/atm/lec\n");
 		return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index edc48edc95c1..55410c00c7e2 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -474,7 +474,7 @@ int __init atm_proc_init(void)
 	for (e = atm_proc_ents; e->name; e++) {
 		struct proc_dir_entry *dirent;
 
-		dirent = proc_create(e->name, S_IRUGO,
+		dirent = proc_create(e->name, 0444,
 				     atm_proc_root, e->proc_fops);
 		if (!dirent)
 			goto err_out_remove;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c8319ed48485..2b41366fcad2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1989,10 +1989,10 @@ static int __init ax25_init(void)
 	dev_add_pack(&ax25_packet_type);
 	register_netdevice_notifier(&ax25_dev_notifier);
 
-	proc_create("ax25_route", S_IRUGO, init_net.proc_net,
+	proc_create("ax25_route", 0444, init_net.proc_net,
 		    &ax25_route_fops);
-	proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops);
-	proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops);
+	proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops);
+	proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops);
 out:
 	return rc;
 }
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c44f6515be5e..e4e2e02b7380 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 022f6e77307b..b97ba6fb8353 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 80c72c7d3cad..ea309ad06175 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 029221615ba3..534b790c3753 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 79e326383726..be09a9883825 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -157,7 +157,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
  * Return: 0 on success, a negative error code otherwise.
  */
 static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
-				     int max_if_num)
+				     unsigned int max_if_num)
 {
 	void *data_ptr;
 	size_t old_size;
@@ -201,7 +201,8 @@ unlock:
  */
 static void
 batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
-				   int max_if_num, int del_if_num)
+				   unsigned int max_if_num,
+				   unsigned int del_if_num)
 {
 	size_t chunk_size;
 	size_t if_offset;
@@ -239,7 +240,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
  */
 static void
 batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
-				       int max_if_num, int del_if_num)
+				       unsigned int max_if_num,
+				       unsigned int del_if_num)
 {
 	size_t if_offset;
 	void *data_ptr;
@@ -276,7 +278,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
  * Return: 0 on success, a negative error code otherwise.
  */
 static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
-				     int max_if_num, int del_if_num)
+				     unsigned int max_if_num,
+				     unsigned int del_if_num)
 {
 	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
 
@@ -311,7 +314,8 @@ static struct batadv_orig_node *
 batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
 {
 	struct batadv_orig_node *orig_node;
-	int size, hash_added;
+	int hash_added;
+	size_t size;
 
 	orig_node = batadv_orig_hash_find(bat_priv, addr);
 	if (orig_node)
@@ -893,7 +897,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
 	u32 i;
 	size_t word_index;
 	u8 *w;
-	int if_num;
+	unsigned int if_num;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -1023,7 +1027,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
 	struct batadv_neigh_node *tmp_neigh_node = NULL;
 	struct batadv_neigh_node *router = NULL;
 	struct batadv_orig_node *orig_node_tmp;
-	int if_num;
+	unsigned int if_num;
 	u8 sum_orig, sum_neigh;
 	u8 *neigh_addr;
 	u8 tq_avg;
@@ -1182,7 +1186,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	u8 total_count;
 	u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
 	unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
-	int if_num;
+	unsigned int if_num;
 	unsigned int tq_asym_penalty, inv_asym_penalty;
 	unsigned int combined_tq;
 	unsigned int tq_iface_penalty;
@@ -1702,9 +1706,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
 
 	if (is_my_orig) {
 		unsigned long *word;
-		int offset;
+		size_t offset;
 		s32 bit_pos;
-		s16 if_num;
+		unsigned int if_num;
 		u8 *weight;
 
 		orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
@@ -2729,7 +2733,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
 	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
 	struct batadv_neigh_node *router;
 	struct batadv_gw_node *curr_gw;
-	int ret = -EINVAL;
+	int ret = 0;
 	void *hdr;
 
 	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 9dc0dd5c83df..317cafd302cf 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 27e165ac9302..ec93337ee259 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
@@ -928,7 +928,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
 	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
 	struct batadv_neigh_node *router;
 	struct batadv_gw_node *curr_gw;
-	int ret = -EINVAL;
+	int ret = 0;
 	void *hdr;
 
 	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index a17ab68bbce8..ec4a2a569750 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a83478c46597..28687493599f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 5e39d0588a48..e8c7b7fd290d 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index ba59b77c605d..2948b41b06d4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 6a4c14ccc3c6..ed36c5e79fde 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bdc1ef06e05b..a296a4d851f5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ca9d0753dd6b..48f683289531 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index fad47853ad3c..a2de5a44bd41 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
@@ -2161,22 +2161,25 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
 {
 	struct batadv_bla_claim *claim;
 	int idx = 0;
+	int ret = 0;
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(claim, head, hash_entry) {
 		if (idx++ < *idx_skip)
 			continue;
-		if (batadv_bla_claim_dump_entry(msg, portid, seq,
-						primary_if, claim)) {
+
+		ret = batadv_bla_claim_dump_entry(msg, portid, seq,
+						  primary_if, claim);
+		if (ret) {
 			*idx_skip = idx - 1;
 			goto unlock;
 		}
 	}
 
-	*idx_skip = idx;
+	*idx_skip = 0;
 unlock:
 	rcu_read_unlock();
-	return 0;
+	return ret;
 }
 
 /**
@@ -2391,22 +2394,25 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
 {
 	struct batadv_bla_backbone_gw *backbone_gw;
 	int idx = 0;
+	int ret = 0;
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 		if (idx++ < *idx_skip)
 			continue;
-		if (batadv_bla_backbone_dump_entry(msg, portid, seq,
-						   primary_if, backbone_gw)) {
+
+		ret = batadv_bla_backbone_dump_entry(msg, portid, seq,
+						     primary_if, backbone_gw);
+		if (ret) {
 			*idx_skip = idx - 1;
 			goto unlock;
 		}
 	}
 
-	*idx_skip = idx;
+	*idx_skip = 0;
 unlock:
 	rcu_read_unlock();
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index b27571abcd2f..71f95a3e4d3f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 21d1189957a7..4229b01ac7b5 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 90a08d35c501..37b069698b04 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 9703c791ffc5..a60bacf7120b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -43,13 +44,19 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "send.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -393,7 +400,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		   batadv_arp_hw_src(skb, hdr_size), &ip_src,
 		   batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
 
-	if (hdr_size == 0)
+	if (hdr_size < sizeof(struct batadv_unicast_packet))
 		return;
 
 	unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
@@ -495,7 +502,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
 	 * the one with the lowest address
 	 */
 	if (tmp_max == max && max_orig_node &&
-	    batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+	    batadv_compare_eth(candidate->orig, max_orig_node->orig))
 		goto out;
 
 	ret = true;
@@ -852,6 +859,151 @@ out:
 #endif
 
 /**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ *  netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_dat_entry *dat_entry)
+{
+	int msecs;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+	if (!hdr)
+		return -ENOBUFS;
+
+	msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+	if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+			    dat_entry->ip) ||
+	    nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+		    dat_entry->mac_addr) ||
+	    nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+		genlmsg_cancel(msg, hdr);
+		return -EMSGSIZE;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ *  a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_dat_entry *dat_entry;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+		if (idx < *idx_skip)
+			goto skip;
+
+		if (batadv_dat_cache_dump_entry(msg, portid, seq,
+						dat_entry)) {
+			rcu_read_unlock();
+			*idx_skip = idx;
+
+			return -EMSGSIZE;
+		}
+
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->dat.hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_dat_cache_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq, head,
+						 &idx))
+			break;
+
+		bucket++;
+		idx = 0;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+/**
  * batadv_arp_get_type() - parse an ARP packet and gets the type
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: packet to analyse
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 12897eb46268..a04596028337 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -28,6 +28,7 @@
 
 #include "originator.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
 int batadv_dat_init(struct batadv_priv *bat_priv);
 void batadv_dat_free(struct batadv_priv *bat_priv);
 int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 /**
  * batadv_dat_inc_counter() - increment the correct DAT packet counter
@@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
 {
 }
 
+static inline int
+batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
 					  u8 subtype)
 {
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 22dde42fd80e..0fddc17106bd 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <[email protected]>
  *
@@ -288,7 +288,8 @@ batadv_frag_merge_packets(struct hlist_head *chain)
 	/* Move the existing MAC header to just before the payload. (Override
 	 * the fragment header.)
 	 */
-	skb_pull_rcsum(skb_out, hdr_size);
+	skb_pull(skb_out, hdr_size);
+	skb_out->ip_summed = CHECKSUM_NONE;
 	memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
 	skb_set_mac_header(skb_out, -ETH_HLEN);
 	skb_reset_network_header(skb_out);
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 138b22a1836a..944512e07782 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <[email protected]>
  *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 37fe9a644f22..c294f6fd43e0 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 981f58421a32..f0b86fcb2493 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b3e156af2256..936c107f3199 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index afebd9c7edf4..80afb2793687 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 5f186bff284a..c405d15befd6 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -763,6 +763,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	hard_iface->soft_iface = soft_iface;
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
+	if (bat_priv->num_ifaces >= UINT_MAX) {
+		ret = -ENOSPC;
+		goto err_dev;
+	}
+
 	ret = netdev_master_upper_dev_link(hard_iface->net_dev,
 					   soft_iface, NULL, NULL, NULL);
 	if (ret)
@@ -876,7 +881,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
 	batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface);
 
 	/* nobody uses this interface anymore */
-	if (!bat_priv->num_ifaces) {
+	if (bat_priv->num_ifaces == 0) {
 		batadv_gw_check_client_stop(bat_priv);
 
 		if (autodel == BATADV_IF_CLEANUP_AUTO)
@@ -912,7 +917,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 	if (ret)
 		goto free_if;
 
-	hard_iface->if_num = -1;
+	hard_iface->if_num = 0;
 	hard_iface->net_dev = net_dev;
 	hard_iface->soft_iface = NULL;
 	hard_iface->if_status = BATADV_IF_NOT_IN_USE;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index de5e9a374ece..d1c0f6189301 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 04d964358c98..7b49e4001778 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 4ce1b6d3ad5c..9490a7ca2ba6 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index e91f29c7c638..55c358ad3331 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -24,6 +24,7 @@
 #include <linux/debugfs.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
+#include <linux/eventpoll.h>
 #include <linux/export.h>
 #include <linux/fcntl.h>
 #include <linux/fs.h>
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 84cddd01eeab..958be22beda9 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index dc9fa37ddd14..853773e45f79 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -22,6 +22,7 @@
 #include <linux/compiler.h>
 #include <linux/debugfs.h>
 #include <linux/errno.h>
+#include <linux/eventpoll.h>
 #include <linux/export.h>
 #include <linux/fcntl.h>
 #include <linux/fs.h>
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35e02b2b9e72..35f4f397ed57 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d31c8266e244..69c0d85bceb3 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f7ba3f96d8f3..057a28a9fe88 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -25,7 +25,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.0"
+#define BATADV_SOURCE_VERSION "2018.1"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
  *
  * Return: true when x is a predecessor of y, false otherwise
  */
-#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
-				 typeof(y)_d2 = (y); \
-				 typeof(x)_dummy = (_d1 - _d2); \
-				 (void)(&_d1 == &_d2); \
-				 _dummy > batadv_smallest_signed_int(_dummy); })
+#define batadv_seq_before(x, y) ({ \
+	typeof(x)_d1 = (x); \
+	typeof(y)_d2 = (y); \
+	typeof(x)_dummy = (_d1 - _d2); \
+	(void)(&_d1 == &_d2); \
+	_dummy > batadv_smallest_signed_int(_dummy); \
+})
 
 /**
  * batadv_seq_after() - Checks if a sequence number x is a successor of y
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cbdeb47ec3f6..de3a055f7dd8 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -52,14 +53,20 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <net/addrconf.h>
+#include <net/genetlink.h>
 #include <net/if_inet6.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -102,7 +109,36 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
 }
 
 /**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+	static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+	static const u8 prefix[] = {0x33, 0x33};
+
+	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
  * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: the device to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+				       struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
+	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
 	struct net_device *bridge = batadv_mcast_get_bridge(dev);
 	struct netdev_hw_addr *mc_list_entry;
 	struct batadv_hw_addr *new;
@@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
 
 	netif_addr_lock_bh(bridge ? bridge : dev);
 	netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+		if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+			continue;
+
+		if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+			continue;
+
 		new = kmalloc(sizeof(*new), GFP_ATOMIC);
 		if (!new) {
 			ret = -ENOMEM;
@@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
 
 /**
  * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: a bridge slave whose bridge to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+				       struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
 	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
 	struct br_ip_list *br_ip_entry, *tmp;
 	struct batadv_hw_addr *new;
 	u8 mcast_addr[ETH_ALEN];
@@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
 		goto out;
 
 	list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+		if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+			continue;
+
+		if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+			continue;
+
 		batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
 		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
 			continue;
@@ -543,8 +598,8 @@ update:
 		bat_priv->mcast.enabled = true;
 	}
 
-	return !(mcast_data.flags &
-		 (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
+	return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
+		 mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
 }
 
 /**
@@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
 	if (!batadv_mcast_mla_tvlv_update(bat_priv))
 		goto update;
 
-	ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
 	if (ret < 0)
 		goto out;
 
-	ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
 	if (ret < 0)
 		goto out;
 
@@ -1286,6 +1341,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
 #endif
 
 /**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+			       struct batadv_priv *bat_priv)
+{
+	u32 flags = bat_priv->mcast.flags;
+	u32 flags_priv = BATADV_NO_FLAGS;
+
+	if (bat_priv->mcast.bridged) {
+		flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+		if (bat_priv->mcast.querier_ipv4.exists)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+		if (bat_priv->mcast.querier_ipv6.exists)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+		if (bat_priv->mcast.querier_ipv4.shadowing)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+		if (bat_priv->mcast.querier_ipv6.shadowing)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+	    nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ *  to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			      struct batadv_orig_node *orig_node)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig_node->orig)) {
+		genlmsg_cancel(msg, hdr);
+		return -EMSGSIZE;
+	}
+
+	if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+		     &orig_node->capabilities)) {
+		if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+				orig_node->mcast_flags)) {
+			genlmsg_cancel(msg, hdr);
+			return -EMSGSIZE;
+		}
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ *  table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct hlist_head *head, long *idx_skip)
+{
+	struct batadv_orig_node *orig_node;
+	long idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+			      &orig_node->capa_initialized))
+			continue;
+
+		if (idx < *idx_skip)
+			goto skip;
+
+		if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+						  orig_node)) {
+			rcu_read_unlock();
+			*idx_skip = idx;
+
+			return -EMSGSIZE;
+		}
+
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	long bucket_tmp = *bucket;
+	struct hlist_head *head;
+	long idx_tmp = *idx;
+
+	while (bucket_tmp < hash->size) {
+		head = &hash->table[bucket_tmp];
+
+		if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+						   &idx_tmp))
+			break;
+
+		bucket_tmp++;
+		idx_tmp = 0;
+	}
+
+	*bucket = bucket_tmp;
+	*idx = idx_tmp;
+
+	return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ *  callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+				 struct batadv_hard_iface **primary_if)
+{
+	struct batadv_hard_iface *hard_iface = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	hard_iface = batadv_primary_if_get_selected(bat_priv);
+	if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	if (!ret && primary_if)
+		*primary_if = hard_iface;
+	else
+		batadv_hardif_put(hard_iface);
+
+	return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_priv *bat_priv;
+	long *bucket = &cb->args[0];
+	long *idx = &cb->args[1];
+	int ret;
+
+	ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+	if (ret)
+		return ret;
+
+	bat_priv = netdev_priv(primary_if->soft_iface);
+	ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+					bat_priv, bucket, idx);
+
+	batadv_hardif_put(primary_if);
+	return ret;
+}
+
+/**
  * batadv_mcast_free() - free the multicast optimizations structures
  * @bat_priv: the bat priv with all the soft interface information
  */
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3ac06337ab71..3b04ab13f0eb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
@@ -21,6 +21,7 @@
 
 #include "main.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv);
 
 int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
 
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+			       struct batadv_priv *bat_priv);
+
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb);
+
 void batadv_mcast_free(struct batadv_priv *bat_priv);
 
 void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
@@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
 	return 0;
 }
 
+static inline int
+batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline int batadv_mcast_flags_dump(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
 }
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a823d3899bad..0d9459b69bdb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
@@ -45,8 +45,10 @@
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
+#include "multicast.h"
 #include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
@@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
 };
 
 static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
-	[BATADV_ATTR_VERSION]		= { .type = NLA_STRING },
-	[BATADV_ATTR_ALGO_NAME]		= { .type = NLA_STRING },
-	[BATADV_ATTR_MESH_IFINDEX]	= { .type = NLA_U32 },
-	[BATADV_ATTR_MESH_IFNAME]	= { .type = NLA_STRING },
-	[BATADV_ATTR_MESH_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_HARD_IFINDEX]	= { .type = NLA_U32 },
-	[BATADV_ATTR_HARD_IFNAME]	= { .type = NLA_STRING },
-	[BATADV_ATTR_HARD_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_ORIG_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TPMETER_RESULT]	= { .type = NLA_U8 },
-	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
-	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
-	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
-	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_TT_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TT_TTVN]		= { .type = NLA_U8 },
-	[BATADV_ATTR_TT_LAST_TTVN]	= { .type = NLA_U8 },
-	[BATADV_ATTR_TT_CRC32]		= { .type = NLA_U32 },
-	[BATADV_ATTR_TT_VID]		= { .type = NLA_U16 },
-	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
-	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
-	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
-	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
-	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
-	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
-	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_OWN]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_BLA_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_VID]		= { .type = NLA_U16 },
-	[BATADV_ATTR_BLA_BACKBONE]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_CRC]		= { .type = NLA_U16 },
+	[BATADV_ATTR_VERSION]			= { .type = NLA_STRING },
+	[BATADV_ATTR_ALGO_NAME]			= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_IFINDEX]		= { .type = NLA_U32 },
+	[BATADV_ATTR_MESH_IFNAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_HARD_IFINDEX]		= { .type = NLA_U32 },
+	[BATADV_ATTR_HARD_IFNAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_HARD_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_ORIG_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TPMETER_RESULT]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TPMETER_TEST_TIME]		= { .type = NLA_U32 },
+	[BATADV_ATTR_TPMETER_BYTES]		= { .type = NLA_U64 },
+	[BATADV_ATTR_TPMETER_COOKIE]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ACTIVE]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_TT_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TT_TTVN]			= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_LAST_TTVN]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_CRC32]			= { .type = NLA_U32 },
+	[BATADV_ATTR_TT_VID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_TT_FLAGS]			= { .type = NLA_U32 },
+	[BATADV_ATTR_FLAG_BEST]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_LAST_SEEN_MSECS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_NEIGH_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TQ]			= { .type = NLA_U8 },
+	[BATADV_ATTR_THROUGHPUT]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_UP]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_DOWN]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ROUTER]			= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_OWN]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_BLA_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_VID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_BLA_BACKBONE]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_CRC]			= { .type = NLA_U16 },
+	[BATADV_ATTR_DAT_CACHE_IP4ADDRESS]	= { .type = NLA_U32 },
+	[BATADV_ATTR_DAT_CACHE_HWADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_DAT_CACHE_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_MCAST_FLAGS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_MCAST_FLAGS_PRIV]		= { .type = NLA_U32 },
 };
 
 /**
@@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
 		goto out;
 #endif
 
+	if (batadv_mcast_mesh_info_put(msg, bat_priv))
+		goto out;
+
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
 		hard_iface = primary_if->net_dev;
@@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_bla_backbone_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_DAT_CACHE,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_dat_cache_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_MCAST_FLAGS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_mcast_flags_dump,
+	},
 
 };
 
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 0e7e57b69b54..571d9a5ae7aa 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b48116bb24ef..c3578444f3cb 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index adaeafa4f71e..65c346812bc1 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 58a7d9274435..716e5b43acfa 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -1569,7 +1569,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
  * Return: 0 on success or negative error number in case of failure
  */
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
-			    int max_if_num)
+			    unsigned int max_if_num)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_algo_ops *bao = bat_priv->algo_ops;
@@ -1611,7 +1611,7 @@ err:
  * Return: 0 on success or negative error number in case of failure
  */
 int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
-			    int max_if_num)
+			    unsigned int max_if_num)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 8e543a3cdc6c..3b3f59b881e1 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -73,9 +73,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
-			    int max_if_num);
+			    unsigned int max_if_num);
 int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
-			    int max_if_num);
+			    unsigned int max_if_num);
 struct batadv_orig_node_vlan *
 batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
 			  unsigned short vid);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index b6891e8b741c..cc3ed93a6d51 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -759,6 +759,7 @@ free_skb:
 /**
  * batadv_reroute_unicast_packet() - update the unicast header for re-routing
  * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast packet to process
  * @unicast_packet: the unicast header to be updated
  * @dst_addr: the payload destination
  * @vid: VLAN identifier
@@ -770,7 +771,7 @@ free_skb:
  * Return: true if the packet header has been updated, false otherwise
  */
 static bool
-batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			      struct batadv_unicast_packet *unicast_packet,
 			      u8 *dst_addr, unsigned short vid)
 {
@@ -799,8 +800,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
 	}
 
 	/* update the packet header */
+	skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 	ether_addr_copy(unicast_packet->dest, orig_addr);
 	unicast_packet->ttvn = orig_ttvn;
+	skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
 	ret = true;
 out:
@@ -841,7 +844,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	 * the packet to
 	 */
 	if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
-		if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+		if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
 						  ethhdr->h_dest, vid))
 			batadv_dbg_ratelimited(BATADV_DBG_TT,
 					       bat_priv,
@@ -887,7 +890,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	 * destination can possibly be updated and forwarded towards the new
 	 * target host
 	 */
-	if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+	if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
 					  ethhdr->h_dest, vid)) {
 		batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
 				       "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
@@ -910,12 +913,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	if (!primary_if)
 		return false;
 
+	/* update the packet header */
+	skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 	ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
+	unicast_packet->ttvn = curr_ttvn;
+	skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
 	batadv_hardif_put(primary_if);
 
-	unicast_packet->ttvn = curr_ttvn;
-
 	return true;
 }
 
@@ -968,14 +973,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 	struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
 	int check, hdr_size = sizeof(*unicast_packet);
 	enum batadv_subtype subtype;
-	struct ethhdr *ethhdr;
 	int ret = NET_RX_DROP;
 	bool is4addr, is_gw;
 
 	unicast_packet = (struct batadv_unicast_packet *)skb->data;
-	unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
-	ethhdr = eth_hdr(skb);
-
 	is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
 	/* the caller function should have already pulled 2 bytes */
 	if (is4addr)
@@ -995,12 +996,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 	if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
 		goto free_skb;
 
+	unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
 	/* packet for me */
 	if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
 		/* If this is a unicast packet from another backgone gw,
 		 * drop it.
 		 */
-		orig_addr_gw = ethhdr->h_source;
+		orig_addr_gw = eth_hdr(skb)->h_source;
 		orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
 		if (orig_node_gw) {
 			is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
@@ -1015,6 +1018,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 		}
 
 		if (is4addr) {
+			unicast_4addr_packet =
+				(struct batadv_unicast_4addr_packet *)skb->data;
 			subtype = unicast_4addr_packet->subtype;
 			batadv_dat_inc_counter(bat_priv, subtype);
 
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index a1289bc5f115..db54c2d9b8bf 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 2a5ab6f1076d..4a35f5c2f52b 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1e8c79093623..64cce07b8fe6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 900c5ce21cd4..edeffcb9f3a2 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -459,13 +459,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
 
 	/* skb->dev & skb->pkt_type are set here */
 	skb->protocol = eth_type_trans(skb, soft_iface);
-
-	/* should not be necessary anymore as we use skb_pull_rcsum()
-	 * TODO: please verify this and remove this TODO
-	 * -- Dec 21st 2009, Simon Wunderlich
-	 */
-
-	/* skb->ip_summed = CHECKSUM_UNNECESSARY; */
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 
 	batadv_inc_counter(bat_priv, BATADV_CNT_RX);
 	batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 075c5b5b2ce1..daf87f07fadd 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c1578fa0b952..f2eef43bd2ec 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index bbeee61221fa..c1e3fb69952d 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8b576712d0c1..11520de96ccb 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index c8b8f2cb2c2b..68e600974759 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7550a9ccd695..0225616d5771 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 8d9e3abec2c8..01b6c8eafaf9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 5ffcb45ac6ff..a637458205d1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index a74df33f446d..ef5867f49824 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index bb1578410e0c..476b052ad982 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -167,7 +167,7 @@ struct batadv_hard_iface {
 	struct list_head list;
 
 	/** @if_num: identificator of the interface */
-	s16 if_num;
+	unsigned int if_num;
 
 	/** @if_status: status of the interface for batman-adv */
 	char if_status;
@@ -1596,7 +1596,7 @@ struct batadv_priv {
 	atomic_t batman_queue_left;
 
 	/** @num_ifaces: number of interfaces assigned to this mesh interface */
-	char num_ifaces;
+	unsigned int num_ifaces;
 
 	/** @mesh_obj: kobject for sysfs mesh subdirectory */
 	struct kobject *mesh_obj;
@@ -2186,15 +2186,16 @@ struct batadv_algo_orig_ops {
 	 *  orig_node due to a new hard-interface being added into the mesh
 	 *  (optional)
 	 */
-	int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+	int (*add_if)(struct batadv_orig_node *orig_node,
+		      unsigned int max_if_num);
 
 	/**
 	 * @del_if: ask the routing algorithm to apply the needed changes to the
 	 *  orig_node due to an hard-interface being removed from the mesh
 	 *  (optional)
 	 */
-	int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
-		      int del_if_num);
+	int (*del_if)(struct batadv_orig_node *orig_node,
+		      unsigned int max_if_num, unsigned int del_if_num);
 
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
 	/** @print: print the originator table (optional) */
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3394e6791673..66c0781773df 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 		/* Slave connection state and connectable mode bit 38
 		 * and scannable bit 21.
 		 */
-		if (connectable && (!(hdev->le_states[4] & 0x01) ||
-				    !(hdev->le_states[2] & 0x40)))
+		if (connectable && (!(hdev->le_states[4] & 0x40) ||
+				    !(hdev->le_states[2] & 0x20)))
 			return false;
 	}
 
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 		/* Master connection state and connectable mode bit 35 and
 		 * scannable 19.
 		 */
-		if (connectable && (!(hdev->le_states[4] & 0x10) ||
+		if (connectable && (!(hdev->le_states[4] & 0x08) ||
 				    !(hdev->le_states[2] & 0x08)))
 			return false;
 	}
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 5f3074cb6b4d..5e44d842cc5d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -210,8 +210,8 @@ static ssize_t show_channel(struct device *tty_dev, struct device_attribute *att
 	return sprintf(buf, "%d\n", dev->channel);
 }
 
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(channel, 0444, show_channel, NULL);
 
 static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
 					   struct rfcomm_dlc *dlc)
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 01117ae84f1d..a2ddae2f37d7 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2296,8 +2296,14 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	else
 		sec_level = authreq_to_seclevel(auth);
 
-	if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK))
+	if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) {
+		/* If link is already encrypted with sufficient security we
+		 * still need refresh encryption as per Core Spec 5.0 Vol 3,
+		 * Part H 2.4.6
+		 */
+		smp_ltk_encrypt(conn, hcon->sec_level);
 		return 0;
+	}
 
 	if (sec_level > hcon->pending_sec_level)
 		hcon->pending_sec_level = sec_level;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..26e1616b2c90 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 
 	switch (event) {
 	case NETDEV_CHANGEMTU:
-		dev_set_mtu(br->dev, br_min_mtu(br));
+		dev_set_mtu(br->dev, br_mtu(br));
 		break;
 
 	case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 1285ca30ab0a..278fc999d355 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -224,7 +224,7 @@ static void br_get_stats64(struct net_device *dev,
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	if (new_mtu > br_min_mtu(br))
+	if (new_mtu > br_mtu(br))
 		return -EINVAL;
 
 	dev->mtu = new_mtu;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9ba4ed65c52b..87b2afd455c7 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -424,8 +424,18 @@ int br_del_bridge(struct net *net, const char *name)
 	return ret;
 }
 
+static bool min_mtu(int a, int b)
+{
+	return a < b ? 1 : 0;
+}
+
+static bool max_mtu(int a, int b)
+{
+	return a > b ? 1 : 0;
+}
+
 /* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-int br_min_mtu(const struct net_bridge *br)
+static int __br_mtu(const struct net_bridge *br, bool (compare_fn)(int, int))
 {
 	const struct net_bridge_port *p;
 	int mtu = 0;
@@ -436,13 +446,21 @@ int br_min_mtu(const struct net_bridge *br)
 		mtu = ETH_DATA_LEN;
 	else {
 		list_for_each_entry(p, &br->port_list, list) {
-			if (!mtu  || p->dev->mtu < mtu)
+			if (!mtu || compare_fn(p->dev->mtu, mtu))
 				mtu = p->dev->mtu;
 		}
 	}
 	return mtu;
 }
 
+int br_mtu(const struct net_bridge *br)
+{
+	if (br_vlan_enabled(br->dev))
+		return __br_mtu(br, max_mtu);
+	else
+		return __br_mtu(br, min_mtu);
+}
+
 static void br_set_gso_limits(struct net_bridge *br)
 {
 	unsigned int gso_max_size = GSO_MAX_SIZE;
@@ -594,7 +612,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	if (changed_addr)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
 
-	dev_set_mtu(br->dev, br_min_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br));
 	br_set_gso_limits(br);
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -641,7 +659,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	 */
 	del_nbp(p);
 
-	dev_set_mtu(br->dev, br_min_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br));
 	br_set_gso_limits(br);
 
 	spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 27f1d4f2114a..9b16eaf33819 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
 
 	iph = ip_hdr(skb);
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
-		goto inhdr_error;
+		goto csum_error;
 
 	len = ntohs(iph->tot_len);
 	if (skb->len < len) {
@@ -236,6 +236,8 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
 	 */
 	return 0;
 
+csum_error:
+	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
 inhdr_error:
 	__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
 drop:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8e13a64d8c99..048d5b51813b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -578,7 +578,7 @@ int br_del_bridge(struct net *net, const char *name);
 int br_add_if(struct net_bridge *br, struct net_device *dev,
 	      struct netlink_ext_ack *extack);
 int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_min_mtu(const struct net_bridge *br);
+int br_mtu(const struct net_bridge *br);
 netdev_features_t br_features_recompute(struct net_bridge *br,
 					netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index b1be0dcfba6b..0318a69888d4 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -893,7 +893,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
 
 static struct bin_attribute bridge_forward = {
 	.attr = { .name = SYSFS_BRIDGE_FDB,
-		  .mode = S_IRUGO, },
+		  .mode = 0444, },
 	.read = brforward_read,
 };
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0254c35b2bf0..fd31ad83ec7b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -44,7 +44,7 @@ static int store_##_name(struct net_bridge_port *p, unsigned long v) \
 {								\
 	return store_flag(p, v, _mask);				\
 }								\
-static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR,			\
+static BRPORT_ATTR(_name, 0644,					\
 		   show_##_name, store_##_name)
 
 static int store_flag(struct net_bridge_port *p, unsigned long v,
@@ -71,7 +71,7 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
 	return sprintf(buf, "%d\n", p->path_cost);
 }
 
-static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(path_cost, 0644,
 		   show_path_cost, br_stp_set_path_cost);
 
 static ssize_t show_priority(struct net_bridge_port *p, char *buf)
@@ -79,91 +79,91 @@ static ssize_t show_priority(struct net_bridge_port *p, char *buf)
 	return sprintf(buf, "%d\n", p->priority);
 }
 
-static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(priority, 0644,
 			 show_priority, br_stp_set_port_priority);
 
 static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
 {
 	return br_show_bridge_id(buf, &p->designated_root);
 }
-static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);
+static BRPORT_ATTR(designated_root, 0444, show_designated_root, NULL);
 
 static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
 {
 	return br_show_bridge_id(buf, &p->designated_bridge);
 }
-static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);
+static BRPORT_ATTR(designated_bridge, 0444, show_designated_bridge, NULL);
 
 static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->designated_port);
 }
-static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);
+static BRPORT_ATTR(designated_port, 0444, show_designated_port, NULL);
 
 static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->designated_cost);
 }
-static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);
+static BRPORT_ATTR(designated_cost, 0444, show_designated_cost, NULL);
 
 static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "0x%x\n", p->port_id);
 }
-static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);
+static BRPORT_ATTR(port_id, 0444, show_port_id, NULL);
 
 static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "0x%x\n", p->port_no);
 }
 
-static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);
+static BRPORT_ATTR(port_no, 0444, show_port_no, NULL);
 
 static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->topology_change_ack);
 }
-static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);
+static BRPORT_ATTR(change_ack, 0444, show_change_ack, NULL);
 
 static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->config_pending);
 }
-static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);
+static BRPORT_ATTR(config_pending, 0444, show_config_pending, NULL);
 
 static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->state);
 }
-static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);
+static BRPORT_ATTR(state, 0444, show_port_state, NULL);
 
 static ssize_t show_message_age_timer(struct net_bridge_port *p,
 					    char *buf)
 {
 	return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
 }
-static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);
+static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL);
 
 static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
 					    char *buf)
 {
 	return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
 }
-static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);
+static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, NULL);
 
 static ssize_t show_hold_timer(struct net_bridge_port *p,
 					    char *buf)
 {
 	return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
 }
-static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL);
 
 static int store_flush(struct net_bridge_port *p, unsigned long v)
 {
 	br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry
 	return 0;
 }
-static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static BRPORT_ATTR(flush, 0200, NULL, store_flush);
 
 static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
 {
@@ -179,7 +179,7 @@ static int store_group_fwd_mask(struct net_bridge_port *p,
 
 	return 0;
 }
-static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask,
 		   store_group_fwd_mask);
 
 BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
@@ -204,7 +204,7 @@ static int store_multicast_router(struct net_bridge_port *p,
 {
 	return br_multicast_set_port_router(p, v);
 }
-static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
+static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
 		   store_multicast_router);
 
 BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE);
@@ -255,6 +255,9 @@ static ssize_t brport_show(struct kobject *kobj,
 	struct brport_attribute *brport_attr = to_brport_attr(attr);
 	struct net_bridge_port *p = to_brport(kobj);
 
+	if (!brport_attr->show)
+		return -EINVAL;
+
 	return brport_attr->show(p, buf);
 }
 
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 51935270c651..9896f4975353 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -168,6 +168,8 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
 		masterv = br_vlan_find(vg, vid);
 		if (WARN_ON(!masterv))
 			return NULL;
+		refcount_set(&masterv->refcnt, 1);
+		return masterv;
 	}
 	refcount_inc(&masterv->refcnt);
 
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 279527f8b1fe..620e54f08296 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -172,32 +172,83 @@ ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	return true;
 }
 
+static bool poolsize_invalid(const struct ebt_mac_wormhash *w)
+{
+	return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple));
+}
+
+static bool wormhash_offset_invalid(int off, unsigned int len)
+{
+	if (off == 0) /* not present */
+		return false;
+
+	if (off < (int)sizeof(struct ebt_among_info) ||
+	    off % __alignof__(struct ebt_mac_wormhash))
+		return true;
+
+	off += sizeof(struct ebt_mac_wormhash);
+
+	return off > len;
+}
+
+static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b)
+{
+	if (a == 0)
+		a = sizeof(struct ebt_among_info);
+
+	return ebt_mac_wormhash_size(wh) + a == b;
+}
+
 static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const struct ebt_entry_match *em =
 		container_of(par->matchinfo, const struct ebt_entry_match, data);
-	int expected_length = sizeof(struct ebt_among_info);
+	unsigned int expected_length = sizeof(struct ebt_among_info);
 	const struct ebt_mac_wormhash *wh_dst, *wh_src;
 	int err;
 
+	if (expected_length > em->match_size)
+		return -EINVAL;
+
+	if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) ||
+	    wormhash_offset_invalid(info->wh_src_ofs, em->match_size))
+		return -EINVAL;
+
 	wh_dst = ebt_among_wh_dst(info);
-	wh_src = ebt_among_wh_src(info);
+	if (poolsize_invalid(wh_dst))
+		return -EINVAL;
+
 	expected_length += ebt_mac_wormhash_size(wh_dst);
+	if (expected_length > em->match_size)
+		return -EINVAL;
+
+	wh_src = ebt_among_wh_src(info);
+	if (poolsize_invalid(wh_src))
+		return -EINVAL;
+
+	if (info->wh_src_ofs < info->wh_dst_ofs) {
+		if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs))
+			return -EINVAL;
+	} else {
+		if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs))
+			return -EINVAL;
+	}
+
 	expected_length += ebt_mac_wormhash_size(wh_src);
 
 	if (em->match_size != EBT_ALIGN(expected_length)) {
-		pr_info("wrong size: %d against expected %d, rounded to %zd\n",
-			em->match_size, expected_length,
-			EBT_ALIGN(expected_length));
+		pr_err_ratelimited("wrong size: %d against expected %d, rounded to %zd\n",
+				   em->match_size, expected_length,
+				   EBT_ALIGN(expected_length));
 		return -EINVAL;
 	}
 	if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
-		pr_info("dst integrity fail: %x\n", -err);
+		pr_err_ratelimited("dst integrity fail: %x\n", -err);
 		return -EINVAL;
 	}
 	if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
-		pr_info("src integrity fail: %x\n", -err);
+		pr_err_ratelimited("src integrity fail: %x\n", -err);
 		return -EINVAL;
 	}
 	return 0;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 61a9f1be1263..165b9d678cf1 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -72,8 +72,8 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (info->burst == 0 ||
 	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
-		pr_info("overflow, try lower: %u/%u\n",
-			info->avg, info->burst);
+		pr_info_ratelimited("overflow, try lower: %u/%u\n",
+				    info->avg, info->burst);
 		return -EINVAL;
 	}
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 02c4b409d317..a94d23b0a9af 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1641,7 +1641,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 	int off = ebt_compat_match_offset(match, m->match_size);
 	compat_uint_t msize = m->match_size - off;
 
-	BUG_ON(off >= m->match_size);
+	if (WARN_ON(off >= m->match_size))
+		return -EINVAL;
 
 	if (copy_to_user(cm->u.name, match->name,
 	    strlen(match->name) + 1) || put_user(msize, &cm->match_size))
@@ -1671,7 +1672,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 	int off = xt_compat_target_offset(target);
 	compat_uint_t tsize = t->target_size - off;
 
-	BUG_ON(off >= t->target_size);
+	if (WARN_ON(off >= t->target_size))
+		return -EINVAL;
 
 	if (copy_to_user(cm->u.name, target->name,
 	    strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
@@ -1902,7 +1904,8 @@ static int ebt_buf_add(struct ebt_entries_buf_state *state,
 	if (state->buf_kern_start == NULL)
 		goto count_only;
 
-	BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len);
+	if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len))
+		return -EINVAL;
 
 	memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz);
 
@@ -1915,7 +1918,8 @@ static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz)
 {
 	char *b = state->buf_kern_start;
 
-	BUG_ON(b && state->buf_kern_offset > state->buf_kern_len);
+	if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len))
+		return -EINVAL;
 
 	if (b != NULL && sz > 0)
 		memset(b + state->buf_kern_offset, 0, sz);
@@ -1992,8 +1996,10 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 	pad = XT_ALIGN(size_kern) - size_kern;
 
 	if (pad > 0 && dst) {
-		BUG_ON(state->buf_kern_len <= pad);
-		BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad);
+		if (WARN_ON(state->buf_kern_len <= pad))
+			return -EINVAL;
+		if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad))
+			return -EINVAL;
 		memset(dst + size_kern, 0, pad);
 	}
 	return off + match_size;
@@ -2043,7 +2049,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
 		if (ret < 0)
 			return ret;
 
-		BUG_ON(ret < match32->match_size);
+		if (WARN_ON(ret < match32->match_size))
+			return -EINVAL;
 		growth += ret - match32->match_size;
 		growth += ebt_compat_entry_padsize();
 
@@ -2053,7 +2060,9 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
 		if (match_kern)
 			match_kern->match_size = ret;
 
-		WARN_ON(type == EBT_COMPAT_TARGET && size_left);
+		if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+			return -EINVAL;
+
 		match32 = (struct compat_ebt_entry_mwt *) buf;
 	}
 
@@ -2109,6 +2118,19 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 	 *
 	 * offsets are relative to beginning of struct ebt_entry (i.e., 0).
 	 */
+	for (i = 0; i < 4 ; ++i) {
+		if (offsets[i] > *total)
+			return -EINVAL;
+
+		if (i < 3 && offsets[i] == *total)
+			return -EINVAL;
+
+		if (i == 0)
+			continue;
+		if (offsets[i-1] > offsets[i])
+			return -EINVAL;
+	}
+
 	for (i = 0, j = 1 ; j < 4 ; j++, i++) {
 		struct compat_ebt_entry_mwt *match32;
 		unsigned int size;
@@ -2140,7 +2162,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 
 	startoff = state->buf_user_offset - startoff;
 
-	BUG_ON(*total < startoff);
+	if (WARN_ON(*total < startoff))
+		return -EINVAL;
 	*total -= startoff;
 	return 0;
 }
@@ -2267,7 +2290,8 @@ static int compat_do_replace(struct net *net, void __user *user,
 	state.buf_kern_len = size64;
 
 	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
-	BUG_ON(ret < 0);	/* parses same data again */
+	if (WARN_ON(ret < 0))
+		goto out_unlock;
 
 	vfree(entries_tmp);
 	tmp.entries_size = size64;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 6da324550eec..1684ba5b51eb 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -72,7 +72,7 @@ MODULE_AUTHOR("Urs Thuermann <[email protected]>, "
 MODULE_ALIAS_NETPROTO(PF_CAN);
 
 static int stats_timer __read_mostly = 1;
-module_param(stats_timer, int, S_IRUGO);
+module_param(stats_timer, int, 0444);
 MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
 
 static struct kmem_cache *rcv_cache __read_mostly;
diff --git a/net/can/gw.c b/net/can/gw.c
index 398dd0395ad9..faa3da88a127 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -72,7 +72,7 @@ MODULE_ALIAS(CAN_GW_NAME);
 #define CGW_DEFAULT_HOPS 1
 
 static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
-module_param(max_hops, uint, S_IRUGO);
+module_param(max_hops, uint, 0444);
 MODULE_PARM_DESC(max_hops,
 		 "maximum " CAN_GW_NAME " routing hops for CAN frames "
 		 "(valid values: " __stringify(CGW_MIN_HOPS) "-"
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 1e492ef2a33d..4adf07826f4a 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -54,7 +54,7 @@ static const struct kernel_param_ops param_ops_supported_features = {
 	.get = param_get_supported_features,
 };
 module_param_cb(supported_features, &param_ops_supported_features, NULL,
-		S_IRUGO);
+		0444);
 
 const char *ceph_msg_type_name(int type)
 {
@@ -418,6 +418,7 @@ ceph_parse_options(char *options, const char *dev_name,
 				opt->flags |= CEPH_OPT_FSID;
 			break;
 		case Opt_name:
+			kfree(opt->name);
 			opt->name = kstrndup(argstr[0].from,
 					      argstr[0].to-argstr[0].from,
 					      GFP_KERNEL);
@@ -427,6 +428,9 @@ ceph_parse_options(char *options, const char *dev_name,
 			}
 			break;
 		case Opt_secret:
+			ceph_crypto_key_destroy(opt->key);
+			kfree(opt->key);
+
 		        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
 			if (!opt->key) {
 				err = -ENOMEM;
@@ -437,6 +441,9 @@ ceph_parse_options(char *options, const char *dev_name,
 				goto out;
 			break;
 		case Opt_key:
+			ceph_crypto_key_destroy(opt->key);
+			kfree(opt->key);
+
 		        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
 			if (!opt->key) {
 				err = -ENOMEM;
diff --git a/net/core/dev.c b/net/core/dev.c
index dda9d7b9a840..e13807b5c84d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1571,6 +1571,25 @@ static void dev_disable_gro_hw(struct net_device *dev)
 		netdev_WARN(dev, "failed to disable GRO_HW!\n");
 }
 
+const char *netdev_cmd_to_name(enum netdev_cmd cmd)
+{
+#define N(val) 						\
+	case NETDEV_##val:				\
+		return "NETDEV_" __stringify(val);
+	switch (cmd) {
+	N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
+	N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
+	N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
+	N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
+	N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
+	N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
+	N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+	};
+#undef N
+	return "UNKNOWN_NETDEV_EVENT";
+}
+EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
+
 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
 				   struct net_device *dev)
 {
@@ -2378,12 +2397,15 @@ EXPORT_SYMBOL(netdev_set_num_tc);
 
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
+	bool disabling;
 	int rc;
 
+	disabling = txq < dev->real_num_tx_queues;
+
 	if (txq < 1 || txq > dev->num_tx_queues)
 		return -EINVAL;
 
@@ -2399,15 +2421,19 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 
-		if (txq < dev->real_num_tx_queues) {
+		dev->real_num_tx_queues = txq;
+
+		if (disabling) {
+			synchronize_net();
 			qdisc_reset_all_tx_gt(dev, txq);
 #ifdef CONFIG_XPS
 			netif_reset_xps_queues_gt(dev, txq);
 #endif
 		}
+	} else {
+		dev->real_num_tx_queues = txq;
 	}
 
-	dev->real_num_tx_queues = txq;
 	return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
@@ -3271,15 +3297,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 static void skb_update_prio(struct sk_buff *skb)
 {
-	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+	const struct netprio_map *map;
+	const struct sock *sk;
+	unsigned int prioidx;
 
-	if (!skb->priority && skb->sk && map) {
-		unsigned int prioidx =
-			sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
+	if (skb->priority)
+		return;
+	map = rcu_dereference_bh(skb->dev->priomap);
+	if (!map)
+		return;
+	sk = skb_to_full_sk(skb);
+	if (!sk)
+		return;
 
-		if (prioidx < map->priomap_len)
-			skb->priority = map->priomap[prioidx];
-	}
+	prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
+
+	if (prioidx < map->priomap_len)
+		skb->priority = map->priomap[prioidx];
 }
 #else
 #define skb_update_prio(skb)
@@ -4344,6 +4378,9 @@ int netdev_rx_handler_register(struct net_device *dev,
 	if (netdev_is_rx_handler_busy(dev))
 		return -EBUSY;
 
+	if (dev->priv_flags & IFF_NO_RX_HANDLER)
+		return -EINVAL;
+
 	/* Note: rx_handler_data must be set before rx_handler */
 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -6389,6 +6426,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 		.linking = true,
 		.upper_info = upper_info,
 	};
+	struct net_device *master_dev;
 	int ret = 0;
 
 	ASSERT_RTNL();
@@ -6400,11 +6438,14 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (netdev_has_upper_dev(upper_dev, dev))
 		return -EBUSY;
 
-	if (netdev_has_upper_dev(dev, upper_dev))
-		return -EEXIST;
-
-	if (master && netdev_master_upper_dev_get(dev))
-		return -EBUSY;
+	if (!master) {
+		if (netdev_has_upper_dev(dev, upper_dev))
+			return -EEXIST;
+	} else {
+		master_dev = netdev_master_upper_dev_get(dev);
+		if (master_dev)
+			return master_dev == upper_dev ? -EEXIST : -EBUSY;
+	}
 
 	ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
 					    &changeupper_info.info);
@@ -7535,6 +7576,19 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		}
 	}
 
+	/* LRO/HW-GRO features cannot be combined with RX-FCS */
+	if (features & NETIF_F_RXFCS) {
+		if (features & NETIF_F_LRO) {
+			netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+			features &= ~NETIF_F_LRO;
+		}
+
+		if (features & NETIF_F_GRO_HW) {
+			netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+			features &= ~NETIF_F_GRO_HW;
+		}
+	}
+
 	return features;
 }
 
@@ -7991,7 +8045,8 @@ int register_netdev(struct net_device *dev)
 {
 	int err;
 
-	rtnl_lock();
+	if (rtnl_lock_killable())
+		return -EINTR;
 	err = register_netdevice(dev);
 	rtnl_unlock();
 	return err;
@@ -8041,7 +8096,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
 			rcu_barrier();
 			rtnl_lock();
 
-			call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
 				     &dev->state)) {
 				/* We must not have linkwatch events
@@ -8113,10 +8167,6 @@ void netdev_run_todo(void)
 			= list_first_entry(&list, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
-		rtnl_lock();
-		call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-		__rtnl_unlock();
-
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
 			pr_err("network todo '%s' but state %d\n",
 			       dev->name, dev->reg_state);
@@ -8134,8 +8184,9 @@ void netdev_run_todo(void)
 		BUG_ON(!list_empty(&dev->ptype_specific));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
 		WARN_ON(dev->dn_ptr);
-
+#endif
 		if (dev->priv_destructor)
 			dev->priv_destructor(dev);
 		if (dev->needs_free_netdev)
@@ -8557,7 +8608,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 */
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
-	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 
 	new_nsid = peernet2id_alloc(dev_net(dev), net);
 	/* If there is an ifindex conflict assign a new one */
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 0ab1af04296c..a04e1e88bf3a 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	if (colon)
 		*colon = 0;
 
-	dev_load(net, ifr->ifr_name);
-
 	/*
 	 *	See which interface the caller is talking about.
 	 */
@@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGIFMAP:
 	case SIOCGIFINDEX:
 	case SIOCGIFTXQLEN:
+		dev_load(net, ifr->ifr_name);
 		rcu_read_lock();
 		ret = dev_ifsioc_locked(net, ifr, cmd);
 		rcu_read_unlock();
@@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		return ret;
 
 	case SIOCETHTOOL:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ethtool(net, ifr);
 		rtnl_unlock();
@@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSIFNAME:
+		dev_load(net, ifr->ifr_name);
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 		rtnl_lock();
@@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		/* fall through */
 	case SIOCBONDSLAVEINFOQUERY:
 	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ifsioc(net, ifr, cmd);
 		rtnl_unlock();
@@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		    cmd == SIOCGHWTSTAMP ||
 		    (cmd >= SIOCDEVPRIVATE &&
 		     cmd <= SIOCDEVPRIVATE + 15)) {
+			dev_load(net, ifr->ifr_name);
 			rtnl_lock();
 			ret = dev_ifsioc(net, ifr, cmd);
 			rtnl_unlock();
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 18d385ed8237..9236e421bd62 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1695,10 +1695,11 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
 		goto nla_put_failure;
 
 	if (table->resource_valid) {
-		nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
-				  table->resource_id, DEVLINK_ATTR_PAD);
-		nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
-				  table->resource_units, DEVLINK_ATTR_PAD);
+		if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+				      table->resource_id, DEVLINK_ATTR_PAD) ||
+		    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+				      table->resource_units, DEVLINK_ATTR_PAD))
+			goto nla_put_failure;
 	}
 	if (devlink_dpipe_matches_put(table, skb))
 		goto nla_put_failure;
@@ -1797,7 +1798,7 @@ send_done:
 	if (!nlh) {
 		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
 		if (err)
-			goto err_skb_send_alloc;
+			return err;
 		goto send_done;
 	}
 
@@ -1806,7 +1807,6 @@ send_done:
 nla_put_failure:
 	err = -EMSGSIZE;
 err_table_put:
-err_skb_send_alloc:
 	genlmsg_cancel(skb, hdr);
 	nlmsg_free(skb);
 	return err;
@@ -2072,7 +2072,7 @@ static int devlink_dpipe_entries_fill(struct genl_info *info,
 					     table->counters_enabled,
 					     &dump_ctx);
 	if (err)
-		goto err_entries_dump;
+		return err;
 
 send_done:
 	nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
@@ -2080,16 +2080,10 @@ send_done:
 	if (!nlh) {
 		err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
 		if (err)
-			goto err_skb_send_alloc;
+			return err;
 		goto send_done;
 	}
 	return genlmsg_reply(dump_ctx.skb, info);
-
-err_entries_dump:
-err_skb_send_alloc:
-	genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
-	nlmsg_free(dump_ctx.skb);
-	return err;
 }
 
 static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
@@ -2228,7 +2222,7 @@ send_done:
 	if (!nlh) {
 		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
 		if (err)
-			goto err_skb_send_alloc;
+			return err;
 		goto send_done;
 	}
 	return genlmsg_reply(skb, info);
@@ -2236,7 +2230,6 @@ send_done:
 nla_put_failure:
 	err = -EMSGSIZE;
 err_table_put:
-err_skb_send_alloc:
 	genlmsg_cancel(skb, hdr);
 	nlmsg_free(skb);
 	return err;
@@ -2332,12 +2325,38 @@ devlink_resource_validate_children(struct devlink_resource *resource)
 	list_for_each_entry(child_resource, &resource->resource_list, list)
 		parts_size += child_resource->size_new;
 
-	if (parts_size > resource->size)
+	if (parts_size > resource->size_new)
 		size_valid = false;
 out:
 	resource->size_valid = size_valid;
 }
 
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+			       struct netlink_ext_ack *extack)
+{
+	u64 reminder;
+	int err = 0;
+
+	if (size > resource->size_params.size_max) {
+		NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+		err = -EINVAL;
+	}
+
+	if (size < resource->size_params.size_min) {
+		NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+		err = -EINVAL;
+	}
+
+	div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+	if (reminder) {
+		NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
 static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
 				       struct genl_info *info)
 {
@@ -2356,12 +2375,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
 	if (!resource)
 		return -EINVAL;
 
-	if (!resource->resource_ops->size_validate)
-		return -EINVAL;
-
 	size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
-	err = resource->resource_ops->size_validate(devlink, size,
-						    info->extack);
+	err = devlink_resource_validate_size(resource, size, info->extack);
 	if (err)
 		return err;
 
@@ -2372,20 +2387,22 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
 	return 0;
 }
 
-static void
+static int
 devlink_resource_size_params_put(struct devlink_resource *resource,
 				 struct sk_buff *skb)
 {
 	struct devlink_resource_size_params *size_params;
 
-	size_params = resource->size_params;
-	nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
-			  size_params->size_granularity, DEVLINK_ATTR_PAD);
-	nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
-			  size_params->size_max, DEVLINK_ATTR_PAD);
-	nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
-			  size_params->size_min, DEVLINK_ATTR_PAD);
-	nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit);
+	size_params = &resource->size_params;
+	if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+			      size_params->size_granularity, DEVLINK_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+			      size_params->size_max, DEVLINK_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+			      size_params->size_min, DEVLINK_ATTR_PAD) ||
+	    nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
+		return -EMSGSIZE;
+	return 0;
 }
 
 static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
@@ -2409,10 +2426,12 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
 		nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
 				  resource->size_new, DEVLINK_ATTR_PAD);
 	if (resource->resource_ops && resource->resource_ops->occ_get)
-		nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
-				  resource->resource_ops->occ_get(devlink),
-				  DEVLINK_ATTR_PAD);
-	devlink_resource_size_params_put(resource, skb);
+		if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+				      resource->resource_ops->occ_get(devlink),
+				      DEVLINK_ATTR_PAD))
+			goto nla_put_failure;
+	if (devlink_resource_size_params_put(resource, skb))
+		goto nla_put_failure;
 	if (list_empty(&resource->resource_list))
 		goto out;
 
@@ -2717,22 +2736,22 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
 		.doit = devlink_nl_cmd_dpipe_table_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
 		.doit = devlink_nl_cmd_dpipe_entries_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
 		.doit = devlink_nl_cmd_dpipe_headers_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2752,8 +2771,8 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.cmd = DEVLINK_CMD_RESOURCE_DUMP,
 		.doit = devlink_nl_cmd_resource_dump,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_RELOAD,
@@ -3147,17 +3166,19 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  */
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
-			      bool top_hierarchy,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
-			      struct devlink_resource_size_params *size_params,
+			      const struct devlink_resource_size_params *size_params,
 			      const struct devlink_resource_ops *resource_ops)
 {
 	struct devlink_resource *resource;
 	struct list_head *resource_list;
+	bool top_hierarchy;
 	int err = 0;
 
+	top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
 	mutex_lock(&devlink->lock);
 	resource = devlink_resource_find(devlink, NULL, resource_id);
 	if (resource) {
@@ -3194,7 +3215,8 @@ int devlink_resource_register(struct devlink *devlink,
 	resource->id = resource_id;
 	resource->resource_ops = resource_ops;
 	resource->size_valid = true;
-	resource->size_params = size_params;
+	memcpy(&resource->size_params, size_params,
+	       sizeof(resource->size_params));
 	INIT_LIST_HEAD(&resource->resource_list);
 	list_add_tail(&resource->list, resource_list);
 out:
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 554d36449231..64cef977484a 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
 
 #if IS_ENABLED(CONFIG_IPV6)
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-		       const struct in6_addr *addr)
+		       const struct in6_addr *saddr)
 {
 	struct dst_cache_pcpu *idst;
 
@@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
 	idst = this_cpu_ptr(dst_cache->cache);
 	dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
 				  rt6_get_cookie((struct rt6_info *)dst));
-	idst->in6_saddr = *addr;
+	idst->in6_saddr = *saddr;
 }
 EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 494e6a5d7306..bb6e498c6e3d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -121,6 +121,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_ID_UNSPEC]     = "Unspec",
 	[ETHTOOL_RX_COPYBREAK]	= "rx-copybreak",
 	[ETHTOOL_TX_COPYBREAK]	= "tx-copybreak",
+	[ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
 };
 
 static const char
@@ -1022,6 +1023,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
+	/* If FLOW_RSS was requested then user-space must be using the
+	 * new definition, as FLOW_RSS is newer.
+	 */
+	if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+		info_size = sizeof(info);
+		if (copy_from_user(&info, useraddr, info_size))
+			return -EFAULT;
+	}
+
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
 			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1261,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	user_key_size = rxfh.key_size;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-	    rxfh.rsvd8[2] || rxfh.rsvd32)
+	if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
+	/* Most drivers don't handle rss_context, check it's 0 as well */
+	if (rxfh.rss_context && !ops->get_rxfh_context)
+		return -EOPNOTSUPP;
 
 	rxfh.indir_size = dev_indir_size;
 	rxfh.key_size = dev_key_size;
@@ -1276,7 +1288,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	if (user_key_size)
 		hkey = rss_config + indir_bytes;
 
-	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+	if (rxfh.rss_context)
+		ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+							 &dev_hfunc,
+							 rxfh.rss_context);
+	else
+		ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
 	if (ret)
 		goto out;
 
@@ -1306,6 +1323,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	u8 *hkey = NULL;
 	u8 *rss_config;
 	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+	bool delete = false;
 
 	if (!ops->get_rxnfc || !ops->set_rxfh)
 		return -EOPNOTSUPP;
@@ -1319,9 +1337,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		return -EFAULT;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-	    rxfh.rsvd8[2] || rxfh.rsvd32)
+	if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
+	/* Most drivers don't handle rss_context, check it's 0 as well */
+	if (rxfh.rss_context && !ops->set_rxfh_context)
+		return -EOPNOTSUPP;
 
 	/* If either indir, hash key or function is valid, proceed further.
 	 * Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1366,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	if (ret)
 		goto out;
 
-	/* rxfh.indir_size == 0 means reset the indir table to default.
+	/* rxfh.indir_size == 0 means reset the indir table to default (master
+	 * context) or delete the context (other RSS contexts).
 	 * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
 	 */
 	if (rxfh.indir_size &&
@@ -1359,9 +1380,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		if (ret)
 			goto out;
 	} else if (rxfh.indir_size == 0) {
-		indir = (u32 *)rss_config;
-		for (i = 0; i < dev_indir_size; i++)
-			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+		if (rxfh.rss_context == 0) {
+			indir = (u32 *)rss_config;
+			for (i = 0; i < dev_indir_size; i++)
+				indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+		} else {
+			delete = true;
+		}
 	}
 
 	if (rxfh.key_size) {
@@ -1374,15 +1399,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		}
 	}
 
-	ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+	if (rxfh.rss_context)
+		ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+					    &rxfh.rss_context, delete);
+	else
+		ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
 	if (ret)
 		goto out;
 
-	/* indicate whether rxfh was set to default */
-	if (rxfh.indir_size == 0)
-		dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
-	else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
-		dev->priv_flags |= IFF_RXFH_CONFIGURED;
+	if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+			 &rxfh.rss_context, sizeof(rxfh.rss_context)))
+		ret = -EFAULT;
+
+	if (!rxfh.rss_context) {
+		/* indicate whether rxfh was set to default */
+		if (rxfh.indir_size == 0)
+			dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+		else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+			dev->priv_flags |= IFF_RXFH_CONFIGURED;
+	}
 
 out:
 	kfree(rss_config);
@@ -2277,6 +2312,11 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
 		    tuna->type_id != ETHTOOL_TUNABLE_U32)
 			return -EINVAL;
 		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		if (tuna->len != sizeof(u16) ||
+		    tuna->type_id != ETHTOOL_TUNABLE_U16)
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -2520,11 +2560,14 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
 static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+	int rc;
 
 	if (!dev->ethtool_ops->get_fecparam)
 		return -EOPNOTSUPP;
 
-	dev->ethtool_ops->get_fecparam(dev, &fecparam);
+	rc = dev->ethtool_ops->get_fecparam(dev, &fecparam);
+	if (rc)
+		return rc;
 
 	if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
 		return -EFAULT;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..9d87ce868402 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
 	if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
 	    !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
 		return false;
+	if (fib_rule_port_range_set(&rule->sport_range))
+		return false;
+	if (fib_rule_port_range_set(&rule->dport_range))
+		return false;
 	return true;
 }
 EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -51,6 +55,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	r->pref = pref;
 	r->table = table;
 	r->flags = flags;
+	r->proto = RTPROT_KERNEL;
 	r->fr_net = ops->fro_net;
 	r->uid_range = fib_kuid_range_unset;
 
@@ -220,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
 	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
 }
 
+static int nla_get_port_range(struct nlattr *pattr,
+			      struct fib_rule_port_range *port_range)
+{
+	const struct fib_rule_port_range *pr = nla_data(pattr);
+
+	if (!fib_rule_port_range_valid(pr))
+		return -EINVAL;
+
+	port_range->start = pr->start;
+	port_range->end = pr->end;
+
+	return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+			      struct fib_rule_port_range *range)
+{
+	return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 			  struct flowi *fl, int flags,
 			  struct fib_lookup_arg *arg)
@@ -424,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
 		    !uid_eq(r->uid_range.end, rule->uid_range.end))
 			continue;
 
+		if (r->ip_proto != rule->ip_proto)
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->sport_range,
+						 &rule->sport_range))
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->dport_range,
+						 &rule->dport_range))
+			continue;
+
 		if (!ops->compare(r, frh, tb))
 			continue;
 		return 1;
@@ -469,6 +505,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
 	                              : fib_default_rule_pref(ops);
 
+	rule->proto = tb[FRA_PROTOCOL] ?
+		nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
 	if (tb[FRA_IIFNAME]) {
 		struct net_device *dev;
 
@@ -565,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		rule->uid_range = fib_kuid_range_unset;
 	}
 
+	if (tb[FRA_IP_PROTO])
+		rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+	if (tb[FRA_SPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+					 &rule->sport_range);
+		if (err)
+			goto errout_free;
+	}
+
+	if (tb[FRA_DPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+					 &rule->dport_range);
+		if (err)
+			goto errout_free;
+	}
+
 	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
 	    rule_exists(ops, frh, tb, rule)) {
 		err = -EEXIST;
@@ -630,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rule_port_range sprange = {0, 0};
+	struct fib_rule_port_range dprange = {0, 0};
 	struct fib_rules_ops *ops = NULL;
 	struct fib_rule *rule, *r;
 	struct nlattr *tb[FRA_MAX+1];
@@ -663,7 +721,25 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		range = fib_kuid_range_unset;
 	}
 
+	if (tb[FRA_SPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+					 &sprange);
+		if (err)
+			goto errout;
+	}
+
+	if (tb[FRA_DPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+					 &dprange);
+		if (err)
+			goto errout;
+	}
+
 	list_for_each_entry(rule, &ops->rules_list, list) {
+		if (tb[FRA_PROTOCOL] &&
+		    (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
+			continue;
+
 		if (frh->action && (frh->action != rule->action))
 			continue;
 
@@ -704,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		     !uid_eq(rule->uid_range.end, range.end)))
 			continue;
 
+		if (tb[FRA_IP_PROTO] &&
+		    (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+			continue;
+
+		if (fib_rule_port_range_set(&sprange) &&
+		    !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+			continue;
+
+		if (fib_rule_port_range_set(&dprange) &&
+		    !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+			continue;
+
 		if (!ops->compare(rule, frh, tb))
 			continue;
 
@@ -781,7 +869,11 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 			 + nla_total_size(4) /* FRA_FWMARK */
 			 + nla_total_size(4) /* FRA_FWMASK */
 			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
-			 + nla_total_size(sizeof(struct fib_kuid_range));
+			 + nla_total_size(sizeof(struct fib_kuid_range))
+			 + nla_total_size(1) /* FRA_PROTOCOL */
+			 + nla_total_size(1) /* FRA_IP_PROTO */
+			 + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+			 + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
 
 	if (ops->nlmsg_payload)
 		payload += ops->nlmsg_payload(rule);
@@ -812,6 +904,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	frh->action = rule->action;
 	frh->flags = rule->flags;
 
+	if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+		goto nla_put_failure;
+
 	if (rule->action == FR_ACT_GOTO &&
 	    rcu_access_pointer(rule->ctarget) == NULL)
 		frh->flags |= FIB_RULE_UNRESOLVED;
@@ -843,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	    (rule->l3mdev &&
 	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
 	    (uid_range_set(&rule->uid_range) &&
-	     nla_put_uid_range(skb, &rule->uid_range)))
+	     nla_put_uid_range(skb, &rule->uid_range)) ||
+	    (fib_rule_port_range_set(&rule->sport_range) &&
+	     nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+	    (fib_rule_port_range_set(&rule->dport_range) &&
+	     nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+	    (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
 		goto nla_put_failure;
 
 	if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 08ab4c65a998..00c711c5f1a2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1890,6 +1890,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
 	.arg4_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	/* If user passes invalid input drop the packet. */
+	if (unlikely(flags))
+		return SK_DROP;
+
+	msg->key = key;
+	msg->flags = flags;
+	msg->map = map;
+
+	return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+	struct sock *sk = NULL;
+
+	if (msg->map) {
+		sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+		msg->key = 0;
+		msg->map = NULL;
+	}
+
+	return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+	.func           = bpf_msg_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+	msg->apply_bytes = bytes;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+	.func           = bpf_msg_apply_bytes,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+	msg->cork_bytes = bytes;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+	.func           = bpf_msg_cork_bytes,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_pull_data,
+	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+	unsigned int len = 0, offset = 0, copy = 0;
+	struct scatterlist *sg = msg->sg_data;
+	int first_sg, last_sg, i, shift;
+	unsigned char *p, *to, *from;
+	int bytes = end - start;
+	struct page *page;
+
+	if (unlikely(flags || end <= start))
+		return -EINVAL;
+
+	/* First find the starting scatterlist element */
+	i = msg->sg_start;
+	do {
+		len = sg[i].length;
+		offset += len;
+		if (start < offset + len)
+			break;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != msg->sg_end);
+
+	if (unlikely(start >= offset + len))
+		return -EINVAL;
+
+	if (!msg->sg_copy[i] && bytes <= len)
+		goto out;
+
+	first_sg = i;
+
+	/* At this point we need to linearize multiple scatterlist
+	 * elements or a single shared page. Either way we need to
+	 * copy into a linear buffer exclusively owned by BPF. Then
+	 * place the buffer in the scatterlist and fixup the original
+	 * entries by removing the entries now in the linear buffer
+	 * and shifting the remaining entries. For now we do not try
+	 * to copy partial entries to avoid complexity of running out
+	 * of sg_entry slots. The downside is reading a single byte
+	 * will copy the entire sg entry.
+	 */
+	do {
+		copy += sg[i].length;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+		if (bytes < copy)
+			break;
+	} while (i != msg->sg_end);
+	last_sg = i;
+
+	if (unlikely(copy < end - start))
+		return -EINVAL;
+
+	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+	if (unlikely(!page))
+		return -ENOMEM;
+	p = page_address(page);
+	offset = 0;
+
+	i = first_sg;
+	do {
+		from = sg_virt(&sg[i]);
+		len = sg[i].length;
+		to = p + offset;
+
+		memcpy(to, from, len);
+		offset += len;
+		sg[i].length = 0;
+		put_page(sg_page(&sg[i]));
+
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != last_sg);
+
+	sg[first_sg].length = copy;
+	sg_set_page(&sg[first_sg], page, copy, 0);
+
+	/* To repair sg ring we need to shift entries. If we only
+	 * had a single entry though we can just replace it and
+	 * be done. Otherwise walk the ring and shift the entries.
+	 */
+	shift = last_sg - first_sg - 1;
+	if (!shift)
+		goto out;
+
+	i = first_sg + 1;
+	do {
+		int move_from;
+
+		if (i + shift >= MAX_SKB_FRAGS)
+			move_from = i + shift - MAX_SKB_FRAGS;
+		else
+			move_from = i + shift;
+
+		if (move_from == msg->sg_end)
+			break;
+
+		sg[i] = sg[move_from];
+		sg[move_from].length = 0;
+		sg[move_from].page_link = 0;
+		sg[move_from].offset = 0;
+
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (1);
+	msg->sg_end -= shift;
+	if (msg->sg_end < 0)
+		msg->sg_end += MAX_SKB_FRAGS;
+out:
+	msg->data = sg_virt(&sg[i]) + start - offset;
+	msg->data_end = msg->data + bytes;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+	.func		= bpf_msg_pull_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -2087,6 +2283,10 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_cow(skb, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2096,19 +2296,21 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* SKB_GSO_TCPV4 needs to be changed into
 		 * SKB_GSO_TCPV6.
 		 */
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
-			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
-			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV6;
+		if (shinfo->gso_type & SKB_GSO_TCPV4) {
+			shinfo->gso_type &= ~SKB_GSO_TCPV4;
+			shinfo->gso_type |=  SKB_GSO_TCPV6;
 		}
 
 		/* Due to IPv6 header, MSS needs to be downgraded. */
-		skb_shinfo(skb)->gso_size -= len_diff;
+		skb_decrease_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	skb->protocol = htons(ETH_P_IPV6);
@@ -2123,6 +2325,10 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_unclone(skb, GFP_ATOMIC);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2132,19 +2338,21 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* SKB_GSO_TCPV6 needs to be changed into
 		 * SKB_GSO_TCPV4.
 		 */
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
-			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
-			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV4;
+		if (shinfo->gso_type & SKB_GSO_TCPV6) {
+			shinfo->gso_type &= ~SKB_GSO_TCPV6;
+			shinfo->gso_type |=  SKB_GSO_TCPV4;
 		}
 
 		/* Due to IPv4 header, MSS can be upgraded. */
-		skb_shinfo(skb)->gso_size += len_diff;
+		skb_increase_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	skb->protocol = htons(ETH_P_IP);
@@ -2243,6 +2451,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_cow(skb, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2252,11 +2464,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* Due to header grow, MSS needs to be downgraded. */
-		skb_shinfo(skb)->gso_size -= len_diff;
+		skb_decrease_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	return 0;
@@ -2267,6 +2481,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_unclone(skb, GFP_ATOMIC);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2276,11 +2494,13 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* Due to header shrink, MSS can be upgraded. */
-		skb_shinfo(skb)->gso_size += len_diff;
+		skb_increase_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	return 0;
@@ -2831,7 +3051,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head ||
-	    func == bpf_xdp_adjust_meta)
+	    func == bpf_xdp_adjust_meta ||
+	    func == bpf_msg_pull_data)
 		return true;
 
 	return false;
@@ -2991,7 +3212,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 	struct ip_tunnel_info *info;
 
 	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
-			       BPF_F_DONT_FRAGMENT)))
+			       BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
 		return -EINVAL;
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
@@ -3025,6 +3246,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 		info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
 	if (flags & BPF_F_ZERO_CSUM_TX)
 		info->key.tun_flags &= ~TUNNEL_CSUM;
+	if (flags & BPF_F_SEQ_NUMBER)
+		info->key.tun_flags |= TUNNEL_SEQ;
 
 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
 	info->key.tos = from->tunnel_tos;
@@ -3381,17 +3604,13 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
 	struct sock *sk = bpf_sock->sk;
 	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
 
-	if (!sk_fullsock(sk))
+	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
 		return -EINVAL;
 
-#ifdef CONFIG_INET
 	if (val)
 		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
 
 	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
-#else
-	return -EINVAL;
-#endif
 }
 
 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
@@ -3593,6 +3812,22 @@ static const struct bpf_func_proto *
 	}
 }
 
+static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_msg_redirect_map:
+		return &bpf_msg_redirect_map_proto;
+	case BPF_FUNC_msg_apply_bytes:
+		return &bpf_msg_apply_bytes_proto;
+	case BPF_FUNC_msg_cork_bytes:
+		return &bpf_msg_cork_bytes_proto;
+	case BPF_FUNC_msg_pull_data:
+		return &bpf_msg_pull_data_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
 static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -3982,6 +4217,32 @@ static bool sk_skb_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
+static bool sk_msg_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE)
+		return false;
+
+	switch (off) {
+	case offsetof(struct sk_msg_md, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct sk_msg_md, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	if (off < 0 || off >= sizeof(struct sk_msg_md))
+		return false;
+	if (off % size != 0)
+		return false;
+	if (size != sizeof(__u64))
+		return false;
+
+	return true;
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -4780,6 +5041,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+				     const struct bpf_insn *si,
+				     struct bpf_insn *insn_buf,
+				     struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct sk_msg_md, data):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg_buff, data));
+		break;
+	case offsetof(struct sk_msg_md, data_end):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg_buff, data_end));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 const struct bpf_verifier_ops sk_filter_verifier_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
@@ -4870,6 +5154,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = {
 const struct bpf_prog_ops sk_skb_prog_ops = {
 };
 
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+	.get_func_proto		= sk_msg_func_proto,
+	.is_valid_access	= sk_msg_is_valid_access,
+	.convert_ctx_access	= sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 559db9ea8d86..d29f09bc5ff9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
 }
 EXPORT_SYMBOL(__get_hash_from_flowi6);
 
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
-	memset(keys, 0, sizeof(*keys));
-
-	keys->addrs.v4addrs.src = fl4->saddr;
-	keys->addrs.v4addrs.dst = fl4->daddr;
-	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys->ports.src = fl4->fl4_sport;
-	keys->ports.dst = fl4->fl4_dport;
-	keys->keyid.keyid = fl4->fl4_gre_key;
-	keys->basic.ip_proto = fl4->flowi4_proto;
-
-	return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
 static const struct flow_dissector_key flow_keys_dissector_keys[] = {
 	{
 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 0a3f88f08727..98fd12721221 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,6 +66,7 @@ struct net_rate_estimator {
 static void est_fetch_counters(struct net_rate_estimator *e,
 			       struct gnet_stats_basic_packed *b)
 {
+	memset(b, 0, sizeof(*b));
 	if (e->stats_lock)
 		spin_lock(e->stats_lock);
 
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index e010bb800d7b..9737302907b1 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -315,12 +315,12 @@ static int __net_init dev_proc_net_init(struct net *net)
 {
 	int rc = -ENOMEM;
 
-	if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+	if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops))
 		goto out;
-	if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+	if (!proc_create("softnet_stat", 0444, net->proc_net,
 			 &softnet_seq_fops))
 		goto out_dev;
-	if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+	if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops))
 		goto out_softnet;
 
 	if (wext_proc_init(net))
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 60a5ad2c33ee..c476f0794132 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -431,7 +431,7 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr,
 	return netdev_store(dev, attr, buf, len, change_group);
 }
 NETDEVICE_SHOW(group, fmt_dec);
-static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
 
 static int change_proto_down(struct net_device *dev, unsigned long proto_down)
 {
@@ -854,10 +854,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 }
 
 static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
-	= __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+	= __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
 
 static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
-	= __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+	= __ATTR(rps_flow_cnt, 0644,
 		 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
 #endif /* CONFIG_RPS */
 
@@ -1154,7 +1154,7 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
 }
 
 static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
-	= __ATTR(hold_time, S_IRUGO | S_IWUSR,
+	= __ATTR(hold_time, 0644,
 		 bql_show_hold_time, bql_set_hold_time);
 
 static ssize_t bql_show_inflight(struct netdev_queue *queue,
@@ -1166,7 +1166,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
 }
 
 static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
-	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
+	__ATTR(inflight, 0444, bql_show_inflight, NULL);
 
 #define BQL_ATTR(NAME, FIELD)						\
 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
@@ -1182,7 +1182,7 @@ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
 }									\
 									\
 static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
-	= __ATTR(NAME, S_IRUGO | S_IWUSR,				\
+	= __ATTR(NAME, 0644,				\
 		 bql_show_ ## NAME, bql_set_ ## NAME)
 
 BQL_ATTR(limit, limit);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3cad5f51afd3..b5796d17a302 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,7 +29,6 @@
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
-DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -41,6 +40,13 @@ struct net init_net = {
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
+/*
+ * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
+ * init_net_initialized and first_device pointer.
+ * This is internal net namespace object. Please, don't use it
+ * outside.
+ */
+DECLARE_RWSEM(pernet_ops_rwsem);
 
 #define MIN_PERNET_OPS_ID	\
 	((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +71,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
 {
 	struct net_generic *ng, *old_ng;
 
-	BUG_ON(!mutex_is_locked(&net_mutex));
 	BUG_ON(id < MIN_PERNET_OPS_ID);
 
 	old_ng = rcu_dereference_protected(net->gen,
-					   lockdep_is_held(&net_mutex));
+					   lockdep_is_held(&pernet_ops_rwsem));
 	if (old_ng->s.len > id) {
 		old_ng->ptr[id] = data;
 		return 0;
@@ -286,7 +291,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
  */
 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 {
-	/* Must be called with net_mutex held */
+	/* Must be called with pernet_ops_rwsem held */
 	const struct pernet_operations *ops, *saved_ops;
 	int error = 0;
 	LIST_HEAD(net_exit_list);
@@ -297,12 +302,16 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
 	spin_lock_init(&net->nsid_lock);
+	mutex_init(&net->ipv4.ra_mutex);
 
 	list_for_each_entry(ops, &pernet_list, list) {
 		error = ops_init(ops, net);
 		if (error < 0)
 			goto out_undo;
 	}
+	rtnl_lock();
+	list_add_tail_rcu(&net->list, &net_namespace_list);
+	rtnl_unlock();
 out:
 	return error;
 
@@ -354,7 +363,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
 	dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 }
 
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
 static struct workqueue_struct *netns_wq;
 
 static struct net *net_alloc(void)
@@ -408,32 +417,27 @@ struct net *copy_net_ns(unsigned long flags,
 
 	net = net_alloc();
 	if (!net) {
-		dec_net_namespaces(ucounts);
-		return ERR_PTR(-ENOMEM);
+		rv = -ENOMEM;
+		goto dec_ucounts;
 	}
-
+	refcount_set(&net->passive, 1);
+	net->ucounts = ucounts;
 	get_user_ns(user_ns);
 
-	rv = mutex_lock_killable(&net_mutex);
-	if (rv < 0) {
-		net_free(net);
-		dec_net_namespaces(ucounts);
-		put_user_ns(user_ns);
-		return ERR_PTR(rv);
-	}
+	rv = down_read_killable(&pernet_ops_rwsem);
+	if (rv < 0)
+		goto put_userns;
 
-	net->ucounts = ucounts;
 	rv = setup_net(net, user_ns);
-	if (rv == 0) {
-		rtnl_lock();
-		list_add_tail_rcu(&net->list, &net_namespace_list);
-		rtnl_unlock();
-	}
-	mutex_unlock(&net_mutex);
+
+	up_read(&pernet_ops_rwsem);
+
 	if (rv < 0) {
-		dec_net_namespaces(ucounts);
+put_userns:
 		put_user_ns(user_ns);
 		net_drop_ns(net);
+dec_ucounts:
+		dec_net_namespaces(ucounts);
 		return ERR_PTR(rv);
 	}
 	return net;
@@ -466,26 +470,23 @@ static void unhash_nsid(struct net *net, struct net *last)
 	spin_unlock_bh(&net->nsid_lock);
 }
 
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
 
 static void cleanup_net(struct work_struct *work)
 {
 	const struct pernet_operations *ops;
 	struct net *net, *tmp, *last;
-	struct list_head net_kill_list;
+	struct llist_node *net_kill_list;
 	LIST_HEAD(net_exit_list);
 
 	/* Atomically snapshot the list of namespaces to cleanup */
-	spin_lock_irq(&cleanup_list_lock);
-	list_replace_init(&cleanup_list, &net_kill_list);
-	spin_unlock_irq(&cleanup_list_lock);
+	net_kill_list = llist_del_all(&cleanup_list);
 
-	mutex_lock(&net_mutex);
+	down_read(&pernet_ops_rwsem);
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
-	list_for_each_entry(net, &net_kill_list, cleanup_list)
+	llist_for_each_entry(net, net_kill_list, cleanup_list)
 		list_del_rcu(&net->list);
 	/* Cache last net. After we unlock rtnl, no one new net
 	 * added to net_namespace_list can assign nsid pointer
@@ -500,7 +501,7 @@ static void cleanup_net(struct work_struct *work)
 	last = list_last_entry(&net_namespace_list, struct net, list);
 	rtnl_unlock();
 
-	list_for_each_entry(net, &net_kill_list, cleanup_list) {
+	llist_for_each_entry(net, net_kill_list, cleanup_list) {
 		unhash_nsid(net, last);
 		list_add_tail(&net->exit_list, &net_exit_list);
 	}
@@ -520,7 +521,7 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_free_list(ops, &net_exit_list);
 
-	mutex_unlock(&net_mutex);
+	up_read(&pernet_ops_rwsem);
 
 	/* Ensure there are no outstanding rcu callbacks using this
 	 * network namespace.
@@ -547,8 +548,8 @@ static void cleanup_net(struct work_struct *work)
  */
 void net_ns_barrier(void)
 {
-	mutex_lock(&net_mutex);
-	mutex_unlock(&net_mutex);
+	down_write(&pernet_ops_rwsem);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL(net_ns_barrier);
 
@@ -557,13 +558,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
 void __put_net(struct net *net)
 {
 	/* Cleanup the network namespace in process context */
-	unsigned long flags;
-
-	spin_lock_irqsave(&cleanup_list_lock, flags);
-	list_add(&net->cleanup_list, &cleanup_list);
-	spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
-	queue_work(netns_wq, &net_cleanup_work);
+	if (llist_add(&net->cleanup_list, &cleanup_list))
+		queue_work(netns_wq, &net_cleanup_work);
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
@@ -861,7 +857,7 @@ static int __init net_ns_init(void)
 #ifdef CONFIG_NET_NS
 	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 					SMP_CACHE_BYTES,
-					SLAB_PANIC, NULL);
+					SLAB_PANIC|SLAB_ACCOUNT, NULL);
 
 	/* Create workqueue for cleanup */
 	netns_wq = create_singlethread_workqueue("netns");
@@ -875,17 +871,12 @@ static int __init net_ns_init(void)
 
 	rcu_assign_pointer(init_net.gen, ng);
 
-	mutex_lock(&net_mutex);
+	down_write(&pernet_ops_rwsem);
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
 	init_net_initialized = true;
-
-	rtnl_lock();
-	list_add_tail_rcu(&init_net.list, &net_namespace_list);
-	rtnl_unlock();
-
-	mutex_unlock(&net_mutex);
+	up_write(&pernet_ops_rwsem);
 
 	register_pernet_subsys(&net_ns_ops);
 
@@ -996,7 +987,6 @@ again:
 
 static void unregister_pernet_operations(struct pernet_operations *ops)
 {
-	
 	__unregister_pernet_operations(ops);
 	rcu_barrier();
 	if (ops->id)
@@ -1025,9 +1015,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
 int register_pernet_subsys(struct pernet_operations *ops)
 {
 	int error;
-	mutex_lock(&net_mutex);
+	down_write(&pernet_ops_rwsem);
 	error =  register_pernet_operations(first_device, ops);
-	mutex_unlock(&net_mutex);
+	up_write(&pernet_ops_rwsem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1033,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  */
 void unregister_pernet_subsys(struct pernet_operations *ops)
 {
-	mutex_lock(&net_mutex);
+	down_write(&pernet_ops_rwsem);
 	unregister_pernet_operations(ops);
-	mutex_unlock(&net_mutex);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 
@@ -1071,11 +1061,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 int register_pernet_device(struct pernet_operations *ops)
 {
 	int error;
-	mutex_lock(&net_mutex);
+	down_write(&pernet_ops_rwsem);
 	error = register_pernet_operations(&pernet_list, ops);
 	if (!error && (first_device == &pernet_list))
 		first_device = &ops->list;
-	mutex_unlock(&net_mutex);
+	up_write(&pernet_ops_rwsem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1081,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
  */
 void unregister_pernet_device(struct pernet_operations *ops)
 {
-	mutex_lock(&net_mutex);
+	down_write(&pernet_ops_rwsem);
 	if (&ops->list == first_device)
 		first_device = first_device->next;
 	unregister_pernet_operations(ops);
-	mutex_unlock(&net_mutex);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8ab5c829511..7e4ede34cc52 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
 	i += len;
 
 	if (debug) {
-		size_t copy = min_t(size_t, count, 1023);
-		char tb[copy + 1];
-		if (copy_from_user(tb, user_buffer, copy))
-			return -EFAULT;
-		tb[copy] = 0;
-		pr_debug("%s,%lu  buffer -:%s:-\n",
-			 name, (unsigned long)count, tb);
+		size_t copy = min_t(size_t, count + 1, 1024);
+		char *tp = strndup_user(user_buffer, copy);
+
+		if (IS_ERR(tp))
+			return PTR_ERR(tp);
+
+		pr_debug("%s,%zu  buffer -:%s:-\n", name, count, tp);
+		kfree(tp);
 	}
 
 	if (!strcmp(name, "min_pkt_size")) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc290413a49d..2d3949789cef 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -75,6 +75,12 @@ void rtnl_lock(void)
 }
 EXPORT_SYMBOL(rtnl_lock);
 
+int rtnl_lock_killable(void)
+{
+	return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
 static struct sk_buff *defer_kfree_skb_list;
 void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
 {
@@ -453,12 +459,12 @@ static void rtnl_lock_unregistering_all(void)
  */
 void rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
-	/* Close the race with cleanup_net() */
-	mutex_lock(&net_mutex);
+	/* Close the race with setup_net() and cleanup_net() */
+	down_write(&pernet_ops_rwsem);
 	rtnl_lock_unregistering_all();
 	__rtnl_link_unregister(ops);
 	rtnl_unlock();
-	mutex_unlock(&net_mutex);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL_GPL(rtnl_link_unregister);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 09bd89c90a71..b5c75d4fcf37 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,8 +77,8 @@
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
 
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
@@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 {
 	unsigned long max_pg, num_pg, new_pg, old_pg;
 	struct user_struct *user;
@@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
 
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
 {
 	if (mmp->user) {
 		atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
 		free_uid(mmp->user);
 	}
 }
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
 
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 {
@@ -3458,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 }
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
+static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
+{
+	skb_frag_t head_frag;
+	struct page *page;
+
+	page = virt_to_head_page(frag_skb->head);
+	head_frag.page.p = page;
+	head_frag.page_offset = frag_skb->data -
+		(unsigned char *)page_address(page);
+	head_frag.size = skb_headlen(frag_skb);
+	return head_frag;
+}
+
 /**
  *	skb_segment - Perform protocol segmentation on skb.
  *	@head_skb: buffer to segment
@@ -3662,15 +3677,19 @@ normal:
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
-				BUG_ON(skb_headlen(list_skb));
-
 				i = 0;
 				nfrags = skb_shinfo(list_skb)->nr_frags;
 				frag = skb_shinfo(list_skb)->frags;
 				frag_skb = list_skb;
+				if (!skb_headlen(list_skb)) {
+					BUG_ON(!nfrags);
+				} else {
+					BUG_ON(!list_skb->head_frag);
 
-				BUG_ON(!nfrags);
-
+					/* to make room for head_frag. */
+					i--;
+					frag--;
+				}
 				if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
 				    skb_zerocopy_clone(nskb, frag_skb,
 						       GFP_ATOMIC))
@@ -3687,7 +3706,7 @@ normal:
 				goto err;
 			}
 
-			*nskb_frag = *frag;
+			*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
 			__skb_frag_ref(nskb_frag);
 			size = skb_frag_size(nskb_frag);
 
@@ -4179,7 +4198,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 
 	skb_queue_tail(&sk->sk_error_queue, skb);
 	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk);
+		sk->sk_error_report(sk);
 	return 0;
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
@@ -4891,7 +4910,7 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet);
  *
  * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
  */
-unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 {
 	const struct skb_shared_info *shinfo = skb_shinfo(skb);
 	unsigned int thlen = 0;
@@ -4904,7 +4923,7 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 			thlen += inner_tcp_hdrlen(skb);
 	} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
 		thlen = tcp_hdrlen(skb);
-	} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+	} else if (unlikely(skb_is_gso_sctp(skb))) {
 		thlen = sizeof(struct sctphdr);
 	}
 	/* UFO sets gso_size to the size of the fragmentation
@@ -4913,7 +4932,40 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 	 */
 	return thlen + shinfo->gso_size;
 }
-EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+/**
+ * skb_gso_network_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_network_seglen is used to determine the real size of the
+ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+ *
+ * The MAC/L2 header is not accounted for.
+ */
+static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+{
+	unsigned int hdr_len = skb_transport_header(skb) -
+			       skb_network_header(skb);
+
+	return hdr_len + skb_gso_transport_seglen(skb);
+}
+
+/**
+ * skb_gso_mac_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_mac_seglen is used to determine the real size of the
+ * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
+ * headers (TCP/UDP).
+ */
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+{
+	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	return hdr_len + skb_gso_transport_seglen(skb);
+}
 
 /**
  * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
@@ -4955,19 +5007,20 @@ static inline bool skb_gso_size_check(const struct sk_buff *skb,
 }
 
 /**
- * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
  *
  * @skb: GSO skb
  * @mtu: MTU to validate against
  *
- * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
- * once split.
+ * skb_gso_validate_network_len validates if a given skb will fit a
+ * wanted MTU once split. It considers L3 headers, L4 headers, and the
+ * payload.
  */
-bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
 {
 	return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
 }
-EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
 
 /**
  * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
@@ -4986,13 +5039,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
+	int mac_len;
+
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
 		kfree_skb(skb);
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
-		2 * ETH_ALEN);
+	mac_len = skb->data - skb_mac_header(skb);
+	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+		mac_len - VLAN_HLEN - ETH_TLEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 04e5e27c9b81..6444525f610c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1049,16 +1049,18 @@ set_rcvbuf:
 		break;
 
 	case SO_ZEROCOPY:
-		if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+			if (sk->sk_protocol != IPPROTO_TCP)
+				ret = -ENOTSUPP;
+		} else if (sk->sk_family != PF_RDS) {
 			ret = -ENOTSUPP;
-		else if (sk->sk_protocol != IPPROTO_TCP)
-			ret = -ENOTSUPP;
-		else if (sk->sk_state != TCP_CLOSE)
-			ret = -EBUSY;
-		else if (val < 0 || val > 1)
-			ret = -EINVAL;
-		else
-			sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+		}
+		if (!ret) {
+			if (val < 0 || val > 1)
+				ret = -EINVAL;
+			else
+				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+		}
 		break;
 
 	default:
@@ -1774,7 +1776,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	u32 max_segs = 1;
 
 	sk_dst_set(sk, dst);
-	sk->sk_route_caps = dst->dev->features;
+	sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
@@ -2235,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 }
 EXPORT_SYMBOL(sk_page_frag_refill);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+		int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
+		int first_coalesce)
+{
+	int sg_curr = *sg_curr_index, use = 0, rc = 0;
+	unsigned int size = *sg_curr_size;
+	struct page_frag *pfrag;
+	struct scatterlist *sge;
+
+	len -= size;
+	pfrag = sk_page_frag(sk);
+
+	while (len > 0) {
+		unsigned int orig_offset;
+
+		if (!sk_page_frag_refill(sk, pfrag)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		use = min_t(int, len, pfrag->size - pfrag->offset);
+
+		if (!sk_wmem_schedule(sk, use)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		sk_mem_charge(sk, use);
+		size += use;
+		orig_offset = pfrag->offset;
+		pfrag->offset += use;
+
+		sge = sg + sg_curr - 1;
+		if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
+		    sg->offset + sg->length == orig_offset) {
+			sg->length += use;
+		} else {
+			sge = sg + sg_curr;
+			sg_unmark_end(sge);
+			sg_set_page(sge, pfrag->page, use, orig_offset);
+			get_page(pfrag->page);
+			sg_curr++;
+
+			if (sg_curr == MAX_SKB_FRAGS)
+				sg_curr = 0;
+
+			if (sg_curr == sg_start) {
+				rc = -ENOSPC;
+				break;
+			}
+		}
+
+		len -= use;
+	}
+out:
+	*sg_curr_size = size;
+	*sg_curr_index = sg_curr;
+	return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
 static void __lock_sock(struct sock *sk)
 	__releases(&sk->sk_lock.slock)
 	__acquires(&sk->sk_lock.slock)
@@ -3262,6 +3325,27 @@ void proto_unregister(struct proto *prot)
 }
 EXPORT_SYMBOL(proto_unregister);
 
+int sock_load_diag_module(int family, int protocol)
+{
+	if (!protocol) {
+		if (!sock_is_registered(family))
+			return -ENOENT;
+
+		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				      NETLINK_SOCK_DIAG, family);
+	}
+
+#ifdef CONFIG_INET
+	if (family == AF_INET &&
+	    !rcu_access_pointer(inet_protos[protocol]))
+		return -ENOENT;
+#endif
+
+	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+			      NETLINK_SOCK_DIAG, family, protocol);
+}
+EXPORT_SYMBOL(sock_load_diag_module);
+
 #ifdef CONFIG_PROC_FS
 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(proto_list_mutex)
@@ -3370,7 +3454,7 @@ static const struct file_operations proto_seq_fops = {
 
 static __net_init int proto_init_net(struct net *net)
 {
-	if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
+	if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 146b50e30659..c37b5be7c5e4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -EINVAL;
 
 	if (sock_diag_handlers[req->sdiag_family] == NULL)
-		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				NETLINK_SOCK_DIAG, req->sdiag_family);
+		sock_load_diag_module(req->sdiag_family, 0);
 
 	mutex_lock(&sock_diag_table_mutex);
 	hndl = sock_diag_handlers[req->sdiag_family];
@@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	case TCPDIAG_GETSOCK:
 	case DCCPDIAG_GETSOCK:
 		if (inet_rcv_compat == NULL)
-			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-					NETLINK_SOCK_DIAG, AF_INET);
+			sock_load_diag_module(AF_INET, 0);
 
 		mutex_lock(&sock_diag_table_mutex);
 		if (inet_rcv_compat != NULL)
@@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group)
 	case SKNLGRP_INET_TCP_DESTROY:
 	case SKNLGRP_INET_UDP_DESTROY:
 		if (!sock_diag_handlers[AF_INET])
-			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				       NETLINK_SOCK_DIAG, AF_INET);
+			sock_load_diag_module(AF_INET, 0);
 		break;
 	case SKNLGRP_INET6_TCP_DESTROY:
 	case SKNLGRP_INET6_UDP_DESTROY:
 		if (!sock_diag_handlers[AF_INET6])
-			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				       NETLINK_SOCK_DIAG, AF_INET6);
+			sock_load_diag_module(AF_INET6, 0);
 		break;
 	}
 	return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f2d0462611c3..b3b609f0eeb5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 	},
+	{
+		.procname	= "fb_tunnels_only_for_init_net",
+		.data		= &sysctl_fb_tunnels_only_for_init_net,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 15bdc002d90c..84cd4e3fd01b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -794,6 +794,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (skb == NULL)
 		goto out_release;
 
+	if (sk->sk_state == DCCP_CLOSED) {
+		rc = -ENOTCONN;
+		goto out_discard;
+	}
+
 	skb_reserve(skb, sk->sk_prot->max_header);
 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc != 0)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 45cb5bea884b..32751602767f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1336,6 +1336,12 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use
 	lock_sock(sk);
 	err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
 	release_sock(sk);
+#ifdef CONFIG_NETFILTER
+	/* we need to exclude all possible ENOPROTOOPTs except default case */
+	if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
+	    optname != DSO_STREAM && optname != DSO_SEQPACKET)
+		err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
+#endif
 
 	return err;
 }
@@ -1443,15 +1449,6 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
 		dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
 		break;
 
-	default:
-#ifdef CONFIG_NETFILTER
-		return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
-#endif
-	case DSO_LINKINFO:
-	case DSO_STREAM:
-	case DSO_SEQPACKET:
-		return -ENOPROTOOPT;
-
 	case DSO_MAXWINDOW:
 		if (optlen != sizeof(unsigned long))
 			return -EINVAL;
@@ -1499,6 +1496,12 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
 			return -EINVAL;
 		scp->info_loc = u.info;
 		break;
+
+	case DSO_LINKINFO:
+	case DSO_STREAM:
+	case DSO_SEQPACKET:
+	default:
+		return -ENOPROTOOPT;
 	}
 
 	return 0;
@@ -1512,6 +1515,20 @@ static int dn_getsockopt(struct socket *sock, int level, int optname, char __use
 	lock_sock(sk);
 	err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
 	release_sock(sk);
+#ifdef CONFIG_NETFILTER
+	if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
+	    optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
+	    optname != DSO_CONREJECT) {
+		int len;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
+		if (err >= 0)
+			err = put_user(len, optlen);
+	}
+#endif
 
 	return err;
 }
@@ -1577,26 +1594,6 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
 		r_data = &link;
 		break;
 
-	default:
-#ifdef CONFIG_NETFILTER
-	{
-		int ret, len;
-
-		if (get_user(len, optlen))
-			return -EFAULT;
-
-		ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
-		if (ret >= 0)
-			ret = put_user(len, optlen);
-		return ret;
-	}
-#endif
-	case DSO_STREAM:
-	case DSO_SEQPACKET:
-	case DSO_CONACCEPT:
-	case DSO_CONREJECT:
-		return -ENOPROTOOPT;
-
 	case DSO_MAXWINDOW:
 		if (r_len > sizeof(unsigned long))
 			r_len = sizeof(unsigned long);
@@ -1628,6 +1625,13 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
 			r_len = sizeof(unsigned char);
 		r_data = &scp->info_rem;
 		break;
+
+	case DSO_STREAM:
+	case DSO_SEQPACKET:
+	case DSO_CONACCEPT:
+	case DSO_CONREJECT:
+	default:
+		return -ENOPROTOOPT;
 	}
 
 	if (r_data) {
@@ -2379,7 +2383,7 @@ static int __init decnet_init(void)
 	dev_add_pack(&dn_dix_packet_type);
 	register_netdevice_notifier(&dn_dev_notifier);
 
-	proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops);
+	proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops);
 	dn_register_sysctl();
 out:
 	return rc;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c9f5e1ebb9c8..c03b046478c3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1424,7 +1424,7 @@ void __init dn_dev_init(void)
 	rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
 			     NULL, dn_nl_dump_ifaddr, 0);
 
-	proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
+	proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops);
 
 #ifdef CONFIG_SYSCTL
 	{
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 6e37d9e6345e..13156165afa3 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -608,7 +608,7 @@ static const struct file_operations dn_neigh_seq_fops = {
 void __init dn_neigh_init(void)
 {
 	neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
-	proc_create("decnet_neigh", S_IRUGO, init_net.proc_net,
+	proc_create("decnet_neigh", 0444, init_net.proc_net,
 		    &dn_neigh_seq_fops);
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ef20b8e31669..eca0cc6b761f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1918,7 +1918,7 @@ void __init dn_route_init(void)
 
 	dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
 
-	proc_create("decnet_cache", S_IRUGO, init_net.proc_net,
+	proc_create("decnet_cache", 0444, init_net.proc_net,
 		    &dn_rt_cache_seq_fops);
 
 #ifdef CONFIG_DECNET_ROUTER
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index e1d4d898a007..8396705deffc 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -38,7 +38,7 @@ MODULE_AUTHOR("Wang Lei");
 MODULE_LICENSE("GPL");
 
 unsigned int dns_resolver_debug;
-module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, dns_resolver_debug, uint, 0644);
 MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
 
 const struct cred *dns_resolver_cache;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6a9d0f50fbee..e63c554e0623 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -23,6 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
 #include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
 
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
 
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+				       struct sk_buff *skb)
+{
+	struct dsa_switch *ds = p->dp->ds;
+	unsigned int type;
+
+	if (skb_headroom(skb) < ETH_HLEN)
+		return false;
+
+	__skb_push(skb, ETH_HLEN);
+
+	type = ptp_classify_raw(skb);
+
+	__skb_pull(skb, ETH_HLEN);
+
+	if (type == PTP_CLASS_NONE)
+		return false;
+
+	if (likely(ds->ops->port_rxtstamp))
+		return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+	return false;
+}
+
 static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 			  struct packet_type *pt, struct net_device *unused)
 {
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	s->rx_bytes += skb->len;
 	u64_stats_update_end(&s->syncp);
 
+	if (dsa_skb_defer_rx_timestamp(p, skb))
+		return 0;
+
 	netif_receive_skb(skb);
 
 	return 0;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index cb54b81d0bd9..42a7b85b84e1 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -194,7 +194,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
 		ds->ports[i].dn = cd->port_dn[i];
 		ds->ports[i].cpu_dp = dst->cpu_dp;
 
-		if (dsa_is_user_port(ds, i))
+		if (!dsa_is_user_port(ds, i))
 			continue;
 
 		ret = dsa_slave_create(&ds->ports[i]);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 00589147f042..90e6df0351eb 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
 		count += ops->get_sset_count(dev, sset);
 
 	if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
-		count += ds->ops->get_sset_count(ds);
+		count += ds->ops->get_sset_count(ds, cpu_dp->index);
 
 	return count;
 }
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 		 * constructed earlier
 		 */
 		ds->ops->get_strings(ds, port, ndata);
-		count = ds->ops->get_sset_count(ds);
+		count = ds->ops->get_sset_count(ds, port);
 		for (i = 0; i < count; i++) {
 			memmove(ndata + (i * len + sizeof(pfx)),
 				ndata + i * len, len - sizeof(pfx));
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f52307296de4..18561af7a8f1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <linux/if_bridge.h>
 #include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
 
 #include "dsa_priv.h"
 
@@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->dp->ds;
+	int port = p->dp->index;
+
+	/* Pass through to switch driver if it supports timestamping */
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		if (ds->ops->port_hwtstamp_get)
+			return ds->ops->port_hwtstamp_get(ds, port, ifr);
+		break;
+	case SIOCSHWTSTAMP:
+		if (ds->ops->port_hwtstamp_set)
+			return ds->ops->port_hwtstamp_set(ds, port, ifr);
+		break;
+	}
+
 	if (!dev->phydev)
 		return -ENODEV;
 
@@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
 	return NETDEV_TX_OK;
 }
 
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+				 struct sk_buff *skb)
+{
+	struct dsa_switch *ds = p->dp->ds;
+	struct sk_buff *clone;
+	unsigned int type;
+
+	type = ptp_classify_raw(skb);
+	if (type == PTP_CLASS_NONE)
+		return;
+
+	if (!ds->ops->port_txtstamp)
+		return;
+
+	clone = skb_clone_sk(skb);
+	if (!clone)
+		return;
+
+	if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+		return;
+
+	kfree_skb(clone);
+}
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	s->tx_bytes += skb->len;
 	u64_stats_update_end(&s->syncp);
 
+	/* Identify PTP protocol packets, clone them, and pass them to the
+	 * switch driver
+	 */
+	dsa_skb_tx_timestamp(p, skb);
+
 	/* Transmit function may have to reallocate the original SKB,
 	 * in which case it must have freed it. Only free it here on error.
 	 */
@@ -559,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 
 		count = 4;
 		if (ds->ops->get_sset_count)
-			count += ds->ops->get_sset_count(ds);
+			count += ds->ops->get_sset_count(ds, dp->index);
 
 		return count;
 	}
@@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
 	return ds->ops->set_rxnfc(ds, dp->index, nfc);
 }
 
+static int dsa_slave_get_ts_info(struct net_device *dev,
+				 struct ethtool_ts_info *ts)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->dp->ds;
+
+	if (!ds->ops->get_ts_info)
+		return -EOPNOTSUPP;
+
+	return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_drvinfo		= dsa_slave_get_drvinfo,
 	.get_regs_len		= dsa_slave_get_regs_len,
@@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 	.get_rxnfc		= dsa_slave_get_rxnfc,
 	.set_rxnfc		= dsa_slave_set_rxnfc,
+	.get_ts_info		= dsa_slave_get_ts_info,
 };
 
 /* legacy way, bypassing the bridge *****************************************/
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 974765b7d92a..275449b0d633 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
 	/* We need an ipv6hdr as minimum len when calling xmit */
 	ldev->hard_header_len	= sizeof(struct ipv6hdr);
 	ldev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	ldev->priv_flags	|= IFF_NO_QUEUE;
 
 	ldev->netdev_ops	= &lowpan_netdev_ops;
 	ldev->header_ops	= &lowpan_header_ops;
@@ -206,9 +207,13 @@ static inline void lowpan_netlink_fini(void)
 static int lowpan_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
-	struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
+	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+	struct wpan_dev *wpan_dev;
 
-	if (wdev->type != ARPHRD_IEEE802154)
+	if (ndev->type != ARPHRD_IEEE802154)
+		return NOTIFY_DONE;
+	wpan_dev = ndev->ieee802154_ptr;
+	if (!wpan_dev)
 		return NOTIFY_DONE;
 
 	switch (event) {
@@ -217,8 +222,8 @@ static int lowpan_device_event(struct notifier_block *unused,
 		 * also delete possible lowpan interfaces which belongs
 		 * to the wpan interface.
 		 */
-		if (wdev->ieee802154_ptr->lowpan_dev)
-			lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
+		if (wpan_dev->lowpan_dev)
+			lowpan_dellink(wpan_dev->lowpan_dev, NULL);
 		break;
 	default:
 		return NOTIFY_DONE;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
 	  Network), but can be distributed all over the Internet. If you want
 	  to do that, say Y here and to "IP multicast routing" below.
 
+config IP_MROUTE_COMMON
+	bool
+	depends on IP_MROUTE || IPV6_MROUTE
+
 config IP_MROUTE
 	bool "IP: multicast routing"
 	depends on IP_MULTICAST
+	select IP_MROUTE_COMMON
 	help
 	  This is used if you want your machine to act as a router for IP
 	  packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
 obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f28f06c91ead..be4c595edccb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1434,7 +1434,7 @@ static const struct file_operations arp_seq_fops = {
 
 static int __net_init arp_net_init(struct net *net)
 {
-	if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops))
+	if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tos && (r->tos != fl4->flowi4_tos))
 		return 0;
 
+	if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->sport_range) &&
+	    !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->dport_range) &&
+	    !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+		return 0;
+
 	return 1;
 }
 
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	}
 #endif
 
+	if (fib_rule_requires_fldissect(rule))
+		net->ipv4.fib_rules_require_fldissect++;
+
 	rule4->src_len = frh->src_len;
 	rule4->srcmask = inet_make_mask(rule4->src_len);
 	rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
+
+	if (net->ipv4.fib_rules_require_fldissect &&
+	    fib_rule_requires_fldissect(rule))
+		net->ipv4.fib_rules_require_fldissect--;
 errout:
 	return err;
 }
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
 		goto fail;
 	net->ipv4.rules_ops = ops;
 	net->ipv4.fib_has_custom_rules = false;
+	net->ipv4.fib_rules_require_fldissect = 0;
 	return 0;
 
 fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c586597da20d..e7c602c600ac 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
 		fnhe = rcu_dereference_protected(hash[i].chain, 1);
 		while (fnhe) {
 			struct fib_nh_exception *next;
-			
+
 			next = rcu_dereference_protected(fnhe->fnhe_next, 1);
 
 			rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -646,6 +646,11 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 					    fi->fib_nh, cfg, extack))
 				return 1;
 		}
+#ifdef CONFIG_IP_ROUTE_CLASSID
+		if (cfg->fc_flow &&
+		    cfg->fc_flow != fi->fib_nh->nh_tclassid)
+			return 1;
+#endif
 		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
 		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
 			return 0;
@@ -1760,14 +1765,12 @@ void fib_select_multipath(struct fib_result *res, int hash)
 void fib_select_path(struct net *net, struct fib_result *res,
 		     struct flowi4 *fl4, const struct sk_buff *skb)
 {
-	bool oif_check;
-
-	oif_check = (fl4->flowi4_oif == 0 ||
-		     fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+	if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+		goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res->fi->fib_nhs > 1 && oif_check) {
-		int h = fib_multipath_hash(res->fi, fl4, skb);
+	if (res->fi->fib_nhs > 1) {
+		int h = fib_multipath_hash(net, fl4, skb, NULL);
 
 		fib_select_multipath(res, h);
 	}
@@ -1775,10 +1778,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
 #endif
 	if (!res->prefixlen &&
 	    res->table->tb_num_default > 1 &&
-	    res->type == RTN_UNICAST && oif_check)
+	    res->type == RTN_UNICAST)
 		fib_select_default(fl4, res);
 
+check_saddr:
 	if (!fl4->saddr)
 		fl4->saddr = FIB_RES_PREFSRC(net, *res);
 }
-EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5530cd6fdbc7..fac0b73e24d1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,6 +50,7 @@
 
 #define VERSION "0.409"
 
+#include <linux/cache.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
  */
 static const int sync_pages = 128;
 
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
 
 static inline struct tnode *tn_info(struct key_vector *kv)
 {
@@ -2721,14 +2722,14 @@ static const struct file_operations fib_route_fops = {
 
 int __net_init fib_proc_init(struct net *net)
 {
-	if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
+	if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops))
 		goto out1;
 
-	if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
+	if (!proc_create("fib_triestat", 0444, net->proc_net,
 			 &fib_triestat_fops))
 		goto out2;
 
-	if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
+	if (!proc_create("route", 0444, net->proc_net, &fib_route_fops))
 		goto out3;
 
 	return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f2402581fef1..b26a81a7de42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2993,10 +2993,10 @@ static int __net_init igmp_net_init(struct net *net)
 	struct proc_dir_entry *pde;
 	int err;
 
-	pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
+	pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops);
 	if (!pde)
 		goto out_igmp;
-	pde = proc_create("mcfilter", S_IRUGO, net->proc_net,
+	pde = proc_create("mcfilter", 0444, net->proc_net,
 			  &igmp_mcf_seq_fops);
 	if (!pde)
 		goto out_mcfilter;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a383f299ce24..4e5bc4b2f14e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
 static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
 {
 	if (!inet_diag_table[proto])
-		request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
-			       NETLINK_SOCK_DIAG, AF_INET, proto);
+		sock_load_diag_module(AF_INET, proto);
 
 	mutex_lock(&inet_diag_table_mutex);
 	if (!inet_diag_table[proto])
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 26a3d0315728..e8ec28999f5c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -119,6 +119,9 @@ out:
 
 static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
 {
+	if (!hlist_unhashed(&q->list_evictor))
+		return false;
+
 	return q->net->low_thresh == 0 ||
 	       frag_mem_limit(q->net) >= q->net->low_thresh;
 }
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 914d56928578..1f04bd91fc2e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -6,6 +6,7 @@
  *  Authors:	Andrey V. Savochkin <[email protected]>
  */
 
+#include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -51,7 +52,7 @@
  *		daddr: unchangeable
  */
 
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
 
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 2dd21c3281a1..b54b948b0596 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -55,7 +55,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->ignore_df)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 		return false;
 
 	return true;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 45d97e9b2759..a8772a978224 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -522,6 +522,7 @@ err_free_skb:
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 			__be16 proto)
 {
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 		goto err_free_rt;
 
-	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+	flags = tun_info->key.tun_flags &
+		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 	gre_build_header(skb, tunnel_hlen, flags, proto,
-			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+			 tunnel_id_to_key32(tun_info->key.tun_id),
+			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
@@ -970,9 +973,6 @@ static void __gre_tunnel_init(struct net_device *dev)
 
 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
-	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
-	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
-
 	dev->features		|= GRE_FEATURES;
 	dev->hw_features	|= GRE_FEATURES;
 
@@ -1290,8 +1290,6 @@ static int erspan_tunnel_init(struct net_device *dev)
 		       erspan_hdr_len(tunnel->erspan_ver);
 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
-	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
-	dev->mtu = ETH_DATA_LEN - t_hlen - 4;
 	dev->features		|= GRE_FEATURES;
 	dev->hw_features	|= GRE_FEATURES;
 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
@@ -1322,6 +1320,12 @@ static void ipgre_tap_setup(struct net_device *dev)
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
+bool is_gretap_dev(const struct net_device *dev)
+{
+	return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 57fc13c6ab2b..7582713dd18f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct net *net = dev_net(dev);
 
-	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
+	for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
 
 		/* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 		 */
 		if (sk && inet_sk(sk)->inet_num == protocol &&
 		    (!sk->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == dev->ifindex) &&
-		    net_eq(sock_net(sk), net)) {
+		     sk->sk_bound_dev_if == dev->ifindex)) {
 			if (ip_is_fragment(ip_hdr(skb))) {
 				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return true;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e8e675be60ec..66340ab750e6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -248,7 +248,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
 
 	/* common case: seglen is <= mtu
 	 */
-	if (skb_gso_validate_mtu(skb, mtu))
+	if (skb_gso_validate_network_len(skb, mtu))
 		return ip_finish_output2(net, sk, skb);
 
 	/* Slowpath -  GSO segment length exceeds the egress MTU.
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 008be04ac1cc..5ad2d8ed3a3f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 			if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
 				return -EINVAL;
-			ipc->oif = src_info->ipi6_ifindex;
+			if (src_info->ipi6_ifindex)
+				ipc->oif = src_info->ipi6_ifindex;
 			ipc->addr = src_info->ipi6_addr.s6_addr32[3];
 			continue;
 		}
@@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 				return -EINVAL;
 			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
-			ipc->oif = info->ipi_ifindex;
+			if (info->ipi_ifindex)
+				ipc->oif = info->ipi_ifindex;
 			ipc->addr = info->ipi_spec_dst.s_addr;
 			break;
 		}
@@ -320,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 	return 0;
 }
 
-
-/* Special input handler for packets caught by router alert option.
-   They are selected only by protocol field, and then processed likely
-   local ones; but only if someone wants them! Otherwise, router
-   not running rsvpd will kill RSVP.
-
-   It is user level problem, what it will make with them.
-   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
-   but receiver should be enough clever f.e. to forward mtrace requests,
-   sent to multicast group to reach destination designated router.
- */
-struct ip_ra_chain __rcu *ip_ra_chain;
-
-
 static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
 	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -347,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 {
 	struct ip_ra_chain *ra, *new_ra;
 	struct ip_ra_chain __rcu **rap;
+	struct net *net = sock_net(sk);
 
 	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 		return -EINVAL;
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-	for (rap = &ip_ra_chain;
-	     (ra = rtnl_dereference(*rap)) != NULL;
+	mutex_lock(&net->ipv4.ra_mutex);
+	for (rap = &net->ipv4.ra_chain;
+	     (ra = rcu_dereference_protected(*rap,
+			lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
 	     rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
+				mutex_unlock(&net->ipv4.ra_mutex);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
 			/* dont let ip_call_ra_chain() use sk again */
 			ra->sk = NULL;
 			RCU_INIT_POINTER(*rap, ra->next);
+			mutex_unlock(&net->ipv4.ra_mutex);
 
 			if (ra->destructor)
 				ra->destructor(sk);
@@ -377,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 			return 0;
 		}
 	}
-	if (!new_ra)
+	if (!new_ra) {
+		mutex_unlock(&net->ipv4.ra_mutex);
 		return -ENOBUFS;
+	}
 	new_ra->sk = sk;
 	new_ra->destructor = destructor;
 
 	RCU_INIT_POINTER(new_ra->next, ra);
 	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
+	mutex_unlock(&net->ipv4.ra_mutex);
 
 	return 0;
 }
@@ -584,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname)
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_UNBLOCK_SOURCE:
-	case IP_ROUTER_ALERT:
 		return true;
 	}
 	return false;
@@ -637,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 
 	/* If optlen==0, it is equivalent to val == 0 */
 
+	if (optname == IP_ROUTER_ALERT)
+		return ip_ra_control(sk, val ? 1 : 0, NULL);
 	if (ip_mroute_opt(optname))
 		return ip_mroute_setsockopt(sk, optname, optval, optlen);
 
@@ -1147,9 +1144,6 @@ mc_msf_out:
 			goto e_inval;
 		inet->mc_all = val;
 		break;
-	case IP_ROUTER_ALERT:
-		err = ip_ra_control(sk, val ? 1 : 0, NULL);
-		break;
 
 	case IP_FREEBIND:
 		if (optlen < 1)
@@ -1567,10 +1561,7 @@ int ip_getsockopt(struct sock *sk, int level,
 		if (get_user(len, optlen))
 			return -EFAULT;
 
-		lock_sock(sk);
-		err = nf_getsockopt(sk, PF_INET, optname, optval,
-				&len);
-		release_sock(sk);
+		err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
 		if (err >= 0)
 			err = put_user(len, optlen);
 		return err;
@@ -1602,9 +1593,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
 		if (get_user(len, optlen))
 			return -EFAULT;
 
-		lock_sock(sk);
 		err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
-		release_sock(sk);
 		if (err >= 0)
 			err = put_user(len, optlen);
 		return err;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d786a8441bce..5fcb17cb426b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -290,22 +290,6 @@ failed:
 	return ERR_PTR(err);
 }
 
-static inline void init_tunnel_flow(struct flowi4 *fl4,
-				    int proto,
-				    __be32 daddr, __be32 saddr,
-				    __be32 key, __u8 tos, int oif,
-				    __u32 mark)
-{
-	memset(fl4, 0, sizeof(*fl4));
-	fl4->flowi4_oif = oif;
-	fl4->daddr = daddr;
-	fl4->saddr = saddr;
-	fl4->flowi4_tos = tos;
-	fl4->flowi4_proto = proto;
-	fl4->fl4_gre_key = key;
-	fl4->flowi4_mark = mark;
-}
-
 static int ip_tunnel_bind_dev(struct net_device *dev)
 {
 	struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 		struct flowi4 fl4;
 		struct rtable *rt;
 
-		init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
-				 iph->saddr, tunnel->parms.o_key,
-				 RT_TOS(iph->tos), tunnel->parms.link,
-				 tunnel->fwmark);
+		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+				    iph->saddr, tunnel->parms.o_key,
+				    RT_TOS(iph->tos), tunnel->parms.link,
+				    tunnel->fwmark);
 		rt = ip_route_output_key(tunnel->net, &fl4);
 
 		if (!IS_ERR(rt)) {
@@ -363,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
 	struct net_device *dev;
 	int t_hlen;
 
-	BUG_ON(!itn->fb_tunnel_dev);
-	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 	if (IS_ERR(dev))
 		return ERR_CAST(dev);
 
@@ -581,8 +564,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 	}
-	init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
-			 RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+			    RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 		goto tx_error;
 	rt = ip_route_output_key(tunnel->net, &fl4);
@@ -710,16 +693,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		}
 	}
 
-	if (tunnel->fwmark) {
-		init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-				 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-				 tunnel->fwmark);
-	}
-	else {
-		init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-				 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-				 skb->mark);
-	}
+	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+			    tunnel->fwmark);
 
 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 		goto tx_error;
@@ -845,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 	struct net *net = t->net;
 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 
-	BUG_ON(!itn->fb_tunnel_dev);
 	switch (cmd) {
 	case SIOCGETTUNNEL:
 		if (dev == itn->fb_tunnel_dev) {
@@ -870,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 				p->o_key = 0;
 		}
 
-		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+		t = ip_tunnel_find(itn, p, itn->type);
 
 		if (cmd == SIOCADDTUNNEL) {
 			if (!t) {
@@ -1014,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 	struct ip_tunnel_parm parms;
 	unsigned int i;
 
+	itn->rtnl_link_ops = ops;
 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&itn->tunnels[i]);
 
-	if (!ops) {
+	if (!ops || !net_has_fallback_tunnels(net)) {
+		struct ip_tunnel_net *it_init_net;
+
+		it_init_net = net_generic(&init_net, ip_tnl_net_id);
+		itn->type = it_init_net->type;
 		itn->fb_tunnel_dev = NULL;
 		return 0;
 	}
@@ -1035,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+		itn->type = itn->fb_tunnel_dev->type;
 	}
 	rtnl_unlock();
 
@@ -1042,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
 
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+			      struct list_head *head,
 			      struct rtnl_link_ops *ops)
 {
-	struct net *net = dev_net(itn->fb_tunnel_dev);
 	struct net_device *dev, *aux;
 	int h;
 
@@ -1077,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
 	rtnl_lock();
 	list_for_each_entry(net, net_list, exit_list) {
 		itn = net_generic(net, id);
-		ip_tunnel_destroy(itn, &list, ops);
+		ip_tunnel_destroy(net, itn, &list, ops);
 	}
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f75802ad960f..43f620feb1c4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1369,7 +1369,7 @@ static int __init ip_auto_config(void)
 	unsigned int i;
 
 #ifdef CONFIG_PROC_FS
-	proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
+	proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
 #endif /* CONFIG_PROC_FS */
 
 	if (!ic_enable)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b05689bbba31..2fb4de3f7f66 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,9 +28,9 @@
 
 #include <linux/uaccess.h>
 #include <linux/types.h>
+#include <linux/cache.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -52,7 +52,6 @@
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/route.h>
-#include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
 #include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
  * In this case data path is free of exclusive locks at all.
  */
 
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			  struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
 #define ipmr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+					   struct mr_table *mrt)
+{
+	struct mr_table *ret;
+
+	if (!mrt)
+		ret = list_entry_rcu(net->ipv4.mr_tables.next,
+				     struct mr_table, list);
+	else
+		ret = list_entry_rcu(mrt->list.next,
+				     struct mr_table, list);
+
+	if (&ret->list == &net->ipv4.mr_tables)
+		return NULL;
+	return ret;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
 #define ipmr_for_each_table(mrt, net) \
 	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+					   struct mr_table *mrt)
+{
+	if (!mrt)
+		return net->ipv4.mrt;
+	return NULL;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
 	return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
 }
 
 static const struct rhashtable_params ipmr_rht_params = {
-	.head_offset = offsetof(struct mfc_cache, mnode),
+	.head_offset = offsetof(struct mr_mfc, mnode),
 	.key_offset = offsetof(struct mfc_cache, cmparg),
 	.key_len = sizeof(struct mfc_cache_cmp_arg),
 	.nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
 	.automatic_shrinking = true,
 };
 
+static void ipmr_new_table_set(struct mr_table *mrt,
+			       struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+	.mfc_mcastgrp = htonl(INADDR_ANY),
+	.mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+	.rht_params = &ipmr_rht_params,
+	.cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 	if (mrt)
 		return mrt;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (!mrt)
-		return ERR_PTR(-ENOMEM);
-	write_pnet(&mrt->net, net);
-	mrt->id = id;
-
-	rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
-	INIT_LIST_HEAD(&mrt->mfc_cache_list);
-	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
-	mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
-	return mrt;
+	return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+			      ipmr_expire_process, ipmr_new_table_set);
 }
 
 static void ipmr_free_table(struct mr_table *mrt)
@@ -619,80 +644,22 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 }
 #endif
 
-static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
-					struct net *net,
-					enum fib_event_type event_type,
-					struct vif_device *vif,
-					vifi_t vif_index, u32 tb_id)
-{
-	struct vif_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.dev = vif->dev,
-		.vif_index = vif_index,
-		.vif_flags = vif->flags,
-		.tb_id = tb_id,
-	};
-
-	return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
 static int call_ipmr_vif_entry_notifiers(struct net *net,
 					 enum fib_event_type event_type,
 					 struct vif_device *vif,
 					 vifi_t vif_index, u32 tb_id)
 {
-	struct vif_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.dev = vif->dev,
-		.vif_index = vif_index,
-		.vif_flags = vif->flags,
-		.tb_id = tb_id,
-	};
-
-	ASSERT_RTNL();
-	net->ipv4.ipmr_seq++;
-	return call_fib_notifiers(net, event_type, &info.info);
-}
-
-static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
-					struct net *net,
-					enum fib_event_type event_type,
-					struct mfc_cache *mfc, u32 tb_id)
-{
-	struct mfc_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.mfc = mfc,
-		.tb_id = tb_id
-	};
-
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+				     vif, vif_index, tb_id,
+				     &net->ipv4.ipmr_seq);
 }
 
 static int call_ipmr_mfc_entry_notifiers(struct net *net,
 					 enum fib_event_type event_type,
 					 struct mfc_cache *mfc, u32 tb_id)
 {
-	struct mfc_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.mfc = mfc,
-		.tb_id = tb_id
-	};
-
-	ASSERT_RTNL();
-	net->ipv4.ipmr_seq++;
-	return call_fib_notifiers(net, event_type, &info.info);
+	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+				     &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
 }
 
 /**
@@ -760,16 +727,15 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
 static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
-	struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 
-	kmem_cache_free(mrt_cachep, c);
+	kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
 }
 
-void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free(struct mfc_cache *c)
 {
-	call_rcu(&c->rcu, ipmr_cache_free_rcu);
+	call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
 }
-EXPORT_SYMBOL(ipmr_cache_free);
 
 /* Destroy an unresolved cache entry, killing queued skbs
  * and reporting error to netlink readers.
@@ -782,7 +748,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 
 	atomic_dec(&mrt->cache_resolve_queue_len);
 
-	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct iphdr));
@@ -806,9 +772,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 static void ipmr_expire_process(struct timer_list *t)
 {
 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
-	unsigned long now;
+	struct mr_mfc *c, *next;
 	unsigned long expires;
-	struct mfc_cache *c, *next;
+	unsigned long now;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +796,8 @@ static void ipmr_expire_process(struct timer_list *t)
 		}
 
 		list_del(&c->list);
-		mroute_netlink_event(mrt, c, RTM_DELROUTE);
-		ipmr_destroy_unres(mrt, c);
+		mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+		ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
 	}
 
 	if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +808,7 @@ out:
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
 				   unsigned char *ttls)
 {
 	int vifi;
@@ -944,6 +910,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	ip_rt_multicast_event(in_dev);
 
 	/* Fill in the VIF structures */
+	vif_device_init(v, dev, vifc->vifc_rate_limit,
+			vifc->vifc_threshold,
+			vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+			(VIFF_TUNNEL | VIFF_REGISTER));
 
 	attr.orig_dev = dev;
 	if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +922,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	} else {
 		v->dev_parent_id.id_len = 0;
 	}
-	v->rate_limit = vifc->vifc_rate_limit;
+
 	v->local = vifc->vifc_lcl_addr.s_addr;
 	v->remote = vifc->vifc_rmt_addr.s_addr;
-	v->flags = vifc->vifc_flags;
-	if (!mrtsock)
-		v->flags |= VIFF_STATIC;
-	v->threshold = vifc->vifc_threshold;
-	v->bytes_in = 0;
-	v->bytes_out = 0;
-	v->pkt_in = 0;
-	v->pkt_out = 0;
-	v->link = dev->ifindex;
-	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
-		v->link = dev_get_iflink(dev);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
@@ -988,33 +947,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = origin
 	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		return c;
-
-	return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
-						    int vifi)
-{
-	struct mfc_cache_cmp_arg arg = {
-			.mfc_mcastgrp = htonl(INADDR_ANY),
-			.mfc_origin = htonl(INADDR_ANY)
-	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (c->mfc_un.res.ttls[vifi] < 255)
-			return c;
 
-	return NULL;
+	return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
@@ -1025,25 +959,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = htonl(INADDR_ANY)
 	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c, *proxy;
 
 	if (mcastgrp == htonl(INADDR_ANY))
-		goto skip;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-		if (c->mfc_un.res.ttls[vifi] < 255)
-			return c;
-
-		/* It's ok if the vifi is part of the static tree */
-		proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
-		if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
-			return c;
-	}
-
-skip:
-	return ipmr_cache_find_any_parent(mrt, vifi);
+		return mr_mfc_find_any_parent(mrt, vifi);
+	return mr_mfc_find_any(mrt, vifi, &arg);
 }
 
 /* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +974,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = origin,
 	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (parent == -1 || parent == c->mfc_parent)
-			return c;
 
-	return NULL;
+	return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
 /* Allocate a multicast cache entry */
@@ -1072,9 +984,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
 	if (c) {
-		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-		c->mfc_un.res.minvif = MAXVIFS;
-		refcount_set(&c->mfc_un.res.refcount, 1);
+		c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+		c->_c.mfc_un.res.minvif = MAXVIFS;
+		c->_c.free = ipmr_cache_free_rcu;
+		refcount_set(&c->_c.mfc_un.res.refcount, 1);
 	}
 	return c;
 }
@@ -1084,8 +997,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 
 	if (c) {
-		skb_queue_head_init(&c->mfc_un.unres.unresolved);
-		c->mfc_un.unres.expires = jiffies + 10*HZ;
+		skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+		c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 	}
 	return c;
 }
@@ -1098,12 +1011,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 	struct nlmsgerr *e;
 
 	/* Play the pending entries through our router */
-	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct iphdr));
 
-			if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+			if (mr_fill_mroute(mrt, skb, &c->_c,
+					   nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) -
 						 (u8 *)nlh;
 			} else {
@@ -1211,7 +1125,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 	int err;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr) {
 			found = true;
@@ -1230,12 +1144,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		}
 
 		/* Fill in the new cache entry */
-		c->mfc_parent	= -1;
+		c->_c.mfc_parent = -1;
 		c->mfc_origin	= iph->saddr;
 		c->mfc_mcastgrp	= iph->daddr;
 
 		/* Reflect first query at mrouted. */
 		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -1248,15 +1163,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		}
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
-		list_add(&c->list, &mrt->mfc_unres_queue);
+		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
 		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
-			mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+			mod_timer(&mrt->ipmr_expire_timer,
+				  c->_c.mfc_un.unres.expires);
 	}
 
 	/* See if we can append the packet */
-	if (c->mfc_un.unres.unresolved.qlen > 3) {
+	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
@@ -1264,7 +1180,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 			skb->dev = dev;
 			skb->skb_iif = dev->ifindex;
 		}
-		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
 
@@ -1286,11 +1202,11 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 	rcu_read_unlock();
 	if (!c)
 		return -ENOENT;
-	rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-	list_del_rcu(&c->list);
+	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+	list_del_rcu(&c->_c.list);
 	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
 	mroute_netlink_event(mrt, c, RTM_DELROUTE);
-	ipmr_cache_put(c);
+	mr_cache_put(&c->_c);
 
 	return 0;
 }
@@ -1299,6 +1215,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 			struct mfcctl *mfc, int mrtsock, int parent)
 {
 	struct mfc_cache *uc, *c;
+	struct mr_mfc *_uc;
 	bool found;
 	int ret;
 
@@ -1312,10 +1229,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 	rcu_read_unlock();
 	if (c) {
 		write_lock_bh(&mrt_lock);
-		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+		c->_c.mfc_parent = mfc->mfcc_parent;
+		ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
 		if (!mrtsock)
-			c->mfc_flags |= MFC_STATIC;
+			c->_c.mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
 		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
 					      mrt->id);
@@ -1333,28 +1250,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
-	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+	c->_c.mfc_parent = mfc->mfcc_parent;
+	ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
 	if (!mrtsock)
-		c->mfc_flags |= MFC_STATIC;
+		c->_c.mfc_flags |= MFC_STATIC;
 
-	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
 				  ipmr_rht_params);
 	if (ret) {
 		pr_err("ipmr: rhtable insert error %d\n", ret);
 		ipmr_cache_free(c);
 		return ret;
 	}
-	list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 	/* Check to see if we resolved a queued list. If so we
 	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+		uc = (struct mfc_cache *)_uc;
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-			list_del(&uc->list);
+			list_del(&_uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
@@ -1377,7 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
 	struct net *net = read_pnet(&mrt->net);
-	struct mfc_cache *c, *tmp;
+	struct mr_mfc *c, *tmp;
+	struct mfc_cache *cache;
 	LIST_HEAD(list);
 	int i;
 
@@ -1395,18 +1314,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 			continue;
 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
 		list_del_rcu(&c->list);
-		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+		cache = (struct mfc_cache *)c;
+		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
 					      mrt->id);
-		mroute_netlink_event(mrt, c, RTM_DELROUTE);
-		ipmr_cache_put(c);
+		mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+		mr_cache_put(c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			mroute_netlink_event(mrt, c, RTM_DELROUTE);
-			ipmr_destroy_unres(mrt, c);
+			cache = (struct mfc_cache *)c;
+			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+			ipmr_destroy_unres(mrt, cache);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1420,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk)
 	struct net *net = sock_net(sk);
 	struct mr_table *mrt;
 
-	ASSERT_RTNL();
+	rtnl_lock();
 	ipmr_for_each_table(mrt, net) {
 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1432,6 +1353,7 @@ static void mrtsock_destruct(struct sock *sk)
 			mroute_clean_tables(mrt, false);
 		}
 	}
+	rtnl_unlock();
 }
 
 /* Socket options and virtual interface manipulation. The whole
@@ -1496,8 +1418,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 		if (sk != rcu_access_pointer(mrt->mroute_sk)) {
 			ret = -EACCES;
 		} else {
+			/* We need to unlock here because mrtsock_destruct takes
+			 * care of rtnl itself and we can't change that due to
+			 * the IP_ROUTER_ALERT setsockopt which runs without it.
+			 */
+			rtnl_unlock();
 			ret = ip_ra_control(sk, 0, NULL);
-			goto out_unlock;
+			goto out;
 		}
 		break;
 	case MRT_ADD_VIF:
@@ -1609,6 +1536,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	}
 out_unlock:
 	rtnl_unlock();
+out:
 	return ret;
 }
 
@@ -1698,9 +1626,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1700,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1926,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			  struct net_device *dev, struct sk_buff *skb,
-			  struct mfc_cache *cache, int local)
+			  struct mfc_cache *c, int local)
 {
 	int true_vifi = ipmr_find_vif(mrt, dev);
 	int psend = -1;
 	int vif, ct;
 
-	vif = cache->mfc_parent;
-	cache->mfc_un.res.pkt++;
-	cache->mfc_un.res.bytes += skb->len;
-	cache->mfc_un.res.lastuse = jiffies;
+	vif = c->_c.mfc_parent;
+	c->_c.mfc_un.res.pkt++;
+	c->_c.mfc_un.res.bytes += skb->len;
+	c->_c.mfc_un.res.lastuse = jiffies;
 
-	if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+	if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
 		struct mfc_cache *cache_proxy;
 
 		/* For an (*,G) entry, we only check that the incomming
 		 * interface is part of the static tree.
 		 */
-		cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
 		if (cache_proxy &&
-		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
 			goto forward;
 	}
 
@@ -2038,7 +1966,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			goto dont_forward;
 		}
 
-		cache->mfc_un.res.wrong_if++;
+		c->_c.mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +1975,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 		     * large chunk of pimd to kernel. Ough... --ANK
 		     */
 		    (mrt->mroute_do_pim ||
-		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
-			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-			cache->mfc_un.res.last_assert = jiffies;
+			       c->_c.mfc_un.res.last_assert +
+			       MFC_ASSERT_THRESH)) {
+			c->_c.mfc_un.res.last_assert = jiffies;
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
 		}
 		goto dont_forward;
@@ -2061,33 +1990,33 @@ forward:
 	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/* Forward the frame */
-	if (cache->mfc_origin == htonl(INADDR_ANY) &&
-	    cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+	if (c->mfc_origin == htonl(INADDR_ANY) &&
+	    c->mfc_mcastgrp == htonl(INADDR_ANY)) {
 		if (true_vifi >= 0 &&
-		    true_vifi != cache->mfc_parent &&
+		    true_vifi != c->_c.mfc_parent &&
 		    ip_hdr(skb)->ttl >
-				cache->mfc_un.res.ttls[cache->mfc_parent]) {
+				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
 			/* It's an (*,*) entry and the packet is not coming from
 			 * the upstream: forward the packet to the upstream
 			 * only.
 			 */
-			psend = cache->mfc_parent;
+			psend = c->_c.mfc_parent;
 			goto last_forward;
 		}
 		goto dont_forward;
 	}
-	for (ct = cache->mfc_un.res.maxvif - 1;
-	     ct >= cache->mfc_un.res.minvif; ct--) {
+	for (ct = c->_c.mfc_un.res.maxvif - 1;
+	     ct >= c->_c.mfc_un.res.minvif; ct--) {
 		/* For (*,G) entry, don't forward to the incoming interface */
-		if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+		if ((c->mfc_origin != htonl(INADDR_ANY) ||
 		     ct != true_vifi) &&
-		    ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+		    ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 				if (skb2)
 					ipmr_queue_xmit(net, mrt, true_vifi,
-							skb2, cache, psend);
+							skb2, c, psend);
 			}
 			psend = ct;
 		}
@@ -2099,9 +2028,9 @@ last_forward:
 
 			if (skb2)
 				ipmr_queue_xmit(net, mrt, true_vifi, skb2,
-						cache, psend);
+						c, psend);
 		} else {
-			ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+			ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
 			return;
 		}
 	}
@@ -2299,62 +2228,6 @@ drop:
 }
 #endif
 
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mfc_cache *c, struct rtmsg *rtm)
-{
-	struct rta_mfc_stats mfcs;
-	struct nlattr *mp_attr;
-	struct rtnexthop *nhp;
-	unsigned long lastuse;
-	int ct;
-
-	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mfc_parent >= MAXVIFS) {
-		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-		return -ENOENT;
-	}
-
-	if (VIF_EXISTS(mrt, c->mfc_parent) &&
-	    nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
-		return -EMSGSIZE;
-
-	if (c->mfc_flags & MFC_OFFLOAD)
-		rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
-	if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
-		return -EMSGSIZE;
-
-	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-			if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
-				nla_nest_cancel(skb, mp_attr);
-				return -EMSGSIZE;
-			}
-
-			nhp->rtnh_flags = 0;
-			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
-			nhp->rtnh_len = sizeof(*nhp);
-		}
-	}
-
-	nla_nest_end(skb, mp_attr);
-
-	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-	mfcs.mfcs_packets = c->mfc_un.res.pkt;
-	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-			      RTA_PAD))
-		return -EMSGSIZE;
-
-	rtm->rtm_type = RTN_MULTICAST;
-	return 1;
-}
-
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
 		   struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2285,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 	}
 
 	read_lock(&mrt_lock);
-	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
 	read_unlock(&mrt_lock);
 	rcu_read_unlock();
 	return err;
@@ -2440,7 +2313,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 		goto nla_put_failure;
 	rtm->rtm_type     = RTN_MULTICAST;
 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-	if (c->mfc_flags & MFC_STATIC)
+	if (c->_c.mfc_flags & MFC_STATIC)
 		rtm->rtm_protocol = RTPROT_STATIC;
 	else
 		rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2322,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
 	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
 		goto nla_put_failure;
-	err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
 	/* do not break the dump if cache is unresolved */
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
@@ -2462,6 +2335,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			     u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+			     int flags)
+{
+	return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+				cmd, flags);
+}
+
 static size_t mroute_msgsize(bool unresolved, int maxvif)
 {
 	size_t len =
@@ -2490,7 +2371,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+	skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+				       mrt->maxvif),
 			GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -2634,62 +2516,8 @@ errout_free:
 
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
-	struct mr_table *mrt;
-	struct mfc_cache *mfc;
-	unsigned int t = 0, s_t;
-	unsigned int e = 0, s_e;
-
-	s_t = cb->args[0];
-	s_e = cb->args[1];
-
-	rcu_read_lock();
-	ipmr_for_each_table(mrt, net) {
-		if (t < s_t)
-			goto next_table;
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-			if (e < s_e)
-				goto next_entry;
-			if (ipmr_fill_mroute(mrt, skb,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     mfc, RTM_NEWROUTE,
-					     NLM_F_MULTI) < 0)
-				goto done;
-next_entry:
-			e++;
-		}
-		e = 0;
-		s_e = 0;
-
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (ipmr_fill_mroute(mrt, skb,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     mfc, RTM_NEWROUTE,
-					     NLM_F_MULTI) < 0) {
-				spin_unlock_bh(&mfc_unres_lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(&mfc_unres_lock);
-		e = 0;
-		s_e = 0;
-next_table:
-		t++;
-	}
-done:
-	rcu_read_unlock();
-
-	cb->args[1] = e;
-	cb->args[0] = t;
-
-	return skb->len;
+	return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+				_ipmr_fill_mroute, &mfc_unres_lock);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2774,11 @@ out:
 /* The /proc interfaces to multicast routing :
  * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
  */
-struct ipmr_vif_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
-					   struct ipmr_vif_iter *iter,
-					   loff_t pos)
-{
-	struct mr_table *mrt = iter->mrt;
-
-	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		if (pos-- == 0)
-			return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
-}
 
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -2981,26 +2789,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
-	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_vif_iter *iter = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = iter->mrt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ipmr_vif_seq_idx(net, iter, 0);
-
-	while (++iter->ct < mrt->maxvif) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
+	return mr_vif_seq_start(seq, pos);
 }
 
 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2800,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2808,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
 	} else {
 		const struct vif_device *vif = v;
-		const char *name =  vif->dev ? vif->dev->name : "none";
+		const char *name =  vif->dev ?
+				    vif->dev->name : "none";
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2823,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_vif_seq_ops = {
 	.start = ipmr_vif_seq_start,
-	.next  = ipmr_vif_seq_next,
+	.next  = mr_vif_seq_next,
 	.stop  = ipmr_vif_seq_stop,
 	.show  = ipmr_vif_seq_show,
 };
@@ -3041,7 +2831,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
 static int ipmr_vif_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
-			    sizeof(struct ipmr_vif_iter));
+			    sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2841,8 @@ static const struct file_operations ipmr_vif_fops = {
 	.release = seq_release_net,
 };
 
-struct ipmr_mfc_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
-					  struct ipmr_mfc_iter *it, loff_t pos)
-{
-	struct mr_table *mrt = it->mrt;
-	struct mfc_cache *mfc;
-
-	rcu_read_lock();
-	it->cache = &mrt->mfc_cache_list;
-	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-		if (pos-- == 0)
-			return mfc;
-	rcu_read_unlock();
-
-	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &mrt->mfc_unres_queue;
-	list_for_each_entry(mfc, it->cache, list)
-		if (pos-- == 0)
-			return mfc;
-	spin_unlock_bh(&mfc_unres_lock);
-
-	it->cache = NULL;
-	return NULL;
-}
-
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -3092,54 +2850,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 	if (!mrt)
 		return ERR_PTR(-ENOENT);
 
-	it->mrt = mrt;
-	it->cache = NULL;
-	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = it->mrt;
-	struct mfc_cache *mfc = v;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-	if (mfc->list.next != it->cache)
-		return list_entry(mfc->list.next, struct mfc_cache, list);
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		goto end_of_list;
-
-	/* exhausted cache_array, show unresolved */
-	rcu_read_unlock();
-	it->cache = &mrt->mfc_unres_queue;
-
-	spin_lock_bh(&mfc_unres_lock);
-	if (!list_empty(it->cache))
-		return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
-	spin_unlock_bh(&mfc_unres_lock);
-	it->cache = NULL;
-
-	return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct mr_table *mrt = it->mrt;
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc_cache_list)
-		rcu_read_unlock();
+	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2862,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
 	} else {
 		const struct mfc_cache *mfc = v;
-		const struct ipmr_mfc_iter *it = seq->private;
+		const struct mr_mfc_iter *it = seq->private;
 		const struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%08X %08X %-3hd",
 			   (__force u32) mfc->mfc_mcastgrp,
 			   (__force u32) mfc->mfc_origin,
-			   mfc->mfc_parent);
+			   mfc->_c.mfc_parent);
 
 		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
-				   mfc->mfc_un.res.pkt,
-				   mfc->mfc_un.res.bytes,
-				   mfc->mfc_un.res.wrong_if);
-			for (n = mfc->mfc_un.res.minvif;
-			     n < mfc->mfc_un.res.maxvif; n++) {
+				   mfc->_c.mfc_un.res.pkt,
+				   mfc->_c.mfc_un.res.bytes,
+				   mfc->_c.mfc_un.res.wrong_if);
+			for (n = mfc->_c.mfc_un.res.minvif;
+			     n < mfc->_c.mfc_un.res.maxvif; n++) {
 				if (VIF_EXISTS(mrt, n) &&
-				    mfc->mfc_un.res.ttls[n] < 255)
+				    mfc->_c.mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 					   " %2d:%-3d",
-					   n, mfc->mfc_un.res.ttls[n]);
+					   n, mfc->_c.mfc_un.res.ttls[n]);
 			}
 		} else {
 			/* unresolved mfc_caches don't contain
@@ -3185,15 +2896,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
-	.next  = ipmr_mfc_seq_next,
-	.stop  = ipmr_mfc_seq_stop,
+	.next  = mr_mfc_seq_next,
+	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct ipmr_mfc_iter));
+			    sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ipmr_mfc_fops = {
@@ -3220,37 +2931,8 @@ static unsigned int ipmr_seq_read(struct net *net)
 
 static int ipmr_dump(struct net *net, struct notifier_block *nb)
 {
-	struct mr_table *mrt;
-	int err;
-
-	err = ipmr_rules_dump(net, nb);
-	if (err)
-		return err;
-
-	ipmr_for_each_table(mrt, net) {
-		struct vif_device *v = &mrt->vif_table[0];
-		struct mfc_cache *mfc;
-		int vifi;
-
-		/* Notifiy on table VIF entries */
-		read_lock(&mrt_lock);
-		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
-			if (!v->dev)
-				continue;
-
-			call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
-						     v, vifi, mrt->id);
-		}
-		read_unlock(&mrt_lock);
-
-		/* Notify on table MFC entries */
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-			call_ipmr_mfc_entry_notifier(nb, net,
-						     FIB_EVENT_ENTRY_ADD, mfc,
-						     mrt->id);
-	}
-
-	return 0;
+	return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
+		       ipmr_mr_table_iter, &mrt_lock);
 }
 
 static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..4fe97723b53f
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,365 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+		     struct net_device *dev,
+		     unsigned long rate_limit,
+		     unsigned char threshold,
+		     unsigned short flags,
+		     unsigned short get_iflink_mask)
+{
+	v->dev = NULL;
+	v->bytes_in = 0;
+	v->bytes_out = 0;
+	v->pkt_in = 0;
+	v->pkt_out = 0;
+	v->rate_limit = rate_limit;
+	v->flags = flags;
+	v->threshold = threshold;
+	if (v->flags & get_iflink_mask)
+		v->link = dev_get_iflink(dev);
+	else
+		v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+	       struct mr_table_ops *ops,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net))
+{
+	struct mr_table *mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (!mrt)
+		return NULL;
+	mrt->id = id;
+	write_pnet(&mrt->net, net);
+
+	mrt->ops = *ops;
+	rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+	INIT_LIST_HEAD(&mrt->mfc_cache_list);
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+	timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+	mrt->mroute_reg_vif_num = -1;
+	table_set(mrt, net);
+	return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c;
+
+	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (parent == -1 || parent == c->mfc_parent)
+			return c;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c;
+
+	list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+			       *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c, *proxy;
+
+	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+		if (c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+		/* It's ok if the vifi is part of the static tree */
+		proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+		if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+			return c;
+	}
+
+	return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+	struct mr_table *mrt = iter->mrt;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!VIF_EXISTS(mrt, iter->ct))
+			continue;
+		if (pos-- == 0)
+			return &mrt->vif_table[iter->ct];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct mr_vif_iter *iter = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = iter->mrt;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return mr_vif_seq_idx(net, iter, 0);
+
+	while (++iter->ct < mrt->maxvif) {
+		if (!VIF_EXISTS(mrt, iter->ct))
+			continue;
+		return &mrt->vif_table[iter->ct];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+		     struct mr_mfc_iter *it, loff_t pos)
+{
+	struct mr_table *mrt = it->mrt;
+	struct mr_mfc *mfc;
+
+	rcu_read_lock();
+	it->cache = &mrt->mfc_cache_list;
+	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+		if (pos-- == 0)
+			return mfc;
+	rcu_read_unlock();
+
+	spin_lock_bh(it->lock);
+	it->cache = &mrt->mfc_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
+		if (pos-- == 0)
+			return mfc;
+	spin_unlock_bh(it->lock);
+
+	it->cache = NULL;
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+		      loff_t *pos)
+{
+	struct mr_mfc_iter *it = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = it->mrt;
+	struct mr_mfc *c = v;
+
+	++*pos;
+
+	if (v == SEQ_START_TOKEN)
+		return mr_mfc_seq_idx(net, seq->private, 0);
+
+	if (c->list.next != it->cache)
+		return list_entry(c->list.next, struct mr_mfc, list);
+
+	if (it->cache == &mrt->mfc_unres_queue)
+		goto end_of_list;
+
+	/* exhausted cache_array, show unresolved */
+	rcu_read_unlock();
+	it->cache = &mrt->mfc_unres_queue;
+
+	spin_lock_bh(it->lock);
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+	spin_unlock_bh(it->lock);
+	it->cache = NULL;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+		   struct mr_mfc *c, struct rtmsg *rtm)
+{
+	struct rta_mfc_stats mfcs;
+	struct nlattr *mp_attr;
+	struct rtnexthop *nhp;
+	unsigned long lastuse;
+	int ct;
+
+	/* If cache is unresolved, don't try to parse IIF and OIF */
+	if (c->mfc_parent >= MAXVIFS) {
+		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+		return -ENOENT;
+	}
+
+	if (VIF_EXISTS(mrt, c->mfc_parent) &&
+	    nla_put_u32(skb, RTA_IIF,
+			mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+		return -EMSGSIZE;
+
+	if (c->mfc_flags & MFC_OFFLOAD)
+		rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+	if (!mp_attr)
+		return -EMSGSIZE;
+
+	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+			struct vif_device *vif;
+
+			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+			if (!nhp) {
+				nla_nest_cancel(skb, mp_attr);
+				return -EMSGSIZE;
+			}
+
+			nhp->rtnh_flags = 0;
+			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+			vif = &mrt->vif_table[ct];
+			nhp->rtnh_ifindex = vif->dev->ifindex;
+			nhp->rtnh_len = sizeof(*nhp);
+		}
+	}
+
+	nla_nest_end(skb, mp_attr);
+
+	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+	mfcs.mfcs_packets = c->mfc_un.res.pkt;
+	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+			      RTA_PAD))
+		return -EMSGSIZE;
+
+	rtm->rtm_type = RTN_MULTICAST;
+	return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct mr_table *(*iter)(struct net *net,
+					      struct mr_table *mrt),
+		     int (*fill)(struct mr_table *mrt,
+				 struct sk_buff *skb,
+				 u32 portid, u32 seq, struct mr_mfc *c,
+				 int cmd, int flags),
+		     spinlock_t *lock)
+{
+	unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+	struct net *net = sock_net(skb->sk);
+	struct mr_table *mrt;
+	struct mr_mfc *mfc;
+
+	rcu_read_lock();
+	for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+		if (t < s_t)
+			goto next_table;
+		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+			if (e < s_e)
+				goto next_entry;
+			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, mfc,
+				 RTM_NEWROUTE, NLM_F_MULTI) < 0)
+				goto done;
+next_entry:
+			e++;
+		}
+		e = 0;
+		s_e = 0;
+
+		spin_lock_bh(lock);
+		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+			if (e < s_e)
+				goto next_entry2;
+			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, mfc,
+				 RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+				spin_unlock_bh(lock);
+				goto done;
+			}
+next_entry2:
+			e++;
+		}
+		spin_unlock_bh(lock);
+		e = 0;
+		s_e = 0;
+next_table:
+		t++;
+	}
+done:
+	rcu_read_unlock();
+
+	cb->args[1] = e;
+	cb->args[0] = t;
+
+	return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
+
+int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
+	    int (*rules_dump)(struct net *net,
+			      struct notifier_block *nb),
+	    struct mr_table *(*mr_iter)(struct net *net,
+					struct mr_table *mrt),
+	    rwlock_t *mrt_lock)
+{
+	struct mr_table *mrt;
+	int err;
+
+	err = rules_dump(net, nb);
+	if (err)
+		return err;
+
+	for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
+		struct vif_device *v = &mrt->vif_table[0];
+		struct mr_mfc *mfc;
+		int vifi;
+
+		/* Notifiy on table VIF entries */
+		read_lock(mrt_lock);
+		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+			if (!v->dev)
+				continue;
+
+			mr_call_vif_notifier(nb, net, family,
+					     FIB_EVENT_VIF_ADD,
+					     v, vifi, mrt->id);
+		}
+		read_unlock(mrt_lock);
+
+		/* Notify on table MFC entries */
+		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+			mr_call_mfc_notifier(nb, net, family,
+					     FIB_EVENT_ENTRY_ADD,
+					     mfc, mrt->id);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mr_dump);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4ffe302f9b82..e3e420f3ba7b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -252,6 +252,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			}
 			if (table_base + v
 			    != arpt_next_entry(e)) {
+				if (unlikely(stackidx >= private->stacksize)) {
+					verdict = NF_DROP;
+					break;
+				}
 				jumpstack[stackidx++] = e;
 			}
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 9a71f3149507..e38395a8dcf2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -330,8 +330,13 @@ ipt_do_table(struct sk_buff *skb,
 				continue;
 			}
 			if (table_base + v != ipt_next_entry(e) &&
-			    !(e->ip.flags & IPT_F_GOTO))
+			    !(e->ip.flags & IPT_F_GOTO)) {
+				if (unlikely(stackidx >= private->stacksize)) {
+					verdict = NF_DROP;
+					break;
+				}
 				jumpstack[stackidx++] = e;
+			}
 
 			e = get_entry(table_base, v);
 			continue;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a84a60f6b39..2c8d313ae216 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -107,12 +107,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
 
 	local_bh_disable();
 	if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
-		list_del_rcu(&c->list);
-		spin_unlock(&cn->lock);
-		local_bh_enable();
-
-		unregister_netdevice_notifier(&c->notifier);
-
 		/* In case anyone still accesses the file, the open/close
 		 * functions are also incrementing the refcount on their own,
 		 * so it's safe to remove the entry even if it's in use. */
@@ -120,6 +114,12 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
 		if (cn->procdir)
 			proc_remove(c->pde);
 #endif
+		list_del_rcu(&c->list);
+		spin_unlock(&cn->lock);
+		local_bh_enable();
+
+		unregister_netdevice_notifier(&c->notifier);
+
 		return;
 	}
 	local_bh_enable();
@@ -154,8 +154,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
 #endif
 		if (unlikely(!refcount_inc_not_zero(&c->refcount)))
 			c = NULL;
-		else if (entry)
-			refcount_inc(&c->entries);
+		else if (entry) {
+			if (unlikely(!refcount_inc_not_zero(&c->entries))) {
+				clusterip_config_put(c);
+				c = NULL;
+			}
+		}
 	}
 	rcu_read_unlock_bh();
 
@@ -228,7 +232,6 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 	c->hash_mode = i->hash_mode;
 	c->hash_initval = i->hash_initval;
 	refcount_set(&c->refcount, 1);
-	refcount_set(&c->entries, 1);
 
 	spin_lock_bh(&cn->lock);
 	if (__clusterip_config_find(net, ip)) {
@@ -247,7 +250,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 
 		/* create proc dir entry */
 		sprintf(buffer, "%pI4", &ip);
-		c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+		c->pde = proc_create_data(buffer, 0600,
 					  cn->procdir,
 					  &clusterip_proc_fops, c);
 		if (!c->pde) {
@@ -259,8 +262,10 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 
 	c->notifier.notifier_call = clusterip_netdev_event;
 	err = register_netdevice_notifier(&c->notifier);
-	if (!err)
+	if (!err) {
+		refcount_set(&c->entries, 1);
 		return c;
+	}
 
 #ifdef CONFIG_PROC_FS
 	proc_remove(c->pde);
@@ -269,7 +274,7 @@ err:
 	spin_lock_bh(&cn->lock);
 	list_del_rcu(&c->list);
 	spin_unlock_bh(&cn->lock);
-	kfree(c);
+	clusterip_config_put(c);
 
 	return ERR_PTR(err);
 }
@@ -492,12 +497,15 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 				return PTR_ERR(config);
 		}
 	}
-	cipinfo->config = config;
 
 	ret = nf_ct_netns_get(par->net, par->family);
-	if (ret < 0)
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
+		clusterip_config_entry_put(par->net, config);
+		clusterip_config_put(config);
+		return ret;
+	}
 
 	if (!par->net->xt.clusterip_deprecated_warning) {
 		pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
@@ -505,6 +513,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 		par->net->xt.clusterip_deprecated_warning = true;
 	}
 
+	cipinfo->config = config;
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 270765236f5e..aaaf9a81fbc9 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -98,17 +98,15 @@ static int ecn_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_ECN_info *einfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
 
-	if (einfo->operation & IPT_ECN_OP_MASK) {
-		pr_info("unsupported ECN operation %x\n", einfo->operation);
+	if (einfo->operation & IPT_ECN_OP_MASK)
 		return -EINVAL;
-	}
-	if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
-		pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
+
+	if (einfo->ip_ect & ~IPT_ECN_IP_MASK)
 		return -EINVAL;
-	}
+
 	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
 	    (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
-		pr_info("cannot use TCP operations on a non-tcp rule\n");
+		pr_info_ratelimited("cannot use operation on non-tcp rule\n");
 		return -EINVAL;
 	}
 	return 0;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 8bd0d7b26632..e8bed3390e58 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -74,13 +74,13 @@ static int reject_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-		pr_info("ECHOREPLY no longer supported.\n");
+		pr_info_ratelimited("ECHOREPLY no longer supported.\n");
 		return -EINVAL;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP ||
 		    (e->ip.invflags & XT_INV_PROTO)) {
-			pr_info("TCP_RESET invalid for non-tcp\n");
+			pr_info_ratelimited("TCP_RESET invalid for non-tcp\n");
 			return -EINVAL;
 		}
 	}
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 37fb9552e858..fd01f13c896a 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -105,14 +105,14 @@ static int rpfilter_check(const struct xt_mtchk_param *par)
 	const struct xt_rpfilter_info *info = par->matchinfo;
 	unsigned int options = ~XT_RPFILTER_OPTION_MASK;
 	if (info->flags & options) {
-		pr_info("unknown options encountered");
+		pr_info_ratelimited("unknown options\n");
 		return -EINVAL;
 	}
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "raw") != 0) {
-		pr_info("match only valid in the \'raw\' "
-			"or \'mangle\' tables, not \'%s\'.\n", par->table);
+		pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n",
+				    par->table);
 		return -EINVAL;
 	}
 
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 25d2975da156..0cd46bffa469 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -111,6 +111,7 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 	default:
 		return -1;
 	}
+	csum_replace4(&iph->check, addr, new_addr);
 
 	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 }
@@ -185,7 +186,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 		return false;
 
 	return true;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b8f0db54b197..05e47d777009 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1177,7 +1177,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = {
 int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
 {
 	struct proc_dir_entry *p;
-	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(afinfo->name, 0444, net->proc_net,
 			     afinfo->seq_fops, afinfo);
 	if (!p)
 		return -ENOMEM;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..adfb75340275 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -521,12 +521,12 @@ static const struct file_operations netstat_seq_fops = {
 
 static __net_init int ip_proc_init_net(struct net *net)
 {
-	if (!proc_create("sockstat", S_IRUGO, net->proc_net,
+	if (!proc_create("sockstat", 0444, net->proc_net,
 			 &sockstat_seq_fops))
 		goto out_sockstat;
-	if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops))
+	if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops))
 		goto out_netstat;
-	if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops))
+	if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops))
 		goto out_snmp;
 
 	return 0;
@@ -555,4 +555,3 @@ int __init ip_misc_proc_init(void)
 {
 	return register_pernet_subsys(&ip_proc_ops);
 }
-
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9b367fc48d7d..1b4d3355624a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout)
 	/*
 	 * Raw sockets may have direct kernel references. Kill them.
 	 */
-	rtnl_lock();
 	ip_ra_control(sk, 0, NULL);
-	rtnl_unlock();
 
 	sk_common_release(sk);
 }
@@ -1142,7 +1140,7 @@ static const struct file_operations raw_seq_fops = {
 
 static __net_init int raw_init_net(struct net *net)
 {
-	if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops))
+	if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 49cc1c1df1ba..8322e479f299 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -128,10 +128,11 @@ static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
 static int ip_rt_error_cost __read_mostly	= HZ;
 static int ip_rt_error_burst __read_mostly	= 5 * HZ;
 static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
-static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
+static u32 ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 
 static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
+
 /*
  *	Interface to generic destination cache.
  */
@@ -378,12 +379,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
+	pde = proc_create("rt_cache", 0444, net->proc_net,
 			  &rt_cache_seq_fops);
 	if (!pde)
 		goto err1;
 
-	pde = proc_create("rt_cache", S_IRUGO,
+	pde = proc_create("rt_cache", 0444,
 			  net->proc_net_stat, &rt_cpu_seq_fops);
 	if (!pde)
 		goto err2;
@@ -633,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
 static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
 {
 	rt->rt_pmtu = fnhe->fnhe_pmtu;
+	rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
 	rt->dst.expires = fnhe->fnhe_expires;
 
 	if (fnhe->fnhe_gw) {
@@ -643,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 }
 
 static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
-				  u32 pmtu, unsigned long expires)
+				  u32 pmtu, bool lock, unsigned long expires)
 {
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
@@ -680,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 			fnhe->fnhe_genid = genid;
 		if (gw)
 			fnhe->fnhe_gw = gw;
-		if (pmtu)
+		if (pmtu) {
 			fnhe->fnhe_pmtu = pmtu;
+			fnhe->fnhe_mtu_locked = lock;
+		}
 		fnhe->fnhe_expires = max(1UL, expires);
 		/* Update all cached dsts too */
 		rt = rcu_dereference(fnhe->fnhe_rth_input);
@@ -705,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 		fnhe->fnhe_daddr = daddr;
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
+		fnhe->fnhe_mtu_locked = lock;
 		fnhe->fnhe_expires = expires;
 
 		/* Exception created; mark the cached routes for the nexthop
@@ -786,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 				struct fib_nh *nh = &FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
-						0, jiffies + ip_rt_gc_timeout);
+						0, false,
+						jiffies + ip_rt_gc_timeout);
 			}
 			if (kill_route)
 				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -930,14 +936,23 @@ out_put_peer:
 
 static int ip_error(struct sk_buff *skb)
 {
-	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 	struct rtable *rt = skb_rtable(skb);
+	struct net_device *dev = skb->dev;
+	struct in_device *in_dev;
 	struct inet_peer *peer;
 	unsigned long now;
 	struct net *net;
 	bool send;
 	int code;
 
+	if (netif_is_l3_master(skb->dev)) {
+		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+		if (!dev)
+			goto out;
+	}
+
+	in_dev = __in_dev_get_rcu(dev);
+
 	/* IP on this device is disabled. */
 	if (!in_dev)
 		goto out;
@@ -999,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 {
 	struct dst_entry *dst = &rt->dst;
 	struct fib_result res;
+	bool lock = false;
 
-	if (dst_metric_locked(dst, RTAX_MTU))
+	if (ip_mtu_locked(dst))
 		return;
 
 	if (ipv4_mtu(dst) < mtu)
 		return;
 
-	if (mtu < ip_rt_min_pmtu)
+	if (mtu < ip_rt_min_pmtu) {
+		lock = true;
 		mtu = ip_rt_min_pmtu;
+	}
 
 	if (rt->rt_pmtu == mtu &&
 	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -1017,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
 
-		update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
+		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
 	}
 	rcu_read_unlock();
@@ -1270,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 
 	mtu = READ_ONCE(dst->dev->mtu);
 
-	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+	if (unlikely(ip_mtu_locked(dst))) {
 		if (rt->rt_uses_gateway && mtu > 576)
 			mtu = 576;
 	}
@@ -1383,7 +1401,7 @@ struct uncached_list {
 
 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
 
-static void rt_add_uncached_list(struct rtable *rt)
+void rt_add_uncached_list(struct rtable *rt)
 {
 	struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
 
@@ -1394,14 +1412,8 @@ static void rt_add_uncached_list(struct rtable *rt)
 	spin_unlock_bh(&ul->lock);
 }
 
-static void ipv4_dst_destroy(struct dst_entry *dst)
+void rt_del_uncached_list(struct rtable *rt)
 {
-	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
-	struct rtable *rt = (struct rtable *) dst;
-
-	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
-		kfree(p);
-
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1411,6 +1423,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	}
 }
 
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
+	struct rtable *rt = (struct rtable *)dst;
+
+	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
+		kfree(p);
+
+	rt_del_uncached_list(rt);
+}
+
 void rt_flush_dev(struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
@@ -1506,9 +1529,9 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_is_input = 0;
 		rt->rt_iif = 0;
 		rt->rt_pmtu = 0;
+		rt->rt_mtu_locked = 0;
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
-		rt->rt_table_id = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
@@ -1644,19 +1667,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
 	spin_unlock_bh(&fnhe_lock);
 }
 
-static void set_lwt_redirect(struct rtable *rth)
-{
-	if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
-		rth->dst.lwtstate->orig_output = rth->dst.output;
-		rth->dst.output = lwtunnel_output;
-	}
-
-	if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
-		rth->dst.lwtstate->orig_input = rth->dst.input;
-		rth->dst.input = lwtunnel_input;
-	}
-}
-
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   const struct fib_result *res,
@@ -1739,15 +1749,13 @@ rt_cache:
 	}
 
 	rth->rt_is_input = 1;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 	RT_CACHE_STAT_INC(in_slow_tot);
 
 	rth->dst.input = ip_forward;
 
 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
 		       do_cache);
-	set_lwt_redirect(rth);
+	lwtunnel_set_redirect(&rth->dst);
 	skb_dst_set(skb, &rth->dst);
 out:
 	err = 0;
@@ -1763,44 +1771,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 				 struct flow_keys *hash_keys)
 {
 	const struct iphdr *outer_iph = ip_hdr(skb);
+	const struct iphdr *key_iph = outer_iph;
 	const struct iphdr *inner_iph;
 	const struct icmphdr *icmph;
 	struct iphdr _inner_iph;
 	struct icmphdr _icmph;
 
-	hash_keys->addrs.v4addrs.src = outer_iph->saddr;
-	hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
 	if (likely(outer_iph->protocol != IPPROTO_ICMP))
-		return;
+		goto out;
 
 	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
-		return;
+		goto out;
 
 	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
 				   &_icmph);
 	if (!icmph)
-		return;
+		goto out;
 
 	if (icmph->type != ICMP_DEST_UNREACH &&
 	    icmph->type != ICMP_REDIRECT &&
 	    icmph->type != ICMP_TIME_EXCEEDED &&
 	    icmph->type != ICMP_PARAMETERPROB)
-		return;
+		goto out;
 
 	inner_iph = skb_header_pointer(skb,
 				       outer_iph->ihl * 4 + sizeof(_icmph),
 				       sizeof(_inner_iph), &_inner_iph);
 	if (!inner_iph)
-		return;
-	hash_keys->addrs.v4addrs.src = inner_iph->saddr;
-	hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+		goto out;
+
+	key_iph = inner_iph;
+out:
+	hash_keys->addrs.v4addrs.src = key_iph->saddr;
+	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
 /* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
-	struct net *net = fi->fib_net;
 	struct flow_keys hash_keys;
 	u32 mhash;
 
@@ -1824,13 +1833,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 			/* short-circuit if we already have L4 hash present */
 			if (skb->l4_hash)
 				return skb_get_hash_raw(skb) >> 1;
+
 			memset(&hash_keys, 0, sizeof(hash_keys));
-			skb_flow_dissect_flow_keys(skb, &keys, flag);
-			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-			hash_keys.ports.src = keys.ports.src;
-			hash_keys.ports.dst = keys.ports.dst;
-			hash_keys.basic.ip_proto = keys.basic.ip_proto;
+
+			if (!flkeys) {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				flkeys = &keys;
+			}
+
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+			hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+			hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+			hash_keys.ports.src = flkeys->ports.src;
+			hash_keys.ports.dst = flkeys->ports.dst;
+			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
 		} else {
 			memset(&hash_keys, 0, sizeof(hash_keys));
 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1846,17 +1862,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 
 	return mhash >> 1;
 }
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
 static int ip_mkroute_input(struct sk_buff *skb,
 			    struct fib_result *res,
 			    struct in_device *in_dev,
-			    __be32 daddr, __be32 saddr, u32 tos)
+			    __be32 daddr, __be32 saddr, u32 tos,
+			    struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi && res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(res->fi, NULL, skb);
+		int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
 		fib_select_multipath(res, h);
 	}
@@ -1882,13 +1898,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			       struct fib_result *res)
 {
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct flow_keys *flkeys = NULL, _flkeys;
+	struct net    *net = dev_net(dev);
 	struct ip_tunnel_info *tun_info;
-	struct flowi4	fl4;
+	int		err = -EINVAL;
 	unsigned int	flags = 0;
 	u32		itag = 0;
 	struct rtable	*rth;
-	int		err = -EINVAL;
-	struct net    *net = dev_net(dev);
+	struct flowi4	fl4;
 	bool do_cache;
 
 	/* IP on this device is disabled. */
@@ -1947,6 +1964,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+	if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+		flkeys = &_flkeys;
+
 	err = fib_lookup(net, &fl4, res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
@@ -1972,7 +1993,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res->type != RTN_UNICAST)
 		goto martian_destination;
 
-	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:	return err;
 
 brd_input:
@@ -2014,8 +2035,6 @@ local_input:
 	rth->dst.tclassid = itag;
 #endif
 	rth->rt_is_input = 1;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res->type == RTN_UNREACHABLE) {
@@ -2244,8 +2263,6 @@ add:
 		return ERR_PTR(-ENOBUFS);
 
 	rth->rt_iif = orig_oif;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
@@ -2267,7 +2284,7 @@ add:
 	}
 
 	rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
-	set_lwt_redirect(rth);
+	lwtunnel_set_redirect(&rth->dst);
 
 	return rth;
 }
@@ -2529,6 +2546,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_is_input = ort->rt_is_input;
 		rt->rt_iif = ort->rt_iif;
 		rt->rt_pmtu = ort->rt_pmtu;
+		rt->rt_mtu_locked = ort->rt_mtu_locked;
 
 		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
@@ -2631,6 +2649,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
 	if (rt->rt_pmtu && expires)
 		metrics[RTAX_MTU - 1] = rt->rt_pmtu;
+	if (rt->rt_mtu_locked && expires)
+		metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
 	if (rtnetlink_put_metrics(skb, metrics) < 0)
 		goto nla_put_failure;
 
@@ -2775,7 +2795,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		rt->rt_flags |= RTCF_NOTIFY;
 
 	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
-		table_id = rt->rt_table_id;
+		table_id = res.table ? res.table->tb_id : 0;
 
 	if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
 		if (!res.fi) {
@@ -2816,6 +2836,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
 static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;
 static int ip_rt_gc_elasticity __read_mostly	= 8;
+static int ip_min_valid_pmtu __read_mostly	= IPV4_MIN_MTU;
 
 static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
 					void __user *buffer,
@@ -2931,7 +2952,8 @@ static struct ctl_table ipv4_route_table[] = {
 		.data		= &ip_rt_min_pmtu,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &ip_min_valid_pmtu,
 	},
 	{
 		.procname	= "min_adv_mss",
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..4b195bac8ac0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 
 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (write && ret == 0)
-		call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+		call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
 
 	return ret;
 }
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
-	{
-		.procname	= "udp_rmem_min",
-		.data		= &sysctl_udp_rmem_min,
-		.maxlen		= sizeof(sysctl_udp_rmem_min),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
-	},
-	{
-		.procname	= "udp_wmem_min",
-		.data		= &sysctl_udp_wmem_min,
-		.maxlen		= sizeof(sysctl_udp_wmem_min),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
-	},
 	{ }
 };
 
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &one,
 	},
+	{
+		.procname	= "udp_rmem_min",
+		.data		= &init_net.ipv4.sysctl_udp_rmem_min,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
+	{
+		.procname	= "udp_wmem_min",
+		.data		= &init_net.ipv4.sysctl_udp_wmem_min,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 48636aee23c3..0c31be306572 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
 
 	sk_sockets_allocated_inc(sk);
+	sk->sk_route_forced_caps = NETIF_F_GSO;
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
@@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 new_size_goal, size_goal;
 
-	if (!large_allowed || !sk_can_gso(sk))
+	if (!large_allowed)
 		return mss_now;
 
 	/* Note : tcp_tso_autosize() will eventually split this later */
@@ -993,7 +994,9 @@ new_segment:
 			get_page(page);
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
-		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+		if (!(flags & MSG_NO_SHARED_FRAGS))
+			skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
 		skb->len += copy;
 		skb->data_len += copy;
@@ -1062,8 +1065,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
 			size_t size, int flags)
 {
-	if (!(sk->sk_route_caps & NETIF_F_SG) ||
-	    !sk_check_csum_caps(sk))
+	if (!(sk->sk_route_caps & NETIF_F_SG))
 		return sock_no_sendpage_locked(sk, page, offset, size, flags);
 
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
@@ -1102,27 +1104,11 @@ static int linear_payload_sz(bool first_skb)
 	return 0;
 }
 
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
-	int tmp = tp->mss_cache;
-
-	if (sg) {
-		if (zc)
-			return 0;
-
-		if (sk_can_gso(sk)) {
-			tmp = linear_payload_sz(first_skb);
-		} else {
-			int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
-			if (tmp >= pgbreak &&
-			    tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
-				tmp = pgbreak;
-		}
-	}
-
-	return tmp;
+	if (zc)
+		return 0;
+	return linear_payload_sz(first_skb);
 }
 
 void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1173,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 	int flags, err, copied = 0;
 	int mss_now = 0, size_goal, copied_syn = 0;
 	bool process_backlog = false;
-	bool sg, zc = false;
+	bool zc = false;
 	long timeo;
 
 	flags = msg->msg_flags;
@@ -1205,7 +1191,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			goto out_err;
 		}
 
-		zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+		zc = sk->sk_route_caps & NETIF_F_SG;
 		if (!zc)
 			uarg->zerocopy = 0;
 	}
@@ -1268,18 +1254,12 @@ restart:
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		goto do_error;
 
-	sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
 	while (msg_data_left(msg)) {
 		int copy = 0;
-		int max = size_goal;
 
 		skb = tcp_write_queue_tail(sk);
-		if (skb) {
-			if (skb->ip_summed == CHECKSUM_NONE)
-				max = mss_now;
-			copy = max - skb->len;
-		}
+		if (skb)
+			copy = size_goal - skb->len;
 
 		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
 			bool first_skb;
@@ -1297,22 +1277,17 @@ new_segment:
 				goto restart;
 			}
 			first_skb = tcp_rtx_and_write_queues_empty(sk);
-			linear = select_size(sk, sg, first_skb, zc);
+			linear = select_size(first_skb, zc);
 			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
 						  first_skb);
 			if (!skb)
 				goto wait_for_memory;
 
 			process_backlog = true;
-			/*
-			 * Check whether we can use HW checksum.
-			 */
-			if (sk_check_csum_caps(sk))
-				skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->ip_summed = CHECKSUM_PARTIAL;
 
 			skb_entail(sk, skb);
 			copy = size_goal;
-			max = size_goal;
 
 			/* All packets are restored as if they have
 			 * already been sent. skb_mstamp isn't set to
@@ -1343,7 +1318,7 @@ new_segment:
 
 			if (!skb_can_coalesce(skb, i, pfrag->page,
 					      pfrag->offset)) {
-				if (i >= sysctl_max_skb_frags || !sg) {
+				if (i >= sysctl_max_skb_frags) {
 					tcp_mark_push(tp, skb);
 					goto new_segment;
 				}
@@ -1396,7 +1371,7 @@ new_segment:
 			goto out;
 		}
 
-		if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+		if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
 			continue;
 
 		if (forced_push(tp)) {
@@ -3058,8 +3033,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	u32 rate;
 
 	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
-			  3 * nla_total_size(sizeof(u32)) +
-			  2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+			  5 * nla_total_size(sizeof(u32)) +
+			  3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
 
@@ -3088,6 +3063,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
 	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
 	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+	nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+
+	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
 	return stats;
 }
 
@@ -3566,6 +3545,7 @@ int tcp_abort(struct sock *sk, int err)
 
 	bh_unlock_sock(sk);
 	local_bh_enable();
+	tcp_write_queue_purge(sk);
 	release_sock(sk);
 	return 0;
 }
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..158d105e76da 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
 		packet_conservation:1,  /* use packet conservation? */
 		restore_cwnd:1,	     /* decided to revert cwnd to old value */
 		round_start:1,	     /* start of packet-timed tx->ack round? */
-		tso_segs_goal:7,     /* segments we want in each skb we send */
 		idle_restart:1,	     /* restarting after idle? */
 		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-		unused:5,
+		unused:12,
 		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
 		lt_rtt_cnt:7,	     /* round trips in long-term interval */
 		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 		sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	return bbr->tso_segs_goal;
+	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
-	u32 min_segs;
+	u32 segs, bytes;
+
+	/* Sort of tcp_tso_autosize() but ignoring
+	 * driver provided sk_gso_max_size.
+	 */
+	bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-	min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-	bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-				 0x7FU);
+	return min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
 	cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
 
 	/* Allow enough full-sized skbs in flight to utilize end systems. */
-	cwnd += 3 * bbr->tso_segs_goal;
+	cwnd += 3 * bbr_tso_segs_goal(sk);
 
 	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
 	cwnd = (cwnd + 1) & ~1U;
@@ -730,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
 		bbr->mode = BBR_DRAIN;	/* drain queue we created */
 		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */
 		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */
+		tcp_sk(sk)->snd_ssthresh =
+				bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
 	}	/* fall through to check if in-flight is already small: */
 	if (bbr->mode == BBR_DRAIN &&
 	    tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -824,7 +827,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 
 	bw = bbr_bw(sk);
 	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-	bbr_set_tso_segs_goal(sk);
 	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
@@ -834,7 +836,7 @@ static void bbr_init(struct sock *sk)
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->prior_cwnd = 0;
-	bbr->tso_segs_goal = 0;	 /* default segs per skb until first ACK */
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	bbr->rtt_cnt = 0;
 	bbr->next_rtt_delivered = 0;
 	bbr->prev_ca_state = TCP_CA_Open;
@@ -887,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
 static u32 bbr_ssthresh(struct sock *sk)
 {
 	bbr_save_cwnd(sk);
-	return TCP_INFINITE_SSTHRESH;	 /* BBR does not use ssthresh */
+	return tcp_sk(sk)->snd_ssthresh;
 }
 
 static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
@@ -936,7 +938,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
 	.undo_cwnd	= bbr_undo_cwnd,
 	.cwnd_event	= bbr_cwnd_event,
 	.ssthresh	= bbr_ssthresh,
-	.tso_segs_goal	= bbr_tso_segs_goal,
+	.min_tso_segs	= bbr_min_tso_segs,
 	.get_info	= bbr_get_info,
 	.set_state	= bbr_set_state,
 };
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 7c843578f233..faddf4f9a707 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -6,7 +6,7 @@
  * The algorithm is described in:
  * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
  *  for High-Speed Networks"
- * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
+ * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf
  *
  * Implemented from description in paper and ns-2 simulation.
  * Copyright (C) 2007 Stephen Hemminger <[email protected]>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575d3c1fb6e8..451ef3012636 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	int len;
 	int in_sack;
 
-	if (!sk_can_gso(sk))
-		goto fallback;
-
 	/* Normally R but no L won't result in plain S */
 	if (!dup_sack &&
 	    (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@@ -1971,11 +1968,6 @@ void tcp_enter_loss(struct sock *sk)
 	/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
 	 * loss recovery is underway except recurring timeout(s) on
 	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
-	 *
-	 * In theory F-RTO can be used repeatedly during loss recovery.
-	 * In practice this interacts badly with broken middle-boxes that
-	 * falsely raise the receive window, which results in repeated
-	 * timeouts and stop-and-go behavior.
 	 */
 	tp->frto = net->ipv4.sysctl_tcp_frto &&
 		   (new_recovery || icsk->icsk_retransmits) &&
@@ -2631,18 +2623,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
 	    tcp_try_undo_loss(sk, false))
 		return;
 
-	/* The ACK (s)acks some never-retransmitted data meaning not all
-	 * the data packets before the timeout were lost. Therefore we
-	 * undo the congestion window and state. This is essentially
-	 * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since
-	 * a retransmitted skb is permantly marked, we can apply such an
-	 * operation even if F-RTO was not used.
-	 */
-	if ((flag & FLAG_ORIG_SACK_ACKED) &&
-	    tcp_try_undo_loss(sk, tp->undo_marker))
-		return;
-
 	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
+		/* Step 3.b. A timeout is spurious if not all data are
+		 * lost, i.e., never-retransmitted data are (s)acked.
+		 */
+		if ((flag & FLAG_ORIG_SACK_ACKED) &&
+		    tcp_try_undo_loss(sk, true))
+			return;
+
 		if (after(tp->snd_nxt, tp->high_seq)) {
 			if (flag & FLAG_DATA_SACKED || is_dupack)
 				tp->frto = 0; /* Step 3.a. loss was real */
@@ -4001,6 +3989,7 @@ void tcp_reset(struct sock *sk)
 	/* This barrier is coupled with smp_rmb() in tcp_poll() */
 	smp_wmb();
 
+	tcp_write_queue_purge(sk);
 	tcp_done(sk);
 
 	if (!sock_flag(sk, SOCK_DEAD))
@@ -5870,10 +5859,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	tp->rx_opt.saw_tstamp = 0;
 	req = tp->fastopen_rsk;
 	if (req) {
+		bool req_stolen;
+
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 		    sk->sk_state != TCP_FIN_WAIT1);
 
-		if (!tcp_check_req(sk, skb, req, true))
+		if (!tcp_check_req(sk, skb, req, true, &req_stolen))
 			goto discard;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8ad397e285e..9639334ebb7c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
 	struct tcphdr *th = tcp_hdr(skb);
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct tcphdr, check);
-	} else {
-		th->check = tcp_v4_check(skb->len, saddr, daddr,
-					 csum_partial(th,
-						      th->doff << 2,
-						      skb->csum));
-	}
+	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
 }
 
 /* This routine computes an IPv4 TCP checksum. */
@@ -1672,6 +1665,7 @@ process:
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen = false;
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
@@ -1694,10 +1688,20 @@ process:
 			th = (const struct tcphdr *)skb->data;
 			iph = ip_hdr(skb);
 			tcp_v4_fill_cb(skb, iph, th);
-			nsk = tcp_check_req(sk, skb, req, false);
+			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
 		}
 		if (!nsk) {
 			reqsk_put(req);
+			if (req_stolen) {
+				/* Another cpu got exclusive access to req
+				 * and created a full blown socket.
+				 * Try to feed this packet to this socket
+				 * instead of discarding it.
+				 */
+				tcp_v4_restore_cb(skb);
+				sock_put(sk);
+				goto lookup;
+			}
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {
@@ -2211,7 +2215,7 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
 	afinfo->seq_ops.next		= tcp_seq_next;
 	afinfo->seq_ops.stop		= tcp_seq_stop;
 
-	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(afinfo->name, 0444, net->proc_net,
 			     afinfo->seq_fops, afinfo);
 	if (!p)
 		rc = -ENOMEM;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..e7e36433cdb5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
 
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req,
-			   bool fastopen)
+			   bool fastopen, bool *req_stolen)
 {
 	struct tcp_options_received tmp_opt;
 	struct sock *child;
@@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	sock_rps_save_rxhash(child, skb);
 	tcp_synack_rtt_meas(child, req);
+	*req_stolen = !own_req;
 	return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e9f985e42405..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 /* Initialize TSO segments for a packet. */
 static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 {
-	if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+	if (skb->len <= mss_now) {
 		/* Avoid the costly divide in the normal
 		 * non-TSO case.
 		 */
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	tcp_skb_fragment_eor(skb, buff);
 
-	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
-		/* Copy and checksum data tail into the new buffer. */
-		buff->csum = csum_partial_copy_nocheck(skb->data + len,
-						       skb_put(buff, nsize),
-						       nsize, 0);
-
-		skb_trim(skb, len);
-
-		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb_split(skb, buff, len);
-	}
+	skb_split(skb, buff, len);
 
-	buff->ip_summed = skb->ip_summed;
+	buff->ip_summed = CHECKSUM_PARTIAL;
 
 	buff->tstamp = skb->tstamp;
 	tcp_fragment_tstamp(skb, buff);
@@ -1715,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+			    int min_tso_segs)
 {
 	u32 bytes, segs;
 
@@ -1730,9 +1718,8 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 	 */
 	segs = max_t(u32, bytes / mss_now, min_tso_segs);
 
-	return min_t(u32, segs, sk->sk_gso_max_segs);
+	return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1740,11 +1727,14 @@ EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+	u32 min_tso, tso_segs;
+
+	min_tso = ca_ops->min_tso_segs ?
+			ca_ops->min_tso_segs(sk) :
+			sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
 
-	return tso_segs ? :
-		tcp_tso_autosize(sk, mss_now,
-				 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 
 /* Returns the portion of skb which can be sent right away */
@@ -1901,7 +1891,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 
 	tcp_skb_fragment_eor(skb, buff);
 
-	buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+	buff->ip_summed = CHECKSUM_PARTIAL;
 	skb_split(skb, buff, len);
 	tcp_fragment_tstamp(skb, buff);
 
@@ -2027,6 +2017,24 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
 	}
 }
 
+static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
+{
+	struct sk_buff *skb, *next;
+
+	skb = tcp_send_head(sk);
+	tcp_for_write_queue_from_safe(skb, next, sk) {
+		if (len <= skb->len)
+			break;
+
+		if (unlikely(TCP_SKB_CB(skb)->eor))
+			return false;
+
+		len -= skb->len;
+	}
+
+	return true;
+}
+
 /* Create a new MTU probe if we are ready.
  * MTU probe is regularly attempting to increase the path MTU by
  * deliberately sending larger packets.  This discovers routing
@@ -2099,6 +2107,9 @@ static int tcp_mtu_probe(struct sock *sk)
 			return 0;
 	}
 
+	if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
+		return -1;
+
 	/* We're allowed to probe.  Build it now. */
 	nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
 	if (!nskb)
@@ -2113,7 +2124,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
-	nskb->ip_summed = skb->ip_summed;
+	nskb->ip_summed = CHECKSUM_PARTIAL;
 
 	tcp_insert_write_queue_before(nskb, skb, sk);
 	tcp_highest_sack_replace(sk, skb, nskb);
@@ -2121,19 +2132,16 @@ static int tcp_mtu_probe(struct sock *sk)
 	len = 0;
 	tcp_for_write_queue_from_safe(skb, next, sk) {
 		copy = min_t(int, skb->len, probe_size - len);
-		if (nskb->ip_summed) {
-			skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
-		} else {
-			__wsum csum = skb_copy_and_csum_bits(skb, 0,
-							     skb_put(nskb, copy),
-							     copy, 0);
-			nskb->csum = csum_block_add(nskb->csum, csum, len);
-		}
+		skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
 
 		if (skb->len <= copy) {
 			/* We've eaten all the data from this skb.
 			 * Throw it away. */
 			TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+			/* If this is the last SKB we copy and eor is set
+			 * we need to propagate it to the new skb.
+			 */
+			TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
 			tcp_unlink_write_queue(skb, sk);
 			sk_wmem_free_skb(sk, skb);
 		} else {
@@ -2141,9 +2149,6 @@ static int tcp_mtu_probe(struct sock *sk)
 						   ~(TCPHDR_FIN|TCPHDR_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
-				if (skb->ip_summed != CHECKSUM_PARTIAL)
-					skb->csum = csum_partial(skb->data,
-								 skb->len, 0);
 			} else {
 				__pskb_trim_head(skb, copy);
 				tcp_set_skb_tso_segs(skb, mss_now);
@@ -2721,12 +2726,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_highest_sack_replace(sk, next_skb, skb);
 
-	if (next_skb->ip_summed == CHECKSUM_PARTIAL)
-		skb->ip_summed = CHECKSUM_PARTIAL;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
 	/* Update sequence range on original skb. */
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 71fc60f1b326..f7d944855f8e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -34,6 +34,7 @@ static void tcp_write_err(struct sock *sk)
 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
 	sk->sk_error_report(sk);
 
+	tcp_write_queue_purge(sk);
 	tcp_done(sk);
 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
 }
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
 	for (handler = rcu_dereference(head);		\
 	     handler != NULL;				\
 	     handler = rcu_dereference(handler->next))	\
-	
+
 static int tunnel4_rcv(struct sk_buff *skb)
 {
 	struct xfrm_tunnel *handler;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bfaefe560b5c..f49e14cd3891 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
 long sysctl_udp_mem[3] __read_mostly;
 EXPORT_SYMBOL(sysctl_udp_mem);
 
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
 atomic_long_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
@@ -2024,6 +2018,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 		err = udplite_checksum_init(skb, uh);
 		if (err)
 			return err;
+
+		if (UDP_SKB_CB(skb)->partial_cov) {
+			skb->csum = inet_compute_pseudo(skb, proto);
+			return 0;
+		}
 	}
 
 	/* Note, we are only interested in != 0 or == 0, thus the
@@ -2528,35 +2527,35 @@ int udp_abort(struct sock *sk, int err)
 EXPORT_SYMBOL_GPL(udp_abort);
 
 struct proto udp_prot = {
-	.name		   = "UDP",
-	.owner		   = THIS_MODULE,
-	.close		   = udp_lib_close,
-	.connect	   = ip4_datagram_connect,
-	.disconnect	   = udp_disconnect,
-	.ioctl		   = udp_ioctl,
-	.init		   = udp_init_sock,
-	.destroy	   = udp_destroy_sock,
-	.setsockopt	   = udp_setsockopt,
-	.getsockopt	   = udp_getsockopt,
-	.sendmsg	   = udp_sendmsg,
-	.recvmsg	   = udp_recvmsg,
-	.sendpage	   = udp_sendpage,
-	.release_cb	   = ip4_datagram_release_cb,
-	.hash		   = udp_lib_hash,
-	.unhash		   = udp_lib_unhash,
-	.rehash		   = udp_v4_rehash,
-	.get_port	   = udp_v4_get_port,
-	.memory_allocated  = &udp_memory_allocated,
-	.sysctl_mem	   = sysctl_udp_mem,
-	.sysctl_wmem	   = &sysctl_udp_wmem_min,
-	.sysctl_rmem	   = &sysctl_udp_rmem_min,
-	.obj_size	   = sizeof(struct udp_sock),
-	.h.udp_table	   = &udp_table,
+	.name			= "UDP",
+	.owner			= THIS_MODULE,
+	.close			= udp_lib_close,
+	.connect		= ip4_datagram_connect,
+	.disconnect		= udp_disconnect,
+	.ioctl			= udp_ioctl,
+	.init			= udp_init_sock,
+	.destroy		= udp_destroy_sock,
+	.setsockopt		= udp_setsockopt,
+	.getsockopt		= udp_getsockopt,
+	.sendmsg		= udp_sendmsg,
+	.recvmsg		= udp_recvmsg,
+	.sendpage		= udp_sendpage,
+	.release_cb		= ip4_datagram_release_cb,
+	.hash			= udp_lib_hash,
+	.unhash			= udp_lib_unhash,
+	.rehash			= udp_v4_rehash,
+	.get_port		= udp_v4_get_port,
+	.memory_allocated	= &udp_memory_allocated,
+	.sysctl_mem		= sysctl_udp_mem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+	.obj_size		= sizeof(struct udp_sock),
+	.h.udp_table		= &udp_table,
 #ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_udp_setsockopt,
-	.compat_getsockopt = compat_udp_getsockopt,
+	.compat_setsockopt	= compat_udp_setsockopt,
+	.compat_getsockopt	= compat_udp_getsockopt,
 #endif
-	.diag_destroy	   = udp_abort,
+	.diag_destroy		= udp_abort,
 };
 EXPORT_SYMBOL(udp_prot);
 
@@ -2674,7 +2673,7 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
 	afinfo->seq_ops.next		= udp_seq_next;
 	afinfo->seq_ops.stop		= udp_seq_stop;
 
-	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(afinfo->name, 0444, net->proc_net,
 			     afinfo->seq_fops, afinfo);
 	if (!p)
 		rc = -ENOMEM;
@@ -2825,6 +2824,26 @@ u32 udp_flow_hashrnd(void)
 }
 EXPORT_SYMBOL(udp_flow_hashrnd);
 
+static void __udp_sysctl_init(struct net *net)
+{
+	net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+	net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+	__udp_sysctl_init(net);
+	return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+	.init	= udp_sysctl_init,
+};
+
 void __init udp_init(void)
 {
 	unsigned long limit;
@@ -2837,8 +2856,7 @@ void __init udp_init(void)
 	sysctl_udp_mem[1] = limit;
 	sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
 
-	sysctl_udp_rmem_min = SK_MEM_QUANTUM;
-	sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+	__udp_sysctl_init(&init_net);
 
 	/* 16 spinlocks per cpu */
 	udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2848,4 +2866,7 @@ void __init udp_init(void)
 		panic("UDP: failed to alloc udp_busylocks\n");
 	for (i = 0; i < (1U << udp_busylocks_log); i++)
 		spin_lock_init(udp_busylocks + i);
+
+	if (register_pernet_subsys(&udp_sysctl_ops))
+		panic("UDP: failed to init sysctl parameters.\n");
 }
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 63faeee989a9..2a9764bd1719 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,7 +92,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb_mac_header_rebuild(skb);
-	eth_hdr(skb)->h_proto = skb->protocol;
+	if (skb->mac_len)
+		eth_hdr(skb)->h_proto = skb->protocol;
 
 	err = 0;
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 94b8702603bc..be980c195fc5 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,7 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 
 	mtu = dst_mtu(skb_dst(skb));
 	if ((!skb_is_gso(skb) && skb->len > mtu) ||
-	    (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) {
+	    (skb_is_gso(skb) &&
+	     !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
 		skb->protocol = htons(ETH_P_IP);
 
 		if (skb->sk)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05017e2c849c..d73a6d6652f6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,8 +100,9 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_gateway = rt->rt_gateway;
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
-	xdst->u.rt.rt_table_id = rt->rt_table_id;
+	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
+	rt_add_uncached_list(&xdst->u.rt);
 
 	return 0;
 }
@@ -241,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
 	dst_destroy_metrics_generic(dst);
-
+	if (xdst->u.rt.rt_uncached_list)
+		rt_del_uncached_list(&xdst->u.rt);
 	xfrm_dst_destroy(xdst);
 }
 
@@ -379,4 +381,3 @@ void __init xfrm4_init(void)
 	xfrm4_protocol_init();
 	register_pernet_subsys(&xfrm4_net_ops);
 }
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
 config IPV6_MROUTE
 	bool "IPv6: multicast routing"
 	depends on IPV6
+	select IP_MROUTE_COMMON
 	---help---
 	  Experimental support for IPv6 multicast forwarding.
 	  If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..78cef00c9596 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -94,15 +94,6 @@
 #include <linux/seq_file.h>
 #include <linux/export.h>
 
-/* Set to 3 to get tracing... */
-#define ACONF_DEBUG 2
-
-#if ACONF_DEBUG >= 3
-#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#else
-#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
 
 #define IPV6_MAX_STRLEN \
@@ -409,9 +400,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	dev_hold(dev);
 
 	if (snmp6_alloc_dev(ndev) < 0) {
-		ADBG(KERN_WARNING
-			"%s: cannot allocate memory for statistics; dev=%s.\n",
-			__func__, dev->name);
+		netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
+			   __func__);
 		neigh_parms_release(&nd_tbl, ndev->nd_parms);
 		dev_put(dev);
 		kfree(ndev);
@@ -419,9 +409,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	}
 
 	if (snmp6_register_dev(ndev) < 0) {
-		ADBG(KERN_WARNING
-			"%s: cannot create /proc/net/dev_snmp6/%s\n",
-			__func__, dev->name);
+		netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
+			   __func__, dev->name);
 		goto err_release;
 	}
 
@@ -984,7 +973,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
 
 	/* Ignore adding duplicate addresses on an interface */
 	if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
-		ADBG("ipv6_add_addr: already assigned\n");
+		netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
 		err = -EEXIST;
 	} else {
 		hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
@@ -1044,7 +1033,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	ifa = kzalloc(sizeof(*ifa), gfp_flags);
 	if (!ifa) {
-		ADBG("ipv6_add_addr: malloc failed\n");
 		err = -ENOBUFS;
 		goto out;
 	}
@@ -1459,6 +1447,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
 #endif
 }
 
+static bool ipv6_allow_optimistic_dad(struct net *net,
+				      struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	if (!idev)
+		return false;
+	if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+		return false;
+
+	return true;
+#else
+	return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
 			       struct ipv6_saddr_score *score,
 			       struct ipv6_saddr_dst *dst,
@@ -1836,22 +1839,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict)
 {
-	return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+	return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+				       strict, IFA_F_TENTATIVE);
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ *   1. does the address exist in the L3 domain that dev is part of
+ *      (skip_dev_check = true), or
+ *
+ *   2. does the address exist on the specific device
+ *      (skip_dev_check = false)
+ */
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags)
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags)
 {
 	unsigned int hash = inet6_addr_hash(net, addr);
+	const struct net_device *l3mdev;
 	struct inet6_ifaddr *ifp;
 	u32 ifp_flags;
 
 	rcu_read_lock();
+
+	l3mdev = l3mdev_master_dev_rcu(dev);
+	if (skip_dev_check)
+		dev = NULL;
+
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
+
+		if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+			continue;
+
 		/* Decouple optimistic from tentative for evaluation here.
 		 * Ban optimistic addresses explicitly, when required.
 		 */
@@ -1968,6 +1991,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		spin_lock_bh(&ifp->lock);
 		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
+		if (dad_failed)
+			ifp->flags &= ~IFA_F_OPTIMISTIC;
 		spin_unlock_bh(&ifp->lock);
 		if (dad_failed)
 			ipv6_ifa_notify(0, ifp);
@@ -2581,7 +2606,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	pinfo = (struct prefix_info *) opt;
 
 	if (len < sizeof(struct prefix_info)) {
-		ADBG("addrconf: prefix option too short\n");
+		netdev_dbg(dev, "addrconf: prefix option too short\n");
 		return;
 	}
 
@@ -4244,7 +4269,7 @@ static const struct file_operations if6_fops = {
 
 static int __net_init if6_proc_net_init(struct net *net)
 {
-	if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
+	if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
@@ -4408,8 +4433,8 @@ restart:
 	if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
 		next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
 
-	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
-	      now, next, next_sec, next_sched);
+	pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+		 now, next, next_sec, next_sched);
 	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
 	rcu_read_unlock_bh();
 }
@@ -4500,6 +4525,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	    (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
 		return -EINVAL;
 
+	if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+		ifa_flags &= ~IFA_F_OPTIMISTIC;
+
 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
 		expires = jiffies_to_clock_t(timeout * HZ);
@@ -4573,6 +4601,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct in6_addr *pfx, *peer_pfx;
 	struct inet6_ifaddr *ifa;
 	struct net_device *dev;
+	struct inet6_dev *idev;
 	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
 	u32 ifa_flags;
 	int err;
@@ -4606,7 +4635,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	/* We ignore other flags so far. */
 	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+	idev = ipv6_find_idev(dev);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
+
+	if (!ipv6_allow_optimistic_dad(net, idev))
+		ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+	if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+		NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+		return -EINVAL;
+	}
 
 	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
 	if (!ifa) {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..bbcabbba9bd8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return -EPERM;
 	if (ipv6_addr_is_multicast(addr))
 		return -EINVAL;
-	if (ipv6_chk_addr(net, addr, NULL, 0))
+
+	if (ifindex)
+		dev = __dev_get_by_index(net, ifindex);
+
+	if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
 		return -EINVAL;
 
 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
-		rt = rt6_lookup(net, addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
 		if (rt) {
 			dev = rt->dst.dev;
 			ip6_rt_put(rt);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			dev = __dev_get_by_flags(net, IFF_UP,
 						 IFF_UP | IFF_LOOPBACK);
 		}
-	} else
-		dev = __dev_get_by_index(net, ifindex);
+	}
 
 	if (!dev) {
 		err = -ENODEV;
@@ -541,7 +544,7 @@ static const struct file_operations ac6_seq_fops = {
 
 int __net_init ac6_proc_init(struct net *net)
 {
-	if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
+	if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net)
 	remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
-
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fbf08ce3f5ab..88bc2ef7c7a8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -146,10 +146,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock	*inet = inet_sk(sk);
 	struct ipv6_pinfo	*np = inet6_sk(sk);
-	struct in6_addr		*daddr;
+	struct in6_addr		*daddr, old_daddr;
+	__be32			fl6_flowlabel = 0;
+	__be32			old_fl6_flowlabel;
+	__be16			old_dport;
 	int			addr_type;
 	int			err;
-	__be32			fl6_flowlabel = 0;
 
 	if (usin->sin6_family == AF_INET) {
 		if (__ipv6_only_sock(sk))
@@ -238,9 +240,13 @@ ipv4_connected:
 		}
 	}
 
+	/* save the current peer information before updating it */
+	old_daddr = sk->sk_v6_daddr;
+	old_fl6_flowlabel = np->flow_label;
+	old_dport = inet->inet_dport;
+
 	sk->sk_v6_daddr = *daddr;
 	np->flow_label = fl6_flowlabel;
-
 	inet->inet_dport = usin->sin6_port;
 
 	/*
@@ -250,11 +256,12 @@ ipv4_connected:
 
 	err = ip6_datagram_dst_update(sk, true);
 	if (err) {
-		/* Reset daddr and dport so that udp_v6_early_demux()
-		 * fails to find this socket
+		/* Restore the socket peer info, to keep it consistent with
+		 * the old socket state
 		 */
-		memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
-		inet->inet_dport = 0;
+		sk->sk_v6_daddr = old_daddr;
+		np->flow_label = old_fl6_flowlabel;
+		inet->inet_dport = old_dport;
 		goto out;
 	}
 
@@ -801,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
 				if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
-				    !ipv6_chk_addr(net, &src_info->ipi6_addr,
-						   strict ? dev : NULL, 0) &&
+				    !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+							     dev, !strict, 0,
+							     IFA_F_TENTATIVE) &&
 				    !ipv6_chk_acast_addr_src(net, dev,
 							     &src_info->ipi6_addr))
 					err = -EINVAL;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 	return nexthdr;
 }
 EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..df113c7b5fc8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup)
 {
 	if (net->ipv6.fib6_has_custom_rules) {
 		struct fib_lookup_arg arg = {
 			.lookup_ptr = lookup,
+			.lookup_data = skb,
 			.flags = FIB_LOOKUP_NOREF,
 		};
 
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	} else {
 		struct rt6_info *rt;
 
-		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
 		if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
 			return &rt->dst;
 		ip6_rt_put(rt);
-		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 		if (rt->dst.error != -EAGAIN)
 			return &rt->dst;
 		ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto out;
 	}
 
-	rt = lookup(net, table, flp6, flags);
+	rt = lookup(net, table, flp6, arg->lookup_data, flags);
 	if (rt != net->ipv6.ip6_null_entry) {
 		struct fib6_rule *r = (struct fib6_rule *)rule;
 
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
 		return 0;
 
+	if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->sport_range) &&
+	    !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->dport_range) &&
+	    !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+		return 0;
+
 	return 1;
 }
 
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
+	if (fib_rule_requires_fldissect(rule))
+		net->ipv6.fib6_rules_require_fldissect++;
+
 	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
 errout:
 	return err;
 }
 
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+	struct net *net = rule->fr_net;
+
+	if (net->ipv6.fib6_rules_require_fldissect &&
+	    fib_rule_requires_fldissect(rule))
+		net->ipv6.fib6_rules_require_fldissect--;
+
+	return 0;
+}
+
 static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 			     struct nlattr **tb)
 {
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
 	.match			= fib6_rule_match,
 	.suppress		= fib6_rule_suppress,
 	.configure		= fib6_rule_configure,
+	.delete			= fib6_rule_delete,
 	.compare		= fib6_rule_compare,
 	.fill			= fib6_rule_fill,
 	.nlmsg_payload		= fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
 		goto out_fib6_rules_ops;
 
 	net->ipv6.fib6_rules_ops = ops;
+	net->ipv6.fib6_rules_require_fldissect = 0;
 out:
 	return err;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..d8c4b6374377 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
-	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 	skb_pull(skb2, nhs);
 	skb_reset_network_header(skb2);
 
-	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+			skb, 0);
 
 	if (rt && rt->dst.dev)
 		skb2->dev = rt->dst.dev;
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index ec43d18b5ff9..547515e8450a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -73,6 +73,11 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 		err = udplite_checksum_init(skb, uh);
 		if (err)
 			return err;
+
+		if (UDP_SKB_CB(skb)->partial_cov) {
+			skb->csum = ip6_compute_pseudo(skb, proto);
+			return 0;
+		}
 	}
 
 	/* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..908b8e5b615a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup)
 {
 	struct rt6_info *rt;
 
-	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 	if (rt->dst.error == -EAGAIN) {
 		ip6_rt_put(rt);
 		rt = net->ipv6.ip6_null_entry;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..c05c4e82a7ca 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -844,7 +844,7 @@ static const struct file_operations ip6fl_seq_fops = {
 
 static int __net_init ip6_flowlabel_proc_init(struct net *net)
 {
-	if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+	if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
 			 &ip6fl_seq_fops))
 		return -ENOMEM;
 	return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c353125546d..22e86557aca4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -126,7 +126,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 	struct ip6_tnl *t, *cand = NULL;
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 	int dev_type = (gre_proto == htons(ETH_P_TEB) ||
-			gre_proto == htons(ETH_P_ERSPAN)) ?
+			gre_proto == htons(ETH_P_ERSPAN) ||
+			gre_proto == htons(ETH_P_ERSPAN2)) ?
 		       ARPHRD_ETHER : ARPHRD_IP6GRE;
 	int score, cand_score = 4;
 
@@ -236,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 		return t;
 
 	dev = ign->fb_tunnel_dev;
-	if (dev->flags & IFF_UP)
+	if (dev && dev->flags & IFF_UP)
 		return netdev_priv(dev);
 
 	return NULL;
@@ -695,9 +696,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 	else
 		fl6->daddr = tunnel->parms.raddr;
 
-	if (tunnel->parms.o_flags & TUNNEL_SEQ)
-		tunnel->o_seqno++;
-
 	/* Push GRE header. */
 	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -720,14 +718,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
 		dsfield = key->tos;
-		flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+		flags = key->tun_flags &
+			(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 		tunnel->tun_hlen = gre_calc_hlen(flags);
 
 		gre_build_header(skb, tunnel->tun_hlen,
 				 flags, protocol,
-				 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+				 tunnel_id_to_key32(tun_info->key.tun_id),
+				 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+						      : 0);
 
 	} else {
+		if (tunnel->parms.o_flags & TUNNEL_SEQ)
+			tunnel->o_seqno++;
+
 		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 				 protocol, tunnel->parms.o_key,
 				 htonl(tunnel->o_seqno));
@@ -902,6 +906,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 		truncate = true;
 	}
 
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto tx_err;
+
 	t->parms.o_flags &= ~TUNNEL_KEY;
 	IPCB(skb)->flags = 0;
 
@@ -944,6 +951,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 					       md->u.md2.dir,
 					       get_hwid(&md->u.md2),
 					       truncate, false);
+		} else {
+			goto tx_err;
 		}
 	} else {
 		switch (skb->protocol) {
@@ -1053,7 +1062,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (!rt)
 			return;
@@ -1469,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net)
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 	int err;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
 					  NET_NAME_UNKNOWN,
 					  ip6gre_tunnel_setup);
@@ -1784,6 +1795,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 }
 
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+	return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
 				       struct ip_tunnel_encap *ipencap)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 997c7f19ad62..2c7f09c3c39e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-		    ((mroute6_socket(net, skb) &&
+		    ((mroute6_is_socket(net, skb) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
@@ -412,7 +412,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->ignore_df)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 		return false;
 
 	return true;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4b15fe928278..df4c29f7d59f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		/* Try to guess incoming interface */
 		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
-				NULL, 0, 0);
+				NULL, 0, skb2, 0);
 
 		if (rt && rt->dst.dev)
 			skb2->dev = rt->dst.dev;
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 			ldev = dev_get_by_index_rcu(net, p->link);
 
 		if ((ipv6_addr_is_multicast(laddr) ||
-		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+		     likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						    0, IFA_F_TENTATIVE))) &&
 		    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
-		     likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+		     likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+						     0, IFA_F_TENTATIVE))))
 			ret = 1;
 	}
 	return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 		if (p->link)
 			ldev = dev_get_by_index_rcu(net, p->link);
 
-		if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+		if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						      0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Local address not yet configured!\n",
 				p->name);
 		else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
 			 !ipv6_addr_is_multicast(raddr) &&
-			 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+			 unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+							  true, 0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
 				p->name);
 		else
@@ -1444,7 +1448,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (!rt)
 			return;
@@ -1982,14 +1986,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct net *net = dev_net(dev);
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-	struct ip6_tnl *nt, *t;
 	struct ip_tunnel_encap ipencap;
+	struct ip6_tnl *nt, *t;
+	int err;
 
 	nt = netdev_priv(dev);
 
 	if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
-		int err = ip6_tnl_encap_setup(nt, &ipencap);
-
+		err = ip6_tnl_encap_setup(nt, &ipencap);
 		if (err < 0)
 			return err;
 	}
@@ -2005,7 +2009,11 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 			return -EEXIST;
 	}
 
-	return ip6_tnl_create2(dev);
+	err = ip6_tnl_create2(dev);
+	if (!err && tb[IFLA_MTU])
+		ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
+	return err;
 }
 
 static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -2201,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 	ip6n->tnls[0] = ip6n->tnls_wc;
 	ip6n->tnls[1] = ip6n->tnls_r_l;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
 	err = -ENOMEM;
 	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
 					NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index fa3ae1cb50d3..60b771f49fb5 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t)
 			      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (rt)
 			tdev = rt->dst.dev;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..298fd8b6ed17 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -32,11 +31,9 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
 #include <net/raw.h>
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
@@ -54,30 +51,12 @@
 #include <net/ip6_checksum.h>
 #include <linux/netconf.h>
 
-struct mr6_table {
-	struct list_head	list;
-	possible_net_t		net;
-	u32			id;
-	struct sock		*mroute6_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc6_unres_queue;
-	struct list_head	mfc6_cache_array[MFC6_LINES];
-	struct mif_device	vif6_table[MAXMIFS];
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	bool			mroute_do_assert;
-	bool			mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-	int			mroute_reg_vif_num;
-#endif
-};
-
 struct ip6mr_rule {
 	struct fib_rule		common;
 };
 
 struct ip6mr_result {
-	struct mr6_table	*mrt;
+	struct mr_table	*mrt;
 };
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
 
 static DEFINE_RWLOCK(mrt_lock);
 
-/*
- *	Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 			   struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-			       struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 			      int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 #define ip6mr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+					    struct mr_table *mrt)
 {
-	struct mr6_table *mrt;
+	struct mr_table *ret;
+
+	if (!mrt)
+		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+				     struct mr_table, list);
+	else
+		ret = list_entry_rcu(mrt->list.next,
+				     struct mr_table, list);
+
+	if (&ret->list == &net->ipv6.mr6_tables)
+		return NULL;
+	return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
 
 	ip6mr_for_each_table(mrt, net) {
 		if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-			    struct mr6_table **mrt)
+			    struct mr_table **mrt)
 {
 	int err;
 	struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 			     int flags, struct fib_lookup_arg *arg)
 {
 	struct ip6mr_result *res = arg->result;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	switch (rule->action) {
 	case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 static int __net_init ip6mr_rules_init(struct net *net)
 {
 	struct fib_rules_ops *ops;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	int err;
 
 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-	struct mr6_table *mrt, *next;
+	struct mr_table *mrt, *next;
 
 	rtnl_lock();
 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -268,17 +258,42 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 	rtnl_unlock();
 }
+
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+}
+
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
+}
+
+bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
+	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
+}
+EXPORT_SYMBOL(ip6mr_rule_default);
 #else
 #define ip6mr_for_each_table(mrt, net) \
 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+					    struct mr_table *mrt)
+{
+	if (!mrt)
+		return net->ipv6.mrt6;
+	return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
 	return net->ipv6.mrt6;
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-			    struct mr6_table **mrt)
+			    struct mr_table **mrt)
 {
 	*mrt = net->ipv6.mrt6;
 	return 0;
@@ -297,114 +312,87 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 	net->ipv6.mrt6 = NULL;
 	rtnl_unlock();
 }
-#endif
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 {
-	struct mr6_table *mrt;
-	unsigned int i;
-
-	mrt = ip6mr_get_table(net, id);
-	if (mrt)
-		return mrt;
+	return 0;
+}
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (!mrt)
-		return NULL;
-	mrt->id = id;
-	write_pnet(&mrt->net, net);
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+	return 0;
+}
+#endif
 
-	/* Forwarding cache */
-	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+			  const void *ptr)
+{
+	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 
-	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
 
-	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+	.head_offset = offsetof(struct mr_mfc, mnode),
+	.key_offset = offsetof(struct mfc6_cache, cmparg),
+	.key_len = sizeof(struct mfc6_cache_cmp_arg),
+	.nelem_hint = 3,
+	.locks_mul = 1,
+	.obj_cmpfn = ip6mr_hash_cmp,
+	.automatic_shrinking = true,
+};
 
-#ifdef CONFIG_IPV6_PIMSM_V2
-	mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+				struct net *net)
+{
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 #endif
-	return mrt;
-}
-
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
-	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt, true);
-	kfree(mrt);
 }
 
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
-	struct seq_net_private p;
-	struct mr6_table *mrt;
-	struct list_head *cache;
-	int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+	.mf6c_origin = IN6ADDR_ANY_INIT,
+	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 };
 
+static struct mr_table_ops ip6mr_mr_table_ops = {
+	.rht_params = &ip6mr_rht_params,
+	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
 
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
-					   struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
-	struct mr6_table *mrt = it->mrt;
-	struct mfc6_cache *mfc;
+	struct mr_table *mrt;
 
-	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-		it->cache = &mrt->mfc6_cache_array[it->ct];
-		list_for_each_entry(mfc, it->cache, list)
-			if (pos-- == 0)
-				return mfc;
-	}
-	read_unlock(&mrt_lock);
-
-	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &mrt->mfc6_unres_queue;
-	list_for_each_entry(mfc, it->cache, list)
-		if (pos-- == 0)
-			return mfc;
-	spin_unlock_bh(&mfc_unres_lock);
+	mrt = ip6mr_get_table(net, id);
+	if (mrt)
+		return mrt;
 
-	it->cache = NULL;
-	return NULL;
+	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+			      ipmr_expire_process, ip6mr_new_table_set);
 }
 
-/*
- *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
-	struct seq_net_private p;
-	struct mr6_table *mrt;
-	int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
-					    struct ipmr_vif_iter *iter,
-					    loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
 {
-	struct mr6_table *mrt = iter->mrt;
-
-	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-		if (!MIF_EXISTS(mrt, iter->ct))
-			continue;
-		if (pos-- == 0)
-			return &mrt->vif6_table[iter->ct];
-	}
-	return NULL;
+	del_timer_sync(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt, true);
+	rhltable_destroy(&mrt->mfc_hash);
+	kfree(mrt);
 }
 
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 	if (!mrt)
@@ -413,26 +401,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
-	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_vif_iter *iter = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = iter->mrt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ip6mr_vif_seq_idx(net, iter, 0);
-
-	while (++iter->ct < mrt->maxvif) {
-		if (!MIF_EXISTS(mrt, iter->ct))
-			continue;
-		return &mrt->vif6_table[iter->ct];
-	}
-	return NULL;
+	return mr_vif_seq_start(seq, pos);
 }
 
 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +412,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct ipmr_vif_iter *iter = seq->private;
-	struct mr6_table *mrt = iter->mrt;
+	struct mr_vif_iter *iter = seq->private;
+	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 	} else {
-		const struct mif_device *vif = v;
+		const struct vif_device *vif = v;
 		const char *name = vif->dev ? vif->dev->name : "none";
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-			   vif - mrt->vif6_table,
+			   vif - mrt->vif_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags);
@@ -465,7 +434,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ip6mr_vif_seq_ops = {
 	.start = ip6mr_vif_seq_start,
-	.next  = ip6mr_vif_seq_next,
+	.next  = mr_vif_seq_next,
 	.stop  = ip6mr_vif_seq_stop,
 	.show  = ip6mr_vif_seq_show,
 };
@@ -473,7 +442,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
 static int ip6mr_vif_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
-			    sizeof(struct ipmr_vif_iter));
+			    sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +454,14 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 	if (!mrt)
 		return ERR_PTR(-ENOENT);
 
-	it->mrt = mrt;
-	it->cache = NULL;
-	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct mfc6_cache *mfc = v;
-	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = it->mrt;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-	if (mfc->list.next != it->cache)
-		return list_entry(mfc->list.next, struct mfc6_cache, list);
-
-	if (it->cache == &mrt->mfc6_unres_queue)
-		goto end_of_list;
-
-	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
-	while (++it->ct < MFC6_LINES) {
-		it->cache = &mrt->mfc6_cache_array[it->ct];
-		if (list_empty(it->cache))
-			continue;
-		return list_first_entry(it->cache, struct mfc6_cache, list);
-	}
-
-	/* exhausted cache_array, show unresolved */
-	read_unlock(&mrt_lock);
-	it->cache = &mrt->mfc6_unres_queue;
-	it->ct = 0;
-
-	spin_lock_bh(&mfc_unres_lock);
-	if (!list_empty(it->cache))
-		return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
-	spin_unlock_bh(&mfc_unres_lock);
-	it->cache = NULL;
-
-	return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct mr6_table *mrt = it->mrt;
-
-	if (it->cache == &mrt->mfc6_unres_queue)
-		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
-		read_unlock(&mrt_lock);
+	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +475,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
 	} else {
 		const struct mfc6_cache *mfc = v;
-		const struct ipmr_mfc_iter *it = seq->private;
-		struct mr6_table *mrt = it->mrt;
+		const struct mr_mfc_iter *it = seq->private;
+		struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
-			   mfc->mf6c_parent);
+			   mfc->_c.mfc_parent);
 
-		if (it->cache != &mrt->mfc6_unres_queue) {
+		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
-				   mfc->mfc_un.res.pkt,
-				   mfc->mfc_un.res.bytes,
-				   mfc->mfc_un.res.wrong_if);
-			for (n = mfc->mfc_un.res.minvif;
-			     n < mfc->mfc_un.res.maxvif; n++) {
-				if (MIF_EXISTS(mrt, n) &&
-				    mfc->mfc_un.res.ttls[n] < 255)
+				   mfc->_c.mfc_un.res.pkt,
+				   mfc->_c.mfc_un.res.bytes,
+				   mfc->_c.mfc_un.res.wrong_if);
+			for (n = mfc->_c.mfc_un.res.minvif;
+			     n < mfc->_c.mfc_un.res.maxvif; n++) {
+				if (VIF_EXISTS(mrt, n) &&
+				    mfc->_c.mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
-						   " %2d:%-3d",
-						   n, mfc->mfc_un.res.ttls[n]);
+						   " %2d:%-3d", n,
+						   mfc->_c.mfc_un.res.ttls[n]);
 			}
 		} else {
 			/* unresolved mfc_caches don't contain
@@ -597,15 +508,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
-	.next  = ipmr_mfc_seq_next,
-	.stop  = ipmr_mfc_seq_stop,
+	.next  = mr_mfc_seq_next,
+	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct ipmr_mfc_iter));
+			    sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +535,7 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->dev->ifindex,
 		.flowi6_mark	= skb->mark,
@@ -658,7 +569,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = mrt->vif6_table[reg_vif_num].dev;
+		reg_dev = mrt->vif_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -693,7 +604,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_oif	= dev->ifindex,
 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +647,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 {
 	struct net_device *dev;
 	char name[IFNAMSIZ];
@@ -769,21 +680,41 @@ failure:
 }
 #endif
 
-/*
- *	Delete a VIF entry
- */
+static int call_ip6mr_vif_entry_notifiers(struct net *net,
+					  enum fib_event_type event_type,
+					  struct vif_device *vif,
+					  mifi_t vif_index, u32 tb_id)
+{
+	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+				     vif, vif_index, tb_id,
+				     &net->ipv6.ipmr_seq);
+}
+
+static int call_ip6mr_mfc_entry_notifiers(struct net *net,
+					  enum fib_event_type event_type,
+					  struct mfc6_cache *mfc, u32 tb_id)
+{
+	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
+}
 
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+/* Delete a VIF entry */
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 		       struct list_head *head)
 {
-	struct mif_device *v;
+	struct vif_device *v;
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 
 	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &mrt->vif6_table[vifi];
+	v = &mrt->vif_table[vifi];
+
+	if (VIF_EXISTS(mrt, vifi))
+		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+					       FIB_EVENT_VIF_DEL, v, vifi,
+					       mrt->id);
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -802,7 +733,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 	if (vifi + 1 == mrt->maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(mrt, tmp))
+			if (VIF_EXISTS(mrt, tmp))
 				break;
 		}
 		mrt->maxvif = tmp + 1;
@@ -827,23 +758,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 	return 0;
 }
 
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-	kmem_cache_free(mrt_cachep, c);
+	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 }
 
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 
 	atomic_dec(&mrt->cache_resolve_queue_len);
 
-	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct ipv6hdr));
@@ -862,13 +800,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
-	struct mfc6_cache *c, *next;
+	struct mr_mfc *c, *next;
 
-	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +816,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
 		}
 
 		list_del(&c->list);
-		mr6_netlink_event(mrt, c, RTM_DELROUTE);
-		ip6mr_destroy_unres(mrt, c);
+		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 	}
 
-	if (!list_empty(&mrt->mfc6_unres_queue))
+	if (!list_empty(&mrt->mfc_unres_queue))
 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(struct timer_list *t)
 {
-	struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (!list_empty(&mrt->mfc6_unres_queue))
+	if (!list_empty(&mrt->mfc_unres_queue))
 		ipmr_do_expire_process(mrt);
 
 	spin_unlock(&mfc_unres_lock);
@@ -903,7 +841,8 @@ static void ipmr_expire_process(struct timer_list *t)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+				    struct mr_mfc *cache,
 				    unsigned char *ttls)
 {
 	int vifi;
@@ -913,7 +852,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
-		if (MIF_EXISTS(mrt, vifi) &&
+		if (VIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +864,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
 	cache->mfc_un.res.lastuse = jiffies;
 }
 
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
 		    struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &mrt->vif6_table[vifi];
+	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(mrt, vifi))
+	if (VIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -980,21 +919,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 					     dev->ifindex, &in6_dev->cnf);
 	}
 
-	/*
-	 *	Fill in the VIF structures
-	 */
-	v->rate_limit = vifc->vifc_rate_limit;
-	v->flags = vifc->mif6c_flags;
-	if (!mrtsock)
-		v->flags |= VIFF_STATIC;
-	v->threshold = vifc->vifc_threshold;
-	v->bytes_in = 0;
-	v->bytes_out = 0;
-	v->pkt_in = 0;
-	v->pkt_out = 0;
-	v->link = dev->ifindex;
-	if (v->flags & MIFF_REGISTER)
-		v->link = dev_get_iflink(dev);
+	/* Fill in the VIF structures */
+	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+			MIFF_REGISTER);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
@@ -1006,78 +934,63 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 	if (vifi + 1 > mrt->maxvif)
 		mrt->maxvif = vifi + 1;
 	write_unlock_bh(&mrt_lock);
+	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
+				       v, vifi, mrt->id);
 	return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 					   const struct in6_addr *origin,
 					   const struct in6_addr *mcastgrp)
 {
-	int line = MFC6_HASH(mcastgrp, origin);
-	struct mfc6_cache *c;
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-			return c;
-	}
-	return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
-						      mifi_t mifi)
-{
-	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
-	struct mfc6_cache *c;
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-		if (ipv6_addr_any(&c->mf6c_origin) &&
-		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
-		    (c->mfc_un.res.ttls[mifi] < 255))
-			return c;
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = *origin,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
 
-	return NULL;
+	return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 					       struct in6_addr *mcastgrp,
 					       mifi_t mifi)
 {
-	int line = MFC6_HASH(mcastgrp, &in6addr_any);
-	struct mfc6_cache *c, *proxy;
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = in6addr_any,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
 
 	if (ipv6_addr_any(mcastgrp))
-		goto skip;
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-		if (ipv6_addr_any(&c->mf6c_origin) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
-			if (c->mfc_un.res.ttls[mifi] < 255)
-				return c;
-
-			/* It's ok if the mifi is part of the static tree */
-			proxy = ip6mr_cache_find_any_parent(mrt,
-							    c->mf6c_parent);
-			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
-				return c;
-		}
+		return mr_mfc_find_any_parent(mrt, mifi);
+	return mr_mfc_find_any(mrt, mifi, &arg);
+}
 
-skip:
-	return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+			const struct in6_addr *origin,
+			const struct in6_addr *mcastgrp,
+			int parent)
+{
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = *origin,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
+
+	return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
-/*
- *	Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
 static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (!c)
 		return NULL;
-	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-	c->mfc_un.res.minvif = MAXMIFS;
+	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+	c->_c.mfc_un.res.minvif = MAXMIFS;
+	c->_c.free = ip6mr_cache_free_rcu;
+	refcount_set(&c->_c.mfc_un.res.refcount, 1);
 	return c;
 }
 
@@ -1086,8 +999,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (!c)
 		return NULL;
-	skb_queue_head_init(&c->mfc_un.unres.unresolved);
-	c->mfc_un.unres.expires = jiffies + 10 * HZ;
+	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 	return c;
 }
 
@@ -1095,7 +1008,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
 				struct mfc6_cache *uc, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
@@ -1104,12 +1017,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 	 *	Play the pending entries through our router
 	 */
 
-	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct ipv6hdr));
 
-			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+			if (mr_fill_mroute(mrt, skb, &c->_c,
+					   nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +1043,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
  *	Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert)
 {
+	struct sock *mroute6_sk;
 	struct sk_buff *skb;
 	struct mrt6msg *msg;
 	int ret;
@@ -1201,17 +1116,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (!mrt->mroute6_sk) {
+	rcu_read_lock();
+	mroute6_sk = rcu_dereference(mrt->mroute_sk);
+	if (!mroute6_sk) {
+		rcu_read_unlock();
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
 	mrt6msg_netlink_event(mrt, skb);
 
-	/*
-	 *	Deliver to user space multicast routing algorithms
-	 */
-	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+	/* Deliver to user space multicast routing algorithms */
+	ret = sock_queue_rcv_skb(mroute6_sk, skb);
+	rcu_read_unlock();
 	if (ret < 0) {
 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
 		kfree_skb(skb);
@@ -1220,19 +1137,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	return ret;
 }
 
-/*
- *	Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+				  struct sk_buff *skb)
 {
+	struct mfc6_cache *c;
 	bool found = false;
 	int err;
-	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
 			found = true;
@@ -1253,10 +1167,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 			return -ENOBUFS;
 		}
 
-		/*
-		 *	Fill in the new cache entry
-		 */
-		c->mf6c_parent = -1;
+		/* Fill in the new cache entry */
+		c->_c.mfc_parent = -1;
 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
 
@@ -1276,20 +1188,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
-		list_add(&c->list, &mrt->mfc6_unres_queue);
+		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
 		ipmr_do_expire_process(mrt);
 	}
 
-	/*
-	 *	See if we can append the packet
-	 */
-	if (c->mfc_un.unres.unresolved.qlen > 3) {
+	/* See if we can append the packet */
+	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
-		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
 
@@ -1301,29 +1211,26 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
 			    int parent)
 {
-	int line;
-	struct mfc6_cache *c, *next;
-
-	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+	struct mfc6_cache *c;
 
-	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp,
-				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
-		    (parent == -1 || parent == c->mf6c_parent)) {
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
+	/* The entries are added/deleted only under RTNL */
+	rcu_read_lock();
+	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+	rcu_read_unlock();
+	if (!c)
+		return -ENOENT;
+	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+	list_del_rcu(&c->_c.list);
 
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_cache_free(c);
-			return 0;
-		}
-	}
-	return -ENOENT;
+	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
+	mr6_netlink_event(mrt, c, RTM_DELROUTE);
+	mr_cache_put(&c->_c);
+	return 0;
 }
 
 static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1238,15 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt;
-	struct mif_device *v;
+	struct mr_table *mrt;
+	struct vif_device *v;
 	int ct;
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
 	ip6mr_for_each_table(mrt, net) {
-		v = &mrt->vif6_table[0];
+		v = &mrt->vif_table[0];
 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 			if (v->dev == dev)
 				mif6_delete(mrt, ct, 1, NULL);
@@ -1349,21 +1256,63 @@ static int ip6mr_device_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
+static unsigned int ip6mr_seq_read(struct net *net)
+{
+	ASSERT_RTNL();
+
+	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+}
+
+static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+{
+	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
+		       ip6mr_mr_table_iter, &mrt_lock);
+}
+
 static struct notifier_block ip6_mr_notifier = {
 	.notifier_call = ip6mr_device_event
 };
 
-/*
- *	Setup for IP multicast routing
- */
+static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
+	.family		= RTNL_FAMILY_IP6MR,
+	.fib_seq_read	= ip6mr_seq_read,
+	.fib_dump	= ip6mr_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ip6mr_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
+
+	net->ipv6.ipmr_seq = 0;
+
+	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	net->ipv6.ip6mr_notifier_ops = ops;
+
+	return 0;
+}
 
+static void __net_exit ip6mr_notifier_exit(struct net *net)
+{
+	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
+	net->ipv6.ip6mr_notifier_ops = NULL;
+}
+
+/* Setup for IP multicast routing */
 static int __net_init ip6mr_net_init(struct net *net)
 {
 	int err;
 
+	err = ip6mr_notifier_init(net);
+	if (err)
+		return err;
+
 	err = ip6mr_rules_init(net);
 	if (err < 0)
-		goto fail;
+		goto ip6mr_rules_fail;
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -1381,7 +1330,8 @@ proc_cache_fail:
 proc_vif_fail:
 	ip6mr_rules_exit(net);
 #endif
-fail:
+ip6mr_rules_fail:
+	ip6mr_notifier_exit(net);
 	return err;
 }
 
@@ -1392,6 +1342,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 	remove_proc_entry("ip6_mr_vif", net->proc_net);
 #endif
 	ip6mr_rules_exit(net);
+	ip6mr_notifier_exit(net);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1452,14 +1403,14 @@ void ip6_mr_cleanup(void)
 	kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 			 struct mf6cctl *mfc, int mrtsock, int parent)
 {
-	bool found = false;
-	int line;
-	struct mfc6_cache *uc, *c;
 	unsigned char ttls[MAXMIFS];
-	int i;
+	struct mfc6_cache *uc, *c;
+	struct mr_mfc *_uc;
+	bool found;
+	int i, err;
 
 	if (mfc->mf6cc_parent >= MAXMIFS)
 		return -ENFILE;
@@ -1468,28 +1419,22 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 	for (i = 0; i < MAXMIFS; i++) {
 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
 			ttls[i] = 1;
-
 	}
 
-	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp,
-				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
-		    (parent == -1 || parent == mfc->mf6cc_parent)) {
-			found = true;
-			break;
-		}
-	}
-
-	if (found) {
+	/* The entries are added/deleted only under RTNL */
+	rcu_read_lock();
+	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+	rcu_read_unlock();
+	if (c) {
 		write_lock_bh(&mrt_lock);
-		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(mrt, c, ttls);
+		c->_c.mfc_parent = mfc->mf6cc_parent;
+		ip6mr_update_thresholds(mrt, &c->_c, ttls);
 		if (!mrtsock)
-			c->mfc_flags |= MFC_STATIC;
+			c->_c.mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
+		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+					       c, mrt->id);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 		return 0;
 	}
@@ -1504,31 +1449,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
-	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(mrt, c, ttls);
+	c->_c.mfc_parent = mfc->mf6cc_parent;
+	ip6mr_update_thresholds(mrt, &c->_c, ttls);
 	if (!mrtsock)
-		c->mfc_flags |= MFC_STATIC;
+		c->_c.mfc_flags |= MFC_STATIC;
 
-	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &mrt->mfc6_cache_array[line]);
-	write_unlock_bh(&mrt_lock);
+	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+				  ip6mr_rht_params);
+	if (err) {
+		pr_err("ip6mr: rhtable insert error %d\n", err);
+		ip6mr_cache_free(c);
+		return err;
+	}
+	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 
-	/*
-	 *	Check to see if we resolved a queued list. If so we
-	 *	need to send on the frames and tidy up.
+	/* Check to see if we resolved a queued list. If so we
+	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+		uc = (struct mfc6_cache *)_uc;
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-			list_del(&uc->list);
+			list_del(&_uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
 	}
-	if (list_empty(&mrt->mfc6_unres_queue))
+	if (list_empty(&mrt->mfc_unres_queue))
 		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -1536,6 +1486,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 		ip6mr_cache_resolve(net, mrt, uc, c);
 		ip6mr_cache_free(uc);
 	}
+	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
+				       c, mrt->id);
 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 	return 0;
 }
@@ -1544,61 +1496,59 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
-	int i;
+	struct mr_mfc *c, *tmp;
 	LIST_HEAD(list);
-	struct mfc6_cache *c, *next;
+	int i;
 
-	/*
-	 *	Shut down all active vif entries
-	 */
+	/* Shut down all active vif entries */
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
 			continue;
 		mif6_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
-	/*
-	 *	Wipe the cache
-	 */
-	for (i = 0; i < MFC6_LINES; i++) {
-		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-			if (!all && (c->mfc_flags & MFC_STATIC))
-				continue;
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
-
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_cache_free(c);
-		}
+	/* Wipe the cache */
+	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+		if (!all && (c->mfc_flags & MFC_STATIC))
+			continue;
+		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+		list_del_rcu(&c->list);
+		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+		mr_cache_put(c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_destroy_unres(mrt, c);
+			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+						       FIB_EVENT_ENTRY_DEL,
+						       (struct mfc6_cache *)c,
+						       mrt->id);
+			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+					  RTM_DELROUTE);
+			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(mrt->mroute6_sk == NULL)) {
-		mrt->mroute6_sk = sk;
-		net->ipv6.devconf_all->mc_forwarding++;
-	} else {
+	if (rtnl_dereference(mrt->mroute_sk)) {
 		err = -EADDRINUSE;
+	} else {
+		rcu_assign_pointer(mrt->mroute_sk, sk);
+		sock_set_flag(sk, SOCK_RCU_FREE);
+		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	write_unlock_bh(&mrt_lock);
 
@@ -1616,7 +1566,7 @@ int ip6mr_sk_done(struct sock *sk)
 {
 	int err = -EACCES;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1624,9 +1574,13 @@ int ip6mr_sk_done(struct sock *sk)
 
 	rtnl_lock();
 	ip6mr_for_each_table(mrt, net) {
-		if (sk == mrt->mroute6_sk) {
+		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			write_lock_bh(&mrt_lock);
-			mrt->mroute6_sk = NULL;
+			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+			/* Note that mroute_sk had SOCK_RCU_FREE set,
+			 * so the RCU grace period before sk freeing
+			 * is guaranteed by sk_destruct()
+			 */
 			net->ipv6.devconf_all->mc_forwarding--;
 			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1644,9 +1598,9 @@ int ip6mr_sk_done(struct sock *sk)
 	return err;
 }
 
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi6_oif	= skb->dev->ifindex,
@@ -1656,8 +1610,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 		return NULL;
 
-	return mrt->mroute6_sk;
+	return rcu_access_pointer(mrt->mroute_sk);
 }
+EXPORT_SYMBOL(mroute6_is_socket);
 
 /*
  *	Socket options and virtual interface manipulation. The whole
@@ -1673,7 +1628,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1684,7 +1639,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		return -ENOENT;
 
 	if (optname != MRT6_INIT) {
-		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1706,7 +1662,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+		ret = mif6_add(net, mrt, &vif,
+			       sk == rtnl_dereference(mrt->mroute_sk));
 		rtnl_unlock();
 		return ret;
 
@@ -1741,7 +1698,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
 		else
 			ret = ip6mr_mfc_add(net, mrt, &mfc,
-					    sk == mrt->mroute6_sk, parent);
+					    sk ==
+					    rtnl_dereference(mrt->mroute_sk),
+					    parent);
 		rtnl_unlock();
 		return ret;
 
@@ -1793,7 +1752,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
 			return -EINVAL;
-		if (sk == mrt->mroute6_sk)
+		if (sk == rcu_access_pointer(mrt->mroute_sk))
 			return -EBUSY;
 
 		rtnl_lock();
@@ -1824,7 +1783,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1872,10 +1831,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 {
 	struct sioc_sg_req6 sr;
 	struct sioc_mif_req6 vr;
-	struct mif_device *vif;
+	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
@@ -1888,8 +1847,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &mrt->vif6_table[vr.mifi];
-		if (MIF_EXISTS(mrt, vr.mifi)) {
+		vif = &mrt->vif_table[vr.mifi];
+		if (VIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1906,19 +1865,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
 
-		read_lock(&mrt_lock);
+		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
-			read_unlock(&mrt_lock);
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
 				return -EFAULT;
 			return 0;
 		}
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	default:
 		return -ENOIOCTLCMD;
@@ -1946,10 +1905,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
 	struct compat_sioc_sg_req6 sr;
 	struct compat_sioc_mif_req6 vr;
-	struct mif_device *vif;
+	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
@@ -1962,8 +1921,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &mrt->vif6_table[vr.mifi];
-		if (MIF_EXISTS(mrt, vr.mifi)) {
+		vif = &mrt->vif_table[vr.mifi];
+		if (VIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1980,19 +1939,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
 
-		read_lock(&mrt_lock);
+		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
-			read_unlock(&mrt_lock);
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
 				return -EFAULT;
 			return 0;
 		}
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	default:
 		return -ENOIOCTLCMD;
@@ -2013,11 +1972,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &mrt->vif6_table[vifi];
+	struct vif_device *vif = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi6 fl6;
@@ -2087,46 +2046,50 @@ out_free:
 	return 0;
 }
 
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
 	int ct;
 
 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
-		if (mrt->vif6_table[ct].dev == dev)
+		if (mrt->vif_table[ct].dev == dev)
 			break;
 	}
 	return ct;
 }
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
-			   struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+			   struct sk_buff *skb, struct mfc6_cache *c)
 {
 	int psend = -1;
 	int vif, ct;
 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-	vif = cache->mf6c_parent;
-	cache->mfc_un.res.pkt++;
-	cache->mfc_un.res.bytes += skb->len;
-	cache->mfc_un.res.lastuse = jiffies;
+	vif = c->_c.mfc_parent;
+	c->_c.mfc_un.res.pkt++;
+	c->_c.mfc_un.res.bytes += skb->len;
+	c->_c.mfc_un.res.lastuse = jiffies;
 
-	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
 		struct mfc6_cache *cache_proxy;
 
 		/* For an (*,G) entry, we only check that the incoming
 		 * interface is part of the static tree.
 		 */
-		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+		rcu_read_lock();
+		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
 		if (cache_proxy &&
-		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+			rcu_read_unlock();
 			goto forward;
+		}
+		rcu_read_unlock();
 	}
 
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (mrt->vif6_table[vif].dev != skb->dev) {
-		cache->mfc_un.res.wrong_if++;
+	if (mrt->vif_table[vif].dev != skb->dev) {
+		c->_c.mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2135,52 +2098,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
 		    (mrt->mroute_do_pim ||
-		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
-			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-			cache->mfc_un.res.last_assert = jiffies;
+			       c->_c.mfc_un.res.last_assert +
+			       MFC_ASSERT_THRESH)) {
+			c->_c.mfc_un.res.last_assert = jiffies;
 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
 		}
 		goto dont_forward;
 	}
 
 forward:
-	mrt->vif6_table[vif].pkt_in++;
-	mrt->vif6_table[vif].bytes_in += skb->len;
+	mrt->vif_table[vif].pkt_in++;
+	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
 	 */
-	if (ipv6_addr_any(&cache->mf6c_origin) &&
-	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+	if (ipv6_addr_any(&c->mf6c_origin) &&
+	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
 		if (true_vifi >= 0 &&
-		    true_vifi != cache->mf6c_parent &&
+		    true_vifi != c->_c.mfc_parent &&
 		    ipv6_hdr(skb)->hop_limit >
-				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
 			/* It's an (*,*) entry and the packet is not coming from
 			 * the upstream: forward the packet to the upstream
 			 * only.
 			 */
-			psend = cache->mf6c_parent;
+			psend = c->_c.mfc_parent;
 			goto last_forward;
 		}
 		goto dont_forward;
 	}
-	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+	for (ct = c->_c.mfc_un.res.maxvif - 1;
+	     ct >= c->_c.mfc_un.res.minvif; ct--) {
 		/* For (*,G) entry, don't forward to the incoming interface */
-		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
-		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(net, mrt, skb2, cache, psend);
+					ip6mr_forward2(net, mrt, skb2,
+						       c, psend);
 			}
 			psend = ct;
 		}
 	}
 last_forward:
 	if (psend != -1) {
-		ip6mr_forward2(net, mrt, skb, cache, psend);
+		ip6mr_forward2(net, mrt, skb, c, psend);
 		return;
 	}
 
@@ -2197,7 +2163,7 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->dev->ifindex,
 		.flowi6_mark	= skb->mark,
@@ -2247,66 +2213,11 @@ int ip6_mr_input(struct sk_buff *skb)
 	return 0;
 }
 
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-			       struct mfc6_cache *c, struct rtmsg *rtm)
-{
-	struct rta_mfc_stats mfcs;
-	struct nlattr *mp_attr;
-	struct rtnexthop *nhp;
-	unsigned long lastuse;
-	int ct;
-
-	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mf6c_parent >= MAXMIFS) {
-		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-		return -ENOENT;
-	}
-
-	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
-	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
-		return -EMSGSIZE;
-	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
-	if (!mp_attr)
-		return -EMSGSIZE;
-
-	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
-			if (!nhp) {
-				nla_nest_cancel(skb, mp_attr);
-				return -EMSGSIZE;
-			}
-
-			nhp->rtnh_flags = 0;
-			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
-			nhp->rtnh_len = sizeof(*nhp);
-		}
-	}
-
-	nla_nest_end(skb, mp_attr);
-
-	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-	mfcs.mfcs_packets = c->mfc_un.res.pkt;
-	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-			      RTA_PAD))
-		return -EMSGSIZE;
-
-	rtm->rtm_type = RTN_MULTICAST;
-	return 1;
-}
-
 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		    u32 portid)
 {
 	int err;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
@@ -2367,15 +2278,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		return err;
 	}
 
-	if (rtm->rtm_flags & RTM_F_NOTIFY)
-		cache->mfc_flags |= MFC_NOTIFY;
-
-	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
 
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
 			     int flags)
 {
@@ -2397,7 +2305,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 		goto nla_put_failure;
 	rtm->rtm_type = RTN_MULTICAST;
 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-	if (c->mfc_flags & MFC_STATIC)
+	if (c->_c.mfc_flags & MFC_STATIC)
 		rtm->rtm_protocol = RTPROT_STATIC;
 	else
 		rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2406,7 +2314,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
 		goto nla_put_failure;
-	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
 	/* do not break the dump if cache is unresolved */
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
@@ -2419,6 +2327,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			      u32 portid, u32 seq, struct mr_mfc *c,
+			      int cmd, int flags)
+{
+	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+				 cmd, flags);
+}
+
 static int mr6_msgsize(bool unresolved, int maxvif)
 {
 	size_t len =
@@ -2440,14 +2356,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
 	return len;
 }
 
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 			      int cmd)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
 			GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -2482,7 +2398,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
 	return len;
 }
 
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct nlmsghdr *nlh;
@@ -2532,65 +2448,6 @@ errout:
 
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
-	struct mr6_table *mrt;
-	struct mfc6_cache *mfc;
-	unsigned int t = 0, s_t;
-	unsigned int h = 0, s_h;
-	unsigned int e = 0, s_e;
-
-	s_t = cb->args[0];
-	s_h = cb->args[1];
-	s_e = cb->args[2];
-
-	read_lock(&mrt_lock);
-	ip6mr_for_each_table(mrt, net) {
-		if (t < s_t)
-			goto next_table;
-		if (t > s_t)
-			s_h = 0;
-		for (h = s_h; h < MFC6_LINES; h++) {
-			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
-				if (e < s_e)
-					goto next_entry;
-				if (ip6mr_fill_mroute(mrt, skb,
-						      NETLINK_CB(cb->skb).portid,
-						      cb->nlh->nlmsg_seq,
-						      mfc, RTM_NEWROUTE,
-						      NLM_F_MULTI) < 0)
-					goto done;
-next_entry:
-				e++;
-			}
-			e = s_e = 0;
-		}
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (ip6mr_fill_mroute(mrt, skb,
-					      NETLINK_CB(cb->skb).portid,
-					      cb->nlh->nlmsg_seq,
-					      mfc, RTM_NEWROUTE,
-					      NLM_F_MULTI) < 0) {
-				spin_unlock_bh(&mfc_unres_lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(&mfc_unres_lock);
-		e = s_e = 0;
-		s_h = 0;
-next_table:
-		t++;
-	}
-done:
-	read_unlock(&mrt_lock);
-
-	cb->args[2] = e;
-	cb->args[1] = h;
-	cb->args[0] = t;
-
-	return skb->len;
+	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+				_ip6mr_fill_mroute, &mfc_unres_lock);
 }
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d78d41fc4b1a..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1367,10 +1367,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 		if (get_user(len, optlen))
 			return -EFAULT;
 
-		lock_sock(sk);
-		err = nf_getsockopt(sk, PF_INET6, optname, optval,
-				&len);
-		release_sock(sk);
+		err = nf_getsockopt(sk, PF_INET6, optname, optval, &len);
 		if (err >= 0)
 			err = put_user(len, optlen);
 	}
@@ -1409,10 +1406,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		if (get_user(len, optlen))
 			return -EFAULT;
 
-		lock_sock(sk);
-		err = compat_nf_getsockopt(sk, PF_INET6,
-					   optname, optval, &len);
-		release_sock(sk);
+		err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len);
 		if (err >= 0)
 			err = put_user(len, optlen);
 	}
@@ -1421,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 }
 EXPORT_SYMBOL(compat_ipv6_getsockopt);
 #endif
-
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..793159d77d8a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	if (ifindex == 0) {
 		struct rt6_info *rt;
-		rt = rt6_lookup(net, addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
 		if (rt) {
 			dev = rt->dst.dev;
 			ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 	struct inet6_dev *idev = NULL;
 
 	if (ifindex == 0) {
-		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
 
 		if (rt) {
 			dev = rt->dst.dev;
@@ -2921,9 +2921,9 @@ static int __net_init igmp6_proc_init(struct net *net)
 	int err;
 
 	err = -ENOMEM;
-	if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
+	if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
 		goto out;
-	if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+	if (!proc_create("mcfilter6", 0444, net->proc_net,
 			 &igmp6_mcf_seq_fops))
 		goto out_proc_net_igmp6;
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f61a5b613b52..9de4dfb126ba 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 	}
 
 	if (!dev->addr_len)
-		inc_opt = 0;
+		inc_opt = false;
 	if (inc_opt)
 		optlen += ndisc_opt_addr_space(dev,
 					       NDISC_NEIGHBOUR_ADVERTISEMENT);
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	int probes = atomic_read(&neigh->probes);
 
 	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
-					   dev, 1,
+					   dev, false, 1,
 					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
 		saddr = &ipv6_hdr(skb)->saddr;
 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
@@ -1554,7 +1554,8 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
 	*(opt++) = (rd_len >> 3);
 	opt += 6;
 
-	memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+	skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
+		      rd_len - 8);
 }
 
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d95ceca7ff8f..531d6957af36 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -21,18 +21,19 @@
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct sock *sk = sk_to_full_sk(skb->sk);
 	unsigned int hh_len;
 	struct dst_entry *dst;
 	struct flowi6 fl6 = {
-		.flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+		.flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
 		.flowi6_mark = skb->mark,
-		.flowi6_uid = sock_net_uid(net, skb->sk),
+		.flowi6_uid = sock_net_uid(net, sk),
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
 	};
 	int err;
 
-	dst = ip6_route_output(net, skb->sk, &fl6);
+	dst = ip6_route_output(net, sk, &fl6);
 	err = dst->error;
 	if (err) {
 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -50,7 +51,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
 	    xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
 		skb_dst_set(skb, NULL);
-		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
+		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 		if (IS_ERR(dst))
 			return PTR_ERR(dst);
 		skb_dst_set(skb, dst);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index af4c917e0836..62358b93bbac 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -352,6 +352,10 @@ ip6t_do_table(struct sk_buff *skb,
 			}
 			if (table_base + v != ip6t_next_entry(e) &&
 			    !(e->ipv6.flags & IP6T_F_GOTO)) {
+				if (unlikely(stackidx >= private->stacksize)) {
+					verdict = NF_DROP;
+					break;
+				}
 				jumpstack[stackidx++] = e;
 			}
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index fa51a205918d..38dea8ff680f 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -85,14 +85,14 @@ static int reject_tg6_check(const struct xt_tgchk_param *par)
 	const struct ip6t_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
-		pr_info("ECHOREPLY is not supported.\n");
+		pr_info_ratelimited("ECHOREPLY is not supported\n");
 		return -EINVAL;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (!(e->ipv6.flags & IP6T_F_PROTO) ||
 		    e->ipv6.proto != IPPROTO_TCP ||
 		    (e->ipv6.invflags & XT_INV_PROTO)) {
-			pr_info("TCP_RESET illegal for non-tcp\n");
+			pr_info_ratelimited("TCP_RESET illegal for non-tcp\n");
 			return -EINVAL;
 		}
 	}
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index b12e61b7b16c..d12f511929f5 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -48,12 +48,8 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 	}
 
 	fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
-	if ((flags & XT_RPFILTER_LOOSE) == 0) {
-		fl6.flowi6_oif = dev->ifindex;
-		lookup_flags |= RT6_LOOKUP_F_IFACE;
-	}
 
-	rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+	rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
 	if (rt->dst.error)
 		goto out;
 
@@ -103,14 +99,14 @@ static int rpfilter_check(const struct xt_mtchk_param *par)
 	unsigned int options = ~XT_RPFILTER_OPTION_MASK;
 
 	if (info->flags & options) {
-		pr_info("unknown options encountered");
+		pr_info_ratelimited("unknown options\n");
 		return -EINVAL;
 	}
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "raw") != 0) {
-		pr_info("match only valid in the \'raw\' "
-			"or \'mangle\' tables, not \'%s\'.\n", par->table);
+		pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n",
+				    par->table);
 		return -EINVAL;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
index 9642164107ce..33719d5560c8 100644
--- a/net/ipv6/netfilter/ip6t_srh.c
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -122,12 +122,14 @@ static int srh_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_srh *srhinfo = par->matchinfo;
 
 	if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
-		pr_err("unknown srh match flags  %X\n", srhinfo->mt_flags);
+		pr_info_ratelimited("unknown srh match flags  %X\n",
+				    srhinfo->mt_flags);
 		return -EINVAL;
 	}
 
 	if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
-		pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
+		pr_info_ratelimited("unknown srh invflags %X\n",
+				    srhinfo->mt_invflags);
 		return -EINVAL;
 	}
 
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index d346705d6ee6..207cb35569b1 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -178,7 +178,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->len <= mtu)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 		return false;
 
 	return true;
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index bed57ee65f7b..6b7f075f811f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -99,6 +99,10 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
 	    !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
 				target, maniptype))
 		return false;
+
+	/* must reload, offset might have changed */
+	ipv6h = (void *)skb->data + iphdroff;
+
 manip_addr:
 	if (maniptype == NF_NAT_MANIP_SRC)
 		ipv6h->saddr = target->src.u3.in6;
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index cc5174c7254c..36be3cf0adef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -180,8 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	}
 
 	*dest = 0;
- again:
-	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+				      lookup_flags);
 	if (rt->dst.error)
 		goto put_rt_err;
 
@@ -189,15 +189,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
 		goto put_rt_err;
 
-	if (oif && oif != rt->rt6i_idev->dev) {
-		/* multipath route? Try again with F_IFACE */
-		if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
-			lookup_flags |= RT6_LOOKUP_F_IFACE;
-			fl6.flowi6_oif = oif->ifindex;
-			ip6_rt_put(rt);
-			goto again;
-		}
-	}
+	if (oif && oif != rt->rt6i_idev->dev)
+		goto put_rt_err;
 
 	switch (priv->result) {
 	case NFT_FIB_RESULT_OIF:
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..6e57028d2e91 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -290,7 +290,7 @@ int snmp6_register_dev(struct inet6_dev *idev)
 	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
 
-	p = proc_create_data(idev->dev->name, S_IRUGO,
+	p = proc_create_data(idev->dev->name, 0444,
 			     net->mib.proc_net_devsnmp6,
 			     &snmp6_dev_seq_fops, idev);
 	if (!p)
@@ -314,11 +314,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 
 static int __net_init ipv6_proc_init_net(struct net *net)
 {
-	if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+	if (!proc_create("sockstat6", 0444, net->proc_net,
 			 &sockstat6_seq_fops))
 		return -ENOMEM;
 
-	if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
+	if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
 	net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -354,4 +354,3 @@ void ipv6_misc_proc_exit(void)
 {
 	unregister_pernet_subsys(&ipv6_proc_ops);
 }
-
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..5eb9b08947ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1318,7 +1318,7 @@ static const struct file_operations raw6_seq_fops = {
 
 static int __net_init raw6_init_net(struct net *net)
 {
-	if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
+	if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9dcfadddd800..ba8d5df50ebe 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -128,7 +128,7 @@ struct uncached_list {
 
 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 
-static void rt6_uncached_list_add(struct rt6_info *rt)
+void rt6_uncached_list_add(struct rt6_info *rt)
 {
 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 
@@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt)
 	spin_unlock_bh(&ul->lock);
 }
 
-static void rt6_uncached_list_del(struct rt6_info *rt)
+void rt6_uncached_list_del(struct rt6_info *rt)
 {
 	if (!list_empty(&rt->rt6i_uncached)) {
 		struct uncached_list *ul = rt->rt6i_uncached_list;
@@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+					     struct rt6_info *match,
 					     struct flowi6 *fl6, int oif,
+					     const struct sk_buff *skb,
 					     int strict)
 {
 	struct rt6_info *sibling, *next_sibling;
@@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
 	if (!fl6->mp_hash)
-		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
 		return match;
@@ -914,7 +916,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
-					     struct flowi6 *fl6, int flags)
+					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
+					     int flags)
 {
 	struct rt6_info *rt, *rt_cache;
 	struct fib6_node *fn;
@@ -929,8 +933,8 @@ restart:
 		rt = rt6_device_match(net, rt, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(rt, fl6,
-						  fl6->flowi6_oif, flags);
+			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+						  skb, flags);
 	}
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
@@ -954,14 +958,15 @@ restart:
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-				    int flags)
+				   const struct sk_buff *skb, int flags)
 {
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
 }
 EXPORT_SYMBOL_GPL(ip6_route_lookup);
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-			    const struct in6_addr *saddr, int oif, int strict)
+			    const struct in6_addr *saddr, int oif,
+			    const struct sk_buff *skb, int strict)
 {
 	struct flowi6 fl6 = {
 		.flowi6_oif = oif,
@@ -975,7 +980,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 	}
 
-	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
 	if (dst->error == 0)
 		return (struct rt6_info *) dst;
 
@@ -1509,7 +1514,30 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
 	}
 }
 
-static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
+					 struct rt6_info *rt, int mtu)
+{
+	/* If the new MTU is lower than the route PMTU, this new MTU will be the
+	 * lowest MTU in the path: always allow updating the route PMTU to
+	 * reflect PMTU decreases.
+	 *
+	 * If the new MTU is higher, and the route PMTU is equal to the local
+	 * MTU, this means the old MTU is the lowest in the path, so allow
+	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
+	 * handle this.
+	 */
+
+	if (dst_mtu(&rt->dst) >= mtu)
+		return true;
+
+	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
+		return true;
+
+	return false;
+}
+
+static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
+				       struct rt6_info *rt, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1518,20 +1546,22 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
 					lockdep_is_held(&rt6_exception_lock));
 
-	if (bucket) {
-		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
-				struct rt6_info *entry = rt6_ex->rt6i;
-				/* For RTF_CACHE with rt6i_pmtu == 0
-				 * (i.e. a redirected route),
-				 * the metrics of its rt->dst.from has already
-				 * been updated.
-				 */
-				if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
-					entry->rt6i_pmtu = mtu;
-			}
-			bucket++;
+	if (!bucket)
+		return;
+
+	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+			struct rt6_info *entry = rt6_ex->rt6i;
+
+			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
+			 * route), the metrics of its rt->dst.from have already
+			 * been updated.
+			 */
+			if (entry->rt6i_pmtu &&
+			    rt6_mtu_change_route_allowed(idev, entry, mtu))
+				entry->rt6i_pmtu = mtu;
 		}
+		bucket++;
 	}
 }
 
@@ -1647,7 +1677,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
 }
 
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-			       int oif, struct flowi6 *fl6, int flags)
+			       int oif, struct flowi6 *fl6,
+			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
 	struct rt6_info *rt, *rt_cache;
@@ -1669,7 +1700,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
 	rt = rt6_select(net, fn, oif, strict);
 	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(rt, fl6, oif, strict);
+		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
@@ -1768,28 +1799,35 @@ uncached_rt_out:
 }
 EXPORT_SYMBOL_GPL(ip6_pol_route);
 
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
-					    struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+					    struct fib6_table *table,
+					    struct flowi6 *fl6,
+					    const struct sk_buff *skb,
+					    int flags)
 {
-	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
-					 struct flowi6 *fl6, int flags)
+					 struct flowi6 *fl6,
+					 const struct sk_buff *skb,
+					 int flags)
 {
 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 		flags |= RT6_LOOKUP_F_IFACE;
 
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
-				  struct flow_keys *keys)
+				  struct flow_keys *keys,
+				  struct flow_keys *flkeys)
 {
 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
 	const struct ipv6hdr *key_iph = outer_iph;
+	struct flow_keys *_flkeys = flkeys;
 	const struct ipv6hdr *inner_iph;
 	const struct icmp6hdr *icmph;
 	struct ipv6hdr _inner_iph;
@@ -1811,26 +1849,76 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
 		goto out;
 
 	key_iph = inner_iph;
+	_flkeys = NULL;
 out:
-	memset(keys, 0, sizeof(*keys));
-	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys->addrs.v6addrs.src = key_iph->saddr;
-	keys->addrs.v6addrs.dst = key_iph->daddr;
-	keys->tags.flow_label = ip6_flowinfo(key_iph);
-	keys->basic.ip_proto = key_iph->nexthdr;
+	if (_flkeys) {
+		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+		keys->tags.flow_label = _flkeys->tags.flow_label;
+		keys->basic.ip_proto = _flkeys->basic.ip_proto;
+	} else {
+		keys->addrs.v6addrs.src = key_iph->saddr;
+		keys->addrs.v6addrs.dst = key_iph->daddr;
+		keys->tags.flow_label = ip6_flowinfo(key_iph);
+		keys->basic.ip_proto = key_iph->nexthdr;
+	}
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
+	u32 mhash;
 
-	if (skb) {
-		ip6_multipath_l3_keys(skb, &hash_keys);
-		return flow_hash_from_keys(&hash_keys) >> 1;
+	switch (ip6_multipath_hash_policy(net)) {
+	case 0:
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (skb) {
+			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+		} else {
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
+	case 1:
+		if (skb) {
+			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+			struct flow_keys keys;
+
+			/* short-circuit if we already have L4 hash present */
+			if (skb->l4_hash)
+				return skb_get_hash_raw(skb) >> 1;
+
+			memset(&hash_keys, 0, sizeof(hash_keys));
+
+                        if (!flkeys) {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				flkeys = &keys;
+			}
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+			hash_keys.ports.src = flkeys->ports.src;
+			hash_keys.ports.dst = flkeys->ports.dst;
+			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+		} else {
+			memset(&hash_keys, 0, sizeof(hash_keys));
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.ports.src = fl6->fl6_sport;
+			hash_keys.ports.dst = fl6->fl6_dport;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
 	}
+	mhash = flow_hash_from_keys(&hash_keys);
 
-	return get_hash_from_flowi6(fl6) >> 1;
+	return mhash >> 1;
 }
 
 void ip6_route_input(struct sk_buff *skb)
@@ -1847,20 +1935,29 @@ void ip6_route_input(struct sk_buff *skb)
 		.flowi6_mark = skb->mark,
 		.flowi6_proto = iph->nexthdr,
 	};
+	struct flow_keys *flkeys = NULL, _flkeys;
 
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+		flkeys = &_flkeys;
+
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+	skb_dst_set(skb,
+		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
 }
 
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
-					     struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+					     struct fib6_table *table,
+					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
+					     int flags)
 {
-	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1888,7 +1985,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 	else if (sk)
 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
 }
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
@@ -2137,6 +2234,7 @@ struct ip6rd_flowi {
 static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2210,8 +2308,9 @@ out:
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
-					const struct flowi6 *fl6,
-					const struct in6_addr *gateway)
+					    const struct flowi6 *fl6,
+					    const struct sk_buff *skb,
+					    const struct in6_addr *gateway)
 {
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct ip6rd_flowi rdfl;
@@ -2219,7 +2318,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
 	rdfl.fl6 = *fl6;
 	rdfl.gateway = *gateway;
 
-	return fib6_rule_lookup(net, &rdfl.fl6,
+	return fib6_rule_lookup(net, &rdfl.fl6, skb,
 				flags, __ip6_route_redirect);
 }
 
@@ -2239,7 +2338,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
 	fl6.flowlabel = ip6_flowinfo(iph);
 	fl6.flowi6_uid = uid;
 
-	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
 	rt6_do_redirect(dst, NULL, skb);
 	dst_release(dst);
 }
@@ -2261,7 +2360,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
 	fl6.saddr = iph->daddr;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
 	rt6_do_redirect(dst, NULL, skb);
 	dst_release(dst);
 }
@@ -2463,7 +2562,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
-	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
 
 	/* if table lookup failed, fall back to full lookup */
 	if (rt == net->ipv6.ip6_null_entry) {
@@ -2476,7 +2575,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 
 static int ip6_route_check_nh_onlink(struct net *net,
 				     struct fib6_config *cfg,
-				     struct net_device *dev,
+				     const struct net_device *dev,
 				     struct netlink_ext_ack *extack)
 {
 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2526,7 +2625,7 @@ static int ip6_route_check_nh(struct net *net,
 	}
 
 	if (!grt)
-		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
 
 	if (!grt)
 		goto out;
@@ -2552,6 +2651,79 @@ out:
 	return err;
 }
 
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+			   struct net_device **_dev, struct inet6_dev **idev,
+			   struct netlink_ext_ack *extack)
+{
+	const struct in6_addr *gw_addr = &cfg->fc_gateway;
+	int gwa_type = ipv6_addr_type(gw_addr);
+	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
+	const struct net_device *dev = *_dev;
+	bool need_addr_check = !dev;
+	int err = -EINVAL;
+
+	/* if gw_addr is local we will fail to detect this in case
+	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
+	 * will return already-added prefix route via interface that
+	 * prefix route was assigned to, which might be non-loopback.
+	 */
+	if (dev &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+		goto out;
+	}
+
+	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+		/* IPv6 strictly inhibits using not link-local
+		 * addresses as nexthop address.
+		 * Otherwise, router will not able to send redirects.
+		 * It is very good, but in some (rare!) circumstances
+		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
+		 * some exceptions. --ANK
+		 * We allow IPv4-mapped nexthops to support RFC4798-type
+		 * addressing
+		 */
+		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+			NL_SET_ERR_MSG(extack, "Invalid gateway address");
+			goto out;
+		}
+
+		if (cfg->fc_flags & RTNH_F_ONLINK)
+			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+		else
+			err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+		if (err)
+			goto out;
+	}
+
+	/* reload in case device was changed */
+	dev = *_dev;
+
+	err = -EINVAL;
+	if (!dev) {
+		NL_SET_ERR_MSG(extack, "Egress device not specified");
+		goto out;
+	} else if (dev->flags & IFF_LOOPBACK) {
+		NL_SET_ERR_MSG(extack,
+			       "Egress device can not be loopback device for this route");
+		goto out;
+	}
+
+	/* if we did not check gw_addr above, do so now that the
+	 * egress device has been resolved.
+	 */
+	if (need_addr_check &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+		goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      struct netlink_ext_ack *extack)
 {
@@ -2671,14 +2843,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 		rt->dst.lwtstate = lwtstate_get(lwtstate);
-		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
-			rt->dst.lwtstate->orig_output = rt->dst.output;
-			rt->dst.output = lwtunnel_output;
-		}
-		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
-			rt->dst.lwtstate->orig_input = rt->dst.input;
-			rt->dst.input = lwtunnel_input;
-		}
+		lwtunnel_set_redirect(&rt->dst);
 	}
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -2741,61 +2906,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	}
 
 	if (cfg->fc_flags & RTF_GATEWAY) {
-		const struct in6_addr *gw_addr;
-		int gwa_type;
-
-		gw_addr = &cfg->fc_gateway;
-		gwa_type = ipv6_addr_type(gw_addr);
-
-		/* if gw_addr is local we will fail to detect this in case
-		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
-		 * will return already-added prefix route via interface that
-		 * prefix route was assigned to, which might be non-loopback.
-		 */
-		err = -EINVAL;
-		if (ipv6_chk_addr_and_flags(net, gw_addr,
-					    gwa_type & IPV6_ADDR_LINKLOCAL ?
-					    dev : NULL, 0, 0)) {
-			NL_SET_ERR_MSG(extack, "Invalid gateway address");
+		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+		if (err)
 			goto out;
-		}
-		rt->rt6i_gateway = *gw_addr;
-
-		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
-			/* IPv6 strictly inhibits using not link-local
-			   addresses as nexthop address.
-			   Otherwise, router will not able to send redirects.
-			   It is very good, but in some (rare!) circumstances
-			   (SIT, PtP, NBMA NOARP links) it is handy to allow
-			   some exceptions. --ANK
-			   We allow IPv4-mapped nexthops to support RFC4798-type
-			   addressing
-			 */
-			if (!(gwa_type & (IPV6_ADDR_UNICAST |
-					  IPV6_ADDR_MAPPED))) {
-				NL_SET_ERR_MSG(extack,
-					       "Invalid gateway address");
-				goto out;
-			}
 
-			if (cfg->fc_flags & RTNH_F_ONLINK) {
-				err = ip6_route_check_nh_onlink(net, cfg, dev,
-								extack);
-			} else {
-				err = ip6_route_check_nh(net, cfg, &dev, &idev);
-			}
-			if (err)
-				goto out;
-		}
-		err = -EINVAL;
-		if (!dev) {
-			NL_SET_ERR_MSG(extack, "Egress device not specified");
-			goto out;
-		} else if (dev->flags & IFF_LOOPBACK) {
-			NL_SET_ERR_MSG(extack,
-				       "Egress device can not be loopback device for this route");
-			goto out;
-		}
+		rt->rt6i_gateway = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;
@@ -3809,25 +3924,13 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   Since RFC 1981 doesn't include administrative MTU increase
 	   update PMTU increase is a MUST. (i.e. jumbo frame)
 	 */
-	/*
-	   If new MTU is less than route PMTU, this new MTU will be the
-	   lowest MTU in the path, update the route PMTU to reflect PMTU
-	   decreases; if new MTU is greater than route PMTU, and the
-	   old MTU is the lowest MTU in the path, update the route PMTU
-	   to reflect the increase. In this case if the other nodes' MTU
-	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
-	   PMTU discovery.
-	 */
 	if (rt->dst.dev == arg->dev &&
-	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
 		spin_lock_bh(&rt6_exception_lock);
-		if (dst_mtu(&rt->dst) >= arg->mtu ||
-		    (dst_mtu(&rt->dst) < arg->mtu &&
-		     dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+		if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
+		    rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
-		}
-		rt6_exceptions_update_pmtu(rt, arg->mtu);
+		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
 		spin_unlock_bh(&rt6_exception_lock);
 	}
 	return 0;
@@ -4099,6 +4202,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 				r_cfg.fc_encap_type = nla_get_u16(nla);
 		}
 
+		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
 		rt = ip6_route_info_create(&r_cfg, extack);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
@@ -4598,7 +4702,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		if (!ipv6_addr_any(&fl6.saddr))
 			flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-		dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
 
 		rcu_read_unlock();
 	} else {
@@ -4963,7 +5067,7 @@ static int __net_init ip6_route_net_init_late(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
-	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
+	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
 #endif
 	return 0;
 }
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index bd6cc688bd19..7a78dcfda68a 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -93,7 +93,8 @@ static void set_tun_src(struct net *net, struct net_device *dev,
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(dst->dev);
 	struct ipv6hdr *hdr, *inner_hdr;
 	struct ipv6_sr_hdr *isrh;
 	int hdrlen, tot_len, err;
@@ -134,7 +135,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 	isrh->nexthdr = proto;
 
 	hdr->daddr = isrh->segments[isrh->first_segment];
-	set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
+	set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	if (sr_has_hmac(isrh)) {
@@ -418,7 +419,7 @@ static int seg6_build_state(struct nlattr *nla,
 
 	slwt = seg6_lwt_lwtunnel(newts);
 
-	err = dst_cache_init(&slwt->cache, GFP_KERNEL);
+	err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
 	if (err) {
 		kfree(newts);
 		return err;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index ba3767ef5e93..45722327375a 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
 
 	if (!tbl_id) {
-		dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+		dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
 	} else {
 		struct fib6_table *table;
 
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 		if (!table)
 			goto out;
 
-		rt = ip6_pol_route(net, table, 0, &fl6, flags);
+		rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
 		dst = &rt->dst;
 	}
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3873d3877135..1522bcfd253f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 #ifdef CONFIG_IPV6_SIT_6RD
 	struct ip_tunnel *t = netdev_priv(dev);
 
-	if (t->dev == sitn->fb_tunnel_dev) {
+	if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
 		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
 		t->ip6rd.relay_prefix = 0;
 		t->ip6rd.prefixlen = 16;
@@ -1578,6 +1578,13 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
+	if (tb[IFLA_MTU]) {
+		u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+
+		if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len)
+			dev->mtu = mtu;
+	}
+
 #ifdef CONFIG_IPV6_SIT_6RD
 	if (ipip6_netlink_6rd_parms(data, &ip6rd))
 		err = ipip6_tunnel_update_6rd(nt, &ip6rd);
@@ -1828,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
 	sitn->tunnels[2] = sitn->tunnels_r;
 	sitn->tunnels[3] = sitn->tunnels_r_l;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
+
 	sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
 					   NET_NAME_UNKNOWN,
 					   ipip6_tunnel_setup);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8ac6a64..6fbdef630152 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
+#include <net/netevent.h>
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
 
+static int zero;
 static int one = 1;
 static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	struct net *net;
+	int ret;
+
+	net = container_of(table->data, struct net,
+			   ipv6.sysctl.multipath_hash_policy);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+	return ret;
+}
 
 static struct ctl_table ipv6_table_template[] = {
 	{
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fib_multipath_hash_policy",
+		.data		= &init_net.ipv6.sysctl.multipath_hash_policy,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_rt6_multipath_hash_policy,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
 	ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
 	ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+	ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 412139f4eccd..883df0ad5bfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1451,6 +1451,7 @@ process:
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen = false;
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
@@ -1470,10 +1471,20 @@ process:
 			th = (const struct tcphdr *)skb->data;
 			hdr = ipv6_hdr(skb);
 			tcp_v6_fill_cb(skb, hdr, th);
-			nsk = tcp_check_req(sk, skb, req, false);
+			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
 		}
 		if (!nsk) {
 			reqsk_put(req);
+			if (req_stolen) {
+				/* Another cpu got exclusive access to req
+				 * and created a full blown socket.
+				 * Try to feed this packet to this socket
+				 * instead of discarding it.
+				 */
+				tcp_v6_restore_cb(skb);
+				sock_put(sk);
+				goto lookup;
+			}
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52e3ea0e6f50..ad30f5e31969 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net)
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
-	.name		   = "UDPv6",
-	.owner		   = THIS_MODULE,
-	.close		   = udp_lib_close,
-	.connect	   = ip6_datagram_connect,
-	.disconnect	   = udp_disconnect,
-	.ioctl		   = udp_ioctl,
-	.init		   = udp_init_sock,
-	.destroy	   = udpv6_destroy_sock,
-	.setsockopt	   = udpv6_setsockopt,
-	.getsockopt	   = udpv6_getsockopt,
-	.sendmsg	   = udpv6_sendmsg,
-	.recvmsg	   = udpv6_recvmsg,
-	.release_cb	   = ip6_datagram_release_cb,
-	.hash		   = udp_lib_hash,
-	.unhash		   = udp_lib_unhash,
-	.rehash		   = udp_v6_rehash,
-	.get_port	   = udp_v6_get_port,
-	.memory_allocated  = &udp_memory_allocated,
-	.sysctl_mem	   = sysctl_udp_mem,
-	.sysctl_wmem	   = &sysctl_udp_wmem_min,
-	.sysctl_rmem	   = &sysctl_udp_rmem_min,
-	.obj_size	   = sizeof(struct udp6_sock),
-	.h.udp_table	   = &udp_table,
+	.name			= "UDPv6",
+	.owner			= THIS_MODULE,
+	.close			= udp_lib_close,
+	.connect		= ip6_datagram_connect,
+	.disconnect		= udp_disconnect,
+	.ioctl			= udp_ioctl,
+	.init			= udp_init_sock,
+	.destroy		= udpv6_destroy_sock,
+	.setsockopt		= udpv6_setsockopt,
+	.getsockopt		= udpv6_getsockopt,
+	.sendmsg		= udpv6_sendmsg,
+	.recvmsg		= udpv6_recvmsg,
+	.release_cb		= ip6_datagram_release_cb,
+	.hash			= udp_lib_hash,
+	.unhash			= udp_lib_unhash,
+	.rehash			= udp_v6_rehash,
+	.get_port		= udp_v6_get_port,
+	.memory_allocated	= &udp_memory_allocated,
+	.sysctl_mem		= sysctl_udp_mem,
+	.sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+	.sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+	.obj_size		= sizeof(struct udp6_sock),
+	.h.udp_table		= &udp_table,
 #ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_udpv6_setsockopt,
-	.compat_getsockopt = compat_udpv6_getsockopt,
+	.compat_setsockopt	= compat_udpv6_setsockopt,
+	.compat_getsockopt	= compat_udpv6_getsockopt,
 #endif
-	.diag_destroy      = udp_abort,
+	.diag_destroy		= udp_abort,
 };
 
 static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index bb935a3b7fea..de1b0b8c53b0 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,7 +92,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb_mac_header_rebuild(skb);
-	eth_hdr(skb)->h_proto = skb->protocol;
+	if (skb->mac_len)
+		eth_hdr(skb)->h_proto = skb->protocol;
 
 	err = 0;
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8ae87d4ec5ff..5959ce9620eb 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -82,7 +82,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 
 	if ((!skb_is_gso(skb) && skb->len > mtu) ||
 	    (skb_is_gso(skb) &&
-	     skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) {
+	     !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
 		skb->dev = dst->dev;
 		skb->protocol = htons(ETH_P_IPV6);
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 09fb44ee3b45..416fe67271a9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
 	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
 	xdst->u.rt6.rt6i_src = rt->rt6i_src;
+	INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
+	rt6_uncached_list_add(&xdst->u.rt6);
+	atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
 
 	return 0;
 }
@@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
 	dst_destroy_metrics_generic(dst);
+	if (xdst->u.rt6.rt6i_uncached_list)
+		rt6_uncached_list_del(&xdst->u.rt6);
 	xfrm_dst_destroy(xdst);
 }
 
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
 {
 	xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
 }
-
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 81ce15ffb878..893a022f9620 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -2432,9 +2432,11 @@ static int afiucv_iucv_init(void)
 	af_iucv_dev->driver = &af_iucv_driver;
 	err = device_register(af_iucv_dev);
 	if (err)
-		goto out_driver;
+		goto out_iucv_dev;
 	return 0;
 
+out_iucv_dev:
+	put_device(af_iucv_dev);
 out_driver:
 	driver_unregister(&af_iucv_driver);
 out_iucv:
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 9d5649e4e8b7..1fac92543094 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -269,7 +269,7 @@ static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
 	struct proc_dir_entry *p;
 	int rc = 0;
 
-	p = proc_create_data(muxinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(muxinfo->name, 0444, net->proc_net,
 			     muxinfo->seq_fops, muxinfo);
 	if (!p)
 		rc = -ENOMEM;
@@ -406,7 +406,7 @@ static int kcm_proc_init_net(struct net *net)
 {
 	int err;
 
-	if (!proc_create("kcm_stats", S_IRUGO, net->proc_net,
+	if (!proc_create("kcm_stats", 0444, net->proc_net,
 			 &kcm_stats_seq_fops)) {
 		err = -ENOMEM;
 		goto out_kcm_stats;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index f297d53a11aa..dc76bc346829 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1381,24 +1381,32 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 		.parse_msg = kcm_parse_func_strparser,
 		.read_sock_done = kcm_read_sock_done,
 	};
-	int err;
+	int err = 0;
 
 	csk = csock->sk;
 	if (!csk)
 		return -EINVAL;
 
+	lock_sock(csk);
+
 	/* Only allow TCP sockets to be attached for now */
 	if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
-	    csk->sk_protocol != IPPROTO_TCP)
-		return -EOPNOTSUPP;
+	    csk->sk_protocol != IPPROTO_TCP) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
 
 	/* Don't allow listeners or closed sockets */
-	if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
-		return -EOPNOTSUPP;
+	if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
 
 	psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
-	if (!psock)
-		return -ENOMEM;
+	if (!psock) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	psock->mux = mux;
 	psock->sk = csk;
@@ -1407,7 +1415,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	err = strp_init(&psock->strp, csk, &cb);
 	if (err) {
 		kmem_cache_free(kcm_psockp, psock);
-		return err;
+		goto out;
 	}
 
 	write_lock_bh(&csk->sk_callback_lock);
@@ -1417,9 +1425,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	 */
 	if (csk->sk_user_data) {
 		write_unlock_bh(&csk->sk_callback_lock);
+		strp_stop(&psock->strp);
 		strp_done(&psock->strp);
 		kmem_cache_free(kcm_psockp, psock);
-		return -EALREADY;
+		err = -EALREADY;
+		goto out;
 	}
 
 	psock->save_data_ready = csk->sk_data_ready;
@@ -1455,7 +1465,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	/* Schedule RX work in case there are already bytes queued */
 	strp_check_rcv(&psock->strp);
 
-	return 0;
+out:
+	release_sock(csk);
+
+	return err;
 }
 
 static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
@@ -1507,6 +1520,7 @@ static void kcm_unattach(struct kcm_psock *psock)
 
 	if (WARN_ON(psock->rx_kcm)) {
 		write_unlock_bh(&csk->sk_callback_lock);
+		release_sock(csk);
 		return;
 	}
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 194a7483bb93..14b67dfacc4b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,13 @@ struct l2tp_net {
 	spinlock_t l2tp_session_hlist_lock;
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
+static bool l2tp_sk_is_v6(struct sock *sk)
+{
+	return sk->sk_family == PF_INET6 &&
+	       !ipv6_addr_v4mapped(&sk->sk_v6_daddr);
+}
+#endif
 
 static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
 {
@@ -136,51 +143,6 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
 
 }
 
-/* Lookup the tunnel socket, possibly involving the fs code if the socket is
- * owned by userspace.  A struct sock returned from this function must be
- * released using l2tp_tunnel_sock_put once you're done with it.
- */
-static struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
-{
-	int err = 0;
-	struct socket *sock = NULL;
-	struct sock *sk = NULL;
-
-	if (!tunnel)
-		goto out;
-
-	if (tunnel->fd >= 0) {
-		/* Socket is owned by userspace, who might be in the process
-		 * of closing it.  Look the socket up using the fd to ensure
-		 * consistency.
-		 */
-		sock = sockfd_lookup(tunnel->fd, &err);
-		if (sock)
-			sk = sock->sk;
-	} else {
-		/* Socket is owned by kernelspace */
-		sk = tunnel->sock;
-		sock_hold(sk);
-	}
-
-out:
-	return sk;
-}
-
-/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */
-static void l2tp_tunnel_sock_put(struct sock *sk)
-{
-	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
-	if (tunnel) {
-		if (tunnel->fd >= 0) {
-			/* Socket is owned by userspace */
-			sockfd_put(sk->sk_socket);
-		}
-		sock_put(sk);
-	}
-	sock_put(sk);
-}
-
 /* Session hash list.
  * The session_id SHOULD be random according to RFC2661, but several
  * L2TP implementations (Cisco and Microsoft) use incrementing
@@ -193,6 +155,13 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
 	return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
 }
 
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+{
+	sock_put(tunnel->sock);
+	/* the tunnel is freed in the socket destructor */
+}
+EXPORT_SYMBOL(l2tp_tunnel_free);
+
 /* Lookup a tunnel. A new reference is held on the returned tunnel. */
 struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
 {
@@ -345,13 +314,11 @@ int l2tp_session_register(struct l2tp_session *session,
 			}
 
 		l2tp_tunnel_inc_refcount(tunnel);
-		sock_hold(tunnel->sock);
 		hlist_add_head_rcu(&session->global_hlist, g_head);
 
 		spin_unlock_bh(&pn->l2tp_session_hlist_lock);
 	} else {
 		l2tp_tunnel_inc_refcount(tunnel);
-		sock_hold(tunnel->sock);
 	}
 
 	hlist_add_head(&session->hlist, head);
@@ -969,7 +936,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct l2tp_tunnel *tunnel;
 
-	tunnel = l2tp_sock_to_tunnel(sk);
+	tunnel = l2tp_tunnel(sk);
 	if (tunnel == NULL)
 		goto pass_up;
 
@@ -977,13 +944,10 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		 tunnel->name, skb->len);
 
 	if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
-		goto pass_up_put;
+		goto pass_up;
 
-	sock_put(sk);
 	return 0;
 
-pass_up_put:
-	sock_put(sk);
 pass_up:
 	return 1;
 }
@@ -1092,7 +1056,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	/* Queue the packet to IP for output */
 	skb->ignore_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
+	if (l2tp_sk_is_v6(tunnel->sock))
 		error = inet6_csk_xmit(tunnel->sock, skb, NULL);
 	else
 #endif
@@ -1155,6 +1119,15 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 		goto out_unlock;
 	}
 
+	/* The user-space may change the connection status for the user-space
+	 * provided socket at run time: we must check it under the socket lock
+	 */
+	if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) {
+		kfree_skb(skb);
+		ret = NET_XMIT_DROP;
+		goto out_unlock;
+	}
+
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
@@ -1174,7 +1147,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 		/* Calculate UDP checksum if configured to do so */
 #if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+		if (l2tp_sk_is_v6(sk))
 			udp6_set_csum(udp_get_no_check6_tx(sk),
 				      skb, &inet6_sk(sk)->saddr,
 				      &sk->sk_v6_daddr, udp_len);
@@ -1207,14 +1180,12 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
 static void l2tp_tunnel_destruct(struct sock *sk)
 {
 	struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
-	struct l2tp_net *pn;
 
 	if (tunnel == NULL)
 		goto end;
 
 	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
 
-
 	/* Disable udp encapsulation */
 	switch (tunnel->encap) {
 	case L2TP_ENCAPTYPE_UDP:
@@ -1231,18 +1202,11 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 	sk->sk_destruct = tunnel->old_sk_destruct;
 	sk->sk_user_data = NULL;
 
-	/* Remove the tunnel struct from the tunnel list */
-	pn = l2tp_pernet(tunnel->l2tp_net);
-	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_del_rcu(&tunnel->list);
-	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
-	tunnel->sock = NULL;
-	l2tp_tunnel_dec_refcount(tunnel);
-
 	/* Call the original destructor */
 	if (sk->sk_destruct)
 		(*sk->sk_destruct)(sk);
+
+	kfree_rcu(tunnel, rcu);
 end:
 	return;
 }
@@ -1303,49 +1267,43 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
 /* Tunnel socket destroy hook for UDP encapsulation */
 static void l2tp_udp_encap_destroy(struct sock *sk)
 {
-	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
-	if (tunnel) {
-		l2tp_tunnel_closeall(tunnel);
-		sock_put(sk);
-	}
+	struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
+
+	if (tunnel)
+		l2tp_tunnel_delete(tunnel);
 }
 
 /* Workqueue tunnel deletion function */
 static void l2tp_tunnel_del_work(struct work_struct *work)
 {
-	struct l2tp_tunnel *tunnel = NULL;
-	struct socket *sock = NULL;
-	struct sock *sk = NULL;
-
-	tunnel = container_of(work, struct l2tp_tunnel, del_work);
+	struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel,
+						  del_work);
+	struct sock *sk = tunnel->sock;
+	struct socket *sock = sk->sk_socket;
+	struct l2tp_net *pn;
 
 	l2tp_tunnel_closeall(tunnel);
 
-	sk = l2tp_tunnel_sock_lookup(tunnel);
-	if (!sk)
-		goto out;
-
-	sock = sk->sk_socket;
-
-	/* If the tunnel socket was created by userspace, then go through the
-	 * inet layer to shut the socket down, and let userspace close it.
-	 * Otherwise, if we created the socket directly within the kernel, use
+	/* If the tunnel socket was created within the kernel, use
 	 * the sk API to release it here.
-	 * In either case the tunnel resources are freed in the socket
-	 * destructor when the tunnel socket goes away.
 	 */
-	if (tunnel->fd >= 0) {
-		if (sock)
-			inet_shutdown(sock, 2);
-	} else {
+	if (tunnel->fd < 0) {
 		if (sock) {
 			kernel_sock_shutdown(sock, SHUT_RDWR);
 			sock_release(sock);
 		}
 	}
 
-	l2tp_tunnel_sock_put(sk);
-out:
+	/* Remove the tunnel struct from the tunnel list */
+	pn = l2tp_pernet(tunnel->l2tp_net);
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_rcu(&tunnel->list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	/* drop initial ref */
+	l2tp_tunnel_dec_refcount(tunnel);
+
+	/* drop workqueue ref */
 	l2tp_tunnel_dec_refcount(tunnel);
 }
 
@@ -1515,9 +1473,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		encap = cfg->encap;
 
 	/* Quick sanity checks */
+	err = -EPROTONOSUPPORT;
+	if (sk->sk_type != SOCK_DGRAM) {
+		pr_debug("tunl %hu: fd %d wrong socket type\n",
+			 tunnel_id, fd);
+		goto err;
+	}
 	switch (encap) {
 	case L2TP_ENCAPTYPE_UDP:
-		err = -EPROTONOSUPPORT;
 		if (sk->sk_protocol != IPPROTO_UDP) {
 			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
 			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
@@ -1525,7 +1488,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		}
 		break;
 	case L2TP_ENCAPTYPE_IP:
-		err = -EPROTONOSUPPORT;
 		if (sk->sk_protocol != IPPROTO_L2TP) {
 			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
 			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
@@ -1565,24 +1527,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
-#if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == PF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
-
-		if (ipv6_addr_v4mapped(&np->saddr) &&
-		    ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
-			struct inet_sock *inet = inet_sk(sk);
-
-			tunnel->v4mapped = true;
-			inet->inet_saddr = np->saddr.s6_addr32[3];
-			inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
-			inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
-		} else {
-			tunnel->v4mapped = false;
-		}
-	}
-#endif
-
 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 	tunnel->encap = encap;
 	if (encap == L2TP_ENCAPTYPE_UDP) {
@@ -1598,13 +1542,22 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		sk->sk_user_data = tunnel;
 	}
 
+	/* Bump the reference count. The tunnel context is deleted
+	 * only when this drops to zero. A reference is also held on
+	 * the tunnel socket to ensure that it is not released while
+	 * the tunnel is extant. Must be done before sk_destruct is
+	 * set.
+	 */
+	refcount_set(&tunnel->ref_count, 1);
+	sock_hold(sk);
+	tunnel->sock = sk;
+	tunnel->fd = fd;
+
 	/* Hook on the tunnel socket destructor so that we can cleanup
 	 * if the tunnel socket goes away.
 	 */
 	tunnel->old_sk_destruct = sk->sk_destruct;
 	sk->sk_destruct = &l2tp_tunnel_destruct;
-	tunnel->sock = sk;
-	tunnel->fd = fd;
 	lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
 
 	sk->sk_allocation = GFP_ATOMIC;
@@ -1614,11 +1567,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	/* Add tunnel to our list */
 	INIT_LIST_HEAD(&tunnel->list);
-
-	/* Bump the reference count. The tunnel context is deleted
-	 * only when this drops to zero. Must be done before list insertion
-	 */
-	refcount_set(&tunnel->ref_count, 1);
 	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
 	list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
 	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
@@ -1659,8 +1607,6 @@ void l2tp_session_free(struct l2tp_session *session)
 
 	if (tunnel) {
 		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-		sock_put(tunnel->sock);
-		session->tunnel = NULL;
 		l2tp_tunnel_dec_refcount(tunnel);
 	}
 
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9bbee90e9963..2718d0b284d0 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -188,9 +188,6 @@ struct l2tp_tunnel {
 	struct sock		*sock;		/* Parent socket */
 	int			fd;		/* Parent fd, if tunnel socket
 						 * was created by userspace */
-#if IS_ENABLED(CONFIG_IPV6)
-	bool			v4mapped;
-#endif
 
 	struct work_struct	del_work;
 
@@ -214,27 +211,8 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
 	return &session->priv[0];
 }
 
-static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
-{
-	struct l2tp_tunnel *tunnel;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
-	if (tunnel == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-out:
-	return tunnel;
-}
-
 struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 
 struct l2tp_session *l2tp_session_get(const struct net *net,
 				      struct l2tp_tunnel *tunnel,
@@ -283,7 +261,7 @@ static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
 static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
 {
 	if (refcount_dec_and_test(&tunnel->ref_count))
-		kfree_rcu(tunnel, rcu);
+		l2tp_tunnel_free(tunnel);
 }
 
 /* Session reference counts. Incremented when code obtains a reference
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4614585e1720..a9c05b2bc1b0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -234,17 +234,13 @@ static void l2tp_ip_close(struct sock *sk, long timeout)
 static void l2tp_ip_destroy_sock(struct sock *sk)
 {
 	struct sk_buff *skb;
-	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+	struct l2tp_tunnel *tunnel = sk->sk_user_data;
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
 		kfree_skb(skb);
 
-	if (tunnel) {
-		l2tp_tunnel_closeall(tunnel);
-		sock_put(sk);
-	}
-
-	sk_refcnt_debug_dec(sk);
+	if (tunnel)
+		l2tp_tunnel_delete(tunnel);
 }
 
 static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index efea58b66295..957369192ca1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -248,16 +248,14 @@ static void l2tp_ip6_close(struct sock *sk, long timeout)
 
 static void l2tp_ip6_destroy_sock(struct sock *sk)
 {
-	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+	struct l2tp_tunnel *tunnel = sk->sk_user_data;
 
 	lock_sock(sk);
 	ip6_flush_pending_frames(sk);
 	release_sock(sk);
 
-	if (tunnel) {
-		l2tp_tunnel_closeall(tunnel);
-		sock_put(sk);
-	}
+	if (tunnel)
+		l2tp_tunnel_delete(tunnel);
 
 	inet6_destroy_sock(sk);
 }
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 99a03c72db4f..d6deca11da19 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -416,20 +416,28 @@ abort:
  * Session (and tunnel control) socket create/destroy.
  *****************************************************************************/
 
+static void pppol2tp_put_sk(struct rcu_head *head)
+{
+	struct pppol2tp_session *ps;
+
+	ps = container_of(head, typeof(*ps), rcu);
+	sock_put(ps->__sk);
+}
+
 /* Called by l2tp_core when a session socket is being closed.
  */
 static void pppol2tp_session_close(struct l2tp_session *session)
 {
-	struct sock *sk;
-
-	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+	struct pppol2tp_session *ps;
 
-	sk = pppol2tp_session_get_sock(session);
-	if (sk) {
-		if (sk->sk_socket)
-			inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
-		sock_put(sk);
-	}
+	ps = l2tp_session_priv(session);
+	mutex_lock(&ps->sk_lock);
+	ps->__sk = rcu_dereference_protected(ps->sk,
+					     lockdep_is_held(&ps->sk_lock));
+	RCU_INIT_POINTER(ps->sk, NULL);
+	if (ps->__sk)
+		call_rcu(&ps->rcu, pppol2tp_put_sk);
+	mutex_unlock(&ps->sk_lock);
 }
 
 /* Really kill the session socket. (Called from sock_put() if
@@ -449,14 +457,6 @@ static void pppol2tp_session_destruct(struct sock *sk)
 	}
 }
 
-static void pppol2tp_put_sk(struct rcu_head *head)
-{
-	struct pppol2tp_session *ps;
-
-	ps = container_of(head, typeof(*ps), rcu);
-	sock_put(ps->__sk);
-}
-
 /* Called when the PPPoX socket (session) is closed.
  */
 static int pppol2tp_release(struct socket *sock)
@@ -480,26 +480,17 @@ static int pppol2tp_release(struct socket *sock)
 	sock_orphan(sk);
 	sock->sk = NULL;
 
+	/* If the socket is associated with a session,
+	 * l2tp_session_delete will call pppol2tp_session_close which
+	 * will drop the session's ref on the socket.
+	 */
 	session = pppol2tp_sock_to_session(sk);
-
-	if (session != NULL) {
-		struct pppol2tp_session *ps;
-
+	if (session) {
 		l2tp_session_delete(session);
-
-		ps = l2tp_session_priv(session);
-		mutex_lock(&ps->sk_lock);
-		ps->__sk = rcu_dereference_protected(ps->sk,
-						     lockdep_is_held(&ps->sk_lock));
-		RCU_INIT_POINTER(ps->sk, NULL);
-		mutex_unlock(&ps->sk_lock);
-		call_rcu(&ps->rcu, pppol2tp_put_sk);
-
-		/* Rely on the sock_put() call at the end of the function for
-		 * dropping the reference held by pppol2tp_sock_to_session().
-		 * The last reference will be dropped by pppol2tp_put_sk().
-		 */
+		/* drop the ref obtained by pppol2tp_sock_to_session */
+		sock_put(sk);
 	}
+
 	release_sock(sk);
 
 	/* This will delete the session context via
@@ -796,6 +787,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 out_no_ppp:
 	/* This is how we get the session context from the socket. */
+	sock_hold(sk);
 	sk->sk_user_data = session;
 	rcu_assign_pointer(ps->sk, sk);
 	mutex_unlock(&ps->sk_lock);
@@ -1750,7 +1742,7 @@ static __net_init int pppol2tp_init_net(struct net *net)
 	struct proc_dir_entry *pde;
 	int err = 0;
 
-	pde = proc_create("pppol2tp", S_IRUGO, net->proc_net,
+	pde = proc_create("pppol2tp", 0444, net->proc_net,
 			  &pppol2tp_proc_fops);
 	if (!pde) {
 		err = -ENOMEM;
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 66821e8a2b7a..62ea0aed94b4 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -249,11 +249,11 @@ int __init llc_proc_init(void)
 	if (!llc_proc_dir)
 		goto out;
 
-	p = proc_create("socket", S_IRUGO, llc_proc_dir, &llc_seq_socket_fops);
+	p = proc_create("socket", 0444, llc_proc_dir, &llc_seq_socket_fops);
 	if (!p)
 		goto out_socket;
 
-	p = proc_create("core", S_IRUGO, llc_proc_dir, &llc_seq_core_fops);
+	p = proc_create("core", 0444, llc_proc_dir, &llc_seq_core_fops);
 	if (!p)
 		goto out_core;
 
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index d90928f50226..a7f7b8ff4729 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			  const struct llc_addr *laddr,
 			  struct sk_buff *skb)
 {
-	int i = 0, count = 256 / sizeof(struct sock *);
-	struct sock *sk, *stack[count];
+	int i = 0;
+	struct sock *sk;
+	struct sock *stack[256 / sizeof(struct sock *)];
 	struct llc_sock *llc;
 	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
 
@@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			continue;
 
 		sock_hold(sk);
-		if (i < count)
+		if (i < ARRAY_SIZE(stack))
 			stack[i++] = sk;
 		else {
 			llc_do_mcast(sap, skb, stack, i);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a8b1616cec41..e83c19d4c292 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -8,6 +8,7 @@
  * Copyright 2007, Michael Wu <[email protected]>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -297,16 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 
 	if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
 		if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+			struct tid_ampdu_rx *tid_rx;
+
 			ht_dbg_ratelimited(sta->sdata,
 					   "updated AddBA Req from %pM on tid %u\n",
 					   sta->sta.addr, tid);
 			/* We have no API to update the timeout value in the
-			 * driver so reject the timeout update.
+			 * driver so reject the timeout update if the timeout
+			 * changed. If if did not change, i.e., no real update,
+			 * just reply with success.
 			 */
-			status = WLAN_STATUS_REQUEST_DECLINED;
-			ieee80211_send_addba_resp(sta->sdata, sta->sta.addr,
-						  tid, dialog_token, status,
-						  1, buf_size, timeout);
+			rcu_read_lock();
+			tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+			if (tid_rx && tid_rx->timeout == timeout)
+				status = WLAN_STATUS_SUCCESS;
+			else
+				status = WLAN_STATUS_REQUEST_DECLINED;
+			rcu_read_unlock();
 			goto end;
 		}
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 46028e12e216..fd68f6fb02d7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2685,6 +2685,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 
 	ieee80211_recalc_ps(local);
 	ieee80211_recalc_ps_vif(sdata);
+	ieee80211_check_fast_rx_iface(sdata);
 
 	return 0;
 }
@@ -2892,7 +2893,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
 	}
 	if (beacon->probe_resp_len) {
 		new_beacon->probe_resp_len = beacon->probe_resp_len;
-		beacon->probe_resp = pos;
+		new_beacon->probe_resp = pos;
 		memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
 		pos += beacon->probe_resp_len;
 	}
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1f466d12a6bc..b5adf3625d16 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -212,6 +212,8 @@ static const char *hw_flag_names[] = {
 	FLAG(REPORTS_LOW_ACK),
 	FLAG(SUPPORTS_TX_FRAG),
 	FLAG(SUPPORTS_TDLS_BUFFER_STA),
+	FLAG(DEAUTH_NEED_MGD_TX_PREP),
+	FLAG(DOESNT_SUPPORT_QOS_NDP),
 #undef FLAG
 };
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 444ea8d127fe..4105081dc1df 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -160,12 +160,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 		       sta->cparams.ecn ? "yes" : "no");
 	p += scnprintf(p,
 		       bufsz+buf-p,
-		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
+		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		txqi = to_txq_info(sta->sta.txq[i]);
 		p += scnprintf(p, bufsz+buf-p,
-			       "%d %d %u %u %u %u %u %u %u %u %u\n",
+			       "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
 			       txqi->txq.tid,
 			       txqi->txq.ac,
 			       txqi->tin.backlog_bytes,
@@ -176,7 +176,11 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 			       txqi->tin.overlimit,
 			       txqi->tin.collisions,
 			       txqi->tin.tx_bytes,
-			       txqi->tin.tx_packets);
+			       txqi->tin.tx_packets,
+			       txqi->flags,
+			       txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
+			       txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
+			       txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
 	}
 
 	rcu_read_unlock();
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 26900025de2f..ae9c33cd8ada 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1467,7 +1467,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_timeout_interval_ie *timeout_int;
 	const u8 *opmode_notif;
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
-	const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
+	struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
 	const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie;
 
 	/* length of them, respectively */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5fe01f82df12..d13ba064951f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1324,8 +1324,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 			mutex_lock(&local->sta_mtx);
 			sta = sta_info_get_bss(sdata, mgmt->sa);
 			if (sta) {
-				u16 tid = *ieee80211_get_qos_ctl(hdr) &
-						IEEE80211_QOS_CTL_TID_MASK;
+				u16 tid = ieee80211_get_tid(hdr);
 
 				__ieee80211_stop_rx_ba_session(
 					sta, tid, WLAN_BACK_RECIPIENT,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 73ac607beb5d..6a381cbe1e33 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1255,13 +1255,12 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
 }
 
 static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
-			       struct ieee80211_mgmt *mgmt, size_t len)
+			       struct ieee80211_mgmt *mgmt, size_t len,
+			       struct ieee802_11_elems *elems)
 {
 	struct ieee80211_mgmt *mgmt_fwd;
 	struct sk_buff *skb;
 	struct ieee80211_local *local = sdata->local;
-	u8 *pos = mgmt->u.action.u.chan_switch.variable;
-	size_t offset_ttl;
 
 	skb = dev_alloc_skb(local->tx_headroom + len);
 	if (!skb)
@@ -1269,13 +1268,9 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
 	skb_reserve(skb, local->tx_headroom);
 	mgmt_fwd = skb_put(skb, len);
 
-	/* offset_ttl is based on whether the secondary channel
-	 * offset is available or not. Subtract 1 from the mesh TTL
-	 * and disable the initiator flag before forwarding.
-	 */
-	offset_ttl = (len < 42) ? 7 : 10;
-	*(pos + offset_ttl) -= 1;
-	*(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
+	elems->mesh_chansw_params_ie->mesh_ttl--;
+	elems->mesh_chansw_params_ie->mesh_flags &=
+		~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
 
 	memcpy(mgmt_fwd, mgmt, len);
 	eth_broadcast_addr(mgmt_fwd->da);
@@ -1323,7 +1318,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
 
 	/* forward or re-broadcast the CSA frame */
 	if (fwd_csa) {
-		if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0)
+		if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0)
 			mcsa_dbg(sdata, "Failed to forward the CSA frame");
 	}
 }
diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 408649bd4702..37e172701a63 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -35,7 +35,7 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
 	da = ieee80211_get_DA(hdr);
 	sa = ieee80211_get_SA(hdr);
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		tid = ieee80211_get_tid(hdr);
 	else
 		tid = 0;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 39b660b9a908..fe4aefb06d9f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -7,6 +7,7 @@
  * Copyright 2007, Michael Wu <[email protected]>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -896,7 +897,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	struct ieee80211_hdr_3addr *nullfunc;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
-	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
+	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
+		!ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
 	if (!skb)
 		return;
 
@@ -2008,9 +2010,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 		ieee80211_flush_queues(local, sdata, true);
 
 	/* deauthenticate/disassociate now */
-	if (tx || frame_buf)
+	if (tx || frame_buf) {
+		struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+		/*
+		 * In multi channel scenarios guarantee that the virtual
+		 * interface is granted immediate airtime to transmit the
+		 * deauthentication frame by calling mgd_prepare_tx, if the
+		 * driver requested so.
+		 */
+		if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
+		    !ifmgd->have_beacon)
+			drv_mgd_prepare_tx(sdata->local, sdata);
+
 		ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
 					       reason, tx, frame_buf);
+	}
 
 	/* flush out frame - make sure the deauth was actually sent */
 	if (tx)
@@ -2151,7 +2166,7 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
 					   u16 tx_time)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	u16 tid = ieee80211_get_tid(hdr);
 	int ac = ieee80211_ac_from_tid(tid);
 	struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
 	unsigned long now = jiffies;
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 9766c1cc4b0a..8221bc5582ab 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -690,7 +690,7 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 #ifdef CONFIG_MAC80211_DEBUGFS
 	mp->fixed_rate_idx = (u32) -1;
 	mp->dbg_fixed_rate = debugfs_create_u32("fixed_rate_idx",
-			S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx);
+			0666, debugfsdir, &mp->fixed_rate_idx);
 #endif
 
 	minstrel_init_cck_rates(mp);
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 36fc971deb86..9ad7d63d3e5b 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -214,11 +214,11 @@ minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
 {
 	struct minstrel_sta_info *mi = priv_sta;
 
-	mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi,
-			&minstrel_stat_fops);
+	mi->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, mi,
+					    &minstrel_stat_fops);
 
-	mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, dir,
-			mi, &minstrel_stat_csv_fops);
+	mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, mi,
+						&minstrel_stat_csv_fops);
 }
 
 void
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 4a5bdad9f303..fb586b6e5d49 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -669,7 +669,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
 	if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
 		return;
 
-	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 	if (likely(sta->ampdu_mlme.tid_tx[tid]))
 		return;
 
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 7d969e300fb3..bfcc03152dc6 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -303,10 +303,10 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
 {
 	struct minstrel_ht_sta_priv *msp = priv_sta;
 
-	msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp,
-			&minstrel_ht_stat_fops);
-	msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO,
-			     dir, msp, &minstrel_ht_stat_csv_fops);
+	msp->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, msp,
+					     &minstrel_ht_stat_fops);
+	msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, msp,
+						 &minstrel_ht_stat_csv_fops);
 }
 
 void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fd580614085b..9c898a3688c6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -439,6 +439,10 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
 		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
 			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+		if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
+			flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
+		if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
+			flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
 		put_unaligned_le16(flags, pos);
 		pos += 2;
 		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
@@ -1185,7 +1189,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	ack_policy = *ieee80211_get_qos_ctl(hdr) &
 		     IEEE80211_QOS_CTL_ACK_POLICY_MASK;
-	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 
 	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
 	if (!tid_agg_rx) {
@@ -1524,9 +1528,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
 		   ieee80211_has_pm(hdr->frame_control) &&
 		   (ieee80211_is_data_qos(hdr->frame_control) ||
 		    ieee80211_is_qos_nullfunc(hdr->frame_control))) {
-		u8 tid;
-
-		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		u8 tid = ieee80211_get_tid(hdr);
 
 		ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid);
 	}
@@ -2351,39 +2353,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 }
 
 static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
 {
 	struct net_device *dev = rx->sdata->dev;
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	__le16 fc = hdr->frame_control;
 	struct sk_buff_head frame_list;
-	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	struct ethhdr ethhdr;
 	const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
 
-	if (unlikely(!ieee80211_is_data(fc)))
-		return RX_CONTINUE;
-
-	if (unlikely(!ieee80211_is_data_present(fc)))
-		return RX_DROP_MONITOR;
-
-	if (!(status->rx_flags & IEEE80211_RX_AMSDU))
-		return RX_CONTINUE;
-
 	if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
-		switch (rx->sdata->vif.type) {
-		case NL80211_IFTYPE_AP_VLAN:
-			if (!rx->sdata->u.vlan.sta)
-				return RX_DROP_UNUSABLE;
-			break;
-		case NL80211_IFTYPE_STATION:
-			if (!rx->sdata->u.mgd.use_4addr)
-				return RX_DROP_UNUSABLE;
-			break;
-		default:
-			return RX_DROP_UNUSABLE;
-		}
 		check_da = NULL;
 		check_sa = NULL;
 	} else switch (rx->sdata->vif.type) {
@@ -2403,15 +2383,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 			break;
 	}
 
-	if (is_multicast_ether_addr(hdr->addr1))
-		return RX_DROP_UNUSABLE;
-
 	skb->dev = dev;
 	__skb_queue_head_init(&frame_list);
 
 	if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
 					  rx->sdata->vif.addr,
-					  rx->sdata->vif.type))
+					  rx->sdata->vif.type,
+					  data_offset))
 		return RX_DROP_UNUSABLE;
 
 	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2433,6 +2411,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	__le16 fc = hdr->frame_control;
+
+	if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+		return RX_CONTINUE;
+
+	if (unlikely(!ieee80211_is_data(fc)))
+		return RX_CONTINUE;
+
+	if (unlikely(!ieee80211_is_data_present(fc)))
+		return RX_DROP_MONITOR;
+
+	if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+		switch (rx->sdata->vif.type) {
+		case NL80211_IFTYPE_AP_VLAN:
+			if (!rx->sdata->u.vlan.sta)
+				return RX_DROP_UNUSABLE;
+			break;
+		case NL80211_IFTYPE_STATION:
+			if (!rx->sdata->u.mgd.use_4addr)
+				return RX_DROP_UNUSABLE;
+			break;
+		default:
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	if (is_multicast_ether_addr(hdr->addr1))
+		return RX_DROP_UNUSABLE;
+
+	return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
 #ifdef CONFIG_MAC80211_MESH
 static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
@@ -2533,11 +2549,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 	fwd_skb = skb_copy_expand(skb, local->tx_headroom +
 				       sdata->encrypt_headroom, 0, GFP_ATOMIC);
-	if (!fwd_skb) {
-		net_info_ratelimited("%s: failed to clone mesh frame\n",
-				    sdata->name);
+	if (!fwd_skb)
 		goto out;
-	}
 
 	fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
 	fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
@@ -2848,6 +2861,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		case WLAN_HT_ACTION_SMPS: {
 			struct ieee80211_supported_band *sband;
 			enum ieee80211_smps_mode smps_mode;
+			struct sta_opmode_info sta_opmode = {};
 
 			/* convert to HT capability */
 			switch (mgmt->u.action.u.ht_smps.smps_control) {
@@ -2868,17 +2882,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			if (rx->sta->sta.smps_mode == smps_mode)
 				goto handled;
 			rx->sta->sta.smps_mode = smps_mode;
+			sta_opmode.smps_mode = smps_mode;
+			sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
 
 			sband = rx->local->hw.wiphy->bands[status->band];
 
 			rate_control_rate_update(local, sband, rx->sta,
 						 IEEE80211_RC_SMPS_CHANGED);
+			cfg80211_sta_opmode_change_notify(sdata->dev,
+							  rx->sta->addr,
+							  &sta_opmode,
+							  GFP_KERNEL);
 			goto handled;
 		}
 		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
 			struct ieee80211_supported_band *sband;
 			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
 			enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
+			struct sta_opmode_info sta_opmode = {};
 
 			/* If it doesn't support 40 MHz it can't change ... */
 			if (!(rx->sta->sta.ht_cap.cap &
@@ -2899,9 +2920,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
 			rx->sta->sta.bandwidth = new_bw;
 			sband = rx->local->hw.wiphy->bands[status->band];
+			sta_opmode.bw = new_bw;
+			sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
 
 			rate_control_rate_update(local, sband, rx->sta,
 						 IEEE80211_RC_BW_CHANGED);
+			cfg80211_sta_opmode_change_notify(sdata->dev,
+							  rx->sta->addr,
+							  &sta_opmode,
+							  GFP_KERNEL);
 			goto handled;
 		}
 		default:
@@ -3731,15 +3758,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_STATION:
-		/* 4-addr is harder to deal with, later maybe */
-		if (sdata->u.mgd.use_4addr)
-			goto clear;
-		/* software powersave is a huge mess, avoid all of it */
-		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
-			goto clear;
-		if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
-		    !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
-			goto clear;
 		if (sta->sta.tdls) {
 			fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
 			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3751,6 +3769,23 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 			fastrx.expected_ds_bits =
 				cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		}
+
+		if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+			fastrx.expected_ds_bits |=
+				cpu_to_le16(IEEE80211_FCTL_TODS);
+			fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+		}
+
+		if (!sdata->u.mgd.powersave)
+			break;
+
+		/* software powersave is a huge mess, avoid all of it */
+		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+			goto clear;
+		if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+		    !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+			goto clear;
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_AP:
@@ -3767,6 +3802,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 			!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
 			(sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
 			 !sdata->u.vlan.sta);
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+		    sdata->u.vlan.sta) {
+			fastrx.expected_ds_bits |=
+				cpu_to_le16(IEEE80211_FCTL_FROMDS);
+			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+			fastrx.internal_forward = 0;
+		}
+
 		break;
 	default:
 		goto clear;
@@ -3865,7 +3909,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct sta_info *sta = rx->sta;
 	int orig_len = skb->len;
-	int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+	int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	int snap_offs = hdrlen;
 	struct {
 		u8 snap[sizeof(rfc1042_header)];
 		__be16 proto;
@@ -3896,10 +3941,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	    (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
 		return false;
 
-	/* we don't deal with A-MSDU deaggregation here */
-	if (status->rx_flags & IEEE80211_RX_AMSDU)
-		return false;
-
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return false;
 
@@ -3921,7 +3962,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS |
 					      IEEE80211_FCTL_TODS)) !=
 	    fast_rx->expected_ds_bits)
-		goto drop;
+		return false;
 
 	/* assign the key to drop unencrypted frames (later)
 	 * and strip the IV/MIC if necessary
@@ -3931,21 +3972,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 		snap_offs += IEEE80211_CCMP_HDR_LEN;
 	}
 
-	if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
-		goto drop;
-	payload = (void *)(skb->data + snap_offs);
+	if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+		if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+			goto drop;
 
-	if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
-		return false;
+		payload = (void *)(skb->data + snap_offs);
 
-	/* Don't handle these here since they require special code.
-	 * Accept AARP and IPX even though they should come with a
-	 * bridge-tunnel header - but if we get them this way then
-	 * there's little point in discarding them.
-	 */
-	if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
-		     payload->proto == fast_rx->control_port_protocol))
-		return false;
+		if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+			return false;
+
+		/* Don't handle these here since they require special code.
+		 * Accept AARP and IPX even though they should come with a
+		 * bridge-tunnel header - but if we get them this way then
+		 * there's little point in discarding them.
+		 */
+		if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+			     payload->proto == fast_rx->control_port_protocol))
+			return false;
+	}
 
 	/* after this point, don't punt to the slowpath! */
 
@@ -3959,12 +4003,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	}
 
 	/* statistics part of ieee80211_rx_h_sta_process() */
-	stats->last_rx = jiffies;
-	stats->last_rate = sta_stats_encode_rate(status);
-
-	stats->fragments++;
-	stats->packets++;
-
 	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
 		stats->last_signal = status->signal;
 		if (!fast_rx->uses_rss)
@@ -3993,6 +4031,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	if (rx->key && !ieee80211_has_protected(hdr->frame_control))
 		goto drop;
 
+	if (status->rx_flags & IEEE80211_RX_AMSDU) {
+		if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+		    RX_QUEUED)
+			goto drop;
+
+		return true;
+	}
+
+	stats->last_rx = jiffies;
+	stats->last_rate = sta_stats_encode_rate(status);
+
+	stats->fragments++;
+	stats->packets++;
+
 	/* do the header conversion - first grab the addresses */
 	ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
 	ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index ee0181778a42..029334835747 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -8,6 +8,7 @@
  * Copyright 2007, Michael Wu <[email protected]>
  * Copyright 2007-2008, Intel Corporation
  * Copyright 2008, Johannes Berg <[email protected]>
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -27,7 +28,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 				 u32 sta_flags, u8 *bssid,
 				 struct ieee80211_csa_ie *csa_ie)
 {
-	enum nl80211_band new_band;
+	enum nl80211_band new_band = current_band;
 	int new_freq;
 	u8 new_chan_no;
 	struct ieee80211_channel *new_chan;
@@ -55,15 +56,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 				elems->ext_chansw_ie->new_operating_class,
 				&new_band)) {
 			sdata_info(sdata,
-				   "cannot understand ECSA IE operating class %d, disconnecting\n",
+				   "cannot understand ECSA IE operating class, %d, ignoring\n",
 				   elems->ext_chansw_ie->new_operating_class);
-			return -EINVAL;
 		}
 		new_chan_no = elems->ext_chansw_ie->new_ch_num;
 		csa_ie->count = elems->ext_chansw_ie->count;
 		csa_ie->mode = elems->ext_chansw_ie->mode;
 	} else if (elems->ch_switch_ie) {
-		new_band = current_band;
 		new_chan_no = elems->ch_switch_ie->new_ch_num;
 		csa_ie->count = elems->ch_switch_ie->count;
 		csa_ie->mode = elems->ch_switch_ie->mode;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 0c5627f8a104..655c3d8b0d80 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -314,7 +314,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
 	if (ieee80211_hw_check(hw, USES_RSS)) {
 		sta->pcpu_rx_stats =
-			alloc_percpu(struct ieee80211_sta_rx_stats);
+			alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
 		if (!sta->pcpu_rx_stats)
 			goto free;
 	}
@@ -433,6 +433,7 @@ free_txq:
 	if (sta->sta.txq[0])
 		kfree(to_txq_info(sta->sta.txq[0]));
 free:
+	free_percpu(sta->pcpu_rx_stats);
 #ifdef CONFIG_MAC80211_MESH
 	kfree(sta->mesh);
 #endif
@@ -2287,6 +2288,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
 		sinfo->expected_throughput = thr;
 	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
+	    sta->status_stats.ack_signal_filled) {
+		sinfo->ack_signal = sta->status_stats.last_ack_signal;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
+	}
 }
 
 u32 sta_get_expected_throughput(struct sta_info *sta)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index cd53619435b6..f64eb86ca64b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -548,6 +548,8 @@ struct sta_info {
 		u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
 		u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
 		unsigned long last_ack;
+		s8 last_ack_signal;
+		bool ack_signal_filled;
 	} status_stats;
 
 	/* Updated from TX path only, no locking requirements */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index da7427a41529..743e89c5926c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -187,9 +187,16 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 	struct ieee80211_mgmt *mgmt = (void *) skb->data;
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
 
-	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 		sta->status_stats.last_ack = jiffies;
+		if (txinfo->status.is_valid_ack_signal) {
+			sta->status_stats.last_ack_signal =
+					 (s8)txinfo->status.ack_signal;
+			sta->status_stats.ack_signal_filled = true;
+		}
+	}
 
 	if (ieee80211_is_data_qos(mgmt->frame_control)) {
 		struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -487,6 +494,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 			    ieee80211_is_qos_nullfunc(hdr->frame_control))
 				cfg80211_probe_status(sdata->dev, hdr->addr1,
 						      cookie, acked,
+						      info->status.ack_signal,
+						      info->status.is_valid_ack_signal,
 						      GFP_ATOMIC);
 			else
 				cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 25904af38839..933c67b5f845 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -797,7 +797,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-	u8 *qc;
 	int tid;
 
 	/*
@@ -844,9 +843,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 		return TX_CONTINUE;
 
 	/* include per-STA, per-TID sequence counter */
-
-	qc = ieee80211_get_qos_ctl(hdr);
-	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 	tx->sta->tx_stats.msdu[tid]++;
 
 	hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -1158,7 +1155,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int tid;
-	u8 *qc;
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
@@ -1198,8 +1194,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	    !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
 		struct tid_ampdu_tx *tid_tx;
 
-		qc = ieee80211_get_qos_ctl(hdr);
-		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+		tid = ieee80211_get_tid(hdr);
 
 		tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
 		if (tid_tx) {
@@ -1921,7 +1916,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_hdr *hdr;
 	int headroom;
 	bool may_encrypt;
 
@@ -3574,6 +3569,14 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 	if (!IS_ERR_OR_NULL(sta)) {
 		struct ieee80211_fast_tx *fast_tx;
 
+		/* We need a bit of data queued to build aggregates properly, so
+		 * instruct the TCP stack to allow more than a single ms of data
+		 * to be queued in the stack. The value is a bit-shift of 1
+		 * second, so 8 is ~4ms of queued data. Only affects local TCP
+		 * sockets.
+		 */
+		sk_pacing_shift_update(skb->sk, 8);
+
 		fast_tx = rcu_dereference(sta->fast_tx);
 
 		if (fast_tx &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index b9276ac849fa..5714dee76b12 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -447,6 +447,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 				  enum nl80211_band band)
 {
 	enum ieee80211_sta_rx_bandwidth new_bw;
+	struct sta_opmode_info sta_opmode = {};
 	u32 changed = 0;
 	u8 nss;
 
@@ -460,7 +461,9 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 
 	if (sta->sta.rx_nss != nss) {
 		sta->sta.rx_nss = nss;
+		sta_opmode.rx_nss = nss;
 		changed |= IEEE80211_RC_NSS_CHANGED;
+		sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
 	}
 
 	switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
@@ -481,9 +484,15 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 	new_bw = ieee80211_sta_cur_vht_bw(sta);
 	if (new_bw != sta->sta.bandwidth) {
 		sta->sta.bandwidth = new_bw;
+		sta_opmode.bw = new_bw;
 		changed |= IEEE80211_RC_BW_CHANGED;
+		sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
 	}
 
+	if (sta_opmode.changed)
+		cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr,
+						  &sta_opmode, GFP_KERNEL);
+
 	return changed;
 }
 
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 785056cb76f6..58d0b258b684 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -340,7 +340,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
 	a4_included = ieee80211_has_a4(hdr->frame_control);
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
@@ -601,8 +601,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
 	aad[23] = 0;
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) &
-			IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
@@ -867,8 +866,7 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_UNUSABLE;
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) &
-				IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index e545a3c9365f..7a4de6d618b1 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -122,7 +122,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->len <= mtu)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 		return false;
 
 	return true;
diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile
index dd12b564f2e7..436ef68331f2 100644
--- a/net/ncsi/Makefile
+++ b/net/ncsi/Makefile
@@ -1,4 +1,4 @@
 #
 # Makefile for NCSI API
 #
-obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o
+obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o ncsi-netlink.o
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index d30f7bd741d0..8da84312cd3b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -276,6 +276,8 @@ struct ncsi_dev_priv {
 	unsigned int        package_num;     /* Number of packages         */
 	struct list_head    packages;        /* List of packages           */
 	struct ncsi_channel *hot_channel;    /* Channel was ever active    */
+	struct ncsi_package *force_package;  /* Force a specific package   */
+	struct ncsi_channel *force_channel;  /* Force a specific channel   */
 	struct ncsi_request requests[256];   /* Request table              */
 	unsigned int        request_id;      /* Last used request ID       */
 #define NCSI_REQ_START_IDX	1
@@ -318,6 +320,7 @@ extern spinlock_t ncsi_dev_lock;
 	list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index);
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index c989211bbabc..c3695ba0cf94 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 
 #include <net/ncsi.h>
 #include <net/net_namespace.h>
@@ -23,6 +22,7 @@
 
 #include "internal.h"
 #include "ncsi-pkt.h"
+#include "ncsi-netlink.h"
 
 LIST_HEAD(ncsi_dev_list);
 DEFINE_SPINLOCK(ncsi_dev_lock);
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
 	return sizes[table];
 }
 
-static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
 {
 	struct ncsi_channel_filter *ncf;
 	int size;
@@ -965,20 +965,37 @@ error:
 
 static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 {
-	struct ncsi_package *np;
-	struct ncsi_channel *nc, *found, *hot_nc;
+	struct ncsi_package *np, *force_package;
+	struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
 	struct ncsi_channel_mode *ncm;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ndp->lock, flags);
 	hot_nc = ndp->hot_channel;
+	force_channel = ndp->force_channel;
+	force_package = ndp->force_package;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
+	/* Force a specific channel whether or not it has link if we have been
+	 * configured to do so
+	 */
+	if (force_package && force_channel) {
+		found = force_channel;
+		ncm = &found->modes[NCSI_MODE_LINK];
+		if (!(ncm->data[2] & 0x1))
+			netdev_info(ndp->ndev.dev,
+				    "NCSI: Channel %u forced, but it is link down\n",
+				    found->id);
+		goto out;
+	}
+
 	/* The search is done once an inactive channel with up
 	 * link is found.
 	 */
 	found = NULL;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (ndp->force_package && np != ndp->force_package)
+			continue;
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
 			spin_lock_irqsave(&nc->lock, flags);
 
@@ -1594,6 +1611,9 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	ndp->ptype.dev = dev;
 	dev_add_pack(&ndp->ptype);
 
+	/* Set up generic netlink interface */
+	ncsi_init_netlink(dev);
+
 	return nd;
 }
 EXPORT_SYMBOL_GPL(ncsi_register_dev);
@@ -1673,6 +1693,8 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
 #endif
 	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
+	ncsi_unregister_netlink(nd->dev);
+
 	kfree(ndp);
 }
 EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
new file mode 100644
index 000000000000..8d7e849d4825
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <net/genetlink.h>
+#include <net/ncsi.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <uapi/linux/ncsi.h>
+
+#include "internal.h"
+#include "ncsi-netlink.h"
+
+static struct genl_family ncsi_genl_family;
+
+static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
+	[NCSI_ATTR_IFINDEX] =		{ .type = NLA_U32 },
+	[NCSI_ATTR_PACKAGE_LIST] =	{ .type = NLA_NESTED },
+	[NCSI_ATTR_PACKAGE_ID] =	{ .type = NLA_U32 },
+	[NCSI_ATTR_CHANNEL_ID] =	{ .type = NLA_U32 },
+};
+
+static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
+{
+	struct ncsi_dev_priv *ndp;
+	struct net_device *dev;
+	struct ncsi_dev *nd;
+	struct ncsi_dev;
+
+	if (!net)
+		return NULL;
+
+	dev = dev_get_by_index(net, ifindex);
+	if (!dev) {
+		pr_err("NCSI netlink: No device for ifindex %u\n", ifindex);
+		return NULL;
+	}
+
+	nd = ncsi_find_dev(dev);
+	ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+
+	dev_put(dev);
+	return ndp;
+}
+
+static int ncsi_write_channel_info(struct sk_buff *skb,
+				   struct ncsi_dev_priv *ndp,
+				   struct ncsi_channel *nc)
+{
+	struct nlattr *vid_nest;
+	struct ncsi_channel_filter *ncf;
+	struct ncsi_channel_mode *m;
+	u32 *data;
+	int i;
+
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id);
+	m = &nc->modes[NCSI_MODE_LINK];
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
+	if (nc->state == NCSI_CHANNEL_ACTIVE)
+		nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
+	if (ndp->force_channel == nc)
+		nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
+
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+	nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
+
+	vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
+	if (!vid_nest)
+		return -ENOMEM;
+	ncf = nc->filters[NCSI_FILTER_VLAN];
+	i = -1;
+	if (ncf) {
+		while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total,
+					  i + 1)) < ncf->total) {
+			data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i);
+			/* Uninitialised channels will have 'zero' vlan ids */
+			if (!data || !*data)
+				continue;
+			nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID,
+				    *(u16 *)data);
+		}
+	}
+	nla_nest_end(skb, vid_nest);
+
+	return 0;
+}
+
+static int ncsi_write_package_info(struct sk_buff *skb,
+				   struct ncsi_dev_priv *ndp, unsigned int id)
+{
+	struct nlattr *pnest, *cnest, *nest;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	bool found;
+	int rc;
+
+	if (id > ndp->package_num) {
+		netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id);
+		return -ENODEV;
+	}
+
+	found = false;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (np->id != id)
+			continue;
+		pnest = nla_nest_start(skb, NCSI_PKG_ATTR);
+		if (!pnest)
+			return -ENOMEM;
+		nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+		if (ndp->force_package == np)
+			nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
+		cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+		if (!cnest) {
+			nla_nest_cancel(skb, pnest);
+			return -ENOMEM;
+		}
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR);
+			if (!nest) {
+				nla_nest_cancel(skb, cnest);
+				nla_nest_cancel(skb, pnest);
+				return -ENOMEM;
+			}
+			rc = ncsi_write_channel_info(skb, ndp, nc);
+			if (rc) {
+				nla_nest_cancel(skb, nest);
+				nla_nest_cancel(skb, cnest);
+				nla_nest_cancel(skb, pnest);
+				return rc;
+			}
+			nla_nest_end(skb, nest);
+		}
+		nla_nest_end(skb, cnest);
+		nla_nest_end(skb, pnest);
+		found = true;
+	}
+
+	if (!found)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned int package_id;
+	struct sk_buff *skb;
+	struct nlattr *attr;
+	void *hdr;
+	int rc;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(genl_info_net(info),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+			  &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
+	if (!hdr) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+
+	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	if (!attr) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+	rc = ncsi_write_package_info(skb, ndp, package_id);
+
+	if (rc) {
+		nla_nest_cancel(skb, attr);
+		goto err;
+	}
+
+	nla_nest_end(skb, attr);
+
+	genlmsg_end(skb, hdr);
+	return genlmsg_reply(skb, info);
+
+err:
+	genlmsg_cancel(skb, hdr);
+	kfree_skb(skb);
+	return rc;
+}
+
+static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	struct nlattr *attrs[NCSI_ATTR_MAX];
+	struct ncsi_package *np, *package;
+	struct ncsi_dev_priv *ndp;
+	unsigned int package_id;
+	struct nlattr *attr;
+	void *hdr;
+	int rc;
+
+	rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX,
+			   ncsi_genl_policy, NULL);
+	if (rc)
+		return rc;
+
+	if (!attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(skb->sk)),
+			       nla_get_u32(attrs[NCSI_ATTR_IFINDEX]));
+
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = cb->args[0];
+	package = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id)
+			package = np;
+
+	if (!package)
+		return 0; /* done */
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &ncsi_genl_family, 0,  NCSI_CMD_PKG_INFO);
+	if (!hdr) {
+		rc = -EMSGSIZE;
+		goto err;
+	}
+
+	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	rc = ncsi_write_package_info(skb, ndp, package->id);
+	if (rc) {
+		nla_nest_cancel(skb, attr);
+		goto err;
+	}
+
+	nla_nest_end(skb, attr);
+	genlmsg_end(skb, hdr);
+
+	cb->args[0] = package_id + 1;
+
+	return skb->len;
+err:
+	genlmsg_cancel(skb, hdr);
+	return rc;
+}
+
+static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_package *np, *package;
+	struct ncsi_channel *nc, *channel;
+	u32 package_id, channel_id;
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+	package = NULL;
+
+	spin_lock_irqsave(&ndp->lock, flags);
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id)
+			package = np;
+	if (!package) {
+		/* The user has set a package that does not exist */
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		return -ERANGE;
+	}
+
+	channel = NULL;
+	if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+		/* Allow any channel */
+		channel_id = NCSI_RESERVED_CHANNEL;
+	} else {
+		channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+		NCSI_FOR_EACH_CHANNEL(package, nc)
+			if (nc->id == channel_id)
+				channel = nc;
+	}
+
+	if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
+		/* The user has set a channel that does not exist on this
+		 * package
+		 */
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
+			    channel_id);
+		return -ERANGE;
+	}
+
+	ndp->force_package = package;
+	ndp->force_channel = channel;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
+		    package_id, channel_id,
+		    channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+
+	/* Bounce the NCSI channel to set changes */
+	ncsi_stop_dev(&ndp->ndev);
+	ncsi_start_dev(&ndp->ndev);
+
+	return 0;
+}
+
+static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	/* Clear any override */
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->force_package = NULL;
+	ndp->force_channel = NULL;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+	netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
+
+	/* Bounce the NCSI channel to set changes */
+	ncsi_stop_dev(&ndp->ndev);
+	ncsi_start_dev(&ndp->ndev);
+
+	return 0;
+}
+
+static const struct genl_ops ncsi_ops[] = {
+	{
+		.cmd = NCSI_CMD_PKG_INFO,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_pkg_info_nl,
+		.dumpit = ncsi_pkg_info_all_nl,
+		.flags = 0,
+	},
+	{
+		.cmd = NCSI_CMD_SET_INTERFACE,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_set_interface_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NCSI_CMD_CLEAR_INTERFACE,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_clear_interface_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+static struct genl_family ncsi_genl_family __ro_after_init = {
+	.name = "NCSI",
+	.version = 0,
+	.maxattr = NCSI_ATTR_MAX,
+	.module = THIS_MODULE,
+	.ops = ncsi_ops,
+	.n_ops = ARRAY_SIZE(ncsi_ops),
+};
+
+int ncsi_init_netlink(struct net_device *dev)
+{
+	int rc;
+
+	rc = genl_register_family(&ncsi_genl_family);
+	if (rc)
+		netdev_err(dev, "ncsi: failed to register netlink family\n");
+
+	return rc;
+}
+
+int ncsi_unregister_netlink(struct net_device *dev)
+{
+	int rc;
+
+	rc = genl_unregister_family(&ncsi_genl_family);
+	if (rc)
+		netdev_err(dev, "ncsi: failed to unregister netlink family\n");
+
+	return rc;
+}
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
new file mode 100644
index 000000000000..91a5c256f8c4
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_NETLINK_H__
+#define __NCSI_NETLINK_H__
+
+#include <linux/netdevice.h>
+
+#include "internal.h"
+
+int ncsi_init_netlink(struct net_device *dev);
+int ncsi_unregister_netlink(struct net_device *dev);
+
+#endif /* __NCSI_NETLINK_H__ */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 975a85a48d39..bc4bd247bb7d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -2094,7 +2094,7 @@ static struct pernet_operations ip_set_net_ops = {
 	.init	= ip_set_net_init,
 	.exit   = ip_set_net_exit,
 	.id	= &ip_set_net_id,
-	.size	= sizeof(struct ip_set_net)
+	.size	= sizeof(struct ip_set_net),
 };
 
 static int __init
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 3e17d32b629d..58d5d05aec24 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -260,7 +260,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		buf_len = strlen(buf);
 
 		ct = nf_ct_get(skb, &ctinfo);
-		if (ct && (ct->status & IPS_NAT_MASK)) {
+		if (ct) {
 			bool mangled;
 
 			/* If mangling fails this function will return 0
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 496ce173f0c1..cc11bf890eb9 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -33,7 +33,7 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
 MODULE_ALIAS_NFCT_HELPER("netbios_ns");
 
 static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, S_IRUSR);
+module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
 static struct nf_conntrack_expect_policy exp_policy = {
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index 87b95a2c270c..1b18f43ad226 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -26,7 +26,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS_NFCT_HELPER("snmp");
 
 static unsigned int timeout __read_mostly = 30;
-module_param(timeout, uint, S_IRUSR);
+module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
 int (*nf_nat_snmp_hook)(struct sk_buff *skb,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9123fdec5e14..037fec54c850 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -495,7 +495,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net)
 	if (uid_valid(root_uid) && gid_valid(root_gid))
 		proc_set_user(pde, root_uid, root_gid);
 
-	pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
+	pde = proc_create("nf_conntrack", 0444, net->proc_net_stat,
 			  &ct_cpu_seq_fops);
 	if (!pde)
 		goto out_stat_nf_conntrack;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index c2c1b16b7538..6d0357817cda 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -549,7 +549,7 @@ static int __net_init nf_log_net_init(struct net *net)
 	int ret = -ENOMEM;
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("nf_log", S_IRUGO,
+	if (!proc_create("nf_log", 0444,
 			 net->nf.proc_netfilter, &nflog_file_ops))
 		return ret;
 #endif
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index fbce552a796e..7d7466dbf663 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -41,7 +41,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 				 const struct nf_conn *ct,
 				 u16 *rover)
 {
-	unsigned int range_size, min, i;
+	unsigned int range_size, min, max, i;
 	__be16 *portptr;
 	u_int16_t off;
 
@@ -71,7 +71,10 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		}
 	} else {
 		min = ntohs(range->min_proto.all);
-		range_size = ntohs(range->max_proto.all) - min + 1;
+		max = ntohs(range->max_proto.all);
+		if (unlikely(max < min))
+			swap(max, min);
+		range_size = max - min + 1;
 	}
 
 	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 92139a087260..6039b350abbe 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -325,7 +325,7 @@ static const struct file_operations synproxy_cpu_seq_fops = {
 
 static int __net_init synproxy_proc_init(struct net *net)
 {
-	if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
+	if (!proc_create("synproxy", 0444, net->proc_net_stat,
 			 &synproxy_cpu_seq_fops))
 		return -ENOMEM;
 	return 0;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8b9fe30de0cd..c4acc7340eb1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5037,9 +5037,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nf_flowtable_type *type;
+	struct nft_flowtable *flowtable, *ft;
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
-	struct nft_flowtable *flowtable;
 	struct nft_table *table;
 	struct nft_ctx ctx;
 	int err, i, k;
@@ -5099,6 +5099,22 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 		goto err3;
 
 	for (i = 0; i < flowtable->ops_len; i++) {
+		if (!flowtable->ops[i].dev)
+			continue;
+
+		list_for_each_entry(ft, &table->flowtables, list) {
+			for (k = 0; k < ft->ops_len; k++) {
+				if (!ft->ops[k].dev)
+					continue;
+
+				if (flowtable->ops[i].dev == ft->ops[k].dev &&
+				    flowtable->ops[i].pf == ft->ops[k].pf) {
+					err = -EBUSY;
+					goto err4;
+				}
+			}
+		}
+
 		err = nf_register_net_hook(net, &flowtable->ops[i]);
 		if (err < 0)
 			goto err4;
@@ -5120,7 +5136,7 @@ err5:
 	i = flowtable->ops_len;
 err4:
 	for (k = i - 1; k >= 0; k--)
-		nf_unregister_net_hook(net, &flowtable->ops[i]);
+		nf_unregister_net_hook(net, &flowtable->ops[k]);
 
 	kfree(flowtable->ops);
 err3:
@@ -5145,6 +5161,11 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 	struct nft_table *table;
 	struct nft_ctx ctx;
 
+	if (!nla[NFTA_FLOWTABLE_TABLE] ||
+	    (!nla[NFTA_FLOWTABLE_NAME] &&
+	     !nla[NFTA_FLOWTABLE_HANDLE]))
+		return -EINVAL;
+
 	table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
 				       family, genmask);
 	if (IS_ERR(table))
@@ -5402,6 +5423,7 @@ err:
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
 	cancel_delayed_work_sync(&flowtable->data.gc_work);
+	kfree(flowtable->ops);
 	kfree(flowtable->name);
 	flowtable->data.type->free(&flowtable->data);
 	rhashtable_destroy(&flowtable->data.rhashtable);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8bba23160a68..0b839c38800f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -833,11 +833,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
 		if (diff > skb_tailroom(e->skb)) {
 			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
 					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "nf_queue: OOM "
-				      "in mangle, dropping packet\n");
+			if (!nskb)
 				return -ENOMEM;
-			}
 			kfree_skb(e->skb);
 			e->skb = nskb;
 		}
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3f1624ee056f..d40591fe1b2f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -674,7 +674,7 @@ static const struct nft_set_ops *
 nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
 		    u32 flags)
 {
-	if (desc->size) {
+	if (desc->size && !(flags & NFT_SET_TIMEOUT)) {
 		switch (desc->klen) {
 		case 4:
 			return &nft_hash_fast_ops;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2f685ee1f9c8..4aa01c90e9d1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -423,6 +423,36 @@ textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
 	return buf;
 }
 
+/**
+ * xt_check_proc_name - check that name is suitable for /proc file creation
+ *
+ * @name: file name candidate
+ * @size: length of buffer
+ *
+ * some x_tables modules wish to create a file in /proc.
+ * This function makes sure that the name is suitable for this
+ * purpose, it checks that name is NUL terminated and isn't a 'special'
+ * name, like "..".
+ *
+ * returns negative number on error or 0 if name is useable.
+ */
+int xt_check_proc_name(const char *name, unsigned int size)
+{
+	if (name[0] == '\0')
+		return -EINVAL;
+
+	if (strnlen(name, size) == size)
+		return -ENAMETOOLONG;
+
+	if (strcmp(name, ".") == 0 ||
+	    strcmp(name, "..") == 0 ||
+	    strchr(name, '/'))
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(xt_check_proc_name);
+
 int xt_check_match(struct xt_mtchk_param *par,
 		   unsigned int size, u_int8_t proto, bool inv_proto)
 {
@@ -434,36 +464,35 @@ int xt_check_match(struct xt_mtchk_param *par,
 		 * ebt_among is exempt from centralized matchsize checking
 		 * because it uses a dynamic-size data set.
 		 */
-		pr_err("%s_tables: %s.%u match: invalid size "
-		       "%u (kernel) != (user) %u\n",
-		       xt_prefix[par->family], par->match->name,
-		       par->match->revision,
-		       XT_ALIGN(par->match->matchsize), size);
+		pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n",
+				   xt_prefix[par->family], par->match->name,
+				   par->match->revision,
+				   XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
 	}
 	if (par->match->table != NULL &&
 	    strcmp(par->match->table, par->table) != 0) {
-		pr_err("%s_tables: %s match: only valid in %s table, not %s\n",
-		       xt_prefix[par->family], par->match->name,
-		       par->match->table, par->table);
+		pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n",
+				    xt_prefix[par->family], par->match->name,
+				    par->match->table, par->table);
 		return -EINVAL;
 	}
 	if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
 		char used[64], allow[64];
 
-		pr_err("%s_tables: %s match: used from hooks %s, but only "
-		       "valid from %s\n",
-		       xt_prefix[par->family], par->match->name,
-		       textify_hooks(used, sizeof(used), par->hook_mask,
-		                     par->family),
-		       textify_hooks(allow, sizeof(allow), par->match->hooks,
-		                     par->family));
+		pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n",
+				    xt_prefix[par->family], par->match->name,
+				    textify_hooks(used, sizeof(used),
+						  par->hook_mask, par->family),
+				    textify_hooks(allow, sizeof(allow),
+						  par->match->hooks,
+						  par->family));
 		return -EINVAL;
 	}
 	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
-		pr_err("%s_tables: %s match: only valid for protocol %u\n",
-		       xt_prefix[par->family], par->match->name,
-		       par->match->proto);
+		pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n",
+				    xt_prefix[par->family], par->match->name,
+				    par->match->proto);
 		return -EINVAL;
 	}
 	if (par->match->checkentry != NULL) {
@@ -814,36 +843,35 @@ int xt_check_target(struct xt_tgchk_param *par,
 	int ret;
 
 	if (XT_ALIGN(par->target->targetsize) != size) {
-		pr_err("%s_tables: %s.%u target: invalid size "
-		       "%u (kernel) != (user) %u\n",
-		       xt_prefix[par->family], par->target->name,
-		       par->target->revision,
-		       XT_ALIGN(par->target->targetsize), size);
+		pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n",
+				   xt_prefix[par->family], par->target->name,
+				   par->target->revision,
+				   XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
 	}
 	if (par->target->table != NULL &&
 	    strcmp(par->target->table, par->table) != 0) {
-		pr_err("%s_tables: %s target: only valid in %s table, not %s\n",
-		       xt_prefix[par->family], par->target->name,
-		       par->target->table, par->table);
+		pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n",
+				    xt_prefix[par->family], par->target->name,
+				    par->target->table, par->table);
 		return -EINVAL;
 	}
 	if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
 		char used[64], allow[64];
 
-		pr_err("%s_tables: %s target: used from hooks %s, but only "
-		       "usable from %s\n",
-		       xt_prefix[par->family], par->target->name,
-		       textify_hooks(used, sizeof(used), par->hook_mask,
-		                     par->family),
-		       textify_hooks(allow, sizeof(allow), par->target->hooks,
-		                     par->family));
+		pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n",
+				    xt_prefix[par->family], par->target->name,
+				    textify_hooks(used, sizeof(used),
+						  par->hook_mask, par->family),
+				    textify_hooks(allow, sizeof(allow),
+						  par->target->hooks,
+						  par->family));
 		return -EINVAL;
 	}
 	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
-		pr_err("%s_tables: %s target: only valid for protocol %u\n",
-		       xt_prefix[par->family], par->target->name,
-		       par->target->proto);
+		pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n",
+				    xt_prefix[par->family], par->target->name,
+				    par->target->proto);
 		return -EINVAL;
 	}
 	if (par->target->checkentry != NULL) {
@@ -1004,10 +1032,6 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	if (sz < sizeof(*info))
 		return NULL;
 
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
-		return NULL;
-
 	/* __GFP_NORETRY is not fully supported by kvmalloc but it should
 	 * work reasonably well if sz is too large and bail out rather
 	 * than shoot all processes down before realizing there is nothing
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index c502419d6306..f368ee6741db 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -120,8 +120,8 @@ static int audit_tg_check(const struct xt_tgchk_param *par)
 	const struct xt_audit_info *info = par->targinfo;
 
 	if (info->type > XT_AUDIT_TYPE_MAX) {
-		pr_info("Audit type out of range (valid range: 0..%hhu)\n",
-			XT_AUDIT_TYPE_MAX);
+		pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n",
+				    XT_AUDIT_TYPE_MAX);
 		return -ERANGE;
 	}
 
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
index 0f642ef8cd26..9f4151ec3e06 100644
--- a/net/netfilter/xt_CHECKSUM.c
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -36,13 +36,13 @@ static int checksum_tg_check(const struct xt_tgchk_param *par)
 	const struct xt_CHECKSUM_info *einfo = par->targinfo;
 
 	if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
-		pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
+		pr_info_ratelimited("unsupported CHECKSUM operation %x\n",
+				    einfo->operation);
 		return -EINVAL;
 	}
-	if (!einfo->operation) {
-		pr_info("no CHECKSUM operation enabled\n");
+	if (!einfo->operation)
 		return -EINVAL;
-	}
+
 	return 0;
 }
 
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index da56c06a443c..f3f1caac949b 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -91,8 +91,8 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
-		pr_info("target only valid in the \'mangle\' "
-			"or \'security\' tables, not \'%s\'.\n", par->table);
+		pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+				    par->table);
 		return -EINVAL;
 	}
 
@@ -102,14 +102,14 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 		break;
 
 	default:
-		pr_info("invalid mode: %hu\n", info->mode);
+		pr_info_ratelimited("invalid mode: %hu\n", info->mode);
 		return -EINVAL;
 	}
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0)
-		pr_info("cannot load conntrack support for proto=%u\n",
-			par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 	return ret;
 }
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 5a152e2acfd5..8790190c6feb 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -82,15 +82,14 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 
 	proto = xt_ct_find_proto(par);
 	if (!proto) {
-		pr_info("You must specify a L4 protocol, and not use "
-			"inversions on it.\n");
+		pr_info_ratelimited("You must specify a L4 protocol and not use inversions on it\n");
 		return -ENOENT;
 	}
 
 	helper = nf_conntrack_helper_try_module_get(helper_name, par->family,
 						    proto);
 	if (helper == NULL) {
-		pr_info("No such helper \"%s\"\n", helper_name);
+		pr_info_ratelimited("No such helper \"%s\"\n", helper_name);
 		return -ENOENT;
 	}
 
@@ -124,6 +123,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 	const struct nf_conntrack_l4proto *l4proto;
 	struct ctnl_timeout *timeout;
 	struct nf_conn_timeout *timeout_ext;
+	const char *errmsg = NULL;
 	int ret = 0;
 	u8 proto;
 
@@ -131,29 +131,29 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 	timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
 	if (timeout_find_get == NULL) {
 		ret = -ENOENT;
-		pr_info("Timeout policy base is empty\n");
+		errmsg = "Timeout policy base is empty";
 		goto out;
 	}
 
 	proto = xt_ct_find_proto(par);
 	if (!proto) {
 		ret = -EINVAL;
-		pr_info("You must specify a L4 protocol, and not use "
-			"inversions on it.\n");
+		errmsg = "You must specify a L4 protocol and not use inversions on it";
 		goto out;
 	}
 
 	timeout = timeout_find_get(par->net, timeout_name);
 	if (timeout == NULL) {
 		ret = -ENOENT;
-		pr_info("No such timeout policy \"%s\"\n", timeout_name);
+		pr_info_ratelimited("No such timeout policy \"%s\"\n",
+				    timeout_name);
 		goto out;
 	}
 
 	if (timeout->l3num != par->family) {
 		ret = -EINVAL;
-		pr_info("Timeout policy `%s' can only be used by L3 protocol "
-			"number %d\n", timeout_name, timeout->l3num);
+		pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
+				    timeout_name, 3, timeout->l3num);
 		goto err_put_timeout;
 	}
 	/* Make sure the timeout policy matches any existing protocol tracker,
@@ -162,9 +162,8 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 	l4proto = __nf_ct_l4proto_find(par->family, proto);
 	if (timeout->l4proto->l4proto != l4proto->l4proto) {
 		ret = -EINVAL;
-		pr_info("Timeout policy `%s' can only be used by L4 protocol "
-			"number %d\n",
-			timeout_name, timeout->l4proto->l4proto);
+		pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
+				    timeout_name, 4, timeout->l4proto->l4proto);
 		goto err_put_timeout;
 	}
 	timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
@@ -180,6 +179,8 @@ err_put_timeout:
 	__xt_ct_tg_timeout_put(timeout);
 out:
 	rcu_read_unlock();
+	if (errmsg)
+		pr_info_ratelimited("%s\n", errmsg);
 	return ret;
 #else
 	return -EOPNOTSUPP;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 3f83d38c4e5b..098ed851b7a7 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -66,10 +66,8 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_DSCP_info *info = par->targinfo;
 
-	if (info->dscp > XT_DSCP_MAX) {
-		pr_info("dscp %x out of range\n", info->dscp);
+	if (info->dscp > XT_DSCP_MAX)
 		return -EDOM;
-	}
 	return 0;
 }
 
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 1535e87ed9bd..4653b071bed4 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -105,10 +105,8 @@ static int ttl_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_TTL_info *info = par->targinfo;
 
-	if (info->mode > IPT_TTL_MAXMODE) {
-		pr_info("TTL: invalid or unknown mode %u\n", info->mode);
+	if (info->mode > IPT_TTL_MAXMODE)
 		return -EINVAL;
-	}
 	if (info->mode != IPT_TTL_SET && info->ttl == 0)
 		return -EINVAL;
 	return 0;
@@ -118,15 +116,10 @@ static int hl_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_HL_info *info = par->targinfo;
 
-	if (info->mode > IP6T_HL_MAXMODE) {
-		pr_info("invalid or unknown mode %u\n", info->mode);
+	if (info->mode > IP6T_HL_MAXMODE)
 		return -EINVAL;
-	}
-	if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
-		pr_info("increment/decrement does not "
-			"make sense with value 0\n");
+	if (info->mode != IP6T_HL_SET && info->hop_limit == 0)
 		return -EINVAL;
-	}
 	return 0;
 }
 
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 60e6dbe12460..9c75f419cd80 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -9,6 +9,8 @@
  * the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
@@ -312,29 +314,30 @@ hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
 static int hmark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_hmark_info *info = par->targinfo;
+	const char *errmsg = "proto mask must be zero with L3 mode";
 
-	if (!info->hmodulus) {
-		pr_info("xt_HMARK: hash modulus can't be zero\n");
+	if (!info->hmodulus)
 		return -EINVAL;
-	}
+
 	if (info->proto_mask &&
-	    (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) {
-		pr_info("xt_HMARK: proto mask must be zero with L3 mode\n");
-		return -EINVAL;
-	}
+	    (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
+		goto err;
+
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
 	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
-			     XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) {
-		pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n");
+			     XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
 		return -EINVAL;
-	}
+
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
 	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
 			     XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
-		pr_info("xt_HMARK: spi-set and port-set can't be combined\n");
-		return -EINVAL;
+		errmsg = "spi-set and port-set can't be combined";
+		goto err;
 	}
 	return 0;
+err:
+	pr_info_ratelimited("%s\n", errmsg);
+	return -EINVAL;
 }
 
 static struct xt_target hmark_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 6c2482b709b1..5ee859193783 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -132,7 +132,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
 		ret = -ENOMEM;
 		goto out_free_timer;
 	}
-	info->timer->attr.attr.mode = S_IRUGO;
+	info->timer->attr.attr.mode = 0444;
 	info->timer->attr.show = idletimer_tg_show;
 
 	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
@@ -146,11 +146,11 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
 	timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
 	info->timer->refcnt = 1;
 
+	INIT_WORK(&info->timer->work, idletimer_tg_work);
+
 	mod_timer(&info->timer->timer,
 		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
 
-	INIT_WORK(&info->timer->work, idletimer_tg_work);
-
 	return 0;
 
 out_free_attr:
@@ -191,7 +191,10 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
 		pr_debug("timeout value is zero\n");
 		return -EINVAL;
 	}
-
+	if (info->timeout >= INT_MAX / 1000) {
+		pr_debug("timeout value is too big\n");
+		return -EINVAL;
+	}
 	if (info->label[0] == '\0' ||
 	    strnlen(info->label,
 		    MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 1dcad893df78..19846445504d 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -111,10 +111,8 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 	struct xt_led_info_internal *ledinternal;
 	int err;
 
-	if (ledinfo->id[0] == '\0') {
-		pr_info("No 'id' parameter given.\n");
+	if (ledinfo->id[0] == '\0')
 		return -EINVAL;
-	}
 
 	mutex_lock(&xt_led_mutex);
 
@@ -138,13 +136,14 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
 	err = led_trigger_register(&ledinternal->netfilter_led_trigger);
 	if (err) {
-		pr_err("Trigger name is already in use.\n");
+		pr_info_ratelimited("Trigger name is already in use.\n");
 		goto exit_alloc;
 	}
 
-	/* See if we need to set up a timer */
-	if (ledinfo->delay > 0)
-		timer_setup(&ledinternal->timer, led_timeout_callback, 0);
+	/* Since the letinternal timer can be shared between multiple targets,
+	 * always set it up, even if the current target does not need it
+	 */
+	timer_setup(&ledinternal->timer, led_timeout_callback, 0);
 
 	list_add_tail(&ledinternal->list, &xt_led_triggers);
 
@@ -181,8 +180,7 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par)
 
 	list_del(&ledinternal->list);
 
-	if (ledinfo->delay > 0)
-		del_timer_sync(&ledinternal->timer);
+	del_timer_sync(&ledinternal->timer);
 
 	led_trigger_unregister(&ledinternal->netfilter_led_trigger);
 
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index a360b99a958a..a9aca80a32ae 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -8,6 +8,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
@@ -67,13 +69,13 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
 	init_hashrandom(&jhash_initval);
 
 	if (info->queues_total == 0) {
-		pr_err("NFQUEUE: number of total queues is 0\n");
+		pr_info_ratelimited("number of total queues is 0\n");
 		return -EINVAL;
 	}
 	maxid = info->queues_total - 1 + info->queuenum;
 	if (maxid > 0xffff) {
-		pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
-		       info->queues_total, maxid);
+		pr_info_ratelimited("number of queues (%u) out of range (got %u)\n",
+				    info->queues_total, maxid);
 		return -ERANGE;
 	}
 	if (par->target->revision == 2 && info->flags > 1)
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 9faf5e050b79..4ad5fe27e08b 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -60,18 +60,20 @@ static int checkentry_lsm(struct xt_secmark_target_info *info)
 				       &info->secid);
 	if (err) {
 		if (err == -EINVAL)
-			pr_info("invalid security context \'%s\'\n", info->secctx);
+			pr_info_ratelimited("invalid security context \'%s\'\n",
+					    info->secctx);
 		return err;
 	}
 
 	if (!info->secid) {
-		pr_info("unable to map security context \'%s\'\n", info->secctx);
+		pr_info_ratelimited("unable to map security context \'%s\'\n",
+				    info->secctx);
 		return -ENOENT;
 	}
 
 	err = security_secmark_relabel_packet(info->secid);
 	if (err) {
-		pr_info("unable to obtain relabeling permission\n");
+		pr_info_ratelimited("unable to obtain relabeling permission\n");
 		return err;
 	}
 
@@ -86,14 +88,14 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
-		pr_info("target only valid in the \'mangle\' "
-			"or \'security\' tables, not \'%s\'.\n", par->table);
+		pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+				    par->table);
 		return -EINVAL;
 	}
 
 	if (mode && mode != info->mode) {
-		pr_info("mode already set to %hu cannot mix with "
-			"rules for mode %hu\n", mode, info->mode);
+		pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n",
+				    mode, info->mode);
 		return -EINVAL;
 	}
 
@@ -101,7 +103,7 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 	case SECMARK_MODE_SEL:
 		break;
 	default:
-		pr_info("invalid mode: %hu\n", info->mode);
+		pr_info_ratelimited("invalid mode: %hu\n", info->mode);
 		return -EINVAL;
 	}
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 99bb8e410f22..98efb202f8b4 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -273,8 +273,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		pr_info("path-MTU clamping only supported in "
-			"FORWARD, OUTPUT and POSTROUTING hooks\n");
+		pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return -EINVAL;
 	}
 	if (par->nft_compat)
@@ -283,7 +282,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
 			return 0;
-	pr_info("Only works on TCP SYN packets\n");
+	pr_info_ratelimited("Only works on TCP SYN packets\n");
 	return -EINVAL;
 }
 
@@ -298,8 +297,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		pr_info("path-MTU clamping only supported in "
-			"FORWARD, OUTPUT and POSTROUTING hooks\n");
+		pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return -EINVAL;
 	}
 	if (par->nft_compat)
@@ -308,7 +306,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
 			return 0;
-	pr_info("Only works on TCP SYN packets\n");
+	pr_info_ratelimited("Only works on TCP SYN packets\n");
 	return -EINVAL;
 }
 #endif
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 17d7705e3bd4..8c89323c06af 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -540,8 +540,7 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
 	    !(i->invflags & IP6T_INV_PROTO))
 		return 0;
 
-	pr_info("Can be used only in combination with "
-		"either -p tcp or -p udp\n");
+	pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
 	return -EINVAL;
 }
 #endif
@@ -559,8 +558,7 @@ static int tproxy_tg4_check(const struct xt_tgchk_param *par)
 	    && !(i->invflags & IPT_INV_PROTO))
 		return 0;
 
-	pr_info("Can be used only in combination with "
-		"either -p tcp or -p udp\n");
+	pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
 	return -EINVAL;
 }
 
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 911a7c0da504..89e281b3bfc2 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -164,48 +164,47 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 
 static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 {
+	const char *errmsg = "both incoming and outgoing interface limitation cannot be selected";
 	struct xt_addrtype_info_v1 *info = par->matchinfo;
 
 	if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
-	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		pr_info("both incoming and outgoing "
-			"interface limitation cannot be selected\n");
-		return -EINVAL;
-	}
+	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
+		goto err;
 
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) &&
 	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		pr_info("output interface limitation "
-			"not valid in PREROUTING and INPUT\n");
-		return -EINVAL;
+		errmsg = "output interface limitation not valid in PREROUTING and INPUT";
+		goto err;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) &&
 	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
-		pr_info("input interface limitation "
-			"not valid in POSTROUTING and OUTPUT\n");
-		return -EINVAL;
+		errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT";
+		goto err;
 	}
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	if (par->family == NFPROTO_IPV6) {
 		if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
-			pr_err("ipv6 BLACKHOLE matching not supported\n");
-			return -EINVAL;
+			errmsg = "ipv6 BLACKHOLE matching not supported";
+			goto err;
 		}
 		if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
-			pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
-			return -EINVAL;
+			errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported";
+			goto err;
 		}
 		if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
-			pr_err("ipv6 does not support BROADCAST matching\n");
-			return -EINVAL;
+			errmsg = "ipv6 does not support BROADCAST matching";
+			goto err;
 		}
 	}
 #endif
 	return 0;
+err:
+	pr_info_ratelimited("%s\n", errmsg);
+	return -EINVAL;
 }
 
 static struct xt_match addrtype_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 06b090d8e901..a2cf8a6236d6 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -7,6 +7,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/skbuff.h>
@@ -34,7 +36,7 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
 	program.filter = insns;
 
 	if (bpf_prog_create(ret, &program)) {
-		pr_info("bpf: check failed: parse error\n");
+		pr_info_ratelimited("check failed: parse error\n");
 		return -EINVAL;
 	}
 
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 891f4e7e8ea7..7df2dece57d3 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -12,6 +12,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/skbuff.h>
 #include <linux/module.h>
 #include <linux/netfilter/x_tables.h>
@@ -48,7 +50,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
 	}
 
 	if (info->has_path && info->has_classid) {
-		pr_info("xt_cgroup: both path and classid specified\n");
+		pr_info_ratelimited("path and classid specified\n");
 		return -EINVAL;
 	}
 
@@ -56,8 +58,8 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
 	if (info->has_path) {
 		cgrp = cgroup_get_from_path(info->path);
 		if (IS_ERR(cgrp)) {
-			pr_info("xt_cgroup: invalid path, errno=%ld\n",
-				PTR_ERR(cgrp));
+			pr_info_ratelimited("invalid path, errno=%ld\n",
+					    PTR_ERR(cgrp));
 			return -EINVAL;
 		}
 		info->priv = cgrp;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 57ef175dfbfa..0068688995c8 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -135,14 +135,12 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 	struct xt_cluster_match_info *info = par->matchinfo;
 
 	if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
-		pr_info("you have exceeded the maximum "
-			"number of cluster nodes (%u > %u)\n",
-			info->total_nodes, XT_CLUSTER_NODES_MAX);
+		pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n",
+				    info->total_nodes, XT_CLUSTER_NODES_MAX);
 		return -EINVAL;
 	}
 	if (info->node_mask >= (1ULL << info->total_nodes)) {
-		pr_info("this node mask cannot be "
-			"higher than the total number of nodes\n");
+		pr_info_ratelimited("node mask cannot exceed total number of nodes\n");
 		return -EDOM;
 	}
 	return 0;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index cad0b7b5eb35..93cb018c3055 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -112,8 +112,8 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0)
-		pr_info("cannot load conntrack support for proto=%u\n",
-			par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 
 	/*
 	 * This filter cannot function correctly unless connection tracking
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index 23372879e6e3..4fa4efd24353 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -57,14 +57,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
 	int ret;
 
 	if (info->options & ~options) {
-		pr_err("Unknown options in mask %x\n", info->options);
+		pr_info_ratelimited("Unknown options in mask %x\n",
+				    info->options);
 		return -EINVAL;
 	}
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0) {
-		pr_info("cannot load conntrack support for proto=%u\n",
-							par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 		return ret;
 	}
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index ec377cc6a369..809639ce6f5a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -79,8 +79,8 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0)
-		pr_info("cannot load conntrack support for proto=%u\n",
-			par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 	return ret;
 }
 
@@ -109,8 +109,8 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0)
-		pr_info("cannot load conntrack support for proto=%u\n",
-			par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 	return ret;
 }
 
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 39cf1d019240..df80fe7d391c 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -272,8 +272,8 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0)
-		pr_info("cannot load conntrack support for proto=%u\n",
-			par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 	return ret;
 }
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 236ac8008909..a4c2b862f820 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -46,10 +46,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 
-	if (info->dscp > XT_DSCP_MAX) {
-		pr_info("dscp %x out of range\n", info->dscp);
+	if (info->dscp > XT_DSCP_MAX)
 		return -EDOM;
-	}
 
 	return 0;
 }
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
index 3c831a8efebc..c7ad4afa5fb8 100644
--- a/net/netfilter/xt_ecn.c
+++ b/net/netfilter/xt_ecn.c
@@ -97,7 +97,7 @@ static int ecn_mt_check4(const struct xt_mtchk_param *par)
 
 	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
 	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
-		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+		pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
 		return -EINVAL;
 	}
 
@@ -139,7 +139,7 @@ static int ecn_mt_check6(const struct xt_mtchk_param *par)
 
 	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
 	    (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) {
-		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+		pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
 		return -EINVAL;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index ca6847403ca2..3360f13dc208 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -523,7 +523,8 @@ static u64 user2rate(u64 user)
 	if (user != 0) {
 		return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
 	} else {
-		pr_warn("invalid rate from userspace: %llu\n", user);
+		pr_info_ratelimited("invalid rate from userspace: %llu\n",
+				    user);
 		return 0;
 	}
 }
@@ -774,7 +775,7 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		if (!dh->rateinfo.prev_window &&
 		    (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
 			spin_unlock(&dh->lock);
-			rcu_read_unlock_bh();
+			local_bh_enable();
 			return !(cfg->mode & XT_HASHLIMIT_INVERT);
 		} else {
 			goto overlimit;
@@ -865,33 +866,34 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 	}
 
 	if (cfg->mode & ~XT_HASHLIMIT_ALL) {
-		pr_info("Unknown mode mask %X, kernel too old?\n",
-						cfg->mode);
+		pr_info_ratelimited("Unknown mode mask %X, kernel too old?\n",
+				    cfg->mode);
 		return -EINVAL;
 	}
 
 	/* Check for overflow. */
 	if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
 		if (cfg->avg == 0 || cfg->avg > U32_MAX) {
-			pr_info("hashlimit invalid rate\n");
+			pr_info_ratelimited("invalid rate\n");
 			return -ERANGE;
 		}
 
 		if (cfg->interval == 0) {
-			pr_info("hashlimit invalid interval\n");
+			pr_info_ratelimited("invalid interval\n");
 			return -EINVAL;
 		}
 	} else if (cfg->mode & XT_HASHLIMIT_BYTES) {
 		if (user2credits_byte(cfg->avg) == 0) {
-			pr_info("overflow, rate too high: %llu\n", cfg->avg);
+			pr_info_ratelimited("overflow, rate too high: %llu\n",
+					    cfg->avg);
 			return -EINVAL;
 		}
 	} else if (cfg->burst == 0 ||
-		    user2credits(cfg->avg * cfg->burst, revision) <
-		    user2credits(cfg->avg, revision)) {
-			pr_info("overflow, try lower: %llu/%llu\n",
-				cfg->avg, cfg->burst);
-			return -ERANGE;
+		   user2credits(cfg->avg * cfg->burst, revision) <
+		   user2credits(cfg->avg, revision)) {
+		pr_info_ratelimited("overflow, try lower: %llu/%llu\n",
+				    cfg->avg, cfg->burst);
+		return -ERANGE;
 	}
 
 	mutex_lock(&hashlimit_mutex);
@@ -915,8 +917,9 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
-	if (info->name[sizeof(info->name) - 1] != '\0')
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
 
@@ -933,8 +936,9 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
-	if (info->name[sizeof(info->name) - 1] != '\0')
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
 
@@ -948,9 +952,11 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+	int ret;
 
-	if (info->name[sizeof(info->name) - 1] != '\0')
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
 					 info->name, 3);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 38a78151c0e9..fd077aeaaed9 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -61,8 +61,8 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0) {
-		pr_info("cannot load conntrack support for proto=%u\n",
-			par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 		return ret;
 	}
 	info->name[sizeof(info->name) - 1] = '\0';
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index 7ca64a50db04..57f1df575701 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -72,7 +72,7 @@ static int comp_mt_check(const struct xt_mtchk_param *par)
 
 	/* Must specify no unknown invflags */
 	if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) {
-		pr_err("unknown flags %X\n", compinfo->invflags);
+		pr_info_ratelimited("unknown flags %X\n", compinfo->invflags);
 		return -EINVAL;
 	}
 	return 0;
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 42540d26c2b8..1d950a6100af 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -158,7 +158,8 @@ static int ipvs_mt_check(const struct xt_mtchk_param *par)
 	    && par->family != NFPROTO_IPV6
 #endif
 		) {
-		pr_info("protocol family %u not supported\n", par->family);
+		pr_info_ratelimited("protocol family %u not supported\n",
+				    par->family);
 		return -EINVAL;
 	}
 
diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c
index 8aee572771f2..c43482bf48e6 100644
--- a/net/netfilter/xt_l2tp.c
+++ b/net/netfilter/xt_l2tp.c
@@ -216,7 +216,7 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par)
 	/* Check for invalid flags */
 	if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION |
 			    XT_L2TP_TYPE)) {
-		pr_info("unknown flags: %x\n", info->flags);
+		pr_info_ratelimited("unknown flags: %x\n", info->flags);
 		return -EINVAL;
 	}
 
@@ -225,7 +225,8 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par)
 	    (!(info->flags & XT_L2TP_SID)) &&
 	    ((!(info->flags & XT_L2TP_TYPE)) ||
 	     (info->type != XT_L2TP_TYPE_CONTROL))) {
-		pr_info("invalid flags combination: %x\n", info->flags);
+		pr_info_ratelimited("invalid flags combination: %x\n",
+				    info->flags);
 		return -EINVAL;
 	}
 
@@ -234,19 +235,22 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par)
 	 */
 	if (info->flags & XT_L2TP_VERSION) {
 		if ((info->version < 2) || (info->version > 3)) {
-			pr_info("wrong L2TP version: %u\n", info->version);
+			pr_info_ratelimited("wrong L2TP version: %u\n",
+					    info->version);
 			return -EINVAL;
 		}
 
 		if (info->version == 2) {
 			if ((info->flags & XT_L2TP_TID) &&
 			    (info->tid > 0xffff)) {
-				pr_info("v2 tid > 0xffff: %u\n", info->tid);
+				pr_info_ratelimited("v2 tid > 0xffff: %u\n",
+						    info->tid);
 				return -EINVAL;
 			}
 			if ((info->flags & XT_L2TP_SID) &&
 			    (info->sid > 0xffff)) {
-				pr_info("v2 sid > 0xffff: %u\n", info->sid);
+				pr_info_ratelimited("v2 sid > 0xffff: %u\n",
+						    info->sid);
 				return -EINVAL;
 			}
 		}
@@ -268,13 +272,13 @@ static int l2tp_mt_check4(const struct xt_mtchk_param *par)
 
 	if ((ip->proto != IPPROTO_UDP) &&
 	    (ip->proto != IPPROTO_L2TP)) {
-		pr_info("missing protocol rule (udp|l2tpip)\n");
+		pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
 		return -EINVAL;
 	}
 
 	if ((ip->proto == IPPROTO_L2TP) &&
 	    (info->version == 2)) {
-		pr_info("v2 doesn't support IP mode\n");
+		pr_info_ratelimited("v2 doesn't support IP mode\n");
 		return -EINVAL;
 	}
 
@@ -295,13 +299,13 @@ static int l2tp_mt_check6(const struct xt_mtchk_param *par)
 
 	if ((ip->proto != IPPROTO_UDP) &&
 	    (ip->proto != IPPROTO_L2TP)) {
-		pr_info("missing protocol rule (udp|l2tpip)\n");
+		pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
 		return -EINVAL;
 	}
 
 	if ((ip->proto == IPPROTO_L2TP) &&
 	    (info->version == 2)) {
-		pr_info("v2 doesn't support IP mode\n");
+		pr_info_ratelimited("v2 doesn't support IP mode\n");
 		return -EINVAL;
 	}
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 61403b77361c..55d18cd67635 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -106,8 +106,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (r->burst == 0
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		pr_info("Overflow, try lower: %u/%u\n",
-			r->avg, r->burst);
+		pr_info_ratelimited("Overflow, try lower: %u/%u\n",
+				    r->avg, r->burst);
 		return -ERANGE;
 	}
 
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 0fd14d1eb09d..bdb689cdc829 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
@@ -19,8 +21,7 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	if (mr->rangesize != 1) {
-		pr_info("%s: multiple ranges no longer supported\n",
-			par->target->name);
+		pr_info_ratelimited("multiple ranges no longer supported\n");
 		return -EINVAL;
 	}
 	return nf_ct_netns_get(par->net, par->family);
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 6f92d25590a8..c8674deed4eb 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -6,6 +6,8 @@
  * it under the terms of the GNU General Public License version 2 (or any
  * later at your option) as published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
@@ -39,8 +41,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
 
 	nfacct = nfnl_acct_find_get(par->net, info->name);
 	if (nfacct == NULL) {
-		pr_info("xt_nfacct: accounting object with name `%s' "
-			"does not exists\n", info->name);
+		pr_info_ratelimited("accounting object `%s' does not exists\n",
+				    info->name);
 		return -ENOENT;
 	}
 	info->nfacct = nfacct;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index bb33598e4530..9d6d67b953ac 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -107,9 +107,7 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
 	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
 	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
-		pr_info("using --physdev-out and --physdev-is-out are only "
-			"supported in the FORWARD and POSTROUTING chains with "
-			"bridged traffic.\n");
+		pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n");
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return -EINVAL;
 	}
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 5639fb03bdd9..13f8ccf946d6 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -132,26 +132,29 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
 static int policy_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
+	const char *errmsg = "neither incoming nor outgoing policy selected";
+
+	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT)))
+		goto err;
 
-	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
-		pr_info("neither incoming nor outgoing policy selected\n");
-		return -EINVAL;
-	}
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
-		pr_info("output policy not valid in PREROUTING and INPUT\n");
-		return -EINVAL;
+		errmsg = "output policy not valid in PREROUTING and INPUT";
+		goto err;
 	}
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
-		pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
-		return -EINVAL;
+		errmsg = "input policy not valid in POSTROUTING and OUTPUT";
+		goto err;
 	}
 	if (info->len > XT_POLICY_MAX_ELEM) {
-		pr_info("too many policy elements\n");
-		return -EINVAL;
+		errmsg = "too many policy elements";
+		goto err;
 	}
 	return 0;
+err:
+	pr_info_ratelimited("%s\n", errmsg);
+	return -EINVAL;
 }
 
 static struct xt_match policy_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 245fa350a7a8..9bbfc17ce3ec 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -51,8 +51,8 @@ static unsigned int ip_list_gid __read_mostly;
 module_param(ip_list_tot, uint, 0400);
 module_param(ip_list_hash_size, uint, 0400);
 module_param(ip_list_perms, uint, 0400);
-module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR);
-module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR);
+module_param(ip_list_uid, uint, 0644);
+module_param(ip_list_gid, uint, 0644);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
@@ -342,8 +342,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 	net_get_random_once(&hash_rnd, sizeof(hash_rnd));
 
 	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
-		pr_info("Unsupported user space flags (%08x)\n",
-			info->check_set);
+		pr_info_ratelimited("Unsupported userspace flags (%08x)\n",
+				    info->check_set);
 		return -EINVAL;
 	}
 	if (hweight8(info->check_set &
@@ -357,13 +357,13 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
 		return -EINVAL;
 	if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) {
-		pr_info("hitcount (%u) is larger than allowed maximum (%u)\n",
-			info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
+		pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n",
+				    info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
 		return -EINVAL;
 	}
-	if (info->name[0] == '\0' ||
-	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot)
 		nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1;
@@ -587,7 +587,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 		add = true;
 		break;
 	default:
-		pr_info("Need \"+ip\", \"-ip\" or \"/\"\n");
+		pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n");
 		return -EINVAL;
 	}
 
@@ -601,10 +601,8 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 		succ   = in4_pton(c, size, (void *)&addr, '\n', NULL);
 	}
 
-	if (!succ) {
-		pr_info("illegal address written to procfs\n");
+	if (!succ)
 		return -EINVAL;
-	}
 
 	spin_lock_bh(&recent_lock);
 	e = recent_entry_lookup(t, &addr, family, 0);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 16b6b11ee83f..6f4c5217d835 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -92,12 +92,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
-		pr_warn("Cannot find set identified by id %u to match\n",
-			info->match_set.index);
+		pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+				    info->match_set.index);
 		return -ENOENT;
 	}
 	if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
-		pr_warn("Protocol error: set match dimension is over the limit!\n");
+		pr_info_ratelimited("set match dimension is over the limit!\n");
 		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
 	}
@@ -143,12 +143,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
 	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
-		pr_warn("Cannot find set identified by id %u to match\n",
-			info->match_set.index);
+		pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+				    info->match_set.index);
 		return -ENOENT;
 	}
 	if (info->match_set.dim > IPSET_DIM_MAX) {
-		pr_warn("Protocol error: set match dimension is over the limit!\n");
+		pr_info_ratelimited("set match dimension is over the limit!\n");
 		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
 	}
@@ -241,8 +241,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	if (info->add_set.index != IPSET_INVALID_ID) {
 		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
-			pr_warn("Cannot find add_set index %u as target\n",
-				info->add_set.index);
+			pr_info_ratelimited("Cannot find add_set index %u as target\n",
+					    info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -250,8 +250,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	if (info->del_set.index != IPSET_INVALID_ID) {
 		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
-			pr_warn("Cannot find del_set index %u as target\n",
-				info->del_set.index);
+			pr_info_ratelimited("Cannot find del_set index %u as target\n",
+					    info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
 				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
@@ -259,7 +259,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	}
 	if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
 	    info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
-		pr_warn("Protocol error: SET target dimension is over the limit!\n");
+		pr_info_ratelimited("SET target dimension over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
 			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
@@ -316,8 +316,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	if (info->add_set.index != IPSET_INVALID_ID) {
 		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
-			pr_warn("Cannot find add_set index %u as target\n",
-				info->add_set.index);
+			pr_info_ratelimited("Cannot find add_set index %u as target\n",
+					    info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -325,8 +325,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	if (info->del_set.index != IPSET_INVALID_ID) {
 		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
-			pr_warn("Cannot find del_set index %u as target\n",
-				info->del_set.index);
+			pr_info_ratelimited("Cannot find del_set index %u as target\n",
+					    info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
 				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
@@ -334,7 +334,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	}
 	if (info->add_set.dim > IPSET_DIM_MAX ||
 	    info->del_set.dim > IPSET_DIM_MAX) {
-		pr_warn("Protocol error: SET target dimension is over the limit!\n");
+		pr_info_ratelimited("SET target dimension over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
 			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
@@ -444,8 +444,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 		index = ip_set_nfnl_get_byindex(par->net,
 						info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
-			pr_warn("Cannot find add_set index %u as target\n",
-				info->add_set.index);
+			pr_info_ratelimited("Cannot find add_set index %u as target\n",
+					    info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -454,8 +454,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 		index = ip_set_nfnl_get_byindex(par->net,
 						info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
-			pr_warn("Cannot find del_set index %u as target\n",
-				info->del_set.index);
+			pr_info_ratelimited("Cannot find del_set index %u as target\n",
+					    info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
 				ip_set_nfnl_put(par->net,
 						info->add_set.index);
@@ -465,7 +465,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 
 	if (info->map_set.index != IPSET_INVALID_ID) {
 		if (strncmp(par->table, "mangle", 7)) {
-			pr_warn("--map-set only usable from mangle table\n");
+			pr_info_ratelimited("--map-set only usable from mangle table\n");
 			return -EINVAL;
 		}
 		if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
@@ -473,14 +473,14 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 		     !(par->hook_mask & (1 << NF_INET_FORWARD |
 					 1 << NF_INET_LOCAL_OUT |
 					 1 << NF_INET_POST_ROUTING))) {
-			pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
+			pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
 			return -EINVAL;
 		}
 		index = ip_set_nfnl_get_byindex(par->net,
 						info->map_set.index);
 		if (index == IPSET_INVALID_ID) {
-			pr_warn("Cannot find map_set index %u as target\n",
-				info->map_set.index);
+			pr_info_ratelimited("Cannot find map_set index %u as target\n",
+					    info->map_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
 				ip_set_nfnl_put(par->net,
 						info->add_set.index);
@@ -494,7 +494,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 	if (info->add_set.dim > IPSET_DIM_MAX ||
 	    info->del_set.dim > IPSET_DIM_MAX ||
 	    info->map_set.dim > IPSET_DIM_MAX) {
-		pr_warn("Protocol error: SET target dimension is over the limit!\n");
+		pr_info_ratelimited("SET target dimension over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
 			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 575d2153e3b8..2ac7f674d19b 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -171,7 +171,8 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par)
 		return err;
 
 	if (info->flags & ~XT_SOCKET_FLAGS_V1) {
-		pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_SOCKET_FLAGS_V1);
 		return -EINVAL;
 	}
 	return 0;
@@ -187,7 +188,8 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par)
 		return err;
 
 	if (info->flags & ~XT_SOCKET_FLAGS_V2) {
-		pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_SOCKET_FLAGS_V2);
 		return -EINVAL;
 	}
 	return 0;
@@ -203,8 +205,8 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par)
 	if (err)
 		return err;
 	if (info->flags & ~XT_SOCKET_FLAGS_V3) {
-		pr_info("unknown flags 0x%x\n",
-			info->flags & ~XT_SOCKET_FLAGS_V3);
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_SOCKET_FLAGS_V3);
 		return -EINVAL;
 	}
 	return 0;
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 5fbd79194d21..0b41c0befe3c 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -44,8 +44,8 @@ static int state_mt_check(const struct xt_mtchk_param *par)
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0)
-		pr_info("cannot load conntrack support for proto=%u\n",
-			par->family);
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
 	return ret;
 }
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 1b01eec1fbda..0160f505e337 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -235,13 +235,13 @@ static int time_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
-		pr_info("invalid argument - start or "
-			"stop time greater than 23:59:59\n");
+		pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n");
 		return -EDOM;
 	}
 
 	if (info->flags & ~XT_TIME_ALL_FLAGS) {
-		pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS);
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_TIME_ALL_FLAGS);
 		return -EINVAL;
 	}
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3c8af14330b5..f1b02d87e336 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2307,7 +2307,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	if (cb->start) {
 		ret = cb->start(cb);
 		if (ret)
-			goto error_unlock;
+			goto error_put;
 	}
 
 	nlk->cb_running = true;
@@ -2327,6 +2327,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	 */
 	return -EINTR;
 
+error_put:
+	module_put(control->module);
 error_unlock:
 	sock_put(sk);
 	mutex_unlock(nlk->cb_mutex);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 6f02499ef007..b9ce82c9440f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1106,7 +1106,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
 	if (!err)
 		delivered = true;
 	else if (err != -ESRCH)
-		goto error;
+		return err;
 	return delivered ? 0 : -ESRCH;
  error:
 	kfree_skb(skb);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 35bb6807927f..4221d98a314b 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1450,9 +1450,9 @@ static int __init nr_proto_init(void)
 
 	nr_loopback_init();
 
-	proc_create("nr", S_IRUGO, init_net.proc_net, &nr_info_fops);
-	proc_create("nr_neigh", S_IRUGO, init_net.proc_net, &nr_neigh_fops);
-	proc_create("nr_nodes", S_IRUGO, init_net.proc_net, &nr_nodes_fops);
+	proc_create("nr", 0444, init_net.proc_net, &nr_info_fops);
+	proc_create("nr_neigh", 0444, init_net.proc_net, &nr_neigh_fops);
+	proc_create("nr_nodes", 0444, init_net.proc_net, &nr_nodes_fops);
 out:
 	return rc;
 fail:
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 367d8c027101..2ceefa183cee 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -149,6 +149,10 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
 
 	pr_debug("uri: %s, len: %zu\n", uri, uri_len);
 
+	/* sdreq->tlv_len is u8, takes uri_len, + 3 for header, + 1 for NULL */
+	if (WARN_ON_ONCE(uri_len > U8_MAX - 4))
+		return NULL;
+
 	sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
 	if (sdreq == NULL)
 		return NULL;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index c0b83dc9d993..f018eafc2a0d 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -61,7 +61,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 };
 
 static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
-	[NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
+	[NFC_SDP_ATTR_URI] = { .type = NLA_STRING,
+			       .len = U8_MAX - 4 },
 	[NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
 };
 
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 04b94281a30b..b891a91577f8 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -242,14 +242,20 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 
 		band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
 		band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+		if (band->rate == 0) {
+			err = -EINVAL;
+			goto exit_free_meter;
+		}
+
 		band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
 		/* Figure out max delta_t that is enough to fill any bucket.
 		 * Keep max_delta_t size to the bucket units:
 		 * pkts => 1/1000 packets, kilobits => bits.
+		 *
+		 * Start with a full bucket.
 		 */
-		band_max_delta_t = (band->burst_size + band->rate) * 1000;
-		/* Start with a full bucket. */
-		band->bucket = band_max_delta_t;
+		band->bucket = (band->burst_size + band->rate) * 1000;
+		band_max_delta_t = band->bucket / band->rate;
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
 		band++;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index b6c8524032a0..f81c1d0ddff4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	return 0;
 }
 
-static unsigned int packet_length(const struct sk_buff *skb,
-				  struct net_device *dev)
+static int packet_length(const struct sk_buff *skb,
+			 struct net_device *dev)
 {
-	unsigned int length = skb->len - dev->hard_header_len;
+	int length = skb->len - dev->hard_header_len;
 
 	if (!skb_vlan_tag_present(skb) &&
 	    eth_type_vlan(skb->protocol))
@@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb,
 	 * account for 802.1ad. e.g. is_skb_forwardable().
 	 */
 
-	return length;
+	return length > 0 ? length : 0;
 }
 
 void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
index 50615d5efac1..9cf089b9754e 100644
--- a/net/qrtr/smd.c
+++ b/net/qrtr/smd.c
@@ -114,5 +114,6 @@ static struct rpmsg_driver qcom_smd_qrtr_driver = {
 
 module_rpmsg_driver(qcom_smd_qrtr_driver);
 
+MODULE_ALIAS("rpmsg:IPCRTR");
 MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
 MODULE_LICENSE("GPL v2");
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 0a8eefd256b3..ab751a150f70 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
 	rds_send_drop_to(rs, NULL);
 	rds_rdma_drop_keys(rs);
 	rds_notify_queue_get(rs, NULL);
+	rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue);
 
 	spin_lock_bh(&rds_sock_lock);
 	list_del_init(&rs->rs_item);
@@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
  *  -	to signal that a previously congested destination may have become
  *	uncongested
  *  -	A notification has been queued to the socket (this can be a congestion
- *	update, or a RDMA completion).
+ *	update, or a RDMA completion, or a MSG_ZEROCOPY completion).
  *
  * EPOLLOUT is asserted if there is room on the send queue. This does not mean
  * however, that the next sendmsg() call will succeed. If the application tries
@@ -178,10 +179,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 		spin_unlock(&rs->rs_lock);
 	}
 	if (!list_empty(&rs->rs_recv_queue) ||
-	    !list_empty(&rs->rs_notify_queue))
+	    !list_empty(&rs->rs_notify_queue) ||
+	    !list_empty(&rs->rs_zcookie_queue.zcookie_head))
 		mask |= (EPOLLIN | EPOLLRDNORM);
 	if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
 		mask |= (EPOLLOUT | EPOLLWRNORM);
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
 	read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 
 	/* clear state any time we wake a seen-congested socket */
@@ -511,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	INIT_LIST_HEAD(&rs->rs_recv_queue);
 	INIT_LIST_HEAD(&rs->rs_notify_queue);
 	INIT_LIST_HEAD(&rs->rs_cong_list);
+	rds_message_zcopy_queue_init(&rs->rs_zcookie_queue);
 	spin_lock_init(&rs->rs_rdma_lock);
 	rs->rs_rdma_keys = RB_ROOT;
 	rs->rs_rx_traces = 0;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 94e190febfdd..abef75da89a7 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -224,7 +224,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	if (rds_destroy_pending(conn))
 		ret = -ENETDOWN;
 	else
-		ret = trans->conn_alloc(conn, gfp);
+		ret = trans->conn_alloc(conn, GFP_ATOMIC);
 	if (ret) {
 		rcu_read_unlock();
 		kfree(conn->c_path);
@@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
 			  int (*visitor)(struct rds_connection *, void *),
+			  u64 *buffer,
 			  size_t item_len)
 {
-	uint64_t buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
 	struct rds_connection *conn;
 	size_t i;
@@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
 				    struct rds_info_iterator *iter,
 				    struct rds_info_lengths *lens,
 				    int (*visitor)(struct rds_conn_path *, void *),
+				    u64 *buffer,
 				    size_t item_len)
 {
-	u64  buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
 	struct rds_connection *conn;
 	size_t i;
@@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens)
 {
+	u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8];
+
 	rds_walk_conn_path_info(sock, len, iter, lens,
 				rds_conn_info_visitor,
+				buffer,
 				sizeof(struct rds_info_connection));
 }
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 50a88f3e7e39..02deee29e7f1 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
 			   struct rds_info_iterator *iter,
 			   struct rds_info_lengths *lens)
 {
+	u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8];
+
 	rds_for_each_conn_info(sock, len, iter, lens,
 				rds_ib_conn_info_visitor,
+				buffer,
 				sizeof(struct rds_info_rdma_connection));
 }
 
diff --git a/net/rds/message.c b/net/rds/message.c
index 4318cc9b78f7..a35f76971984 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -33,6 +33,9 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/errqueue.h>
 
 #include "rds.h"
 
@@ -45,7 +48,6 @@ static unsigned int	rds_exthdr_size[__RDS_EXTHDR_MAX] = {
 [RDS_EXTHDR_GEN_NUM]	= sizeof(u32),
 };
 
-
 void rds_message_addref(struct rds_message *rm)
 {
 	rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
@@ -53,20 +55,107 @@ void rds_message_addref(struct rds_message *rm)
 }
 EXPORT_SYMBOL_GPL(rds_message_addref);
 
+static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
+{
+	struct rds_zcopy_cookies *ck = &info->zcookies;
+	int ncookies = ck->num;
+
+	if (ncookies == RDS_MAX_ZCOOKIES)
+		return false;
+	ck->cookies[ncookies] = cookie;
+	ck->num =  ++ncookies;
+	return true;
+}
+
+static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
+{
+	return container_of(znotif, struct rds_msg_zcopy_info, znotif);
+}
+
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
+{
+	unsigned long flags;
+	LIST_HEAD(copy);
+	struct rds_msg_zcopy_info *info, *tmp;
+
+	spin_lock_irqsave(&q->lock, flags);
+	list_splice(&q->zcookie_head, &copy);
+	INIT_LIST_HEAD(&q->zcookie_head);
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	list_for_each_entry_safe(info, tmp, &copy, rs_zcookie_next) {
+		list_del(&info->rs_zcookie_next);
+		kfree(info);
+	}
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+				     struct rds_znotifier *znotif)
+{
+	struct rds_msg_zcopy_info *info;
+	struct rds_msg_zcopy_queue *q;
+	u32 cookie = znotif->z_cookie;
+	struct rds_zcopy_cookies *ck;
+	struct list_head *head;
+	unsigned long flags;
+
+	mm_unaccount_pinned_pages(&znotif->z_mmp);
+	q = &rs->rs_zcookie_queue;
+	spin_lock_irqsave(&q->lock, flags);
+	head = &q->zcookie_head;
+	if (!list_empty(head)) {
+		info = list_entry(head, struct rds_msg_zcopy_info,
+				  rs_zcookie_next);
+		if (info && rds_zcookie_add(info, cookie)) {
+			spin_unlock_irqrestore(&q->lock, flags);
+			kfree(rds_info_from_znotifier(znotif));
+			/* caller invokes rds_wake_sk_sleep() */
+			return;
+		}
+	}
+
+	info = rds_info_from_znotifier(znotif);
+	ck = &info->zcookies;
+	memset(ck, 0, sizeof(*ck));
+	WARN_ON(!rds_zcookie_add(info, cookie));
+	list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
+
+	spin_unlock_irqrestore(&q->lock, flags);
+	/* caller invokes rds_wake_sk_sleep() */
+}
+
 /*
  * This relies on dma_map_sg() not touching sg[].page during merging.
  */
 static void rds_message_purge(struct rds_message *rm)
 {
-	unsigned long i;
+	unsigned long i, flags;
+	bool zcopy = false;
 
 	if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
 		return;
 
+	spin_lock_irqsave(&rm->m_rs_lock, flags);
+	if (rm->m_rs) {
+		struct rds_sock *rs = rm->m_rs;
+
+		if (rm->data.op_mmp_znotifier) {
+			zcopy = true;
+			rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+			rds_wake_sk_sleep(rs);
+			rm->data.op_mmp_znotifier = NULL;
+		}
+		sock_put(rds_rs_to_sk(rs));
+		rm->m_rs = NULL;
+	}
+	spin_unlock_irqrestore(&rm->m_rs_lock, flags);
+
 	for (i = 0; i < rm->data.op_nents; i++) {
-		rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
 		/* XXX will have to put_page for page refs */
-		__free_page(sg_page(&rm->data.op_sg[i]));
+		if (!zcopy)
+			__free_page(sg_page(&rm->data.op_sg[i]));
+		else
+			put_page(sg_page(&rm->data.op_sg[i]));
 	}
 	rm->data.op_nents = 0;
 
@@ -266,12 +355,13 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 	return rm;
 }
 
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
+static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 {
-	unsigned long to_copy, nbytes;
-	unsigned long sg_off;
 	struct scatterlist *sg;
 	int ret = 0;
+	int length = iov_iter_count(from);
+	int total_copied = 0;
+	struct rds_msg_zcopy_info *info;
 
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -279,8 +369,67 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
 	 * now allocate and copy in the data payload.
 	 */
 	sg = rm->data.op_sg;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&info->rs_zcookie_next);
+	rm->data.op_mmp_znotifier = &info->znotif;
+	if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+				    length)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	while (iov_iter_count(from)) {
+		struct page *pages;
+		size_t start;
+		ssize_t copied;
+
+		copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+					    1, &start);
+		if (copied < 0) {
+			struct mmpin *mmp;
+			int i;
+
+			for (i = 0; i < rm->data.op_nents; i++)
+				put_page(sg_page(&rm->data.op_sg[i]));
+			mmp = &rm->data.op_mmp_znotifier->z_mmp;
+			mm_unaccount_pinned_pages(mmp);
+			ret = -EFAULT;
+			goto err;
+		}
+		total_copied += copied;
+		iov_iter_advance(from, copied);
+		length -= copied;
+		sg_set_page(sg, pages, copied, start);
+		rm->data.op_nents++;
+		sg++;
+	}
+	WARN_ON_ONCE(length != 0);
+	return ret;
+err:
+	kfree(info);
+	rm->data.op_mmp_znotifier = NULL;
+	return ret;
+}
+
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+			       bool zcopy)
+{
+	unsigned long to_copy, nbytes;
+	unsigned long sg_off;
+	struct scatterlist *sg;
+	int ret = 0;
+
+	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
+
+	/* now allocate and copy in the data payload.  */
+	sg = rm->data.op_sg;
 	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
+	if (zcopy)
+		return rds_message_zcopy_from_user(rm, from);
+
 	while (iov_iter_count(from)) {
 		if (!sg_page(sg)) {
 			ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 7301b9b01890..b04c333d9d1c 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -356,6 +356,30 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 #define RDS_MSG_PAGEVEC		7
 #define RDS_MSG_FLUSH		8
 
+struct rds_znotifier {
+	struct mmpin		z_mmp;
+	u32			z_cookie;
+};
+
+struct rds_msg_zcopy_info {
+	struct list_head rs_zcookie_next;
+	union {
+		struct rds_znotifier znotif;
+		struct rds_zcopy_cookies zcookies;
+	};
+};
+
+struct rds_msg_zcopy_queue {
+	struct list_head zcookie_head;
+	spinlock_t lock; /* protects zcookie_head queue */
+};
+
+static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
+{
+	spin_lock_init(&q->lock);
+	INIT_LIST_HEAD(&q->zcookie_head);
+}
+
 struct rds_message {
 	refcount_t		m_refcount;
 	struct list_head	m_sock_item;
@@ -436,6 +460,7 @@ struct rds_message {
 			unsigned int		op_count;
 			unsigned int		op_dmasg;
 			unsigned int		op_dmaoff;
+			struct rds_znotifier	*op_mmp_znotifier;
 			struct scatterlist	*op_sg;
 		} data;
 	};
@@ -589,6 +614,7 @@ struct rds_sock {
 	/* Socket receive path trace points*/
 	u8			rs_rx_traces;
 	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+	struct rds_msg_zcopy_queue rs_zcookie_queue;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -709,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
 			  int (*visitor)(struct rds_connection *, void *),
+			  u64 *buffer,
 			  size_t item_len);
 
 __printf(2, 3)
@@ -771,7 +798,8 @@ rds_conn_connecting(struct rds_connection *conn)
 /* message.c */
 struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
 struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+			       bool zcopy);
 struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
 void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
 				 __be16 dport, u64 seq);
@@ -786,6 +814,7 @@ void rds_message_addref(struct rds_message *rm);
 void rds_message_put(struct rds_message *rm);
 void rds_message_wait(struct rds_message *rm);
 void rds_message_unmapped(struct rds_message *rm);
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info);
 
 static inline void rds_message_make_checksum(struct rds_header *hdr)
 {
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b25bcfe411ca..de50e2126e40 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -577,6 +577,41 @@ out:
 	return ret;
 }
 
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+	struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue;
+	struct rds_msg_zcopy_info *info = NULL;
+	struct rds_zcopy_cookies *done;
+	unsigned long flags;
+
+	if (!msg->msg_control)
+		return false;
+
+	if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+	    msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+		return false;
+
+	spin_lock_irqsave(&q->lock, flags);
+	if (!list_empty(&q->zcookie_head)) {
+		info = list_entry(q->zcookie_head.next,
+				  struct rds_msg_zcopy_info, rs_zcookie_next);
+		list_del(&info->rs_zcookie_next);
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+	if (!info)
+		return false;
+	done = &info->zcookies;
+	if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+		     done)) {
+		spin_lock_irqsave(&q->lock, flags);
+		list_add(&info->rs_zcookie_next, &q->zcookie_head);
+		spin_unlock_irqrestore(&q->lock, flags);
+		return false;
+	}
+	kfree(info);
+	return true;
+}
+
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		int msg_flags)
 {
@@ -594,6 +629,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 	if (msg_flags & MSG_OOB)
 		goto out;
+	if (msg_flags & MSG_ERRQUEUE)
+		return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR);
 
 	while (1) {
 		/* If there are pending notifications, do those - and nothing else */
@@ -609,7 +646,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 		if (!rds_next_incoming(rs, &inc)) {
 			if (nonblock) {
-				ret = -EAGAIN;
+				bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+				ret = reaped ?  0 : -EAGAIN;
 				break;
 			}
 
@@ -658,6 +697,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 			ret = -EFAULT;
 			goto out;
 		}
+		rds_recvmsg_zcookie(rs, msg);
 
 		rds_stats_inc(s_recv_delivered);
 
diff --git a/net/rds/send.c b/net/rds/send.c
index b1b0022b8370..acad04243b41 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status)
 				rm->rdma.op_notifier = NULL;
 			}
 			was_on_sock = 1;
-			rm->m_rs = NULL;
 		}
 		spin_unlock(&rs->rs_lock);
 
@@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		 */
 		if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
 			spin_unlock_irqrestore(&cp->cp_lock, flags);
-			spin_lock_irqsave(&rm->m_rs_lock, flags);
-			rm->m_rs = NULL;
-			spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 			continue;
 		}
 		list_del_init(&rm->m_conn_item);
@@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
 		spin_unlock(&rs->rs_lock);
 
-		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
 		rds_message_put(rm);
@@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
 		spin_unlock(&rs->rs_lock);
 
-		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
 		rds_message_put(rm);
@@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
 		list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
 		set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
 		rds_message_addref(rm);
+		sock_hold(rds_rs_to_sk(rs));
 		rm->m_rs = rs;
 
 		/* The code ordering is a little weird, but we're
@@ -880,12 +875,13 @@ out:
  * rds_message is getting to be quite complicated, and we'd like to allocate
  * it all in one go. This figures out how big it needs to be up front.
  */
-static int rds_rm_size(struct msghdr *msg, int data_len)
+static int rds_rm_size(struct msghdr *msg, int num_sgs)
 {
 	struct cmsghdr *cmsg;
 	int size = 0;
 	int cmsg_groups = 0;
 	int retval;
+	bool zcopy_cookie = false;
 
 	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
@@ -904,6 +900,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
 			break;
 
+		case RDS_CMSG_ZCOPY_COOKIE:
+			zcopy_cookie = true;
+			/* fall through */
+
 		case RDS_CMSG_RDMA_DEST:
 		case RDS_CMSG_RDMA_MAP:
 			cmsg_groups |= 2;
@@ -924,7 +924,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
 	}
 
-	size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
+	if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
+		return -EINVAL;
+
+	size += num_sgs * sizeof(struct scatterlist);
 
 	/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
 	if (cmsg_groups == 3)
@@ -933,6 +936,19 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 	return size;
 }
 
+static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
+			  struct cmsghdr *cmsg)
+{
+	u32 *cookie;
+
+	if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
+	    !rm->data.op_mmp_znotifier)
+		return -EINVAL;
+	cookie = CMSG_DATA(cmsg);
+	rm->data.op_mmp_znotifier->z_cookie = *cookie;
+	return 0;
+}
+
 static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			 struct msghdr *msg, int *allocated_mr)
 {
@@ -975,6 +991,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			ret = rds_cmsg_atomic(rs, rm, cmsg);
 			break;
 
+		case RDS_CMSG_ZCOPY_COOKIE:
+			ret = rds_cmsg_zcopy(rs, rm, cmsg);
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -1045,10 +1065,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	long timeo = sock_sndtimeo(sk, nonblock);
 	struct rds_conn_path *cpath;
 	size_t total_payload_len = payload_len, rdma_payload_len = 0;
+	bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
+		      sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
+	int num_sgs = ceil(payload_len, PAGE_SIZE);
 
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
-	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
+	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
@@ -1092,8 +1115,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		goto out;
 	}
 
+	if (zcopy) {
+		if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+		num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
+	}
 	/* size of rm including all sgs */
-	ret = rds_rm_size(msg, payload_len);
+	ret = rds_rm_size(msg, num_sgs);
 	if (ret < 0)
 		goto out;
 
@@ -1105,12 +1135,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
 	/* Attach data to the rm */
 	if (payload_len) {
-		rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+		rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
 		if (!rm->data.op_sg) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = rds_message_copy_from_user(rm, &msg->msg_iter);
+		ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
 		if (ret)
 			goto out;
 	}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08230a145042..351a28474667 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -272,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 static void rds_tcp_conn_free(void *arg)
 {
 	struct rds_tcp_connection *tc = arg;
+	unsigned long flags;
 
 	rdsdebug("freeing tc %p\n", tc);
 
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irqsave(&rds_tcp_conn_lock, flags);
 	if (!tc->t_tcp_node_detached)
 		list_del(&tc->t_tcp_node);
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
 
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
@@ -308,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 		rdsdebug("rds_conn_path [%d] tc %p\n", i,
 			 conn->c_path[i].cp_transport_data);
 	}
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
 		tc = conn->c_path[i].cp_transport_data;
 		tc->t_tcp_node_detached = false;
 		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 fail:
 	if (ret) {
 		for (j = 0; j < i; j++)
@@ -484,39 +485,6 @@ fail:
 	return err;
 }
 
-static void __net_exit rds_tcp_exit_net(struct net *net)
-{
-	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-
-	if (rtn->rds_tcp_sysctl)
-		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
-
-	if (net != &init_net && rtn->ctl_table)
-		kfree(rtn->ctl_table);
-
-	/* If rds_tcp_exit_net() is called as a result of netns deletion,
-	 * the rds_tcp_kill_sock() device notifier would already have cleaned
-	 * up the listen socket, thus there is no work to do in this function.
-	 *
-	 * If rds_tcp_exit_net() is called as a result of module unload,
-	 * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
-	 * we do need to clean up the listen socket here.
-	 */
-	if (rtn->rds_tcp_listen_sock) {
-		struct socket *lsock = rtn->rds_tcp_listen_sock;
-
-		rtn->rds_tcp_listen_sock = NULL;
-		rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-	}
-}
-
-static struct pernet_operations rds_tcp_net_ops = {
-	.init = rds_tcp_init_net,
-	.exit = rds_tcp_exit_net,
-	.id = &rds_tcp_netid,
-	.size = sizeof(struct rds_tcp_net),
-};
-
 static void rds_tcp_kill_sock(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
@@ -526,7 +494,7 @@ static void rds_tcp_kill_sock(struct net *net)
 
 	rtn->rds_tcp_listen_sock = NULL;
 	rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -539,45 +507,42 @@ static void rds_tcp_kill_sock(struct net *net)
 			tc->t_tcp_node_detached = true;
 		}
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
 		rds_conn_destroy(tc->t_cpath->cp_conn);
 }
 
-void *rds_tcp_listen_sock_def_readable(struct net *net)
+static void __net_exit rds_tcp_exit_net(struct net *net)
 {
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-	struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-	if (!lsock)
-		return NULL;
+	rds_tcp_kill_sock(net);
 
-	return lsock->sk->sk_user_data;
+	if (rtn->rds_tcp_sysctl)
+		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+
+	if (net != &init_net && rtn->ctl_table)
+		kfree(rtn->ctl_table);
 }
 
-static int rds_tcp_dev_event(struct notifier_block *this,
-			     unsigned long event, void *ptr)
+static struct pernet_operations rds_tcp_net_ops = {
+	.init = rds_tcp_init_net,
+	.exit = rds_tcp_exit_net,
+	.id = &rds_tcp_netid,
+	.size = sizeof(struct rds_tcp_net),
+};
+
+void *rds_tcp_listen_sock_def_readable(struct net *net)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-	/* rds-tcp registers as a pernet subys, so the ->exit will only
-	 * get invoked after network acitivity has quiesced. We need to
-	 * clean up all sockets  to quiesce network activity, and use
-	 * the unregistration of the per-net loopback device as a trigger
-	 * to start that cleanup.
-	 */
-	if (event == NETDEV_UNREGISTER_FINAL &&
-	    dev->ifindex == LOOPBACK_IFINDEX)
-		rds_tcp_kill_sock(dev_net(dev));
+	if (!lsock)
+		return NULL;
 
-	return NOTIFY_DONE;
+	return lsock->sk->sk_user_data;
 }
 
-static struct notifier_block rds_tcp_dev_notifier = {
-	.notifier_call        = rds_tcp_dev_event,
-	.priority = -10, /* must be called after other network notifiers */
-};
-
 /* when sysctl is used to modify some kernel socket parameters,this
  * function  resets the RDS connections in that netns  so that we can
  * restart with new parameters.  The assumption is that such reset
@@ -587,7 +552,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
 
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -597,7 +562,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 		/* reconnect with new parameters */
 		rds_conn_path_drop(tc->t_cpath, false);
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 }
 
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
@@ -623,9 +588,7 @@ static void rds_tcp_exit(void)
 	rds_tcp_set_unloading();
 	synchronize_rcu();
 	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
-	unregister_pernet_subsys(&rds_tcp_net_ops);
-	if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-		pr_warn("could not unregister rds_tcp_dev_notifier\n");
+	unregister_pernet_device(&rds_tcp_net_ops);
 	rds_tcp_destroy_conns();
 	rds_trans_unregister(&rds_tcp_transport);
 	rds_tcp_recv_exit();
@@ -649,24 +612,15 @@ static int rds_tcp_init(void)
 	if (ret)
 		goto out_slab;
 
-	ret = register_pernet_subsys(&rds_tcp_net_ops);
+	ret = register_pernet_device(&rds_tcp_net_ops);
 	if (ret)
 		goto out_recv;
 
-	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-	if (ret) {
-		pr_warn("could not register rds_tcp_dev_notifier\n");
-		goto out_pernet;
-	}
-
 	rds_trans_register(&rds_tcp_transport);
 
 	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
 	goto out;
-
-out_pernet:
-	unregister_pernet_subsys(&rds_tcp_net_ops);
 out_recv:
 	rds_tcp_recv_exit();
 out_slab:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index c061d6eb465d..22571189f21e 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -142,12 +142,20 @@ int rds_tcp_accept_one(struct socket *sock)
 	if (ret)
 		goto out;
 
-	new_sock->type = sock->type;
-	new_sock->ops = sock->ops;
 	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
 	if (ret < 0)
 		goto out;
 
+	/* sock_create_lite() does not get a hold on the owner module so we
+	 * need to do it here.  Note that sock_release() uses sock->ops to
+	 * determine if it needs to decrement the reference count.  So set
+	 * sock->ops after calling accept() in case that fails.  And there's
+	 * no need to do try_module_get() as the listener should have a hold
+	 * already.
+	 */
+	new_sock->ops = sock->ops;
+	__module_get(new_sock->ops->owner);
+
 	ret = rds_tcp_keepalive(new_sock);
 	if (ret < 0)
 		goto out;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5170373b797c..9ff5e0a76593 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1567,12 +1567,12 @@ static int __init rose_proto_init(void)
 
 	rose_add_loopback_neigh();
 
-	proc_create("rose", S_IRUGO, init_net.proc_net, &rose_info_fops);
-	proc_create("rose_neigh", S_IRUGO, init_net.proc_net,
+	proc_create("rose", 0444, init_net.proc_net, &rose_info_fops);
+	proc_create("rose_neigh", 0444, init_net.proc_net,
 		    &rose_neigh_fops);
-	proc_create("rose_nodes", S_IRUGO, init_net.proc_net,
+	proc_create("rose_nodes", 0444, init_net.proc_net,
 		    &rose_nodes_fops);
-	proc_create("rose_routes", S_IRUGO, init_net.proc_net,
+	proc_create("rose_routes", 0444, init_net.proc_net,
 		    &rose_routes_fops);
 out:
 	return rc;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0c9c18aa7c77..9e1c2c6b6a67 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -32,7 +32,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_RXRPC);
 
 unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
-module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, rxrpc_debug, uint, 0644);
 MODULE_PARM_DESC(debug, "RxRPC debugging mask");
 
 static struct proto rxrpc_proto;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 42410e910aff..cf73dc006c3b 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -445,7 +445,7 @@ send_fragmentable:
 					(char *)&opt, sizeof(opt));
 		if (ret == 0) {
 			ret = kernel_sendmsg(conn->params.local->socket, &msg,
-					     iov, 1, iov[0].iov_len);
+					     iov, 2, len);
 
 			opt = IPV6_PMTUDISC_DO;
 			kernel_setsockopt(conn->params.local->socket,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index cc21e8db25b0..7bff716e911e 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     unsigned int *_offset, unsigned int *_len)
 {
 	unsigned int offset = sizeof(struct rxrpc_wire_header);
-	unsigned int len = *_len;
+	unsigned int len;
 	int ret;
 	u8 annotation = *_annotation;
 
@@ -517,9 +517,10 @@ try_again:
 			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
 				       sizeof(unsigned int), &id32);
 		} else {
+			unsigned long idl = call->user_call_ID;
+
 			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
-				       sizeof(unsigned long),
-				       &call->user_call_ID);
+				       sizeof(unsigned long), &idl);
 		}
 		if (ret < 0)
 			goto error_unlock_call;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f24a6ae6819a..a01169fb5325 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -658,6 +658,18 @@ config NET_EMATCH_IPSET
 	  To compile this code as a module, choose M here: the
 	  module will be called em_ipset.
 
+config NET_EMATCH_IPT
+	tristate "IPtables Matches"
+	depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
+	---help---
+	  Say Y here to be able to classify packets based on iptables
+	  matches.
+	  Current supported match is "policy" which allows packet classification
+	  based on IPsec policy that was used during decapsulation
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_ipt.
+
 config NET_CLS_ACT
 	bool "Actions"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5b635447e3f8..8811d3804878 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -75,3 +75,4 @@ obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
 obj-$(CONFIG_NET_EMATCH_TEXT)	+= em_text.o
 obj-$(CONFIG_NET_EMATCH_CANID)	+= em_canid.o
 obj-$(CONFIG_NET_EMATCH_IPSET)	+= em_ipset.o
+obj-$(CONFIG_NET_EMATCH_IPT)	+= em_ipt.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eba6682727dd..0d78b58e1898 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 }
 EXPORT_SYMBOL(__tcf_idr_release);
 
+static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+{
+	u32 cookie_len = 0;
+
+	if (act->act_cookie)
+		cookie_len = nla_total_size(act->act_cookie->len);
+
+	return  nla_total_size(0) /* action number nested */
+		+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+		+ cookie_len /* TCA_ACT_COOKIE */
+		+ nla_total_size(0) /* TCA_ACT_STATS nested */
+		/* TCA_STATS_BASIC */
+		+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+		/* TCA_STATS_QUEUE */
+		+ nla_total_size_64bit(sizeof(struct gnet_stats_queue))
+		+ nla_total_size(0) /* TCA_OPTIONS nested */
+		+ nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
+}
+
+static size_t tcf_action_full_attrs_size(size_t sz)
+{
+	return NLMSG_HDRLEN                     /* struct nlmsghdr */
+		+ sizeof(struct tcamsg)
+		+ nla_total_size(0)             /* TCA_ACT_TAB nested */
+		+ sz;
+}
+
+static size_t tcf_action_fill_size(const struct tc_action *act)
+{
+	size_t sz = tcf_action_shared_attrs_size(act);
+
+	if (act->ops->get_fill_size)
+		return act->ops->get_fill_size(act) + sz;
+	return sz;
+}
+
 static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
@@ -202,7 +238,8 @@ nla_put_failure:
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       const struct tc_action_ops *ops)
+		       const struct tc_action_ops *ops,
+		       struct netlink_ext_ack *extack)
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
@@ -211,7 +248,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 	} else if (type == RTM_GETACTION) {
 		return tcf_dump_walker(idrinfo, skb, cb);
 	} else {
-		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
+		WARN(1, "tcf_generic_walker: unknown command %d\n", type);
+		NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
 		return -EINVAL;
 	}
 }
@@ -258,14 +296,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 }
 EXPORT_SYMBOL(tcf_idr_check);
 
-void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
-{
-	if (est)
-		gen_kill_estimator(&a->tcfa_rate_est);
-	free_tcf(a);
-}
-EXPORT_SYMBOL(tcf_idr_cleanup);
-
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   struct tc_action **a, const struct tc_action_ops *ops,
 		   int bind, bool cpustats)
@@ -605,7 +635,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind)
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -616,31 +647,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	int err;
 
 	if (name == NULL) {
-		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 		if (err < 0)
 			goto err_out;
 		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
-		if (kind == NULL)
+		if (!kind) {
+			NL_SET_ERR_MSG(extack, "TC action kind must be specified");
 			goto err_out;
-		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+		}
+		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+			NL_SET_ERR_MSG(extack, "TC action name too long");
 			goto err_out;
+		}
 		if (tb[TCA_ACT_COOKIE]) {
 			int cklen = nla_len(tb[TCA_ACT_COOKIE]);
 
-			if (cklen > TC_COOKIE_MAX_SIZE)
+			if (cklen > TC_COOKIE_MAX_SIZE) {
+				NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
 				goto err_out;
+			}
 
 			cookie = nla_memdup_cookie(tb);
 			if (!cookie) {
+				NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
 				err = -ENOMEM;
 				goto err_out;
 			}
 		}
 	} else {
-		err = -EINVAL;
-		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+			NL_SET_ERR_MSG(extack, "TC action name too long");
+			err = -EINVAL;
 			goto err_out;
+		}
 	}
 
 	a_o = tc_lookup_action_n(act_name);
@@ -663,15 +703,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 			goto err_mod;
 		}
 #endif
+		NL_SET_ERR_MSG(extack, "Failed to load TC action module");
 		err = -ENOENT;
 		goto err_out;
 	}
 
 	/* backward compatibility for policer */
 	if (name == NULL)
-		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
+		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
+				extack);
 	else
-		err = a_o->init(net, nla, est, &a, ovr, bind);
+		err = a_o->init(net, nla, est, &a, ovr, bind, extack);
 	if (err < 0)
 		goto err_mod;
 
@@ -697,6 +739,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 
 			list_add_tail(&a->list, &actions);
 			tcf_action_destroy(&actions, bind);
+			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
 	}
@@ -726,29 +769,35 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions)
+		    struct list_head *actions, size_t *attr_size,
+		    struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
+	size_t sz = 0;
 	int err;
 	int i;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (err < 0)
 		return err;
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
+		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
+					extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
 		}
 		act->order = i;
+		sz += tcf_action_fill_size(act);
 		if (ovr)
 			act->tcfa_refcnt++;
 		list_add_tail(&act->list, actions);
 	}
 
+	*attr_size = tcf_action_full_attrs_size(sz);
+
 	/* Remove the temp refcnt which was necessary to protect against
 	 * destroying an existing action which was being replaced
 	 */
@@ -822,7 +871,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest)
 		goto out_nlmsg_trim;
 
 	if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -840,7 +889,8 @@ out_nlmsg_trim:
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-	       struct list_head *actions, int event)
+	       struct list_head *actions, int event,
+	       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 
@@ -849,6 +899,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 		return -ENOBUFS;
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
 			 0, 0) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -857,7 +908,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 }
 
 static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
-					  struct nlmsghdr *n, u32 portid)
+					  struct nlmsghdr *n, u32 portid,
+					  struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX + 1];
 	const struct tc_action_ops *ops;
@@ -865,22 +917,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 	int index;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 	if (err < 0)
 		goto err_out;
 
 	err = -EINVAL;
 	if (tb[TCA_ACT_INDEX] == NULL ||
-	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
+	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
+		NL_SET_ERR_MSG(extack, "Invalid TC action index value");
 		goto err_out;
+	}
 	index = nla_get_u32(tb[TCA_ACT_INDEX]);
 
 	err = -EINVAL;
 	ops = tc_lookup_action(tb[TCA_ACT_KIND]);
-	if (!ops) /* could happen in batch of actions */
+	if (!ops) { /* could happen in batch of actions */
+		NL_SET_ERR_MSG(extack, "Specified TC action not found");
 		goto err_out;
+	}
 	err = -ENOENT;
-	if (ops->lookup(net, &a, index) == 0)
+	if (ops->lookup(net, &a, index, extack) == 0)
 		goto err_mod;
 
 	module_put(ops->owner);
@@ -893,7 +949,8 @@ err_out:
 }
 
 static int tca_action_flush(struct net *net, struct nlattr *nla,
-			    struct nlmsghdr *n, u32 portid)
+			    struct nlmsghdr *n, u32 portid,
+			    struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	unsigned char *b;
@@ -907,39 +964,45 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	int err = -ENOMEM;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb) {
-		pr_debug("tca_action_flush: failed skb alloc\n");
+	if (!skb)
 		return err;
-	}
 
 	b = skb_tail_pointer(skb);
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 	if (err < 0)
 		goto err_out;
 
 	err = -EINVAL;
 	kind = tb[TCA_ACT_KIND];
 	ops = tc_lookup_action(kind);
-	if (!ops) /*some idjot trying to flush unknown action */
+	if (!ops) { /*some idjot trying to flush unknown action */
+		NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
 		goto err_out;
+	}
 
 	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
 			sizeof(*t), 0);
-	if (!nlh)
+	if (!nlh) {
+		NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
 		goto out_module_put;
+	}
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest) {
+		NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
 		goto out_module_put;
+	}
 
-	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
-	if (err <= 0)
+	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
+	if (err <= 0) {
+		nla_nest_cancel(skb, nest);
 		goto out_module_put;
+	}
 
 	nla_nest_end(skb, nest);
 
@@ -950,6 +1013,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 			     n->nlmsg_flags & NLM_F_ECHO);
 	if (err > 0)
 		return 0;
+	if (err < 0)
+		NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
 
 	return err;
 
@@ -962,17 +1027,19 @@ err_out:
 
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid)
+	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	int ret;
 	struct sk_buff *skb;
 
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+			GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
 			 0, 1) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -980,6 +1047,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 	/* now do the delete */
 	ret = tcf_action_destroy(actions, 0);
 	if (ret < 0) {
+		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
 		kfree_skb(skb);
 		return ret;
 	}
@@ -993,38 +1061,43 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
 static int
 tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
-	      u32 portid, int event)
+	      u32 portid, int event, struct netlink_ext_ack *extack)
 {
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
+	size_t attr_size = 0;
 	LIST_HEAD(actions);
 
-	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (ret < 0)
 		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
-		if (tb[1] != NULL)
-			return tca_action_flush(net, tb[1], n, portid);
-		else
-			return -EINVAL;
+		if (tb[1])
+			return tca_action_flush(net, tb[1], n, portid, extack);
+
+		NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
+		return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_get_1(net, tb[i], n, portid);
+		act = tcf_action_get_1(net, tb[i], n, portid, extack);
 		if (IS_ERR(act)) {
 			ret = PTR_ERR(act);
 			goto err;
 		}
 		act->order = i;
+		attr_size += tcf_action_fill_size(act);
 		list_add_tail(&act->list, &actions);
 	}
 
+	attr_size = tcf_action_full_attrs_size(attr_size);
+
 	if (event == RTM_GETACTION)
-		ret = tcf_get_notify(net, portid, n, &actions, event);
+		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
 	else { /* delete */
-		ret = tcf_del_notify(net, n, &actions, portid);
+		ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
 		if (ret)
 			goto err;
 		return ret;
@@ -1037,17 +1110,19 @@ err:
 
 static int
 tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid)
+	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	int err = 0;
 
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+			GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
 			 RTM_NEWACTION, 0, 0) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -1060,16 +1135,19 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-			  struct nlmsghdr *n, u32 portid, int ovr)
+			  struct nlmsghdr *n, u32 portid, int ovr,
+			  struct netlink_ext_ack *extack)
 {
+	size_t attr_size = 0;
 	int ret = 0;
 	LIST_HEAD(actions);
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+			      &attr_size, extack);
 	if (ret)
 		return ret;
 
-	return tcf_add_notify(net, n, &actions, portid);
+	return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
@@ -1097,7 +1175,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		return ret;
 
 	if (tca[TCA_ACT_TAB] == NULL) {
-		pr_notice("tc_ctl_action: received NO action attribs\n");
+		NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
 		return -EINVAL;
 	}
 
@@ -1113,17 +1191,18 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+				     extack);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
 	case RTM_DELACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    portid, RTM_DELACTION);
+				    portid, RTM_DELACTION, extack);
 		break;
 	case RTM_GETACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    portid, RTM_GETACTION);
+				    portid, RTM_GETACTION, extack);
 		break;
 	default:
 		BUG();
@@ -1218,7 +1297,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto out_module_put;
 
-	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
+	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
 	if (ret < 0)
 		goto out_module_put;
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b3f2c15affa7..9092531d45d8 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -272,7 +272,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
-			int replace, int bind)
+			int replace, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -352,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	return res;
 out:
 	if (res == ACT_P_CREATED)
-		tcf_idr_cleanup(*act, est);
+		tcf_idr_release(*act, bind);
 
 	return ret;
 }
@@ -367,14 +367,16 @@ static void tcf_bpf_cleanup(struct tc_action *act)
 
 static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b15ba84e0c8..e4b880fa51fe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
-			     int ovr, int bind)
+			     int ovr, int bind,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 	struct nlattr *tb[TCA_CONNMARK_MAX + 1];
@@ -176,14 +177,16 @@ nla_put_failure:
 
 static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
 			       struct netlink_callback *cb, int type,
-			       const struct tc_action_ops *ops)
+			       const struct tc_action_ops *ops,
+			       struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
+			       struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b7ba9b06b147..7e28b2ce1437 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind)
+			 int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 	struct tcf_csum_params *params_old, *params_new;
@@ -350,7 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
 {
 	struct sctphdr *sctph;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)
+	if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
 		return 1;
 
 	sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph));
@@ -626,19 +626,22 @@ static void tcf_csum_cleanup(struct tc_action *a)
 	struct tcf_csum_params *params;
 
 	params = rcu_dereference_protected(p->params, 1);
-	kfree_rcu(params, rcu);
+	if (params)
+		kfree_rcu(params, rcu);
 }
 
 static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b56986d41c87..4dc4f153cad8 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -201,20 +201,35 @@ nla_put_failure:
 
 static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
 	return tcf_idr_search(tn, a, index);
 }
 
+static size_t tcf_gact_get_fill_size(const struct tc_action *act)
+{
+	size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */
+
+#ifdef CONFIG_GACT_PROB
+	if (to_gact(act)->tcfg_ptype)
+		/* TCA_GACT_PROB */
+		sz += nla_total_size(sizeof(struct tc_gact_p));
+#endif
+
+	return sz;
+}
+
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
 	.type		=	TCA_ACT_GACT,
@@ -225,6 +240,7 @@ static struct tc_action_ops act_gact_ops = {
 	.init		=	tcf_gact_init,
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
+	.get_fill_size	=	tcf_gact_get_fill_size,
 	.size		=	sizeof(struct tcf_gact),
 };
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5954e992685a..a5994cf0512b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
-			int ovr, int bind)
+			int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -824,14 +824,16 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 
 static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 06e380ae0928..14c312d7908f 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -80,9 +80,12 @@ static void ipt_destroy_target(struct xt_entry_target *t)
 static void tcf_ipt_release(struct tc_action *a)
 {
 	struct tcf_ipt *ipt = to_ipt(a);
-	ipt_destroy_target(ipt->tcfi_t);
+
+	if (ipt->tcfi_t) {
+		ipt_destroy_target(ipt->tcfi_t);
+		kfree(ipt->tcfi_t);
+	}
 	kfree(ipt->tcfi_tname);
-	kfree(ipt->tcfi_t);
 }
 
 static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
@@ -187,13 +190,13 @@ err2:
 	kfree(tname);
 err1:
 	if (ret == ACT_P_CREATED)
-		tcf_idr_cleanup(*a, est);
+		tcf_idr_release(*a, bind);
 	return err;
 }
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind)
+			int bind, struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
 			      bind);
@@ -201,7 +204,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind)
+		       int bind, struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
 			      bind);
@@ -303,14 +306,16 @@ nla_put_failure:
 
 static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -351,14 +356,16 @@ static struct pernet_operations ipt_net_ops = {
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
 			 struct netlink_callback *cb, int type,
-			 const struct tc_action_ops *ops)
+			 const struct tc_action_ops *ops,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e6ff88f72900..fd34015331ab 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind)
+			   int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	int ret;
 
-	if (nla == NULL)
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
 		return -EINVAL;
-	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
+	}
+	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
 	if (ret < 0)
 		return ret;
-	if (tb[TCA_MIRRED_PARMS] == NULL)
+	if (!tb[TCA_MIRRED_PARMS]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
 		return -EINVAL;
+	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
 	exists = tcf_idr_check(tn, parm->index, a, bind);
@@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
@@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		if (dev == NULL)
+		if (!dev) {
+			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
 			return -EINVAL;
+		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
 		if (ret)
@@ -265,14 +272,16 @@ nla_put_failure:
 
 static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 98c6a4b2f523..4b5848b6c252 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 };
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-			struct tc_action **a, int ovr, int bind)
+			struct tc_action **a, int ovr, int bind,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -277,14 +278,16 @@ nla_put_failure:
 
 static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 349beaffb29e..8a925c72db5f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  int ovr, int bind)
+			  int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -176,7 +176,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			tcf_idr_cleanup(*a, est);
+			tcf_idr_release(*a, bind);
 			kfree(keys_ex);
 			return -ENOMEM;
 		}
@@ -419,14 +419,16 @@ nla_put_failure:
 
 static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
 			    struct netlink_callback *cb, int type,
-			    const struct tc_action_ops *ops)
+			    const struct tc_action_ops *ops,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 95d3c9097b25..4e72bc2a0dfb 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -58,11 +58,12 @@ static struct tc_action_ops act_police_ops;
 
 static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
 				 struct netlink_callback *cb, int type,
-				 const struct tc_action_ops *ops)
+				 const struct tc_action_ops *ops,
+				 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -74,7 +75,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
-			       int ovr, int bind)
+			       int ovr, int bind,
+			       struct netlink_ext_ack *extack)
 {
 	int ret = 0, err;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
@@ -194,7 +196,7 @@ failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
-		tcf_idr_cleanup(*a, est);
+		tcf_idr_release(*a, bind);
 	return err;
 }
 
@@ -304,7 +306,8 @@ nla_put_failure:
 	return -1;
 }
 
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1ba0df238756..5db358497c9e 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind)
+			   int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -103,7 +103,8 @@ static void tcf_sample_cleanup(struct tc_action *a)
 
 	psample_group = rtnl_dereference(s->psample_group);
 	RCU_INIT_POINTER(s->psample_group, NULL);
-	psample_group_put(psample_group);
+	if (psample_group)
+		psample_group_put(psample_group);
 }
 
 static bool tcf_sample_dev_ok_push(struct net_device *dev)
@@ -202,14 +203,16 @@ nla_put_failure:
 
 static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 425eac11f6da..9618b4a83cee 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -121,7 +121,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		d = to_defact(*a);
 		ret = alloc_defdata(d, defdata);
 		if (ret < 0) {
-			tcf_idr_cleanup(*a, est);
+			tcf_idr_release(*a, bind);
 			return ret;
 		}
 		d->tcf_action = parm->action;
@@ -170,14 +170,16 @@ nla_put_failure:
 
 static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 5a3f691bb545..ddf69fc01bdf 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
-			    int ovr, int bind)
+			    int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
@@ -208,14 +208,16 @@ nla_put_failure:
 
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 			      struct netlink_callback *cb, int type,
-			      const struct tc_action_ops *ops)
+			      const struct tc_action_ops *ops,
+			      struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
+			      struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index fa975262dbac..bbcbdce732cc 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind)
+			   int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -152,7 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
-		if (ovr)
+		if (ret == ACT_P_CREATED)
 			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
@@ -190,7 +190,8 @@ static void tcf_skbmod_cleanup(struct tc_action *a)
 	struct tcf_skbmod_params  *p;
 
 	p = rcu_dereference_protected(d->skbmod_p, 1);
-	kfree_rcu(p, rcu);
+	if (p)
+		kfree_rcu(p, rcu);
 }
 
 static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
@@ -232,14 +233,16 @@ nla_put_failure:
 
 static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 0e23aac09ad6..626dac81a48a 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind)
+			   int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -153,6 +153,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
 		break;
 	default:
+		ret = -EINVAL;
 		goto err_out;
 	}
 
@@ -207,11 +208,12 @@ static void tunnel_key_release(struct tc_action *a)
 	struct tcf_tunnel_key_params *params;
 
 	params = rcu_dereference_protected(t->params, 1);
+	if (params) {
+		if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+			dst_release(&params->tcft_enc_metadata->dst);
 
-	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
-		dst_release(&params->tcft_enc_metadata->dst);
-
-	kfree_rcu(params, rcu);
+		kfree_rcu(params, rcu);
+	}
 }
 
 static int tunnel_key_dump_addresses(struct sk_buff *skb,
@@ -291,14 +293,16 @@ nla_put_failure:
 
 static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index e1a1b3f3983a..853604685965 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -117,7 +117,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	struct tc_vlan *parm;
 	struct tcf_vlan *v;
 	int action;
-	__be16 push_vid = 0;
+	u16 push_vid = 0;
 	__be16 push_proto = 0;
 	u8 push_prio = 0;
 	bool exists = false;
@@ -195,7 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
-		if (ovr)
+		if (ret == ACT_P_CREATED)
 			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
@@ -225,7 +225,8 @@ static void tcf_vlan_cleanup(struct tc_action *a)
 	struct tcf_vlan_params *p;
 
 	p = rcu_dereference_protected(v->vlan_p, 1);
-	kfree_rcu(p, rcu);
+	if (p)
+		kfree_rcu(p, rcu);
 }
 
 static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
@@ -267,14 +268,16 @@ nla_put_failure:
 
 static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2bc1bc23d42e..b66754f52a9f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -376,17 +376,12 @@ struct tcf_net {
 static unsigned int tcf_net_id;
 
 static int tcf_block_insert(struct tcf_block *block, struct net *net,
-			    u32 block_index, struct netlink_ext_ack *extack)
+			    struct netlink_ext_ack *extack)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
-	int err;
 
-	err = idr_alloc_u32(&tn->idr, block, &block_index, block_index,
-			    GFP_KERNEL);
-	if (err)
-		return err;
-	block->index = block_index;
-	return 0;
+	return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
+			     GFP_KERNEL);
 }
 
 static void tcf_block_remove(struct tcf_block *block, struct net *net)
@@ -397,6 +392,7 @@ static void tcf_block_remove(struct tcf_block *block, struct net *net)
 }
 
 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
+					  u32 block_index,
 					  struct netlink_ext_ack *extack)
 {
 	struct tcf_block *block;
@@ -419,10 +415,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 		err = -ENOMEM;
 		goto err_chain_create;
 	}
-	block->net = qdisc_net(q);
 	block->refcnt = 1;
 	block->net = net;
-	block->q = q;
+	block->index = block_index;
+
+	/* Don't store q pointer for blocks which are shared */
+	if (!tcf_block_shared(block))
+		block->q = q;
 	return block;
 
 err_chain_create:
@@ -518,13 +517,12 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 	}
 
 	if (!block) {
-		block = tcf_block_create(net, q, extack);
+		block = tcf_block_create(net, q, ei->block_index, extack);
 		if (IS_ERR(block))
 			return PTR_ERR(block);
 		created = true;
-		if (ei->block_index) {
-			err = tcf_block_insert(block, net,
-					       ei->block_index, extack);
+		if (tcf_block_shared(block)) {
+			err = tcf_block_insert(block, net, extack);
 			if (err)
 				goto err_block_insert;
 		}
@@ -1399,13 +1397,18 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
 			continue;
 		if (!tcf_chain_dump(chain, q, parent, skb, cb,
-				    index_start, &index))
+				    index_start, &index)) {
+			err = -EMSGSIZE;
 			break;
+		}
 	}
 
 	cb->args[0] = index;
 
 out:
+	/* If we did no progress, the error (EMSGSIZE) is real */
+	if (skb->len == 0 && err)
+		return err;
 	return skb->len;
 }
 
@@ -1430,11 +1433,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 #ifdef CONFIG_NET_CLS_ACT
 	{
 		struct tc_action *act;
+		size_t attr_size = 0;
 
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND);
+						TCA_ACT_BIND, extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1447,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions);
+					      &actions, &attr_size, extack);
 			if (err)
 				return err;
 			list_for_each_entry(act, &actions, list)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7d0ce2c40f93..d964e60c730e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb,
 
 	fl_set_key_flag(key, mask, flags_key, flags_mask,
 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+	fl_set_key_flag(key, mask, flags_key, flags_mask,
+			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+			FLOW_DIS_FIRST_FRAG);
 
 	return 0;
 }
@@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 
 	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+			FLOW_DIS_FIRST_FRAG);
 
 	_key = cpu_to_be32(key);
 	_mask = cpu_to_be32(mask);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 6c7601a530e3..ed8b6a24b9e9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -96,7 +96,7 @@ struct tc_u_hnode {
 
 struct tc_u_common {
 	struct tc_u_hnode __rcu	*hlist;
-	struct tcf_block	*block;
+	void			*ptr;
 	int			refcnt;
 	struct idr		handle_idr;
 	struct hlist_node	hnode;
@@ -330,9 +330,25 @@ static struct hlist_head *tc_u_common_hash;
 #define U32_HASH_SHIFT 10
 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
 
+static void *tc_u_common_ptr(const struct tcf_proto *tp)
+{
+	struct tcf_block *block = tp->chain->block;
+
+	/* The block sharing is currently supported only
+	 * for classless qdiscs. In that case we use block
+	 * for tc_u_common identification. In case the
+	 * block is not shared, block->q is a valid pointer
+	 * and we can use that. That works for classful qdiscs.
+	 */
+	if (tcf_block_shared(block))
+		return block;
+	else
+		return block->q;
+}
+
 static unsigned int tc_u_hash(const struct tcf_proto *tp)
 {
-	return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
+	return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
 }
 
 static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
@@ -342,7 +358,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
 
 	h = tc_u_hash(tp);
 	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
-		if (tc->block == tp->chain->block)
+		if (tc->ptr == tc_u_common_ptr(tp))
 			return tc;
 	}
 	return NULL;
@@ -371,7 +387,7 @@ static int u32_init(struct tcf_proto *tp)
 			kfree(root_ht);
 			return -ENOBUFS;
 		}
-		tp_c->block = tp->chain->block;
+		tp_c->ptr = tc_u_common_ptr(tp);
 		INIT_HLIST_NODE(&tp_c->hnode);
 		idr_init(&tp_c->handle_idr);
 
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
new file mode 100644
index 000000000000..a5f34e930eff
--- /dev/null
+++ b/net/sched/em_ipt.c
@@ -0,0 +1,257 @@
+/*
+ * net/sched/em_ipt.c IPtables matches Ematch
+ *
+ * (c) 2018 Eyal Birger <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_ipt.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/pkt_cls.h>
+
+struct em_ipt_match {
+	const struct xt_match *match;
+	u32 hook;
+	u8 match_data[0] __aligned(8);
+};
+
+struct em_ipt_xt_match {
+	char *match_name;
+	int (*validate_match_data)(struct nlattr **tb, u8 mrev);
+};
+
+static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
+	[TCA_EM_IPT_MATCH_NAME]		= { .type = NLA_STRING,
+					    .len = XT_EXTENSION_MAXNAMELEN },
+	[TCA_EM_IPT_MATCH_REVISION]	= { .type = NLA_U8 },
+	[TCA_EM_IPT_HOOK]		= { .type = NLA_U32 },
+	[TCA_EM_IPT_NFPROTO]		= { .type = NLA_U8 },
+	[TCA_EM_IPT_MATCH_DATA]		= { .type = NLA_UNSPEC },
+};
+
+static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
+{
+	struct xt_mtchk_param mtpar = {};
+	union {
+		struct ipt_entry e4;
+		struct ip6t_entry e6;
+	} e = {};
+
+	mtpar.net	= net;
+	mtpar.table	= "filter";
+	mtpar.hook_mask	= 1 << im->hook;
+	mtpar.family	= im->match->family;
+	mtpar.match	= im->match;
+	mtpar.entryinfo = &e;
+	mtpar.matchinfo	= (void *)im->match_data;
+	return xt_check_match(&mtpar, mdata_len, 0, 0);
+}
+
+static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+	if (mrev != 0) {
+		pr_err("only policy match revision 0 supported");
+		return -EINVAL;
+	}
+
+	if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
+		pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
+	{
+		.match_name = "policy",
+		.validate_match_data = policy_validate_match_data
+	},
+	{}
+};
+
+static struct xt_match *get_xt_match(struct nlattr **tb)
+{
+	const struct em_ipt_xt_match *m;
+	struct nlattr *mname_attr;
+	u8 nfproto, mrev = 0;
+	int ret;
+
+	mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
+	for (m = em_ipt_xt_matches; m->match_name; m++) {
+		if (!nla_strcmp(mname_attr, m->match_name))
+			break;
+	}
+
+	if (!m->match_name) {
+		pr_err("Unsupported xt match");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (tb[TCA_EM_IPT_MATCH_REVISION])
+		mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
+
+	ret = m->validate_match_data(tb, mrev);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+	return xt_request_find_match(nfproto, m->match_name, mrev);
+}
+
+static int em_ipt_change(struct net *net, void *data, int data_len,
+			 struct tcf_ematch *em)
+{
+	struct nlattr *tb[TCA_EM_IPT_MAX + 1];
+	struct em_ipt_match *im = NULL;
+	struct xt_match *match;
+	int mdata_len, ret;
+
+	ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
+			NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
+	    !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
+		return -EINVAL;
+
+	match = get_xt_match(tb);
+	if (IS_ERR(match)) {
+		pr_err("unable to load match\n");
+		return PTR_ERR(match);
+	}
+
+	mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
+	im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
+	if (!im) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	im->match = match;
+	im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+	nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
+
+	ret = check_match(net, im, mdata_len);
+	if (ret)
+		goto err;
+
+	em->datalen = sizeof(*im) + mdata_len;
+	em->data = (unsigned long)im;
+	return 0;
+
+err:
+	kfree(im);
+	module_put(match->me);
+	return ret;
+}
+
+static void em_ipt_destroy(struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (!im)
+		return;
+
+	if (im->match->destroy) {
+		struct xt_mtdtor_param par = {
+			.net = em->net,
+			.match = im->match,
+			.matchinfo = im->match_data,
+			.family = im->match->family
+		};
+		im->match->destroy(&par);
+	}
+	module_put(im->match->me);
+	kfree((void *)im);
+}
+
+static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	const struct em_ipt_match *im = (const void *)em->data;
+	struct xt_action_param acpar = {};
+	struct net_device *indev = NULL;
+	struct nf_hook_state state;
+	int ret;
+
+	rcu_read_lock();
+
+	if (skb->skb_iif)
+		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+
+	nf_hook_state_init(&state, im->hook, im->match->family,
+			   indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
+
+	acpar.match = im->match;
+	acpar.matchinfo = im->match_data;
+	acpar.state = &state;
+
+	ret = im->match->match(skb, &acpar);
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+		return -EMSGSIZE;
+	if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
+		    im->match->usersize ?: im->match->matchsize,
+		    im->match_data) < 0)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_ipt_ops = {
+	.kind	  = TCF_EM_IPT,
+	.change	  = em_ipt_change,
+	.destroy  = em_ipt_destroy,
+	.match	  = em_ipt_match,
+	.dump	  = em_ipt_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_ipt_ops.link)
+};
+
+static int __init init_em_ipt(void)
+{
+	return tcf_em_register(&em_ipt_ops);
+}
+
+static void __exit exit_em_ipt(void)
+{
+	tcf_em_unregister(&em_ipt_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eyal Birger <[email protected]>");
+MODULE_DESCRIPTION("TC extended match for IPtables matches");
+
+module_init(init_em_ipt);
+module_exit(exit_em_ipt);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d512f49ee83c..106dae7e4818 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 			       unsigned int len)
 {
+	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 		 * If child was empty even before update then backlog
 		 * counter is screwed and we skip notification because
 		 * parent class is already passive.
+		 *
+		 * If the original child was offloaded then it is allowed
+		 * to be seem as empty, so the parent is notified anyway.
 		 */
-		notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+		notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+						       !qdisc_is_offloaded);
 		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 190570f21b20..7e3fbe9cc936 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -106,6 +106,14 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
 
 	__skb_queue_tail(&q->skb_bad_txq, skb);
 
+	if (qdisc_is_percpu_stats(q)) {
+		qdisc_qstats_cpu_backlog_inc(q, skb);
+		qdisc_qstats_cpu_qlen_inc(q);
+	} else {
+		qdisc_qstats_backlog_inc(q, skb);
+		q->q.qlen++;
+	}
+
 	if (lock)
 		spin_unlock(lock);
 }
@@ -196,14 +204,6 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 			break;
 		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
 			qdisc_enqueue_skb_bad_txq(q, nskb);
-
-			if (qdisc_is_percpu_stats(q)) {
-				qdisc_qstats_cpu_backlog_inc(q, nskb);
-				qdisc_qstats_cpu_qlen_inc(q);
-			} else {
-				qdisc_qstats_backlog_inc(q, nskb);
-				q->q.qlen++;
-			}
 			break;
 		}
 		skb->next = nskb;
@@ -628,6 +628,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 	int band = prio2band[skb->priority & TC_PRIO_MAX];
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	struct skb_array *q = band2list(priv, band);
+	unsigned int pkt_len = qdisc_pkt_len(skb);
 	int err;
 
 	err = skb_array_produce(q, skb);
@@ -636,7 +637,10 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 		return qdisc_drop_cpu(skb, qdisc, to_free);
 
 	qdisc_qstats_cpu_qlen_inc(qdisc);
-	qdisc_qstats_cpu_backlog_inc(qdisc, skb);
+	/* Note: skb can not be used after skb_array_produce(),
+	 * so we better not use qdisc_qstats_cpu_backlog_inc()
+	 */
+	this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
 	return NET_XMIT_SUCCESS;
 }
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7c179addebcd..7d6801fc5340 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -509,7 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	if (unlikely(sch->q.qlen >= sch->limit))
-		return qdisc_drop(skb, sch, to_free);
+		return qdisc_drop_all(skb, sch, to_free);
 
 	qdisc_qstats_backlog_inc(sch, skb);
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index efbf51f35778..222e53d3d27a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
 	sch->q.qlen = 0;
 }
 
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 {
-	struct prio_sched_data *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
 	struct tc_prio_qopt_offload opt = {
 		.handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 		return -EOPNOTSUPP;
 
-	if (enable) {
+	if (qopt) {
 		opt.command = TC_PRIO_REPLACE;
-		opt.replace_params.bands = q->bands;
-		memcpy(&opt.replace_params.priomap, q->prio2band,
+		opt.replace_params.bands = qopt->bands;
+		memcpy(&opt.replace_params.priomap, qopt->priomap,
 		       TC_PRIO_MAX + 1);
 		opt.replace_params.qstats = &sch->qstats;
 	} else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	tcf_block_put(q->block);
-	prio_offload(sch, false);
+	prio_offload(sch, NULL);
 	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 		}
 	}
 
+	prio_offload(sch, qopt);
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	sch_tree_unlock(sch);
-	prio_offload(sch, true);
 	return 0;
 }
 
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		      struct Qdisc **old, struct netlink_ext_ack *extack)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tc_prio_qopt_offload graft_offload;
+	struct net_device *dev = qdisc_dev(sch);
 	unsigned long band = arg - 1;
+	bool any_qdisc_is_offloaded;
+	int err;
 
 	if (new == NULL)
 		new = &noop_qdisc;
 
 	*old = qdisc_replace(sch, new, &q->queues[band]);
+
+	if (!tc_can_offload(dev))
+		return 0;
+
+	graft_offload.handle = sch->handle;
+	graft_offload.parent = sch->parent;
+	graft_offload.graft_params.band = band;
+	graft_offload.graft_params.child_handle = new->handle;
+	graft_offload.command = TC_PRIO_GRAFT;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+					    &graft_offload);
+
+	/* Don't report error if the graft is part of destroy operation. */
+	if (err && new != &noop_qdisc) {
+		/* Don't report error if the parent, the old child and the new
+		 * one are not offloaded.
+		 */
+		any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+		any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+		if (*old)
+			any_qdisc_is_offloaded |= (*old)->flags &
+						   TCQ_F_OFFLOADED;
+
+		if (any_qdisc_is_offloaded)
+			NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+	}
+
 	return 0;
 }
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 229172d509cc..03225a8df973 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -188,7 +188,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	int ret;
 
 	if (qdisc_pkt_len(skb) > q->max_size) {
-		if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
+		if (skb_is_gso(skb) &&
+		    skb_gso_validate_mac_len(skb, q->max_size))
 			return tbf_segment(skb, sch, to_free);
 		return qdisc_drop(skb, sch, to_free);
 	}
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6776582ec449..e845e4588535 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  offload.o stream_sched.o stream_sched_prio.o \
 	  stream_sched_rr.o stream_interleave.o
 
+sctp_diag-y := diag.o
+
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 00667c50efa7..e64630cd3331 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
 		return NULL;
 
 	INIT_LIST_HEAD(&new->key_list);
+	refcount_set(&new->refcnt, 1);
 	new->key_id = key_id;
 
 	return new;
 }
 
 /* Free the shared key structure */
-static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
+static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key)
 {
 	BUG_ON(!list_empty(&sh_key->key_list));
 	sctp_auth_key_put(sh_key->key);
@@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
 	kfree(sh_key);
 }
 
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key)
+{
+	if (refcount_dec_and_test(&sh_key->refcnt))
+		sctp_auth_shkey_destroy(sh_key);
+}
+
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key)
+{
+	refcount_inc(&sh_key->refcnt);
+}
+
 /* Destroy the entire key list.  This is done during the
  * associon and endpoint free process.
  */
@@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys)
 
 	key_for_each_safe(ep_key, tmp, keys) {
 		list_del_init(&ep_key->key_list);
-		sctp_auth_shkey_free(ep_key);
+		sctp_auth_shkey_release(ep_key);
 	}
 }
 
@@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
 
 	sctp_auth_key_put(asoc->asoc_shared_key);
 	asoc->asoc_shared_key = secret;
+	asoc->shkey = ep_key;
 
 	/* Update send queue in case any chunk already in there now
 	 * needs authenticating
 	 */
 	list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
-		if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
+		if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) {
 			chunk->auth = 1;
+			if (!chunk->shkey) {
+				chunk->shkey = asoc->shkey;
+				sctp_auth_shkey_hold(chunk->shkey);
+			}
+		}
 	}
 
 	return 0;
@@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey(
 
 	/* First search associations set of endpoint pair shared keys */
 	key_for_each(key, &asoc->endpoint_shared_keys) {
-		if (key->key_id == key_id)
-			return key;
+		if (key->key_id == key_id) {
+			if (!key->deactivated)
+				return key;
+			break;
+		}
 	}
 
 	return NULL;
@@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
  *    after the AUTH chunk in the SCTP packet.
  */
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-			      struct sk_buff *skb,
-			      struct sctp_auth_chunk *auth,
-			      gfp_t gfp)
+			      struct sk_buff *skb, struct sctp_auth_chunk *auth,
+			      struct sctp_shared_key *ep_key, gfp_t gfp)
 {
-	struct crypto_shash *tfm;
 	struct sctp_auth_bytes *asoc_key;
+	struct crypto_shash *tfm;
 	__u16 key_id, hmac_id;
-	__u8 *digest;
 	unsigned char *end;
 	int free_key = 0;
+	__u8 *digest;
 
 	/* Extract the info we need:
 	 * - hmac id
@@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
 	if (key_id == asoc->active_key_id)
 		asoc_key = asoc->asoc_shared_key;
 	else {
-		struct sctp_shared_key *ep_key;
-
-		ep_key = sctp_auth_get_shkey(asoc, key_id);
-		if (!ep_key)
-			return;
-
+		/* ep_key can't be NULL here */
 		asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp);
 		if (!asoc_key)
 			return;
@@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 		      struct sctp_association *asoc,
 		      struct sctp_authkey *auth_key)
 {
-	struct sctp_shared_key *cur_key = NULL;
+	struct sctp_shared_key *cur_key, *shkey;
 	struct sctp_auth_bytes *key;
 	struct list_head *sh_keys;
 	int replace = 0;
@@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 	else
 		sh_keys = &ep->endpoint_shared_keys;
 
-	key_for_each(cur_key, sh_keys) {
-		if (cur_key->key_id == auth_key->sca_keynumber) {
+	key_for_each(shkey, sh_keys) {
+		if (shkey->key_id == auth_key->sca_keynumber) {
 			replace = 1;
 			break;
 		}
 	}
 
-	/* If we are not replacing a key id, we need to allocate
-	 * a shared key.
-	 */
-	if (!replace) {
-		cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber,
-						 GFP_KERNEL);
-		if (!cur_key)
-			return -ENOMEM;
-	}
+	cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL);
+	if (!cur_key)
+		return -ENOMEM;
 
 	/* Create a new key data based on the info passed in */
 	key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
-	if (!key)
-		goto nomem;
+	if (!key) {
+		kfree(cur_key);
+		return -ENOMEM;
+	}
 
 	memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
+	cur_key->key = key;
 
-	/* If we are replacing, remove the old keys data from the
-	 * key id.  If we are adding new key id, add it to the
-	 * list.
-	 */
-	if (replace)
-		sctp_auth_key_put(cur_key->key);
-	else
-		list_add(&cur_key->key_list, sh_keys);
+	if (replace) {
+		list_del_init(&shkey->key_list);
+		sctp_auth_shkey_release(shkey);
+	}
+	list_add(&cur_key->key_list, sh_keys);
 
-	cur_key->key = key;
 	return 0;
-nomem:
-	if (!replace)
-		sctp_auth_shkey_free(cur_key);
-
-	return -ENOMEM;
 }
 
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
@@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
 		}
 	}
 
-	if (!found)
+	if (!found || key->deactivated)
 		return -EINVAL;
 
 	if (asoc) {
@@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
 
 	/* Delete the shared key */
 	list_del_init(&key->key_list);
-	sctp_auth_shkey_free(key);
+	sctp_auth_shkey_release(key);
+
+	return 0;
+}
+
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+			   struct sctp_association *asoc, __u16  key_id)
+{
+	struct sctp_shared_key *key;
+	struct list_head *sh_keys;
+	int found = 0;
+
+	/* The key identifier MUST NOT be the current active key
+	 * The key identifier MUST correst to an existing key
+	 */
+	if (asoc) {
+		if (asoc->active_key_id == key_id)
+			return -EINVAL;
+
+		sh_keys = &asoc->endpoint_shared_keys;
+	} else {
+		if (ep->active_key_id == key_id)
+			return -EINVAL;
+
+		sh_keys = &ep->endpoint_shared_keys;
+	}
+
+	key_for_each(key, sh_keys) {
+		if (key->key_id == key_id) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		return -EINVAL;
+
+	/* refcnt == 1 and !list_empty mean it's not being used anywhere
+	 * and deactivated will be set, so it's time to notify userland
+	 * that this shkey can be freed.
+	 */
+	if (asoc && !list_empty(&key->key_list) &&
+	    refcount_read(&key->refcnt) == 1) {
+		struct sctp_ulpevent *ev;
+
+		ev = sctp_ulpevent_make_authkey(asoc, key->key_id,
+						SCTP_AUTH_FREE_KEY, GFP_KERNEL);
+		if (ev)
+			asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+	}
+
+	key->deactivated = 1;
 
 	return 0;
 }
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 991a530c6b31..f889a84f264d 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 {
 	size_t len, first_len, max_data, remaining;
 	size_t msg_len = iov_iter_count(from);
+	struct sctp_shared_key *shkey = NULL;
 	struct list_head *pos, *temp;
 	struct sctp_chunk *chunk;
 	struct sctp_datamsg *msg;
@@ -204,6 +205,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		if (hmac_desc)
 			max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
 					      hmac_desc->hmac_len);
+
+		if (sinfo->sinfo_tsn &&
+		    sinfo->sinfo_ssn != asoc->active_key_id) {
+			shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn);
+			if (!shkey) {
+				err = -EINVAL;
+				goto errout;
+			}
+		} else {
+			shkey = asoc->shkey;
+		}
 	}
 
 	/* Check what's our max considering the above */
@@ -275,6 +287,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		if (err < 0)
 			goto errout_chunk_free;
 
+		chunk->shkey = shkey;
+
 		/* Put the chunk->skb back into the form expected by send.  */
 		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr -
 				       chunk->skb->data);
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 291c97b07058..8f6c2e8c0953 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -81,6 +81,12 @@ const char *sctp_cname(const union sctp_subtype cid)
 	case SCTP_CID_RECONF:
 		return "RECONF";
 
+	case SCTP_CID_I_DATA:
+		return "I_DATA";
+
+	case SCTP_CID_I_FWD_TSN:
+		return "I_FWD_TSN";
+
 	default:
 		break;
 	}
diff --git a/net/sctp/sctp_diag.c b/net/sctp/diag.c
index a72a7d925d46..078f01a8d582 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/diag.c
@@ -1,3 +1,34 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions implement sctp diag support.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <[email protected]>
+ *
+ * Written or modified by:
+ *    Xin Long <[email protected]>
+ */
+
 #include <linux/module.h>
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8b3146816519..e2f5a3ee41a7 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -349,8 +349,8 @@ out:
 /* Look for any peeled off association from the endpoint that matches the
  * given peer address.
  */
-int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
-				const union sctp_addr *paddr)
+bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
+				 const union sctp_addr *paddr)
 {
 	struct sctp_sockaddr_entry *addr;
 	struct sctp_bind_addr *bp;
@@ -362,10 +362,10 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
 	 */
 	list_for_each_entry(addr, &bp->address_list, list) {
 		if (sctp_has_association(net, &addr->a, paddr))
-			return 1;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /* Do delayed input processing.  This is scheduled by sctp_rcv().
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 141c9c466ec1..ba8a6e6c36fa 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -106,6 +106,7 @@ int sctp_rcv(struct sk_buff *skb)
 	int family;
 	struct sctp_af *af;
 	struct net *net = dev_net(skb->dev);
+	bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb);
 
 	if (skb->pkt_type != PACKET_HOST)
 		goto discard_it;
@@ -123,8 +124,7 @@ int sctp_rcv(struct sk_buff *skb)
 	 * it's better to just linearize it otherwise crc computing
 	 * takes longer.
 	 */
-	if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
-	     skb_linearize(skb)) ||
+	if ((!is_gso && skb_linearize(skb)) ||
 	    !pskb_may_pull(skb, sizeof(struct sctphdr)))
 		goto discard_it;
 
@@ -135,7 +135,7 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb_csum_unnecessary(skb))
 		__skb_decr_checksum_unnecessary(skb);
 	else if (!sctp_checksum_disable &&
-		 !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+		 !is_gso &&
 		 sctp_rcv_checksum(net, skb) < 0)
 		goto discard_it;
 	skb->csum_valid = 1;
@@ -897,15 +897,12 @@ int sctp_hash_transport(struct sctp_transport *t)
 	rhl_for_each_entry_rcu(transport, tmp, list, node)
 		if (transport->asoc->ep == t->asoc->ep) {
 			rcu_read_unlock();
-			err = -EEXIST;
-			goto out;
+			return -EEXIST;
 		}
 	rcu_read_unlock();
 
 	err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
 				  &t->node, sctp_hash_params);
-
-out:
 	if (err)
 		pr_err_once("insert transport fail, errno %d\n", err);
 
@@ -1013,19 +1010,18 @@ struct sctp_association *sctp_lookup_association(struct net *net,
 }
 
 /* Is there an association matching the given local and peer addresses? */
-int sctp_has_association(struct net *net,
-			 const union sctp_addr *laddr,
-			 const union sctp_addr *paddr)
+bool sctp_has_association(struct net *net,
+			  const union sctp_addr *laddr,
+			  const union sctp_addr *paddr)
 {
-	struct sctp_association *asoc;
 	struct sctp_transport *transport;
 
-	if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
+	if (sctp_lookup_association(net, laddr, paddr, &transport)) {
 		sctp_transport_put(transport);
-		return 1;
+		return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /*
@@ -1221,7 +1217,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 	 * issue as packets hitting this are mostly INIT or INIT-ACK and
 	 * those cannot be on GSO-style anyway.
 	 */
-	if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+	if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
 		return NULL;
 
 	ch = (struct sctp_chunkhdr *)skb->data;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 48392552ee7c..23ebc5318edc 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -170,7 +170,7 @@ next_chunk:
 
 		chunk = list_entry(entry, struct sctp_chunk, list);
 
-		if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+		if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) {
 			/* GSO-marked skbs but without frags, handle
 			 * them normally
 			 */
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index aeea6da81441..fd2684ad94c8 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net)
 	if (!ent)
 		pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
-
-/* Cleanup the objcount entry in the proc filesystem.  */
-void sctp_dbg_objcnt_exit(struct net *net)
-{
-	remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
-}
-
-
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 35bc7106d182..123e9f2dc226 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,7 +45,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct sctphdr *sh;
 
-	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+	if (!skb_is_gso_sctp(skb))
 		goto out;
 
 	sh = sctp_hdr(skb);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 01a26ee051e3..d6e1c90cc09a 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
 	if (!chunk->auth)
 		return retval;
 
-	auth = sctp_make_auth(asoc);
+	auth = sctp_make_auth(asoc, chunk->shkey->key_id);
 	if (!auth)
 		return retval;
 
+	auth->shkey = chunk->shkey;
+	sctp_auth_shkey_hold(auth->shkey);
+
 	retval = __sctp_packet_append_chunk(pkt, auth);
 
 	if (retval != SCTP_XMIT_OK)
@@ -490,7 +493,8 @@ merge:
 		}
 
 		if (auth) {
-			sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+			sctp_auth_calculate_hmac(tp->asoc, nskb, auth,
+						 packet->auth->shkey, gfp);
 			/* free auth if no more chunks, or add it back */
 			if (list_empty(&packet->chunk_list))
 				sctp_chunk_free(packet->auth);
@@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
 	enum sctp_xmit retval = SCTP_XMIT_OK;
 	size_t psize, pmtu, maxsize;
 
+	/* Don't bundle in this packet if this chunk's auth key doesn't
+	 * match other chunks already enqueued on this packet. Also,
+	 * don't bundle the chunk with auth key if other chunks in this
+	 * packet don't have auth key.
+	 */
+	if ((packet->auth && chunk->shkey != packet->auth->shkey) ||
+	    (!packet->auth && chunk->shkey &&
+	     chunk->chunk_hdr->type != SCTP_CID_AUTH))
+		return SCTP_XMIT_PMTU_FULL;
+
 	psize = packet->size;
 	if (packet->transport->asoc)
 		pmtu = packet->transport->asoc->pathmtu;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 537545ebcb0e..1d9ccc6dab2b 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = {
 	.release = single_release_net,
 };
 
-/* Set up the proc fs entry for 'snmp' object. */
-int __net_init sctp_snmp_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_snmp_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(struct net *net)
-{
-	remove_proc_entry("snmp", net->sctp.proc_net_sctp);
-}
-
 /* Dump local addresses of an association/endpoint. */
 static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
 {
@@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = {
 	.release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'eps' object. */
-int __net_init sctp_eps_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_eps_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(struct net *net)
-{
-	remove_proc_entry("eps", net->sctp.proc_net_sctp);
-}
-
 struct sctp_ht_iter {
 	struct seq_net_private p;
 	struct rhashtable_iter hti;
@@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = {
 	.release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'assocs' object. */
-int __net_init sctp_assocs_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_assocs_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(struct net *net)
-{
-	remove_proc_entry("assocs", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
 	struct sctp_association *assoc;
@@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = {
 	.show  = sctp_remaddr_seq_show,
 };
 
-/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(struct net *net)
-{
-	remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &sctp_remaddr_ops,
@@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = {
 	.release = seq_release_net,
 };
 
-int __net_init sctp_remaddr_proc_init(struct net *net)
+/* Set up the proc fs entry for the SCTP protocol. */
+int __net_init sctp_proc_init(struct net *net)
 {
-	struct proc_dir_entry *p;
-
-	p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_remaddr_seq_fops);
-	if (!p)
+	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+	if (!net->sctp.proc_net_sctp)
 		return -ENOMEM;
+	if (!proc_create("snmp", 0444, net->sctp.proc_net_sctp,
+			 &sctp_snmp_seq_fops))
+		goto cleanup;
+	if (!proc_create("eps", 0444, net->sctp.proc_net_sctp,
+			 &sctp_eps_seq_fops))
+		goto cleanup;
+	if (!proc_create("assocs", 0444, net->sctp.proc_net_sctp,
+			 &sctp_assocs_seq_fops))
+		goto cleanup;
+	if (!proc_create("remaddr", 0444, net->sctp.proc_net_sctp,
+			 &sctp_remaddr_seq_fops))
+		goto cleanup;
 	return 0;
+
+cleanup:
+	remove_proc_subtree("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
+	return -ENOMEM;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91813e686c67..84a09f599131 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -80,56 +80,6 @@ long sysctl_sctp_mem[3];
 int sysctl_sctp_rmem[3];
 int sysctl_sctp_wmem[3];
 
-/* Set up the proc fs entry for the SCTP protocol. */
-static int __net_init sctp_proc_init(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
-	if (!net->sctp.proc_net_sctp)
-		goto out_proc_net_sctp;
-	if (sctp_snmp_proc_init(net))
-		goto out_snmp_proc_init;
-	if (sctp_eps_proc_init(net))
-		goto out_eps_proc_init;
-	if (sctp_assocs_proc_init(net))
-		goto out_assocs_proc_init;
-	if (sctp_remaddr_proc_init(net))
-		goto out_remaddr_proc_init;
-
-	return 0;
-
-out_remaddr_proc_init:
-	sctp_assocs_proc_exit(net);
-out_assocs_proc_init:
-	sctp_eps_proc_exit(net);
-out_eps_proc_init:
-	sctp_snmp_proc_exit(net);
-out_snmp_proc_init:
-	remove_proc_entry("sctp", net->proc_net);
-	net->sctp.proc_net_sctp = NULL;
-out_proc_net_sctp:
-	return -ENOMEM;
-#endif /* CONFIG_PROC_FS */
-	return 0;
-}
-
-/* Clean up the proc fs entry for the SCTP protocol.
- * Note: Do not make this __exit as it is used in the init error
- * path.
- */
-static void sctp_proc_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-	sctp_snmp_proc_exit(net);
-	sctp_eps_proc_exit(net);
-	sctp_assocs_proc_exit(net);
-	sctp_remaddr_proc_exit(net);
-
-	remove_proc_entry("sctp", net->proc_net);
-	net->sctp.proc_net_sctp = NULL;
-#endif
-}
-
 /* Private helper to extract ipv4 address and stash them in
  * the protocol structure.
  */
@@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net)
 	if (status)
 		goto err_init_mibs;
 
+#ifdef CONFIG_PROC_FS
 	/* Initialize proc fs directory.  */
 	status = sctp_proc_init(net);
 	if (status)
 		goto err_init_proc;
+#endif
 
 	sctp_dbg_objcnt_init(net);
 
@@ -1320,9 +1272,10 @@ static void __net_exit sctp_defaults_exit(struct net *net)
 	sctp_free_addr_wq(net);
 	sctp_free_local_addr_list(net);
 
-	sctp_dbg_objcnt_exit(net);
-
-	sctp_proc_exit(net);
+#ifdef CONFIG_PROC_FS
+	remove_proc_subtree("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
+#endif
 	cleanup_sctp_mibs(net);
 	sctp_sysctl_net_unregister(net);
 }
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d01475f5f710..cc20bc39ee7c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -87,7 +87,28 @@ static void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
 /* Control chunk destructor */
 static void sctp_control_release_owner(struct sk_buff *skb)
 {
-	/*TODO: do memory release */
+	struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg;
+
+	if (chunk->shkey) {
+		struct sctp_shared_key *shkey = chunk->shkey;
+		struct sctp_association *asoc = chunk->asoc;
+
+		/* refcnt == 2 and !list_empty mean after this release, it's
+		 * not being used anywhere, and it's time to notify userland
+		 * that this shkey can be freed if it's been deactivated.
+		 */
+		if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+		    refcount_read(&shkey->refcnt) == 2) {
+			struct sctp_ulpevent *ev;
+
+			ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+							SCTP_AUTH_FREE_KEY,
+							GFP_KERNEL);
+			if (ev)
+				asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+		}
+		sctp_auth_shkey_release(chunk->shkey);
+	}
 }
 
 static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
@@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
 	 *
 	 *  For now don't do anything for now.
 	 */
+	if (chunk->auth) {
+		chunk->shkey = asoc->shkey;
+		sctp_auth_shkey_hold(chunk->shkey);
+	}
 	skb->sk = asoc ? asoc->base.sk : NULL;
+	skb_shinfo(skb)->destructor_arg = chunk;
 	skb->destructor = sctp_control_release_owner;
 }
 
@@ -1271,7 +1297,8 @@ nodata:
 	return retval;
 }
 
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+				  __u16 key_id)
 {
 	struct sctp_authhdr auth_hdr;
 	struct sctp_hmac *hmac_desc;
@@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 		return NULL;
 
 	auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
-	auth_hdr.shkey_id = htons(asoc->active_key_id);
+	auth_hdr.shkey_id = htons(key_id);
 
 	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
 						 &auth_hdr);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b71e7fb0a20a..298112ca8c06 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
 		asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
 }
 
+static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands,
+				  struct sctp_association *asoc)
+{
+	struct sctp_ulpevent *ev;
+
+	ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC);
+	if (ev)
+		asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+}
+
 /* Helper function to generate an adaptation indication event */
 static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
 				    struct sctp_association *asoc)
@@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 		case SCTP_CMD_ADAPTATION_IND:
 			sctp_cmd_adaptation_ind(commands, asoc);
 			break;
+		case SCTP_CMD_PEER_NO_AUTH:
+			sctp_cmd_peer_no_auth(commands, asoc);
+			break;
 
 		case SCTP_CMD_ASSOC_SHKEY:
 			error = sctp_auth_asoc_init_active_key(asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index eb7905ffe5f2..cc56a67dbb4d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 					 void *arg,
 					 struct sctp_cmd_seq *commands)
 {
-	struct sctp_ulpevent *ev, *ai_ev = NULL;
+	struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL;
 	struct sctp_association *new_asoc;
 	struct sctp_init_chunk *peer_init;
 	struct sctp_chunk *chunk = arg;
@@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 			goto nomem_aiev;
 	}
 
+	if (!new_asoc->peer.auth_capable) {
+		auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0,
+						     SCTP_AUTH_NO_AUTH,
+						     GFP_ATOMIC);
+		if (!auth_ev)
+			goto nomem_authev;
+	}
+
 	/* Add all the state machine commands now since we've created
 	 * everything.  This way we don't introduce memory corruptions
 	 * during side-effect processing and correclty count established
@@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 				SCTP_ULPEVENT(ai_ev));
 
+	if (auth_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(auth_ev));
+
 	return SCTP_DISPOSITION_CONSUME;
 
+nomem_authev:
+	sctp_ulpevent_free(ai_ev);
 nomem_aiev:
 	sctp_ulpevent_free(ev);
 nomem_ev:
@@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 				SCTP_ULPEVENT(ev));
 	}
 
+	if (!asoc->peer.auth_capable) {
+		ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH,
+						GFP_ATOMIC);
+		if (!ev)
+			goto nomem;
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	}
+
 	return SCTP_DISPOSITION_CONSUME;
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
@@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
 	if (asoc->peer.adaptation_ind)
 		sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
 
+	if (!asoc->peer.auth_capable)
+		sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL());
+
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
@@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
-	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
+	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL;
 	struct sctp_chunk *repl;
 
 	/* Clarification from Implementor's Guide:
@@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 				goto nomem;
 
 		}
+
+		if (!asoc->peer.auth_capable) {
+			auth_ev = sctp_ulpevent_make_authkey(asoc, 0,
+							     SCTP_AUTH_NO_AUTH,
+							     GFP_ATOMIC);
+			if (!auth_ev)
+				goto nomem;
+		}
 	}
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 	if (ai_ev)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 					SCTP_ULPEVENT(ai_ev));
+	if (auth_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(auth_ev));
 
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
+	if (auth_ev)
+		sctp_ulpevent_free(auth_ev);
 	if (ai_ev)
 		sctp_ulpevent_free(ai_ev);
 	if (ev)
@@ -4114,6 +4153,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 					const union sctp_subtype type,
 					struct sctp_chunk *chunk)
 {
+	struct sctp_shared_key *sh_key = NULL;
 	struct sctp_authhdr *auth_hdr;
 	__u8 *save_digest, *digest;
 	struct sctp_hmac *hmac;
@@ -4135,9 +4175,11 @@ static enum sctp_ierror sctp_sf_authenticate(
 	 * configured
 	 */
 	key_id = ntohs(auth_hdr->shkey_id);
-	if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id))
-		return SCTP_IERROR_AUTH_BAD_KEYID;
-
+	if (key_id != asoc->active_key_id) {
+		sh_key = sctp_auth_get_shkey(asoc, key_id);
+		if (!sh_key)
+			return SCTP_IERROR_AUTH_BAD_KEYID;
+	}
 
 	/* Make sure that the length of the signature matches what
 	 * we expect.
@@ -4166,7 +4208,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 
 	sctp_auth_calculate_hmac(asoc, chunk->skb,
 				 (struct sctp_auth_chunk *)chunk->chunk_hdr,
-				 GFP_ATOMIC);
+				 sh_key, GFP_ATOMIC);
 
 	/* Discard the packet if the digests do not match */
 	if (memcmp(save_digest, digest, sig_len)) {
@@ -4243,7 +4285,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
 		struct sctp_ulpevent *ev;
 
 		ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id),
-				    SCTP_AUTH_NEWKEY, GFP_ATOMIC);
+				    SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
 
 		if (!ev)
 			return -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bf271f8c2dc9..7a10ae3c3d82 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 	/* The sndbuf space is tracked per association.  */
 	sctp_association_hold(asoc);
 
+	if (chunk->shkey)
+		sctp_auth_shkey_hold(chunk->shkey);
+
 	skb_set_owner_w(chunk->skb, sk);
 
 	chunk->skb->destructor = sctp_wfree;
@@ -1606,396 +1609,303 @@ static int sctp_error(struct sock *sk, int flags, int err)
 static int sctp_msghdr_parse(const struct msghdr *msg,
 			     struct sctp_cmsgs *cmsgs);
 
-static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
+			      struct sctp_sndrcvinfo *srinfo,
+			      const struct msghdr *msg, size_t msg_len)
 {
-	struct net *net = sock_net(sk);
-	struct sctp_sock *sp;
-	struct sctp_endpoint *ep;
-	struct sctp_association *new_asoc = NULL, *asoc = NULL;
-	struct sctp_transport *transport, *chunk_tp;
-	struct sctp_chunk *chunk;
-	union sctp_addr to;
-	struct sockaddr *msg_name = NULL;
-	struct sctp_sndrcvinfo default_sinfo;
-	struct sctp_sndrcvinfo *sinfo;
-	struct sctp_initmsg *sinit;
-	sctp_assoc_t associd = 0;
-	struct sctp_cmsgs cmsgs = { NULL };
-	enum sctp_scope scope;
-	bool fill_sinfo_ttl = false, wait_connect = false;
-	struct sctp_datamsg *datamsg;
-	int msg_flags = msg->msg_flags;
-	__u16 sinfo_flags = 0;
-	long timeo;
+	__u16 sflags;
 	int err;
 
-	err = 0;
-	sp = sctp_sk(sk);
-	ep = sp->ep;
-
-	pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
-		 msg, msg_len, ep);
+	if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP))
+		return -EPIPE;
 
-	/* We cannot send a message over a TCP-style listening socket. */
-	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
-		err = -EPIPE;
-		goto out_nounlock;
-	}
+	if (msg_len > sk->sk_sndbuf)
+		return -EMSGSIZE;
 
-	/* Parse out the SCTP CMSGs.  */
-	err = sctp_msghdr_parse(msg, &cmsgs);
+	memset(cmsgs, 0, sizeof(*cmsgs));
+	err = sctp_msghdr_parse(msg, cmsgs);
 	if (err) {
 		pr_debug("%s: msghdr parse err:%x\n", __func__, err);
-		goto out_nounlock;
+		return err;
 	}
 
-	/* Fetch the destination address for this packet.  This
-	 * address only selects the association--it is not necessarily
-	 * the address we will send to.
-	 * For a peeled-off socket, msg_name is ignored.
-	 */
-	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
-		int msg_namelen = msg->msg_namelen;
+	memset(srinfo, 0, sizeof(*srinfo));
+	if (cmsgs->srinfo) {
+		srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream;
+		srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags;
+		srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid;
+		srinfo->sinfo_context = cmsgs->srinfo->sinfo_context;
+		srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id;
+		srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive;
+	}
 
-		err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
-				       msg_namelen);
-		if (err)
-			return err;
+	if (cmsgs->sinfo) {
+		srinfo->sinfo_stream = cmsgs->sinfo->snd_sid;
+		srinfo->sinfo_flags = cmsgs->sinfo->snd_flags;
+		srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid;
+		srinfo->sinfo_context = cmsgs->sinfo->snd_context;
+		srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
+	}
 
-		if (msg_namelen > sizeof(to))
-			msg_namelen = sizeof(to);
-		memcpy(&to, msg->msg_name, msg_namelen);
-		msg_name = msg->msg_name;
+	if (cmsgs->prinfo) {
+		srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value;
+		SCTP_PR_SET_POLICY(srinfo->sinfo_flags,
+				   cmsgs->prinfo->pr_policy);
 	}
 
-	sinit = cmsgs.init;
-	if (cmsgs.sinfo != NULL) {
-		memset(&default_sinfo, 0, sizeof(default_sinfo));
-		default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
-		default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
-		default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
-		default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
-		default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
+	sflags = srinfo->sinfo_flags;
+	if (!sflags && msg_len)
+		return 0;
 
-		sinfo = &default_sinfo;
-		fill_sinfo_ttl = true;
-	} else {
-		sinfo = cmsgs.srinfo;
-	}
-	/* Did the user specify SNDINFO/SNDRCVINFO? */
-	if (sinfo) {
-		sinfo_flags = sinfo->sinfo_flags;
-		associd = sinfo->sinfo_assoc_id;
-	}
+	if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT)))
+		return -EINVAL;
 
-	pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
-		 msg_len, sinfo_flags);
+	if (((sflags & SCTP_EOF) && msg_len > 0) ||
+	    (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0))
+		return -EINVAL;
 
-	/* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
-	if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
+	if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name)
+		return -EINVAL;
 
-	/* If SCTP_EOF is set, no data can be sent. Disallow sending zero
-	 * length messages when SCTP_EOF|SCTP_ABORT is not set.
-	 * If SCTP_ABORT is set, the message length could be non zero with
-	 * the msg_iov set to the user abort reason.
-	 */
-	if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
-	    (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
+	return 0;
+}
 
-	/* If SCTP_ADDR_OVER is set, there must be an address
-	 * specified in msg_name.
-	 */
-	if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
+static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
+				 struct sctp_cmsgs *cmsgs,
+				 union sctp_addr *daddr,
+				 struct sctp_transport **tp)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct net *net = sock_net(sk);
+	struct sctp_association *asoc;
+	enum sctp_scope scope;
+	struct cmsghdr *cmsg;
+	int err;
 
-	transport = NULL;
+	*tp = NULL;
 
-	pr_debug("%s: about to look up association\n", __func__);
+	if (sflags & (SCTP_EOF | SCTP_ABORT))
+		return -EINVAL;
 
-	lock_sock(sk);
+	if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) ||
+				    sctp_sstate(sk, CLOSING)))
+		return -EADDRNOTAVAIL;
 
-	/* If a msg_name has been specified, assume this is to be used.  */
-	if (msg_name) {
-		/* Look for a matching association on the endpoint. */
-		asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
 
-		/* If we could not find a matching association on the
-		 * endpoint, make sure that it is not a TCP-style
-		 * socket that already has an association or there is
-		 * no peeled-off association on another socket.
-		 */
-		if (!asoc &&
-		    ((sctp_style(sk, TCP) &&
-		      (sctp_sstate(sk, ESTABLISHED) ||
-		       sctp_sstate(sk, CLOSING))) ||
-		     sctp_endpoint_is_peeled_off(ep, &to))) {
-			err = -EADDRNOTAVAIL;
-			goto out_unlock;
-		}
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
 	} else {
-		asoc = sctp_id2assoc(sk, associd);
-		if (!asoc) {
-			err = -EPIPE;
-			goto out_unlock;
-		}
+		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+			return -EACCES;
 	}
 
-	if (asoc) {
-		pr_debug("%s: just looked up association:%p\n", __func__, asoc);
+	scope = sctp_scope(daddr);
 
-		/* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
-		 * socket that has an association in CLOSED state. This can
-		 * happen when an accepted socket has an association that is
-		 * already CLOSED.
-		 */
-		if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
-			err = -EPIPE;
-			goto out_unlock;
-		}
+	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+	if (!asoc)
+		return -ENOMEM;
+
+	if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
+		err = -ENOMEM;
+		goto free;
+	}
 
-		if (sinfo_flags & SCTP_EOF) {
-			pr_debug("%s: shutting down association:%p\n",
-				 __func__, asoc);
+	if (cmsgs->init) {
+		struct sctp_initmsg *init = cmsgs->init;
 
-			sctp_primitive_SHUTDOWN(net, asoc, NULL);
-			err = 0;
-			goto out_unlock;
+		if (init->sinit_num_ostreams) {
+			__u16 outcnt = init->sinit_num_ostreams;
+
+			asoc->c.sinit_num_ostreams = outcnt;
+			/* outcnt has been changed, need to re-init stream */
+			err = sctp_stream_init(&asoc->stream, outcnt, 0,
+					       GFP_KERNEL);
+			if (err)
+				goto free;
 		}
-		if (sinfo_flags & SCTP_ABORT) {
 
-			chunk = sctp_make_abort_user(asoc, msg, msg_len);
-			if (!chunk) {
-				err = -ENOMEM;
-				goto out_unlock;
-			}
+		if (init->sinit_max_instreams)
+			asoc->c.sinit_max_instreams = init->sinit_max_instreams;
 
-			pr_debug("%s: aborting association:%p\n",
-				 __func__, asoc);
+		if (init->sinit_max_attempts)
+			asoc->max_init_attempts = init->sinit_max_attempts;
 
-			sctp_primitive_ABORT(net, asoc, chunk);
-			err = 0;
-			goto out_unlock;
-		}
+		if (init->sinit_max_init_timeo)
+			asoc->max_init_timeo =
+				msecs_to_jiffies(init->sinit_max_init_timeo);
 	}
 
-	/* Do we need to create the association?  */
-	if (!asoc) {
-		pr_debug("%s: there is no association yet\n", __func__);
+	*tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!*tp) {
+		err = -ENOMEM;
+		goto free;
+	}
 
-		if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
-			err = -EINVAL;
-			goto out_unlock;
-		}
+	if (!cmsgs->addrs_msg)
+		return 0;
 
-		/* Check for invalid stream against the stream counts,
-		 * either the default or the user specified stream counts.
-		 */
-		if (sinfo) {
-			if (!sinit || !sinit->sinit_num_ostreams) {
-				/* Check against the defaults. */
-				if (sinfo->sinfo_stream >=
-				    sp->initmsg.sinit_num_ostreams) {
-					err = -EINVAL;
-					goto out_unlock;
-				}
-			} else {
-				/* Check against the requested.  */
-				if (sinfo->sinfo_stream >=
-				    sinit->sinit_num_ostreams) {
-					err = -EINVAL;
-					goto out_unlock;
-				}
-			}
-		}
+	/* sendv addr list parse */
+	for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
+		struct sctp_transport *transport;
+		struct sctp_association *old;
+		union sctp_addr _daddr;
+		int dlen;
 
-		/*
-		 * API 3.1.2 bind() - UDP Style Syntax
-		 * If a bind() or sctp_bindx() is not called prior to a
-		 * sendmsg() call that initiates a new association, the
-		 * system picks an ephemeral port and will choose an address
-		 * set equivalent to binding with a wildcard address.
-		 */
-		if (!ep->base.bind_addr.port) {
-			if (sctp_autobind(sk)) {
-				err = -EAGAIN;
-				goto out_unlock;
+		if (cmsg->cmsg_level != IPPROTO_SCTP ||
+		    (cmsg->cmsg_type != SCTP_DSTADDRV4 &&
+		     cmsg->cmsg_type != SCTP_DSTADDRV6))
+			continue;
+
+		daddr = &_daddr;
+		memset(daddr, 0, sizeof(*daddr));
+		dlen = cmsg->cmsg_len - sizeof(struct cmsghdr);
+		if (cmsg->cmsg_type == SCTP_DSTADDRV4) {
+			if (dlen < sizeof(struct in_addr)) {
+				err = -EINVAL;
+				goto free;
 			}
+
+			dlen = sizeof(struct in_addr);
+			daddr->v4.sin_family = AF_INET;
+			daddr->v4.sin_port = htons(asoc->peer.port);
+			memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen);
 		} else {
-			/*
-			 * If an unprivileged user inherits a one-to-many
-			 * style socket with open associations on a privileged
-			 * port, it MAY be permitted to accept new associations,
-			 * but it SHOULD NOT be permitted to open new
-			 * associations.
-			 */
-			if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-			    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
-				err = -EACCES;
-				goto out_unlock;
+			if (dlen < sizeof(struct in6_addr)) {
+				err = -EINVAL;
+				goto free;
 			}
-		}
 
-		scope = sctp_scope(&to);
-		new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-		if (!new_asoc) {
-			err = -ENOMEM;
-			goto out_unlock;
+			dlen = sizeof(struct in6_addr);
+			daddr->v6.sin6_family = AF_INET6;
+			daddr->v6.sin6_port = htons(asoc->peer.port);
+			memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
 		}
-		asoc = new_asoc;
-		err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
-		if (err < 0) {
-			err = -ENOMEM;
-			goto out_free;
+		err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
+		if (err)
+			goto free;
+
+		old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+		if (old && old != asoc) {
+			if (old->state >= SCTP_STATE_ESTABLISHED)
+				err = -EISCONN;
+			else
+				err = -EALREADY;
+			goto free;
 		}
 
-		/* If the SCTP_INIT ancillary data is specified, set all
-		 * the association init values accordingly.
-		 */
-		if (sinit) {
-			if (sinit->sinit_num_ostreams) {
-				__u16 outcnt = sinit->sinit_num_ostreams;
-
-				asoc->c.sinit_num_ostreams = outcnt;
-				/* outcnt has been changed, so re-init stream */
-				err = sctp_stream_init(&asoc->stream, outcnt, 0,
-						       GFP_KERNEL);
-				if (err)
-					goto out_free;
-			}
-			if (sinit->sinit_max_instreams) {
-				asoc->c.sinit_max_instreams =
-					sinit->sinit_max_instreams;
-			}
-			if (sinit->sinit_max_attempts) {
-				asoc->max_init_attempts
-					= sinit->sinit_max_attempts;
-			}
-			if (sinit->sinit_max_init_timeo) {
-				asoc->max_init_timeo =
-				 msecs_to_jiffies(sinit->sinit_max_init_timeo);
-			}
+		if (sctp_endpoint_is_peeled_off(ep, daddr)) {
+			err = -EADDRNOTAVAIL;
+			goto free;
 		}
 
-		/* Prime the peer's transport structures.  */
-		transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
+		transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
+						SCTP_UNKNOWN);
 		if (!transport) {
 			err = -ENOMEM;
-			goto out_free;
+			goto free;
 		}
 	}
 
-	/* ASSERT: we have a valid association at this point.  */
-	pr_debug("%s: we have a valid association\n", __func__);
+	return 0;
 
-	if (!sinfo) {
-		/* If the user didn't specify SNDINFO/SNDRCVINFO, make up
-		 * one with some defaults.
-		 */
-		memset(&default_sinfo, 0, sizeof(default_sinfo));
-		default_sinfo.sinfo_stream = asoc->default_stream;
-		default_sinfo.sinfo_flags = asoc->default_flags;
-		default_sinfo.sinfo_ppid = asoc->default_ppid;
-		default_sinfo.sinfo_context = asoc->default_context;
-		default_sinfo.sinfo_timetolive = asoc->default_timetolive;
-		default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
-
-		sinfo = &default_sinfo;
-	} else if (fill_sinfo_ttl) {
-		/* In case SNDINFO was specified, we still need to fill
-		 * it with a default ttl from the assoc here.
-		 */
-		sinfo->sinfo_timetolive = asoc->default_timetolive;
-	}
+free:
+	sctp_association_free(asoc);
+	return err;
+}
 
-	/* API 7.1.7, the sndbuf size per association bounds the
-	 * maximum size of data that can be sent in a single send call.
-	 */
-	if (msg_len > sk->sk_sndbuf) {
-		err = -EMSGSIZE;
-		goto out_free;
+static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
+				     __u16 sflags, struct msghdr *msg,
+				     size_t msg_len)
+{
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
+
+	if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
+		return -EPIPE;
+
+	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) &&
+	    !sctp_state(asoc, ESTABLISHED))
+		return 0;
+
+	if (sflags & SCTP_EOF) {
+		pr_debug("%s: shutting down association:%p\n", __func__, asoc);
+		sctp_primitive_SHUTDOWN(net, asoc, NULL);
+
+		return 0;
 	}
 
-	if (asoc->pmtu_pending)
-		sctp_assoc_pending_pmtu(asoc);
+	if (sflags & SCTP_ABORT) {
+		struct sctp_chunk *chunk;
 
-	/* If fragmentation is disabled and the message length exceeds the
-	 * association fragmentation point, return EMSGSIZE.  The I-D
-	 * does not specify what this error is, but this looks like
-	 * a great fit.
-	 */
-	if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
-		err = -EMSGSIZE;
-		goto out_free;
+		chunk = sctp_make_abort_user(asoc, msg, msg_len);
+		if (!chunk)
+			return -ENOMEM;
+
+		pr_debug("%s: aborting association:%p\n", __func__, asoc);
+		sctp_primitive_ABORT(net, asoc, chunk);
+
+		return 0;
 	}
 
-	/* Check for invalid stream. */
+	return 1;
+}
+
+static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+				struct msghdr *msg, size_t msg_len,
+				struct sctp_transport *transport,
+				struct sctp_sndrcvinfo *sinfo)
+{
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
+	struct sctp_datamsg *datamsg;
+	bool wait_connect = false;
+	struct sctp_chunk *chunk;
+	long timeo;
+	int err;
+
 	if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
 		err = -EINVAL;
-		goto out_free;
+		goto err;
 	}
 
-	/* Allocate sctp_stream_out_ext if not already done */
 	if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
 		err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
 		if (err)
-			goto out_free;
+			goto err;
 	}
 
+	if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+		err = -EMSGSIZE;
+		goto err;
+	}
+
+	if (asoc->pmtu_pending)
+		sctp_assoc_pending_pmtu(asoc);
+
 	if (sctp_wspace(asoc) < msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
-	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
-		/* sk can be changed by peel off when waiting for buf. */
+		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-		if (err) {
-			if (err == -ESRCH) {
-				/* asoc is already dead. */
-				new_asoc = NULL;
-				err = -EPIPE;
-			}
-			goto out_free;
-		}
+		if (err)
+			goto err;
 	}
 
-	/* If an address is passed with the sendto/sendmsg call, it is used
-	 * to override the primary destination address in the TCP model, or
-	 * when SCTP_ADDR_OVER flag is set in the UDP model.
-	 */
-	if ((sctp_style(sk, TCP) && msg_name) ||
-	    (sinfo_flags & SCTP_ADDR_OVER)) {
-		chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
-		if (!chunk_tp) {
-			err = -EINVAL;
-			goto out_free;
-		}
-	} else
-		chunk_tp = NULL;
-
-	/* Auto-connect, if we aren't connected already. */
 	if (sctp_state(asoc, CLOSED)) {
 		err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
-		if (err < 0)
-			goto out_free;
+		if (err)
+			goto err;
 
-		/* If stream interleave is enabled, wait_connect has to be
-		 * done earlier than data enqueue, as it needs to make data
-		 * or idata according to asoc->intl_enable which is set
-		 * after connection is done.
-		 */
-		if (sctp_sk(asoc->base.sk)->strm_interleave) {
+		if (sctp_sk(sk)->strm_interleave) {
 			timeo = sock_sndtimeo(sk, 0);
 			err = sctp_wait_for_connect(asoc, &timeo);
 			if (err)
-				goto out_unlock;
+				goto err;
 		} else {
 			wait_connect = true;
 		}
@@ -2003,73 +1913,186 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		pr_debug("%s: we associated primitively\n", __func__);
 	}
 
-	/* Break the message into multiple chunks of maximum size. */
 	datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
 	if (IS_ERR(datamsg)) {
 		err = PTR_ERR(datamsg);
-		goto out_free;
+		goto err;
 	}
+
 	asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
 
-	/* Now send the (possibly) fragmented message. */
 	list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
 		sctp_chunk_hold(chunk);
-
-		/* Do accounting for the write space.  */
 		sctp_set_owner_w(chunk);
-
-		chunk->transport = chunk_tp;
+		chunk->transport = transport;
 	}
 
-	/* Send it to the lower layers.  Note:  all chunks
-	 * must either fail or succeed.   The lower layer
-	 * works that way today.  Keep it that way or this
-	 * breaks.
-	 */
 	err = sctp_primitive_SEND(net, asoc, datamsg);
-	/* Did the lower layer accept the chunk? */
 	if (err) {
 		sctp_datamsg_free(datamsg);
-		goto out_free;
+		goto err;
 	}
 
 	pr_debug("%s: we sent primitively\n", __func__);
 
 	sctp_datamsg_put(datamsg);
-	err = msg_len;
 
 	if (unlikely(wait_connect)) {
-		timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
+		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		sctp_wait_for_connect(asoc, &timeo);
 	}
 
-	/* If we are already past ASSOCIATE, the lower
-	 * layers are responsible for association cleanup.
-	 */
-	goto out_unlock;
+	err = msg_len;
 
-out_free:
-	if (new_asoc)
-		sctp_association_free(asoc);
-out_unlock:
-	release_sock(sk);
+err:
+	return err;
+}
 
-out_nounlock:
-	return sctp_error(sk, msg_flags, err);
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+					       const struct msghdr *msg,
+					       struct sctp_cmsgs *cmsgs)
+{
+	union sctp_addr *daddr = NULL;
+	int err;
 
-#if 0
-do_sock_err:
-	if (msg_len)
-		err = msg_len;
-	else
-		err = sock_error(sk);
-	goto out;
+	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+		int len = msg->msg_namelen;
 
-do_interrupted:
-	if (msg_len)
-		err = msg_len;
-	goto out;
-#endif /* 0 */
+		if (len > sizeof(*daddr))
+			len = sizeof(*daddr);
+
+		daddr = (union sctp_addr *)msg->msg_name;
+
+		err = sctp_verify_addr(sk, daddr, len);
+		if (err)
+			return ERR_PTR(err);
+	}
+
+	return daddr;
+}
+
+static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
+				      struct sctp_sndrcvinfo *sinfo,
+				      struct sctp_cmsgs *cmsgs)
+{
+	if (!cmsgs->srinfo && !cmsgs->sinfo) {
+		sinfo->sinfo_stream = asoc->default_stream;
+		sinfo->sinfo_ppid = asoc->default_ppid;
+		sinfo->sinfo_context = asoc->default_context;
+		sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+
+		if (!cmsgs->prinfo)
+			sinfo->sinfo_flags = asoc->default_flags;
+	}
+
+	if (!cmsgs->srinfo && !cmsgs->prinfo)
+		sinfo->sinfo_timetolive = asoc->default_timetolive;
+
+	if (cmsgs->authinfo) {
+		/* Reuse sinfo_tsn to indicate that authinfo was set and
+		 * sinfo_ssn to save the keyid on tx path.
+		 */
+		sinfo->sinfo_tsn = 1;
+		sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber;
+	}
+}
+
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct sctp_transport *transport = NULL;
+	struct sctp_sndrcvinfo _sinfo, *sinfo;
+	struct sctp_association *asoc;
+	struct sctp_cmsgs cmsgs;
+	union sctp_addr *daddr;
+	bool new = false;
+	__u16 sflags;
+	int err;
+
+	/* Parse and get snd_info */
+	err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
+	if (err)
+		goto out;
+
+	sinfo  = &_sinfo;
+	sflags = sinfo->sinfo_flags;
+
+	/* Get daddr from msg */
+	daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
+	if (IS_ERR(daddr)) {
+		err = PTR_ERR(daddr);
+		goto out;
+	}
+
+	lock_sock(sk);
+
+	/* SCTP_SENDALL process */
+	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
+		list_for_each_entry(asoc, &ep->asocs, asocs) {
+			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+							msg_len);
+			if (err == 0)
+				continue;
+			if (err < 0)
+				goto out_unlock;
+
+			sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+			err = sctp_sendmsg_to_asoc(asoc, msg, msg_len,
+						   NULL, sinfo);
+			if (err < 0)
+				goto out_unlock;
+
+			iov_iter_revert(&msg->msg_iter, err);
+		}
+
+		goto out_unlock;
+	}
+
+	/* Get and check or create asoc */
+	if (daddr) {
+		asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+		if (asoc) {
+			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+							msg_len);
+			if (err <= 0)
+				goto out_unlock;
+		} else {
+			err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
+						    &transport);
+			if (err)
+				goto out_unlock;
+
+			asoc = transport->asoc;
+			new = true;
+		}
+
+		if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
+			transport = NULL;
+	} else {
+		asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
+		if (!asoc) {
+			err = -EPIPE;
+			goto out_unlock;
+		}
+
+		err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
+		if (err <= 0)
+			goto out_unlock;
+	}
+
+	/* Update snd_info with the asoc */
+	sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+	/* Send msg to the asoc */
+	err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
+	if (err < 0 && err != -ESRCH && new)
+		sctp_association_free(asoc);
+
+out_unlock:
+	release_sock(sk);
+out:
+	return sctp_error(sk, msg->msg_flags, err);
 }
 
 /* This is an extended version of skb_pull() that removes the data from the
@@ -3624,6 +3647,33 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 }
 
 /*
+ * 8.3.4  Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY)
+ *
+ * This set option will deactivate a shared secret key.
+ */
+static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
+					  unsigned int optlen)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct sctp_authkeyid val;
+	struct sctp_association *asoc;
+
+	if (!ep->auth_enable)
+		return -EACCES;
+
+	if (optlen != sizeof(struct sctp_authkeyid))
+		return -EINVAL;
+	if (copy_from_user(&val, optval, optlen))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, val.scact_assoc_id);
+	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+}
+
+/*
  * 8.1.23 SCTP_AUTO_ASCONF
  *
  * This option will enable or disable the use of the automatic generation of
@@ -4215,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_AUTH_DELETE_KEY:
 		retval = sctp_setsockopt_del_key(sk, optval, optlen);
 		break;
+	case SCTP_AUTH_DEACTIVATE_KEY:
+		retval = sctp_setsockopt_deactivate_key(sk, optval, optlen);
+		break;
 	case SCTP_AUTO_ASCONF:
 		retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
 		break;
@@ -7189,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_AUTH_KEY:
 	case SCTP_AUTH_CHUNK:
 	case SCTP_AUTH_DELETE_KEY:
+	case SCTP_AUTH_DEACTIVATE_KEY:
 		retval = -EOPNOTSUPP;
 		break;
 	case SCTP_HMAC_IDENT:
@@ -7811,8 +7865,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
 			if (cmsgs->srinfo->sinfo_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-			      SCTP_ABORT | SCTP_EOF))
+			      SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+			      SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
 
@@ -7835,10 +7889,60 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
 			if (cmsgs->sinfo->snd_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-			      SCTP_ABORT | SCTP_EOF))
+			      SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+			      SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
+		case SCTP_PRINFO:
+			/* SCTP Socket API Extension
+			 * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+			 */
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo)))
+				return -EINVAL;
+
+			cmsgs->prinfo = CMSG_DATA(cmsg);
+			if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK)
+				return -EINVAL;
+
+			if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
+				cmsgs->prinfo->pr_value = 0;
+			break;
+		case SCTP_AUTHINFO:
+			/* SCTP Socket API Extension
+			 * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+			 */
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo)))
+				return -EINVAL;
+
+			cmsgs->authinfo = CMSG_DATA(cmsg);
+			break;
+		case SCTP_DSTADDRV4:
+		case SCTP_DSTADDRV6:
+			/* SCTP Socket API Extension
+			 * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type         cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_DSTADDRV4 struct in_addr
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_DSTADDRV6 struct in6_addr
+			 */
+			cmsgs->addrs_msg = my_msg;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -8062,6 +8166,26 @@ static void sctp_wfree(struct sk_buff *skb)
 	sk->sk_wmem_queued   -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
 
+	if (chunk->shkey) {
+		struct sctp_shared_key *shkey = chunk->shkey;
+
+		/* refcnt == 2 and !list_empty mean after this release, it's
+		 * not being used anywhere, and it's time to notify userland
+		 * that this shkey can be freed if it's been deactivated.
+		 */
+		if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+		    refcount_read(&shkey->refcnt) == 2) {
+			struct sctp_ulpevent *ev;
+
+			ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+							SCTP_AUTH_FREE_KEY,
+							GFP_KERNEL);
+			if (ev)
+				asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+		}
+		sctp_auth_shkey_release(chunk->shkey);
+	}
+
 	sock_wfree(skb);
 	sctp_wake_up_waiters(sk, asoc);
 
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index cedf672487f9..f799043abec9 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -6,7 +6,7 @@
  *
  * This file is part of the SCTP kernel implementation
  *
- * These functions manipulate sctp tsn mapping array.
+ * This file contains sctp stream maniuplation primitives and helpers.
  *
  * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 8c7cf8f08711..d3764c181299 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -3,7 +3,8 @@
  *
  * This file is part of the SCTP kernel implementation
  *
- * These functions manipulate sctp stream queue/scheduling.
+ * These functions implement sctp stream message interleaving, mostly
+ * including I-DATA and I-FORWARD-TSN chunks process.
  *
  * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
@@ -954,12 +955,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 	__u32 freed = 0;
 	__u16 needed;
 
-	if (chunk) {
-		needed = ntohs(chunk->chunk_hdr->length);
-		needed -= sizeof(struct sctp_idata_chunk);
-	} else {
-		needed = SCTP_DEFAULT_MAXWINDOW;
-	}
+	needed = ntohs(chunk->chunk_hdr->length) -
+		 sizeof(struct sctp_idata_chunk);
 
 	if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
 		freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed);
@@ -971,9 +968,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 						       needed);
 	}
 
-	if (chunk && freed >= needed)
-		if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
-			sctp_intl_start_pd(ulpq, gfp);
+	if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
+		sctp_intl_start_pd(ulpq, gfp);
 
 	sk_mem_reclaim(asoc->base.sk);
 }
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 38ae22b65e77..5f8046c62d90 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,13 +7,11 @@
  *  applicable with RoCE-cards only
  *
  *  Initial restrictions:
- *    - non-blocking connect postponed
- *    - IPv6 support postponed
  *    - support for alternate links postponed
  *    - partial support for non-blocking sockets only
  *    - support for urgent data postponed
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <[email protected]>
  *              based on prototype from Frank Blaschka
@@ -24,7 +22,6 @@
 
 #include <linux/module.h>
 #include <linux/socket.h>
-#include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
@@ -66,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
 };
 
+static struct smc_hashinfo smc_v6_hashinfo = {
+	.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
+};
+
 int smc_hash_sk(struct sock *sk)
 {
 	struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -105,6 +106,18 @@ struct proto smc_proto = {
 };
 EXPORT_SYMBOL_GPL(smc_proto);
 
+struct proto smc_proto6 = {
+	.name		= "SMC6",
+	.owner		= THIS_MODULE,
+	.keepalive	= smc_set_keepalive,
+	.hash		= smc_hash_sk,
+	.unhash		= smc_unhash_sk,
+	.obj_size	= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v6_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+EXPORT_SYMBOL_GPL(smc_proto6);
+
 static int smc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -161,19 +174,22 @@ static void smc_destruct(struct sock *sk)
 	sk_refcnt_debug_dec(sk);
 }
 
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+				   int protocol)
 {
 	struct smc_sock *smc;
+	struct proto *prot;
 	struct sock *sk;
 
-	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
+	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
 	if (!sk)
 		return NULL;
 
 	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
 	sk->sk_state = SMC_INIT;
 	sk->sk_destruct = smc_destruct;
-	sk->sk_protocol = SMCPROTO_SMC;
+	sk->sk_protocol = protocol;
 	smc = smc_sk(sk);
 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 	INIT_LIST_HEAD(&smc->accept_q);
@@ -200,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
 		goto out;
 
 	rc = -EAFNOSUPPORT;
+	if (addr->sin_family != AF_INET &&
+	    addr->sin_family != AF_INET6 &&
+	    addr->sin_family != AF_UNSPEC)
+		goto out;
 	/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
-	if ((addr->sin_family != AF_INET) &&
-	    ((addr->sin_family != AF_UNSPEC) ||
-	     (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
+	if (addr->sin_family == AF_UNSPEC &&
+	    addr->sin_addr.s_addr != htonl(INADDR_ANY))
 		goto out;
 
 	lock_sock(sk);
@@ -273,46 +292,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 	smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
-			 __be32 *subnet, u8 *prefix_len)
-{
-	struct dst_entry *dst = sk_dst_get(clcsock->sk);
-	struct in_device *in_dev;
-	struct sockaddr_in addr;
-	int rc = -ENOENT;
-
-	if (!dst) {
-		rc = -ENOTCONN;
-		goto out;
-	}
-	if (!dst->dev) {
-		rc = -ENODEV;
-		goto out_rel;
-	}
-
-	/* get address to which the internal TCP socket is bound */
-	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
-	/* analyze IPv4 specific data of net_device belonging to TCP socket */
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(dst->dev);
-	for_ifa(in_dev) {
-		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
-			continue;
-		*prefix_len = inet_mask_len(ifa->ifa_mask);
-		*subnet = ifa->ifa_address & ifa->ifa_mask;
-		rc = 0;
-		break;
-	} endfor_ifa(in_dev);
-	rcu_read_unlock();
-
-out_rel:
-	dst_release(dst);
-out:
-	return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
 	struct smc_link_group *lgr = smc->conn.lgr;
 	struct smc_link *link;
@@ -332,6 +312,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 		return rc;
 	}
 
+	if (link->llc_confirm_rc)
+		return SMC_CLC_DECL_RMBE_EC;
+
 	rc = smc_ib_modify_qp_rts(link);
 	if (rc)
 		return SMC_CLC_DECL_INTERR;
@@ -346,11 +329,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
-				       gid, SMC_LLC_RESP);
+				       &link->smcibdev->gid[link->ibport - 1],
+				       SMC_LLC_RESP);
 	if (rc < 0)
 		return SMC_CLC_DECL_TCL;
 
-	return rc;
+	/* receive ADD LINK request from server over RoCE fabric */
+	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+							 SMC_LLC_WAIT_TIME);
+	if (rest <= 0) {
+		struct smc_clc_msg_decline dclc;
+
+		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+				      SMC_CLC_DECLINE);
+		return rc;
+	}
+
+	/* send add link reject message, only one link supported for now */
+	rc = smc_llc_send_add_link(link,
+				   link->smcibdev->mac[link->ibport - 1],
+				   &link->smcibdev->gid[link->ibport - 1],
+				   SMC_LLC_RESP);
+	if (rc < 0)
+		return SMC_CLC_DECL_TCL;
+
+	link->state = SMC_LNK_ACTIVE;
+
+	return 0;
 }
 
 static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -372,19 +377,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->qp_mtu;
 }
 
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
-	spin_lock_bh(&smc_lgr_list.lock);
-	/* do not use this link group for new connections */
-	if (!list_empty(&lgr->list))
-		list_del_init(&lgr->list);
-	spin_unlock_bh(&smc_lgr_list.lock);
-}
-
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
-	struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
 	struct smc_clc_msg_accept_confirm aclc;
 	int local_contact = SMC_FIRST_CONTACT;
 	struct smc_ib_device *smcibdev;
@@ -438,8 +433,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	srv_first_contact = aclc.hdr.flag;
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
-					ibport, &aclc.lcl, srv_first_contact);
+	local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+					srv_first_contact);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
@@ -498,8 +493,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		/* QP confirmation over RoCE fabric */
-		reason_code = smc_clnt_conf_first_link(
-			smc, &smcibdev->gid[ibport - 1]);
+		reason_code = smc_clnt_conf_first_link(smc);
 		if (reason_code < 0) {
 			rc = reason_code;
 			goto out_err_unlock;
@@ -556,9 +550,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 	/* separate smc parameter checking to be safe */
 	if (alen < sizeof(addr->sa_family))
 		goto out_err;
-	if (addr->sa_family != AF_INET)
+	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 		goto out_err;
-	smc->addr = addr;	/* needed for nonblocking connect */
 
 	lock_sock(sk);
 	switch (sk->sk_state) {
@@ -599,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 	int rc;
 
 	release_sock(lsk);
-	new_sk = smc_sock_alloc(sock_net(lsk), NULL);
+	new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
 	if (!new_sk) {
 		rc = -ENOMEM;
 		lsk->sk_err = ENOMEM;
@@ -748,9 +741,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE);
+		return rc;
 	}
 
-	return rc;
+	if (link->llc_confirm_resp_rc)
+		return SMC_CLC_DECL_RMBE_EC;
+
+	/* send ADD LINK request to client over the RoCE fabric */
+	rc = smc_llc_send_add_link(link,
+				   link->smcibdev->mac[link->ibport - 1],
+				   &link->smcibdev->gid[link->ibport - 1],
+				   SMC_LLC_REQ);
+	if (rc < 0)
+		return SMC_CLC_DECL_TCL;
+
+	/* receive ADD LINK response from client over the RoCE fabric */
+	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+							 SMC_LLC_WAIT_TIME);
+	if (rest <= 0) {
+		struct smc_clc_msg_decline dclc;
+
+		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+				      SMC_CLC_DECLINE);
+		return rc;
+	}
+
+	link->state = SMC_LNK_ACTIVE;
+
+	return 0;
 }
 
 /* setup for RDMA connection of server */
@@ -766,13 +784,10 @@ static void smc_listen_work(struct work_struct *work)
 	struct sock *newsmcsk = &new_smc->sk;
 	struct smc_clc_msg_proposal *pclc;
 	struct smc_ib_device *smcibdev;
-	struct sockaddr_in peeraddr;
 	u8 buf[SMC_CLC_MAX_LEN];
 	struct smc_link *link;
 	int reason_code = 0;
 	int rc = 0;
-	__be32 subnet;
-	u8 prefix_len;
 	u8 ibport;
 
 	/* check if peer is smc capable */
@@ -807,28 +822,19 @@ static void smc_listen_work(struct work_struct *work)
 		goto decline_rdma;
 	}
 
-	/* determine subnet and mask from internal TCP socket */
-	rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-		goto decline_rdma;
-	}
-
 	pclc = (struct smc_clc_msg_proposal *)&buf;
 	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
-	if (pclc_prfx->outgoing_subnet != subnet ||
-	    pclc_prfx->prefix_len != prefix_len) {
+
+	rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
+	if (rc) {
 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
 		goto decline_rdma;
 	}
 
-	/* get address of the peer connected to the internal TCP socket */
-	kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
-
 	/* allocate connection / link group */
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
-					smcibdev, ibport, &pclc->lcl, 0);
+	local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+					0);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
@@ -977,10 +983,6 @@ out:
 		lsmc->clcsock = NULL;
 	}
 	release_sock(lsk);
-	/* no more listening, wake up smc_close_wait_listen_clcsock and
-	 * accept
-	 */
-	lsk->sk_state_change(lsk);
 	sock_put(&lsmc->sk); /* sock_hold in smc_listen */
 }
 
@@ -1382,6 +1384,7 @@ static const struct proto_ops smc_sock_ops = {
 static int smc_create(struct net *net, struct socket *sock, int protocol,
 		      int kern)
 {
+	int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
 	struct smc_sock *smc;
 	struct sock *sk;
 	int rc;
@@ -1391,22 +1394,24 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 		goto out;
 
 	rc = -EPROTONOSUPPORT;
-	if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
+	if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
 		goto out;
 
 	rc = -ENOBUFS;
 	sock->ops = &smc_sock_ops;
-	sk = smc_sock_alloc(net, sock);
+	sk = smc_sock_alloc(net, sock, protocol);
 	if (!sk)
 		goto out;
 
 	/* create internal TCP socket for CLC handshake and fallback */
 	smc = smc_sk(sk);
 	smc->use_fallback = false; /* assume rdma capability first */
-	rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
-			      IPPROTO_TCP, &smc->clcsock);
-	if (rc)
+	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+			      &smc->clcsock);
+	if (rc) {
 		sk_common_release(sk);
+		goto out;
+	}
 	smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
 	smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
 
@@ -1442,16 +1447,23 @@ static int __init smc_init(void)
 
 	rc = proto_register(&smc_proto, 1);
 	if (rc) {
-		pr_err("%s: proto_register fails with %d\n", __func__, rc);
+		pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
 		goto out_pnet;
 	}
 
+	rc = proto_register(&smc_proto6, 1);
+	if (rc) {
+		pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
+		goto out_proto;
+	}
+
 	rc = sock_register(&smc_sock_family_ops);
 	if (rc) {
 		pr_err("%s: sock_register fails with %d\n", __func__, rc);
-		goto out_proto;
+		goto out_proto6;
 	}
 	INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
+	INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
 
 	rc = smc_ib_register_client();
 	if (rc) {
@@ -1464,6 +1476,8 @@ static int __init smc_init(void)
 
 out_sock:
 	sock_unregister(PF_SMC);
+out_proto6:
+	proto_unregister(&smc_proto6);
 out_proto:
 	proto_unregister(&smc_proto);
 out_pnet:
@@ -1482,11 +1496,13 @@ static void __exit smc_exit(void)
 	spin_unlock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
 		list_del_init(&lgr->list);
+		cancel_delayed_work_sync(&lgr->free_work);
 		smc_lgr_free(lgr); /* free link group */
 	}
 	static_branch_disable(&tcp_have_smc);
 	smc_ib_unregister_client();
 	sock_unregister(PF_SMC);
+	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
 }
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 9518986c97b1..e4829a2f46ba 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -18,11 +18,13 @@
 
 #include "smc_ib.h"
 
-#define SMCPROTO_SMC		0	/* SMC protocol */
+#define SMCPROTO_SMC		0	/* SMC protocol, IPv4 */
+#define SMCPROTO_SMC6		1	/* SMC protocol, IPv6 */
 
 #define SMC_MAX_PORTS		2	/* Max # of ports */
 
 extern struct proto smc_proto;
+extern struct proto smc_proto6;
 
 #ifdef ATOMIC64_INIT
 #define KERNEL_HAS_ATOMIC64
@@ -172,7 +174,6 @@ struct smc_sock {				/* smc sock container */
 	struct sock		sk;
 	struct socket		*clcsock;	/* internal tcp socket */
 	struct smc_connection	conn;		/* smc connection */
-	struct sockaddr		*addr;		/* inet connect address */
 	struct smc_sock		*listen_smc;	/* listen parent */
 	struct work_struct	tcp_listen_work;/* handle tcp socket accepts */
 	struct work_struct	smc_listen_work;/* prepare new accept socket */
@@ -263,10 +264,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
 
 struct smc_clc_msg_local;
 
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
-			 u8 *prefix_len);
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
 		    struct smc_ib_device *smcibdev, u8 ibport,
 		    struct smc_clc_msg_local *lcl, int srv_first_contact);
 struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 3cd086e5bd28..b42395d24cba 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -269,7 +269,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
 
 	if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved))
 		return; /* short message */
-	if (cdc->len != sizeof(*cdc))
+	if (cdc->len != SMC_WR_TX_SIZE)
 		return; /* invalid message */
 	smc_cdc_msg_recv(cdc, link, wc->wr_id);
 }
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8ac51583a063..64fbc3230e6c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -5,15 +5,17 @@
  *  CLC (connection layer control) handshake over initial TCP socket to
  *  prepare for RDMA traffic
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <[email protected]>
  */
 
 #include <linux/in.h>
+#include <linux/inetdevice.h>
 #include <linux/if_ether.h>
 #include <linux/sched/signal.h>
 
+#include <net/addrconf.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 
@@ -22,6 +24,9 @@
 #include "smc_clc.h"
 #include "smc_ib.h"
 
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
  */
@@ -70,6 +75,172 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 	return true;
 }
 
+/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
+static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
+				 struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+
+	if (!in_dev)
+		return -ENODEV;
+	for_ifa(in_dev) {
+		if (!inet_ifa_match(ipv4, ifa))
+			continue;
+		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
+		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
+		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
+		return 0;
+	} endfor_ifa(in_dev);
+	return -ENOENT;
+}
+
+/* fill CLC proposal msg with ipv6 prefixes from device */
+static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
+				 struct smc_clc_msg_proposal_prefix *prop,
+				 struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
+	struct inet6_ifaddr *ifa;
+	int cnt = 0;
+
+	if (!in6_dev)
+		return -ENODEV;
+	/* use a maximum of 8 IPv6 prefixes from device */
+	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+			continue;
+		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
+				 &ifa->addr, ifa->prefix_len);
+		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
+		cnt++;
+		if (cnt == SMC_CLC_MAX_V6_PREFIX)
+			break;
+	}
+	prop->ipv6_prefixes_cnt = cnt;
+	if (cnt)
+		return 0;
+#endif
+	return -ENOENT;
+}
+
+/* retrieve and set prefixes in CLC proposal msg */
+static int smc_clc_prfx_set(struct socket *clcsock,
+			    struct smc_clc_msg_proposal_prefix *prop,
+			    struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct sockaddr_storage addrs;
+	struct sockaddr_in6 *addr6;
+	struct sockaddr_in *addr;
+	int rc = -ENOENT;
+
+	memset(prop, 0, sizeof(*prop));
+	if (!dst) {
+		rc = -ENOTCONN;
+		goto out;
+	}
+	if (!dst->dev) {
+		rc = -ENODEV;
+		goto out_rel;
+	}
+	/* get address to which the internal TCP socket is bound */
+	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+	/* analyze IP specific data of net_device belonging to TCP socket */
+	addr6 = (struct sockaddr_in6 *)&addrs;
+	rcu_read_lock();
+	if (addrs.ss_family == PF_INET) {
+		/* IPv4 */
+		addr = (struct sockaddr_in *)&addrs;
+		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
+		/* mapped IPv4 address - peer is IPv4 only */
+		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
+					   prop);
+	} else {
+		/* IPv6 */
+		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
+	}
+	rcu_read_unlock();
+out_rel:
+	dst_release(dst);
+out:
+	return rc;
+}
+
+/* match ipv4 addrs of dev against addr in CLC proposal */
+static int smc_clc_prfx_match4_rcu(struct net_device *dev,
+				   struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+	if (!in_dev)
+		return -ENODEV;
+	for_ifa(in_dev) {
+		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
+		    inet_ifa_match(prop->outgoing_subnet, ifa))
+			return 0;
+	} endfor_ifa(in_dev);
+
+	return -ENOENT;
+}
+
+/* match ipv6 addrs of dev against addrs in CLC proposal */
+static int smc_clc_prfx_match6_rcu(struct net_device *dev,
+				   struct smc_clc_msg_proposal_prefix *prop)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev = __in6_dev_get(dev);
+	struct smc_clc_ipv6_prefix *ipv6_prfx;
+	struct inet6_ifaddr *ifa;
+	int i, max;
+
+	if (!in6_dev)
+		return -ENODEV;
+	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
+	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
+	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
+	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+			continue;
+		for (i = 0; i < max; i++) {
+			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
+			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
+					      ifa->prefix_len))
+				return 0;
+		}
+	}
+#endif
+	return -ENOENT;
+}
+
+/* check if proposed prefixes match one of our device prefixes */
+int smc_clc_prfx_match(struct socket *clcsock,
+		       struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	int rc;
+
+	if (!dst) {
+		rc = -ENOTCONN;
+		goto out;
+	}
+	if (!dst->dev) {
+		rc = -ENODEV;
+		goto out_rel;
+	}
+	rcu_read_lock();
+	if (!prop->ipv6_prefixes_cnt)
+		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
+	else
+		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
+	rcu_read_unlock();
+out_rel:
+	dst_release(dst);
+out:
+	return rc;
+}
+
 /* Wait for data on the tcp-socket, analyze received data
  * Returns:
  * 0 if success and it was not a decline that we received.
@@ -189,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 			  struct smc_ib_device *smcibdev,
 			  u8 ibport)
 {
+	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
 	struct smc_clc_msg_proposal_prefix pclc_prfx;
 	struct smc_clc_msg_proposal pclc;
 	struct smc_clc_msg_trail trl;
+	int len, i, plen, rc;
 	int reason_code = 0;
-	struct kvec vec[3];
+	struct kvec vec[4];
 	struct msghdr msg;
-	int len, plen, rc;
+
+	/* retrieve ip prefixes for CLC proposal msg */
+	rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
+	if (rc)
+		return SMC_CLC_DECL_CNFERR; /* configuration error */
 
 	/* send SMC Proposal CLC message */
-	plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
+	plen = sizeof(pclc) + sizeof(pclc_prfx) +
+	       (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
+	       sizeof(trl);
 	memset(&pclc, 0, sizeof(pclc));
 	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -209,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
 	pclc.iparea_offset = htons(0);
 
-	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
-	/* determine subnet and mask from internal TCP socket */
-	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
-				  &pclc_prfx.prefix_len);
-	if (rc)
-		return SMC_CLC_DECL_CNFERR; /* configuration error */
-	pclc_prfx.ipv6_prefixes_cnt = 0;
 	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	memset(&msg, 0, sizeof(msg));
-	vec[0].iov_base = &pclc;
-	vec[0].iov_len = sizeof(pclc);
-	vec[1].iov_base = &pclc_prfx;
-	vec[1].iov_len = sizeof(pclc_prfx);
-	vec[2].iov_base = &trl;
-	vec[2].iov_len = sizeof(trl);
+	i = 0;
+	vec[i].iov_base = &pclc;
+	vec[i++].iov_len = sizeof(pclc);
+	vec[i].iov_base = &pclc_prfx;
+	vec[i++].iov_len = sizeof(pclc_prfx);
+	if (pclc_prfx.ipv6_prefixes_cnt > 0) {
+		vec[i].iov_base = &ipv6_prfx[0];
+		vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
+				   sizeof(ipv6_prfx[0]);
+	}
+	vec[i].iov_base = &trl;
+	vec[i++].iov_len = sizeof(trl);
 	/* due to the few bytes needed for clc-handshake this cannot block */
-	len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
+	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
 	if (len < sizeof(pclc)) {
 		if (len >= 0) {
 			reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c145a0f36a68..63bf1dc2c1f9 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -22,9 +22,6 @@
 #define SMC_CLC_CONFIRM		0x03
 #define SMC_CLC_DECLINE		0x04
 
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
 #define SMC_CLC_V1		0x1		/* SMC version                */
 #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
 #define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */
 #define SMC_CLC_DECL_TCL	0x02040000  /* timeout w4 QP confirm          */
 #define SMC_CLC_DECL_SEND	0x07000000  /* sending problem                */
+#define SMC_CLC_DECL_RMBE_EC	0x08000000  /* peer has eyecatcher in RMBE    */
 
 struct smc_clc_msg_hdr {	/* header1 of clc messages */
 	u8 eyecatcher[4];	/* eye catcher */
@@ -62,10 +60,15 @@ struct smc_clc_msg_local {	/* header2 of clc messages */
 	u8 mac[6];		/* mac of ib_device port */
 };
 
+#define SMC_CLC_MAX_V6_PREFIX	8
+
+/* Struct would be 4 byte aligned, but it is used in an array that is sent
+ * to peers and must conform to RFC7609, hence we need to use packed here.
+ */
 struct smc_clc_ipv6_prefix {
-	u8 prefix[4];
+	struct in6_addr prefix;
 	u8 prefix_len;
-} __packed;
+} __packed;			/* format defined in RFC7609 */
 
 struct smc_clc_msg_proposal_prefix {	/* prefix part of clc proposal message*/
 	__be32 outgoing_subnet;	/* subnet mask */
@@ -81,9 +84,11 @@ struct smc_clc_msg_proposal {	/* clc proposal message sent by Linux */
 } __aligned(4);
 
 #define SMC_CLC_PROPOSAL_MAX_OFFSET	0x28
-#define SMC_CLC_PROPOSAL_MAX_PREFIX	(8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_PROPOSAL_MAX_PREFIX	(SMC_CLC_MAX_V6_PREFIX * \
+					 sizeof(struct smc_clc_ipv6_prefix))
 #define SMC_CLC_MAX_LEN		(sizeof(struct smc_clc_msg_proposal) + \
 				 SMC_CLC_PROPOSAL_MAX_OFFSET + \
+				 sizeof(struct smc_clc_msg_proposal_prefix) + \
 				 SMC_CLC_PROPOSAL_MAX_PREFIX + \
 				 sizeof(struct smc_clc_msg_trail))
 
@@ -124,9 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
 	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_prfx_match(struct socket *clcsock,
+		       struct smc_clc_msg_proposal_prefix *prop);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index e339c0186dcf..fa41d9881741 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -30,27 +30,6 @@ static void smc_close_cleanup_listen(struct sock *parent)
 		smc_close_non_accepted(sk);
 }
 
-static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct sock *sk = &smc->sk;
-	signed long timeout;
-
-	timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
-	add_wait_queue(sk_sleep(sk), &wait);
-	do {
-		release_sock(sk);
-		if (smc->clcsock)
-			timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
-					     timeout);
-		sched_annotate_sleep();
-		lock_sock(sk);
-		if (!smc->clcsock)
-			break;
-	} while (timeout);
-	remove_wait_queue(sk_sleep(sk), &wait);
-}
-
 /* wait for sndbuf data being transmitted */
 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
 {
@@ -204,9 +183,11 @@ again:
 			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
 			/* wake up kernel_accept of smc_tcp_listen_worker */
 			smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
-			smc_close_wait_listen_clcsock(smc);
 		}
 		smc_close_cleanup_listen(sk);
+		release_sock(sk);
+		flush_work(&smc->tcp_listen_work);
+		lock_sock(sk);
 		break;
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2424c7100aaf..f44f6803f7ff 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -32,6 +32,17 @@
 
 static u32 smc_lgr_num;			/* unique link group number */
 
+static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
+{
+	/* client link group creation always follows the server link group
+	 * creation. For client use a somewhat higher removal delay time,
+	 * otherwise there is a risk of out-of-sync link groups.
+	 */
+	mod_delayed_work(system_wq, &lgr->free_work,
+			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+						 SMC_LGR_FREE_DELAY_SERV);
+}
+
 /* Register connection's alert token in our lookup structure.
  * To use rbtrees we have to implement our own insert core.
  * Requires @conns_lock
@@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 	write_unlock_bh(&lgr->conns_lock);
 	if (!reduced || lgr->conns_num)
 		return;
-	/* client link group creation always follows the server link group
-	 * creation. For client use a somewhat higher removal delay time,
-	 * otherwise there is a risk of out-of-sync link groups.
-	 */
-	mod_delayed_work(system_wq, &lgr->free_work,
-			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
-						 SMC_LGR_FREE_DELAY_SERV);
+	smc_lgr_schedule_free_work(lgr);
 }
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -140,11 +145,12 @@ static void smc_lgr_free_work(struct work_struct *work)
 	list_del_init(&lgr->list); /* remove from smc_lgr_list */
 free:
 	spin_unlock_bh(&smc_lgr_list.lock);
-	smc_lgr_free(lgr);
+	if (!delayed_work_pending(&lgr->free_work))
+		smc_lgr_free(lgr);
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
 			  struct smc_ib_device *smcibdev, u8 ibport,
 			  char *peer_systemid, unsigned short vlan_id)
 {
@@ -161,7 +167,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 	}
 	lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 	lgr->sync_err = false;
-	lgr->daddr = peer_in_addr;
 	memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
 	lgr->vlan_id = vlan_id;
 	rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +182,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 	lnk = &lgr->lnk[SMC_SINGLE_LINK];
 	/* initialize link */
+	lnk->state = SMC_LNK_ACTIVATING;
+	lnk->link_id = SMC_SINGLE_LINK;
 	lnk->smcibdev = smcibdev;
 	lnk->ibport = ibport;
 	lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +205,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 		goto destroy_qp;
 	init_completion(&lnk->llc_confirm);
 	init_completion(&lnk->llc_confirm_resp);
+	init_completion(&lnk->llc_add);
+	init_completion(&lnk->llc_add_resp);
 
 	smc->conn.lgr = lgr;
 	rwlock_init(&lgr->conns_lock);
@@ -306,6 +315,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
 	kfree(lgr);
 }
 
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+	spin_lock_bh(&smc_lgr_list.lock);
+	/* do not use this link group for new connections */
+	if (!list_empty(&lgr->list))
+		list_del_init(&lgr->list);
+	spin_unlock_bh(&smc_lgr_list.lock);
+}
+
 /* terminate linkgroup abnormally */
 void smc_lgr_terminate(struct smc_link_group *lgr)
 {
@@ -313,15 +331,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
 	struct smc_sock *smc;
 	struct rb_node *node;
 
-	spin_lock_bh(&smc_lgr_list.lock);
-	if (list_empty(&lgr->list)) {
-		/* termination already triggered */
-		spin_unlock_bh(&smc_lgr_list.lock);
-		return;
-	}
-	/* do not use this link group for new connections */
-	list_del_init(&lgr->list);
-	spin_unlock_bh(&smc_lgr_list.lock);
+	smc_lgr_forget(lgr);
 
 	write_lock_bh(&lgr->conns_lock);
 	node = rb_first(&lgr->conns_all);
@@ -339,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
 	}
 	write_unlock_bh(&lgr->conns_lock);
 	wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+	smc_lgr_schedule_free_work(lgr);
 }
 
 /* Determine vlan of internal TCP socket.
@@ -400,7 +411,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
 		    struct smc_ib_device *smcibdev, u8 ibport,
 		    struct smc_clc_msg_local *lcl, int srv_first_contact)
 {
@@ -457,7 +468,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 create:
 	if (local_contact == SMC_FIRST_CONTACT) {
-		rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+		rc = smc_lgr_create(smc, smcibdev, ibport,
 				    lcl->id_for_peer, vlan_id);
 		if (rc)
 			goto out;
@@ -465,7 +476,7 @@ create:
 		rc = smc_link_determine_gid(conn->lgr);
 	}
 	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
-	conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg);
+	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
 #ifndef KERNEL_HAS_ATOMIC64
 	spin_lock_init(&conn->acurs_lock);
 #endif
@@ -698,27 +709,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 	return -ENOSPC;
 }
 
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
-			    struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 {
-	u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
-	struct smc_link_group *lgr = conn->lgr;
-	u32 rkey = ntohl(clc->rmb_rkey);
+	u64 dma_addr = be64_to_cpu(nw_vaddr);
+	u32 rkey = ntohl(nw_rkey);
 	int i;
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
 		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
-			conn->rtoken_idx = i;
+			/* already in list */
+			return i;
+		}
+	}
+	i = smc_rmb_reserve_rtoken_idx(lgr);
+	if (i < 0)
+		return i;
+	lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+	lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+	return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+	u32 rkey = ntohl(nw_rkey);
+	int i;
+
+	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+		if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+		    test_bit(i, lgr->rtokens_used_mask)) {
+			lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+			lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+			clear_bit(i, lgr->rtokens_used_mask);
 			return 0;
 		}
 	}
-	conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+	return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+			    struct smc_clc_msg_accept_confirm *clc)
+{
+	conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+					  clc->rmb_rkey);
 	if (conn->rtoken_idx < 0)
 		return conn->rtoken_idx;
-	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
-	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
 	return 0;
 }
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fe691bf9af91..07e2a393e6d9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,6 +32,12 @@ enum smc_lgr_role {		/* possible roles of a link group */
 	SMC_SERV	/* server */
 };
 
+enum smc_link_state {			/* possible states of a link */
+	SMC_LNK_INACTIVE,	/* link is inactive */
+	SMC_LNK_ACTIVATING,	/* link is being activated */
+	SMC_LNK_ACTIVE		/* link is active */
+};
+
 #define SMC_WR_BUF_SIZE		48	/* size of work request buffer */
 
 struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
 	u8			peer_mac[ETH_ALEN];	/* = gid[8:10||13:15] */
 	u8			peer_gid[sizeof(union ib_gid)];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
+
+	enum smc_link_state	state;		/* state of link */
 	struct completion	llc_confirm;	/* wait for rx of conf link */
 	struct completion	llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+	int			llc_confirm_rc; /* rc from confirm link msg */
+	int			llc_confirm_resp_rc; /* rc from conf_resp msg */
+	struct completion	llc_add;	/* wait for rx of add link */
+	struct completion	llc_add_resp;	/* wait for rx of add link rsp*/
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken {				/* address/key of remote RMB */
 struct smc_link_group {
 	struct list_head	list;
 	enum smc_lgr_role	role;		/* client or server */
-	__be32			daddr;		/* destination ip address */
 	struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];	/* smc link */
 	char			peer_systemid[SMC_SYSTEMID_LEN];
 						/* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2a8957bd6d38..26df554f7588 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -23,6 +23,8 @@
 #include "smc_wr.h"
 #include "smc.h"
 
+#define SMC_MAX_CQE 32766	/* max. # of completion queue elements */
+
 #define SMC_QP_MIN_RNR_TIMER		5
 #define SMC_QP_TIMEOUT			15 /* 4096 * 2 ** timeout usec */
 #define SMC_QP_RETRY_CNT			7 /* 7: infinite */
@@ -438,9 +440,15 @@ out:
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
 {
 	struct ib_cq_init_attr cqattr =	{
-		.cqe = SMC_WR_MAX_CQE, .comp_vector = 0 };
+		.cqe = SMC_MAX_CQE, .comp_vector = 0 };
+	int cqe_size_order, smc_order;
 	long rc;
 
+	/* the calculated number of cq entries fits to mlx5 cq allocation */
+	cqe_size_order = cache_line_size() == 128 ? 7 : 6;
+	smc_order = MAX_ORDER - cqe_size_order - 1;
+	if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
+		cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
 	smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
 					      smc_wr_tx_cq_handler, NULL,
 					      smcibdev, &cqattr);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 92fe4cc8c82c..ea4b21981b4b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -4,9 +4,6 @@
  *
  *  Link Layer Control (LLC)
  *
- *  For now, we only support the necessary "confirm link" functionality
- *  which happens for the first RoCE link after successful CLC handshake.
- *
  *  Copyright IBM Corp. 2016
  *
  *  Author(s):  Klaus Wacker <[email protected]>
@@ -21,6 +18,122 @@
 #include "smc_clc.h"
 #include "smc_llc.h"
 
+#define SMC_LLC_DATA_LEN		40
+
+struct smc_llc_hdr {
+	struct smc_wr_rx_hdr common;
+	u8 length;	/* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u8 reserved:4,
+	   add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	u8 add_link_rej_rsn:4,
+	   reserved:4;
+#endif
+	u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC	0x03
+
+struct smc_llc_msg_confirm_link {	/* type 0x01 */
+	struct smc_llc_hdr hd;
+	u8 sender_mac[ETH_ALEN];
+	u8 sender_gid[SMC_GID_SIZE];
+	u8 sender_qp_num[3];
+	u8 link_num;
+	u8 link_uid[SMC_LGR_ID_SIZE];
+	u8 max_links;
+	u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ	0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH	1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS	2
+
+struct smc_llc_msg_add_link {		/* type 0x02 */
+	struct smc_llc_hdr hd;
+	u8 sender_mac[ETH_ALEN];
+	u8 reserved2[2];
+	u8 sender_gid[SMC_GID_SIZE];
+	u8 sender_qp_num[3];
+	u8 link_num;
+	u8 flags2;	/* QP mtu */
+	u8 initial_psn[3];
+	u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL	0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY	0x20
+
+struct smc_llc_msg_del_link {		/* type 0x04 */
+	struct smc_llc_hdr hd;
+	u8 link_num;
+	__be32 reason;
+	u8 reserved[35];
+} __packed;			/* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link {		/* type 0x07 */
+	struct smc_llc_hdr hd;
+	u8 user_data[16];
+	u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+	union {
+		u8 num_rkeys;	/* first rtoken byte of CONFIRM LINK msg */
+				/* is actually the num of rtokens, first */
+				/* rtoken is always for the current link */
+		u8 link_id;	/* link id of the rtoken */
+	};
+	__be32 rmb_key;
+	__be64 rmb_vaddr;
+} __packed;			/* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG	3
+
+struct smc_llc_msg_confirm_rkey {	/* type 0x06 */
+	struct smc_llc_hdr hd;
+	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+	u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont {	/* type 0x08 */
+	struct smc_llc_hdr hd;
+	u8 num_rkeys;
+	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX	8
+#define SMC_LLC_FLAG_RKEY_NEG	0x20
+
+struct smc_llc_msg_delete_rkey {	/* type 0x09 */
+	struct smc_llc_hdr hd;
+	u8 num_rkeys;
+	u8 err_mask;
+	u8 reserved[2];
+	__be32 rkey[8];
+	u8 reserved2[4];
+};
+
+union smc_llc_msg {
+	struct smc_llc_msg_confirm_link confirm_link;
+	struct smc_llc_msg_add_link add_link;
+	struct smc_llc_msg_del_link delete_link;
+
+	struct smc_llc_msg_confirm_rkey confirm_rkey;
+	struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+	struct smc_llc_msg_delete_rkey delete_rkey;
+
+	struct smc_llc_msg_test_link test_link;
+	struct {
+		struct smc_llc_hdr hdr;
+		u8 data[SMC_LLC_DATA_LEN];
+	} raw;
+};
+
+#define SMC_LLC_FLAG_RESP		0x80
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
@@ -87,14 +200,112 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
 	memset(confllc, 0, sizeof(*confllc));
 	confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
 	confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+	confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
 	if (reqresp == SMC_LLC_RESP)
 		confllc->hd.flags |= SMC_LLC_FLAG_RESP;
 	memcpy(confllc->sender_mac, mac, ETH_ALEN);
 	memcpy(confllc->sender_gid, gid, SMC_GID_SIZE);
 	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
-	/* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
+	confllc->link_num = link->link_id;
 	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
-	confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+	confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+			  union ib_gid *gid,
+			  enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_add_link *addllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	addllc = (struct smc_llc_msg_add_link *)wr_buf;
+	memset(addllc, 0, sizeof(*addllc));
+	addllc->hd.common.type = SMC_LLC_ADD_LINK;
+	addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+	if (reqresp == SMC_LLC_RESP) {
+		addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+		/* always reject more links for now */
+		addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+		addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+	}
+	memcpy(addllc->sender_mac, mac, ETH_ALEN);
+	memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+			     enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_del_link *delllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	delllc = (struct smc_llc_msg_del_link *)wr_buf;
+	memset(delllc, 0, sizeof(*delllc));
+	delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+	delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+	if (reqresp == SMC_LLC_RESP)
+		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	/* DEL_LINK_ALL because only 1 link supported */
+	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+	delllc->link_num = link->link_id;
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+			   enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_test_link *testllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	testllc = (struct smc_llc_msg_test_link *)wr_buf;
+	memset(testllc, 0, sizeof(*testllc));
+	testllc->hd.common.type = SMC_LLC_TEST_LINK;
+	testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+	if (reqresp == SMC_LLC_RESP)
+		testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	memcpy(wr_buf, llcbuf, llclen);
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 				    struct smc_llc_msg_confirm_link *llc)
 {
 	struct smc_link_group *lgr;
+	int conf_rc;
 
 	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	/* RMBE eyecatchers are not supported */
+	if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+		conf_rc = 0;
+	else
+		conf_rc = ENOTSUPP;
+
 	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (lgr->role == SMC_SERV)
+		if (lgr->role == SMC_SERV &&
+		    link->state == SMC_LNK_ACTIVATING) {
+			link->llc_confirm_resp_rc = conf_rc;
 			complete(&link->llc_confirm_resp);
+		}
 	} else {
-		if (lgr->role == SMC_CLNT) {
+		if (lgr->role == SMC_CLNT &&
+		    link->state == SMC_LNK_ACTIVATING) {
+			link->llc_confirm_rc = conf_rc;
 			link->link_id = llc->link_num;
 			complete(&link->llc_confirm);
 		}
 	}
 }
 
+static void smc_llc_rx_add_link(struct smc_link *link,
+				struct smc_llc_msg_add_link *llc)
+{
+	struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+						  lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		if (link->state == SMC_LNK_ACTIVATING)
+			complete(&link->llc_add_resp);
+	} else {
+		if (link->state == SMC_LNK_ACTIVATING) {
+			complete(&link->llc_add);
+			return;
+		}
+
+		if (lgr->role == SMC_SERV) {
+			smc_llc_send_add_link(link,
+					link->smcibdev->mac[link->ibport - 1],
+					&link->smcibdev->gid[link->ibport - 1],
+					SMC_LLC_REQ);
+
+		} else {
+			smc_llc_send_add_link(link,
+					link->smcibdev->mac[link->ibport - 1],
+					&link->smcibdev->gid[link->ibport - 1],
+					SMC_LLC_RESP);
+		}
+	}
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+				   struct smc_llc_msg_del_link *llc)
+{
+	struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+						  lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		if (lgr->role == SMC_SERV)
+			smc_lgr_terminate(lgr);
+	} else {
+		if (lgr->role == SMC_SERV) {
+			smc_lgr_forget(lgr);
+			smc_llc_send_delete_link(link, SMC_LLC_REQ);
+		} else {
+			smc_llc_send_delete_link(link, SMC_LLC_RESP);
+			smc_lgr_terminate(lgr);
+		}
+	}
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+				 struct smc_llc_msg_test_link *llc)
+{
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+	}
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+				    struct smc_llc_msg_confirm_rkey *llc)
+{
+	struct smc_link_group *lgr;
+	int rc;
+
+	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		rc = smc_rtoken_add(lgr,
+				    llc->rtoken[0].rmb_vaddr,
+				    llc->rtoken[0].rmb_key);
+
+		/* ignore rtokens for other links, we have only one link */
+
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		if (rc < 0)
+			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+				      struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		/* ignore rtokens for other links, we have only one link */
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+				   struct smc_llc_msg_delete_rkey *llc)
+{
+	struct smc_link_group *lgr;
+	u8 err_mask = 0;
+	int i, max;
+
+	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+		for (i = 0; i < max; i++) {
+			if (smc_rtoken_delete(lgr, llc->rkey[i]))
+				err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+		}
+
+		if (err_mask) {
+			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+			llc->err_mask = err_mask;
+		}
+
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
 	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 		return; /* short message */
 	if (llc->raw.hdr.length != sizeof(*llc))
 		return; /* invalid message */
-	if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+	switch (llc->raw.hdr.common.type) {
+	case SMC_LLC_TEST_LINK:
+		smc_llc_rx_test_link(link, &llc->test_link);
+		break;
+	case SMC_LLC_CONFIRM_LINK:
 		smc_llc_rx_confirm_link(link, &llc->confirm_link);
+		break;
+	case SMC_LLC_ADD_LINK:
+		smc_llc_rx_add_link(link, &llc->add_link);
+		break;
+	case SMC_LLC_DELETE_LINK:
+		smc_llc_rx_delete_link(link, &llc->delete_link);
+		break;
+	case SMC_LLC_CONFIRM_RKEY:
+		smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+		break;
+	case SMC_LLC_CONFIRM_RKEY_CONT:
+		smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+		break;
+	case SMC_LLC_DELETE_RKEY:
+		smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+		break;
+	}
 }
 
 /***************************** init, exit, misc ******************************/
@@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
 		.type		= SMC_LLC_CONFIRM_LINK
 	},
 	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_TEST_LINK
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_ADD_LINK
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_DELETE_LINK
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_CONFIRM_RKEY
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_CONFIRM_RKEY_CONT
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_DELETE_RKEY
+	},
+	{
 		.handler	= NULL,
 	}
 };
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 51b27ce90dbd..e4a7d5e234d5 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -18,6 +18,7 @@
 #define SMC_LLC_FLAG_RESP		0x80
 
 #define SMC_LLC_WAIT_FIRST_TIME		(5 * HZ)
+#define SMC_LLC_WAIT_TIME		(2 * HZ)
 
 enum smc_llc_reqresp {
 	SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
 	SMC_LLC_CONFIRM_LINK		= 0x01,
-};
-
-#define SMC_LLC_DATA_LEN		40
-
-struct smc_llc_hdr {
-	struct smc_wr_rx_hdr common;
-	u8 length;	/* 44 */
-	u8 reserved;
-	u8 flags;
-};
-
-struct smc_llc_msg_confirm_link {	/* type 0x01 */
-	struct smc_llc_hdr hd;
-	u8 sender_mac[ETH_ALEN];
-	u8 sender_gid[SMC_GID_SIZE];
-	u8 sender_qp_num[3];
-	u8 link_num;
-	u8 link_uid[SMC_LGR_ID_SIZE];
-	u8 max_links;
-	u8 reserved[9];
-};
-
-union smc_llc_msg {
-	struct smc_llc_msg_confirm_link confirm_link;
-	struct {
-		struct smc_llc_hdr hdr;
-		u8 data[SMC_LLC_DATA_LEN];
-	} raw;
+	SMC_LLC_ADD_LINK		= 0x02,
+	SMC_LLC_DELETE_LINK		= 0x04,
+	SMC_LLC_CONFIRM_RKEY		= 0x06,
+	SMC_LLC_TEST_LINK		= 0x07,
+	SMC_LLC_CONFIRM_RKEY_CONT	= 0x08,
+	SMC_LLC_DELETE_RKEY		= 0x09,
 };
 
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
 			      enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+			  enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+			     enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+			   enum smc_llc_reqresp reqresp);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index ef0c3494c9cb..210bec3c3ebe 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -19,7 +19,6 @@
 #include "smc.h"
 #include "smc_core.h"
 
-#define SMC_WR_MAX_CQE 32768	/* max. # of completion queue elements */
 #define SMC_WR_BUF_CNT 16	/* # of ctrl buffers per link */
 
 #define SMC_WR_TX_WAIT_FREE_SLOT_TIME	(10 * HZ)
diff --git a/net/socket.c b/net/socket.c
index fac8246a8ae8..3d1948d27a25 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,7 +104,6 @@
 #include <linux/ipv6_route.h>
 #include <linux/route.h>
 #include <linux/sockios.h>
-#include <linux/atalk.h>
 #include <net/busy_poll.h>
 #include <linux/errqueue.h>
 
@@ -234,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
 	return __put_user(klen, ulen);
 }
 
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
@@ -991,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *	what to do with it - that's up to the protocol still.
  */
 
-static struct ns_common *get_net_ns(struct ns_common *ns)
+struct ns_common *get_net_ns(struct ns_common *ns)
 {
 	return &get_net(container_of(ns, struct net, ns))->ns;
 }
+EXPORT_SYMBOL_GPL(get_net_ns);
 
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
@@ -2289,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 	if (!sock)
 		return err;
 
-	err = sock_error(sock->sk);
-	if (err) {
-		datagrams = err;
-		goto out_put;
+	if (likely(!(flags & MSG_ERRQUEUE))) {
+		err = sock_error(sock->sk);
+		if (err) {
+			datagrams = err;
+			goto out_put;
+		}
 	}
 
 	entry = mmsg;
@@ -2588,6 +2590,11 @@ void sock_unregister(int family)
 }
 EXPORT_SYMBOL(sock_unregister);
 
+bool sock_is_registered(int family)
+{
+	return family < NPROTO && rcu_access_pointer(net_families[family]);
+}
+
 static int __init sock_init(void)
 {
 	int err;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 26531193fce4..5089dbb96d58 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1375,7 +1375,7 @@ static int create_use_gss_proxy_proc_entry(struct net *net)
 	struct proc_dir_entry **p = &sn->use_gssp_proc;
 
 	sn->use_gss_proxy = -1;
-	*p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
+	*p = proc_create_data("use-gss-proxy", S_IFREG | 0600,
 			      sn->proc_net_rpc,
 			      &use_gss_proxy_ops, net);
 	if (!*p)
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8a7e1c774f9c..c536cc24b3d1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1621,20 +1621,20 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 	if (cd->procfs == NULL)
 		goto out_nomem;
 
-	p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
+	p = proc_create_data("flush", S_IFREG | 0600,
 			     cd->procfs, &cache_flush_operations_procfs, cd);
 	if (p == NULL)
 		goto out_nomem;
 
 	if (cd->cache_request || cd->cache_parse) {
-		p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
-				cd->procfs, &cache_file_operations_procfs, cd);
+		p = proc_create_data("channel", S_IFREG | 0600, cd->procfs,
+				     &cache_file_operations_procfs, cd);
 		if (p == NULL)
 			goto out_nomem;
 	}
 	if (cd->cache_show) {
-		p = proc_create_data("content", S_IFREG|S_IRUSR,
-				cd->procfs, &content_file_operations_procfs, cd);
+		p = proc_create_data("content", S_IFREG | 0400, cd->procfs,
+				     &content_file_operations_procfs, cd);
 		if (p == NULL)
 			goto out_nomem;
 	}
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e980d2a493de..45a033329cd4 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -139,7 +139,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 		return;
 
 	/* make tasks file */
-	if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
+	if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
 				 clnt, &tasks_fops))
 		goto out_err;
 
@@ -241,7 +241,7 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 		return;
 
 	/* make tasks file */
-	if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
+	if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs,
 				 xprt, &xprt_info_fops)) {
 		debugfs_remove_recursive(xprt->debugfs);
 		xprt->debugfs = NULL;
@@ -317,7 +317,7 @@ inject_fault_dir(struct dentry *topdir)
 	if (!faultdir)
 		return NULL;
 
-	if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
+	if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir,
 				 NULL, &fault_disconnect_fops))
 		return NULL;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fc97fc3ed637..0f08934b2cea 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -820,13 +820,13 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
 {
 	struct dentry *dentry;
 	struct inode *dir = d_inode(parent);
-	umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
+	umode_t umode = S_IFIFO | 0600;
 	int err;
 
 	if (pipe->ops->upcall == NULL)
-		umode &= ~S_IRUGO;
+		umode &= ~0444;
 	if (pipe->ops->downcall == NULL)
-		umode &= ~S_IWUGO;
+		umode &= ~0222;
 
 	inode_lock_nested(dir, I_MUTEX_PARENT);
 	dentry = __rpc_lookup_create_exclusive(parent, name);
@@ -1035,7 +1035,7 @@ static const struct rpc_filelist authfiles[] = {
 	[RPCAUTH_info] = {
 		.name = "info",
 		.i_fop = &rpc_info_operations,
-		.mode = S_IFREG | S_IRUSR,
+		.mode = S_IFREG | 0400,
 	},
 };
 
@@ -1068,8 +1068,8 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
 {
 	struct dentry *ret;
 
-	ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
-			rpc_clntdir_populate, rpc_client);
+	ret = rpc_mkdir_populate(dentry, name, 0555, NULL,
+				 rpc_clntdir_populate, rpc_client);
 	if (!IS_ERR(ret)) {
 		rpc_client->cl_pipedir_objects.pdh_dentry = ret;
 		rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
@@ -1096,17 +1096,17 @@ static const struct rpc_filelist cache_pipefs_files[3] = {
 	[0] = {
 		.name = "channel",
 		.i_fop = &cache_file_operations_pipefs,
-		.mode = S_IFREG|S_IRUSR|S_IWUSR,
+		.mode = S_IFREG | 0600,
 	},
 	[1] = {
 		.name = "content",
 		.i_fop = &content_file_operations_pipefs,
-		.mode = S_IFREG|S_IRUSR,
+		.mode = S_IFREG | 0400,
 	},
 	[2] = {
 		.name = "flush",
 		.i_fop = &cache_flush_operations_pipefs,
-		.mode = S_IFREG|S_IRUSR|S_IWUSR,
+		.mode = S_IFREG | 0600,
 	},
 };
 
@@ -1164,39 +1164,39 @@ enum {
 static const struct rpc_filelist files[] = {
 	[RPCAUTH_lockd] = {
 		.name = "lockd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_mount] = {
 		.name = "mount",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_nfs] = {
 		.name = "nfs",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_portmap] = {
 		.name = "portmap",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_statd] = {
 		.name = "statd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_nfsd4_cb] = {
 		.name = "nfsd4_cb",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_cache] = {
 		.name = "cache",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_nfsd] = {
 		.name = "nfsd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_gssd] = {
 		.name = "gssd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 };
 
@@ -1261,7 +1261,7 @@ EXPORT_SYMBOL_GPL(rpc_put_sb_net);
 static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
 	[0] = {
 		.name = "clntXX",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 };
 
@@ -1310,7 +1310,7 @@ static const struct rpc_filelist gssd_dummy_info_file[] = {
 	[0] = {
 		.name = "info",
 		.i_fop = &rpc_dummy_info_operations,
-		.mode = S_IFREG | S_IRUSR,
+		.mode = S_IFREG | 0400,
 	},
 };
 
@@ -1397,7 +1397,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_d_op = &simple_dentry_operations;
 	sb->s_time_gran = 1;
 
-	inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
+	inode = rpc_get_inode(sb, S_IFDIR | 0555);
 	sb->s_root = root = d_make_root(inode);
 	if (!root)
 		return -ENOMEM;
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index c25a3a149dc4..e450212121d2 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -34,3 +34,11 @@ config TIPC_MEDIA_UDP
 	  Saying Y here will enable support for running TIPC over IP/UDP
 	bool
 	default y
+
+config TIPC_DIAG
+	tristate "TIPC: socket monitoring interface"
+	depends on TIPC
+	default y
+	---help---
+	Support for TIPC socket monitoring interface used by ss tool.
+	If unsure, say Y.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 37bb0bfbd936..aca168f2abb1 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,8 +9,13 @@ tipc-y	+= addr.o bcast.o bearer.o \
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o monitor.o name_table.o net.o  \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
-	   server.o socket.o group.o
+	   topsrv.o socket.o group.o
 
 tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
 tipc-$(CONFIG_SYSCTL)		+= sysctl.o
+
+
+obj-$(CONFIG_TIPC_DIAG)	+= diag.o
+
+tipc_diag-y	:= diag.o
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 48fd3b5a73fb..b88d48d00913 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/addr.c: TIPC address utility routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -34,113 +34,90 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/kernel.h>
 #include "addr.h"
 #include "core.h"
 
-/**
- * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
- */
-int in_own_cluster(struct net *net, u32 addr)
-{
-	return in_own_cluster_exact(net, addr) || !addr;
-}
-
-int in_own_cluster_exact(struct net *net, u32 addr)
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	return !((addr ^ tn->own_addr) >> 12);
+	if (!domain || (domain == addr))
+		return true;
+	if (!legacy_format)
+		return false;
+	if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
+		return true;
+	if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */
+		return true;
+	if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */
+		return true;
+	return false;
 }
 
-/**
- * in_own_node - test for node inclusion; <0.0.0> always matches
- */
-int in_own_node(struct net *net, u32 addr)
+void tipc_set_node_id(struct net *net, u8 *id)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
+	u32 *tmp = (u32 *)id;
 
-	return (addr == tn->own_addr) || !addr;
+	memcpy(tn->node_id, id, NODE_ID_LEN);
+	tipc_nodeid2string(tn->node_id_string, id);
+	tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3];
+	pr_info("Own node identity %s, cluster identity %u\n",
+		tipc_own_id_string(net), tn->net_id);
 }
 
-/**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-u32 addr_domain(struct net *net, u32 sc)
+void tipc_set_node_addr(struct net *net, u32 addr)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
+	u8 node_id[NODE_ID_LEN] = {0,};
 
-	if (likely(sc == TIPC_NODE_SCOPE))
-		return tn->own_addr;
-	if (sc == TIPC_CLUSTER_SCOPE)
-		return tipc_cluster_mask(tn->own_addr);
-	return tipc_zone_mask(tn->own_addr);
+	tn->node_addr = addr;
+	if (!tipc_own_id(net)) {
+		sprintf(node_id, "%x", addr);
+		tipc_set_node_id(net, node_id);
+	}
+	tn->trial_addr = addr;
+	pr_info("32-bit node address hash set to %x\n", addr);
 }
 
-/**
- * tipc_addr_domain_valid - validates a network domain address
- *
- * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
- * where Z, C, and N are non-zero.
- *
- * Returns 1 if domain address is valid, otherwise 0
- */
-int tipc_addr_domain_valid(u32 addr)
+char *tipc_nodeid2string(char *str, u8 *id)
 {
-	u32 n = tipc_node(addr);
-	u32 c = tipc_cluster(addr);
-	u32 z = tipc_zone(addr);
-
-	if (n && (!z || !c))
-		return 0;
-	if (c && !z)
-		return 0;
-	return 1;
-}
+	int i;
+	u8 c;
 
-/**
- * tipc_addr_node_valid - validates a proposed network address for this node
- *
- * Accepts <Z.C.N>, where Z, C, and N are non-zero.
- *
- * Returns 1 if address can be used, otherwise 0
- */
-int tipc_addr_node_valid(u32 addr)
-{
-	return tipc_addr_domain_valid(addr) && tipc_node(addr);
-}
+	/* Already a string ? */
+	for (i = 0; i < NODE_ID_LEN; i++) {
+		c = id[i];
+		if (c >= '0' && c <= '9')
+			continue;
+		if (c >= 'A' && c <= 'Z')
+			continue;
+		if (c >= 'a' && c <= 'z')
+			continue;
+		if (c == '.')
+			continue;
+		if (c == ':')
+			continue;
+		if (c == '_')
+			continue;
+		if (c == '-')
+			continue;
+		if (c == '@')
+			continue;
+		if (c != 0)
+			break;
+	}
+	if (i == NODE_ID_LEN) {
+		memcpy(str, id, NODE_ID_LEN);
+		str[NODE_ID_LEN] = 0;
+		return str;
+	}
 
-int tipc_in_scope(u32 domain, u32 addr)
-{
-	if (!domain || (domain == addr))
-		return 1;
-	if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
-		return 1;
-	if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */
-		return 1;
-	return 0;
-}
+	/* Translate to hex string */
+	for (i = 0; i < NODE_ID_LEN; i++)
+		sprintf(&str[2 * i], "%02x", id[i]);
 
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-int tipc_addr_scope(u32 domain)
-{
-	if (likely(!domain))
-		return TIPC_ZONE_SCOPE;
-	if (tipc_node(domain))
-		return TIPC_NODE_SCOPE;
-	if (tipc_cluster(domain))
-		return TIPC_CLUSTER_SCOPE;
-	return TIPC_ZONE_SCOPE;
-}
+	/* Strip off trailing zeroes */
+	for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--)
+		str[i] = 0;
 
-char *tipc_addr_string_fill(char *string, u32 addr)
-{
-	snprintf(string, 16, "<%u.%u.%u>",
-		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
-	return string;
+	return str;
 }
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index bebb347803ce..31bee0ea7b3e 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/addr.h: Include file for TIPC address utility routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, Wind River Systems
  * All rights reserved.
  *
@@ -45,14 +45,21 @@
 
 static inline u32 tipc_own_addr(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	return tipc_net(net)->node_addr;
+}
+
+static inline u8 *tipc_own_id(struct net *net)
+{
+	struct tipc_net *tn = tipc_net(net);
 
-	return tn->own_addr;
+	if (!strlen(tn->node_id_string))
+		return NULL;
+	return tn->node_id;
 }
 
-static inline u32 tipc_zone_mask(u32 addr)
+static inline char *tipc_own_id_string(struct net *net)
 {
-	return addr & TIPC_ZONE_MASK;
+	return tipc_net(net)->node_id_string;
 }
 
 static inline u32 tipc_cluster_mask(u32 addr)
@@ -60,15 +67,25 @@ static inline u32 tipc_cluster_mask(u32 addr)
 	return addr & TIPC_ZONE_CLUSTER_MASK;
 }
 
-u32 tipc_own_addr(struct net *net);
-int in_own_cluster(struct net *net, u32 addr);
-int in_own_cluster_exact(struct net *net, u32 addr);
-int in_own_node(struct net *net, u32 addr);
-u32 addr_domain(struct net *net, u32 sc);
-int tipc_addr_domain_valid(u32);
-int tipc_addr_node_valid(u32 addr);
-int tipc_in_scope(u32 domain, u32 addr);
-int tipc_addr_scope(u32 domain);
-char *tipc_addr_string_fill(char *string, u32 addr);
+static inline int tipc_node2scope(u32 node)
+{
+	return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE;
+}
+
+static inline int tipc_scope2node(struct net *net, int sc)
+{
+	return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
+}
+
+static inline int in_own_node(struct net *net, u32 addr)
+{
+	return addr == tipc_own_addr(net) || !addr;
+}
+
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
+void tipc_set_node_id(struct net *net, u8 *id);
+void tipc_set_node_addr(struct net *net, u32 addr);
+char *tipc_nodeid2string(char *str, u8 *id);
+u32 tipc_node_id2hash(u8 *id128);
 
 #endif
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 37892b3909af..f3711176be45 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -574,5 +574,5 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
 {
 	tipc_dest_list_purge(&nl->list);
 	nl->remote = 0;
-	nl->local = 0;
+	nl->local = false;
 }
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index c8001471da6c..f7d47c89d658 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -210,7 +210,7 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (b)
-		tipc_disc_add_dest(b->link_req);
+		tipc_disc_add_dest(b->disc);
 	rcu_read_unlock();
 }
 
@@ -222,7 +222,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (b)
-		tipc_disc_remove_dest(b->link_req);
+		tipc_disc_remove_dest(b->disc);
 	rcu_read_unlock();
 }
 
@@ -230,88 +230,67 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
  * tipc_enable_bearer - enable bearer with the given name
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
-			      u32 disc_domain, u32 priority,
+			      u32 disc_domain, u32 prio,
 			      struct nlattr *attr[])
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_bearer_names b_names;
+	int with_this_prio = 1;
 	struct tipc_bearer *b;
 	struct tipc_media *m;
-	struct tipc_bearer_names b_names;
 	struct sk_buff *skb;
-	char addr_string[16];
-	u32 bearer_id;
-	u32 with_this_prio;
-	u32 i;
+	int bearer_id = 0;
 	int res = -EINVAL;
+	char *errstr = "";
 
-	if (!tn->own_addr) {
-		pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
-			name);
-		return -ENOPROTOOPT;
-	}
 	if (!bearer_name_validate(name, &b_names)) {
-		pr_warn("Bearer <%s> rejected, illegal name\n", name);
-		return -EINVAL;
+		errstr = "illegal name";
+		goto rejected;
 	}
-	if (tipc_addr_domain_valid(disc_domain) &&
-	    (disc_domain != tn->own_addr)) {
-		if (tipc_in_scope(disc_domain, tn->own_addr)) {
-			disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
-			res = 0;   /* accept any node in own cluster */
-		} else if (in_own_cluster_exact(net, disc_domain))
-			res = 0;   /* accept specified node in own cluster */
-	}
-	if (res) {
-		pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
-			name);
-		return -EINVAL;
-	}
-	if ((priority > TIPC_MAX_LINK_PRI) &&
-	    (priority != TIPC_MEDIA_LINK_PRI)) {
-		pr_warn("Bearer <%s> rejected, illegal priority\n", name);
-		return -EINVAL;
+
+	if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
+		errstr = "illegal priority";
+		goto rejected;
 	}
 
 	m = tipc_media_find(b_names.media_name);
 	if (!m) {
-		pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
-			name, b_names.media_name);
-		return -EINVAL;
+		errstr = "media not registered";
+		goto rejected;
 	}
 
-	if (priority == TIPC_MEDIA_LINK_PRI)
-		priority = m->priority;
+	if (prio == TIPC_MEDIA_LINK_PRI)
+		prio = m->priority;
 
-restart:
-	bearer_id = MAX_BEARERS;
-	with_this_prio = 1;
-	for (i = MAX_BEARERS; i-- != 0; ) {
-		b = rtnl_dereference(tn->bearer_list[i]);
-		if (!b) {
-			bearer_id = i;
-			continue;
-		}
+	/* Check new bearer vs existing ones and find free bearer id if any */
+	while (bearer_id < MAX_BEARERS) {
+		b = rtnl_dereference(tn->bearer_list[bearer_id]);
+		if (!b)
+			break;
 		if (!strcmp(name, b->name)) {
-			pr_warn("Bearer <%s> rejected, already enabled\n",
-				name);
-			return -EINVAL;
+			errstr = "already enabled";
+			goto rejected;
 		}
-		if ((b->priority == priority) &&
-		    (++with_this_prio > 2)) {
-			if (priority-- == 0) {
-				pr_warn("Bearer <%s> rejected, duplicate priority\n",
-					name);
-				return -EINVAL;
-			}
-			pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
-				name, priority + 1, priority);
-			goto restart;
+		bearer_id++;
+		if (b->priority != prio)
+			continue;
+		if (++with_this_prio <= 2)
+			continue;
+		pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+			name, prio);
+		if (prio == TIPC_MIN_LINK_PRI) {
+			errstr = "cannot adjust to lower";
+			goto rejected;
 		}
+		pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
+		prio--;
+		bearer_id = 0;
+		with_this_prio = 1;
 	}
+
 	if (bearer_id >= MAX_BEARERS) {
-		pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
-			name, MAX_BEARERS);
-		return -EINVAL;
+		errstr = "max 3 bearers permitted";
+		goto rejected;
 	}
 
 	b = kzalloc(sizeof(*b), GFP_ATOMIC);
@@ -322,10 +301,9 @@ restart:
 	b->media = m;
 	res = m->enable_media(net, b, attr);
 	if (res) {
-		pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
-			name, -res);
 		kfree(b);
-		return -EINVAL;
+		errstr = "failed to enable media";
+		goto rejected;
 	}
 
 	b->identity = bearer_id;
@@ -333,15 +311,15 @@ restart:
 	b->window = m->window;
 	b->domain = disc_domain;
 	b->net_plane = bearer_id + 'A';
-	b->priority = priority;
+	b->priority = prio;
 	test_and_set_bit_lock(0, &b->up);
 
 	res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
 	if (res) {
 		bearer_disable(net, b);
-		pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
-			name);
-		return -EINVAL;
+		kfree(b);
+		errstr = "failed to create discoverer";
+		goto rejected;
 	}
 
 	rcu_assign_pointer(tn->bearer_list[bearer_id], b);
@@ -353,9 +331,11 @@ restart:
 		return -ENOMEM;
 	}
 
-	pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
-		name,
-		tipc_addr_string_fill(addr_string, disc_domain), priority);
+	pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
+
+	return res;
+rejected:
+	pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr);
 	return res;
 }
 
@@ -385,8 +365,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
 	tipc_node_delete_links(net, bearer_id);
 	b->media->disable_media(b);
 	RCU_INIT_POINTER(b->media_ptr, NULL);
-	if (b->link_req)
-		tipc_disc_delete(b->link_req);
+	if (b->disc)
+		tipc_disc_delete(b->disc);
 	RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
 	kfree_rcu(b, rcu);
 	tipc_mon_delete(net, bearer_id);
@@ -395,11 +375,13 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 			 struct nlattr *attr[])
 {
+	char *dev_name = strchr((const char *)b->name, ':') + 1;
+	int hwaddr_len = b->media->hwaddr_len;
+	u8 node_id[NODE_ID_LEN] = {0,};
 	struct net_device *dev;
-	char *driver_name = strchr((const char *)b->name, ':') + 1;
 
 	/* Find device with specified name */
-	dev = dev_get_by_name(net, driver_name);
+	dev = dev_get_by_name(net, dev_name);
 	if (!dev)
 		return -ENODEV;
 	if (tipc_mtu_bad(dev, 0)) {
@@ -407,6 +389,16 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 		return -EINVAL;
 	}
 
+	/* Autoconfigure own node identity if needed */
+	if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) {
+		memcpy(node_id, dev->dev_addr, hwaddr_len);
+		tipc_net_init(net, node_id, 0);
+	}
+	if (!tipc_own_id(net)) {
+		pr_warn("Failed to obtain node identity\n");
+		return -EINVAL;
+	}
+
 	/* Associate TIPC bearer with L2 bearer */
 	rcu_assign_pointer(b->media_ptr, dev);
 	b->pt.dev = dev;
@@ -414,7 +406,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 	b->pt.func = tipc_l2_rcv_msg;
 	dev_add_pack(&b->pt);
 	memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
-	memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
+	memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len);
 	b->bcast_addr.media_id = b->media->type_id;
 	b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
 	b->mtu = dev->mtu;
@@ -813,7 +805,7 @@ err_out:
 	return err;
 }
 
-int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 {
 	int err;
 	char *name;
@@ -835,31 +827,36 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 
 	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
-	rtnl_lock();
 	bearer = tipc_bearer_find(net, name);
-	if (!bearer) {
-		rtnl_unlock();
+	if (!bearer)
 		return -EINVAL;
-	}
 
 	bearer_disable(net, bearer);
-	rtnl_unlock();
 
 	return 0;
 }
 
-int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+
+	rtnl_lock();
+	err = __tipc_nl_bearer_disable(skb, info);
+	rtnl_unlock();
+
+	return err;
+}
+
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
 	int err;
 	char *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
 	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 domain;
+	u32 domain = 0;
 	u32 prio;
 
 	prio = TIPC_MEDIA_LINK_PRI;
-	domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -890,15 +887,18 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 			prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
 	}
 
+	return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+}
+
+int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+
 	rtnl_lock();
-	err = tipc_enable_bearer(net, bearer, domain, prio, attrs);
-	if (err) {
-		rtnl_unlock();
-		return err;
-	}
+	err = __tipc_nl_bearer_enable(skb, info);
 	rtnl_unlock();
 
-	return 0;
+	return err;
 }
 
 int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
@@ -944,13 +944,13 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
-int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
-	int err;
-	char *name;
 	struct tipc_bearer *b;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
 	struct net *net = sock_net(skb->sk);
+	char *name;
+	int err;
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -965,35 +965,42 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
-	rtnl_lock();
 	b = tipc_bearer_find(net, name);
-	if (!b) {
-		rtnl_unlock();
+	if (!b)
 		return -EINVAL;
-	}
 
 	if (attrs[TIPC_NLA_BEARER_PROP]) {
 		struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
 
 		err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
 					      props);
-		if (err) {
-			rtnl_unlock();
+		if (err)
 			return err;
-		}
 
-		if (props[TIPC_NLA_PROP_TOL])
+		if (props[TIPC_NLA_PROP_TOL]) {
 			b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+			tipc_node_apply_tolerance(net, b);
+		}
 		if (props[TIPC_NLA_PROP_PRIO])
 			b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
 		if (props[TIPC_NLA_PROP_WIN])
 			b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
 	}
-	rtnl_unlock();
 
 	return 0;
 }
 
+int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+
+	rtnl_lock();
+	err = __tipc_nl_bearer_set(skb, info);
+	rtnl_unlock();
+
+	return err;
+}
+
 static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
 			       struct tipc_media *media, int nlflags)
 {
@@ -1115,7 +1122,7 @@ err_out:
 	return err;
 }
 
-int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
 {
 	int err;
 	char *name;
@@ -1133,22 +1140,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
 
-	rtnl_lock();
 	m = tipc_media_find(name);
-	if (!m) {
-		rtnl_unlock();
+	if (!m)
 		return -EINVAL;
-	}
 
 	if (attrs[TIPC_NLA_MEDIA_PROP]) {
 		struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
 
 		err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP],
 					      props);
-		if (err) {
-			rtnl_unlock();
+		if (err)
 			return err;
-		}
 
 		if (props[TIPC_NLA_PROP_TOL])
 			m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
@@ -1157,7 +1159,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
 		if (props[TIPC_NLA_PROP_WIN])
 			m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
 	}
-	rtnl_unlock();
 
 	return 0;
 }
+
+int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+
+	rtnl_lock();
+	err = __tipc_nl_media_set(skb, info);
+	rtnl_unlock();
+
+	return err;
+}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 42d6eeeb646d..6efcee63a381 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -159,7 +159,7 @@ struct tipc_bearer {
 	u32 tolerance;
 	u32 domain;
 	u32 identity;
-	struct tipc_link_req *link_req;
+	struct tipc_discoverer *disc;
 	char net_plane;
 	unsigned long up;
 };
@@ -188,15 +188,19 @@ extern struct tipc_media udp_media_info;
 #endif
 
 int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 0b982d048fb9..5b38f5164281 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -56,7 +56,11 @@ static int __net_init tipc_init_net(struct net *net)
 	int err;
 
 	tn->net_id = 4711;
-	tn->own_addr = 0;
+	tn->node_addr = 0;
+	tn->trial_addr = 0;
+	tn->addr_trial_end = 0;
+	memset(tn->node_id, 0, sizeof(tn->node_id));
+	memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
 	tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
 	get_random_bytes(&tn->random, sizeof(int));
 	INIT_LIST_HEAD(&tn->node_list);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 20b21af2ff14..d0f64ca62d02 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/core.h: Include file for TIPC global declarations
  *
- * Copyright (c) 2005-2006, 2013 Ericsson AB
+ * Copyright (c) 2005-2006, 2013-2018 Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -64,7 +64,7 @@ struct tipc_bearer;
 struct tipc_bc_base;
 struct tipc_link;
 struct tipc_name_table;
-struct tipc_server;
+struct tipc_topsrv;
 struct tipc_monitor;
 
 #define TIPC_MOD_VER "2.0.0"
@@ -72,15 +72,22 @@ struct tipc_monitor;
 #define NODE_HTABLE_SIZE       512
 #define MAX_BEARERS	         3
 #define TIPC_DEF_MON_THRESHOLD  32
+#define NODE_ID_LEN             16
+#define NODE_ID_STR_LEN        (NODE_ID_LEN * 2 + 1)
 
 extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
 struct tipc_net {
-	u32 own_addr;
+	u8  node_id[NODE_ID_LEN];
+	u32 node_addr;
+	u32 trial_addr;
+	unsigned long addr_trial_end;
+	char node_id_string[NODE_ID_STR_LEN];
 	int net_id;
 	int random;
+	bool legacy_addr_format;
 
 	/* Node table and node list */
 	spinlock_t node_list_lock;
@@ -112,7 +119,7 @@ struct tipc_net {
 	struct list_head dist_queue;
 
 	/* Topology subscription server */
-	struct tipc_server *topsrv;
+	struct tipc_topsrv *topsrv;
 	atomic_t subscription_count;
 };
 
@@ -131,7 +138,12 @@ static inline struct list_head *tipc_nodes(struct net *net)
 	return &tipc_net(net)->node_list;
 }
 
-static inline struct tipc_server *tipc_topsrv(struct net *net)
+static inline struct name_table *tipc_name_table(struct net *net)
+{
+	return tipc_net(net)->nametbl;
+}
+
+static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
 {
 	return tipc_net(net)->topsrv;
 }
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
new file mode 100644
index 000000000000..46d9cd62f781
--- /dev/null
+++ b/net/tipc/diag.c
@@ -0,0 +1,114 @@
+/*
+ * net/tipc/diag.c: TIPC socket diag
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "socket.h"
+#include <linux/sock_diag.h>
+#include <linux/tipc_sockets_diag.h>
+
+static u64 __tipc_diag_gen_cookie(struct sock *sk)
+{
+	u32 res[2];
+
+	sock_diag_save_cookie(sk, res);
+	return *((u64 *)res);
+}
+
+static int __tipc_add_sock_diag(struct sk_buff *skb,
+				struct netlink_callback *cb,
+				struct tipc_sock *tsk)
+{
+	struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh);
+	struct nlmsghdr *nlh;
+	int err;
+
+	nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0,
+			       NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states,
+				     __tipc_diag_gen_cookie);
+	if (err)
+		return err;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+}
+
+static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag);
+}
+
+static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
+				       struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct tipc_sock_diag_req);
+	struct net *net = sock_net(skb->sk);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = tipc_diag_dump,
+		};
+		netlink_dump_start(net->diag_nlsk, skb, h, &c);
+		return 0;
+	}
+	return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler tipc_sock_diag_handler = {
+	.family = AF_TIPC,
+	.dump = tipc_sock_diag_handler_dump,
+};
+
+static int __init tipc_diag_init(void)
+{
+	return sock_diag_register(&tipc_sock_diag_handler);
+}
+
+static void __exit tipc_diag_exit(void)
+{
+	sock_diag_unregister(&tipc_sock_diag_handler);
+}
+
+module_init(tipc_diag_init);
+module_exit(tipc_diag_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 92e4828c6b09..9f666e0650e2 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/discover.c
  *
- * Copyright (c) 2003-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -39,34 +39,34 @@
 #include "discover.h"
 
 /* min delay during bearer start up */
-#define TIPC_LINK_REQ_INIT	msecs_to_jiffies(125)
+#define TIPC_DISC_INIT	msecs_to_jiffies(125)
 /* max delay if bearer has no links */
-#define TIPC_LINK_REQ_FAST	msecs_to_jiffies(1000)
+#define TIPC_DISC_FAST	msecs_to_jiffies(1000)
 /* max delay if bearer has links */
-#define TIPC_LINK_REQ_SLOW	msecs_to_jiffies(60000)
+#define TIPC_DISC_SLOW	msecs_to_jiffies(60000)
 /* indicates no timer in use */
-#define TIPC_LINK_REQ_INACTIVE	0xffffffff
+#define TIPC_DISC_INACTIVE	0xffffffff
 
 /**
- * struct tipc_link_req - information about an ongoing link setup request
+ * struct tipc_discoverer - information about an ongoing link setup request
  * @bearer_id: identity of bearer issuing requests
  * @net: network namespace instance
  * @dest: destination address for request messages
  * @domain: network domain to which links can be established
  * @num_nodes: number of nodes currently discovered (i.e. with an active link)
  * @lock: spinlock for controlling access to requests
- * @buf: request message to be (repeatedly) sent
+ * @skb: request message to be (repeatedly) sent
  * @timer: timer governing period between requests
  * @timer_intv: current interval between requests (in ms)
  */
-struct tipc_link_req {
+struct tipc_discoverer {
 	u32 bearer_id;
 	struct tipc_media_addr dest;
 	struct net *net;
 	u32 domain;
 	int num_nodes;
 	spinlock_t lock;
-	struct sk_buff *buf;
+	struct sk_buff *skb;
 	struct timer_list timer;
 	unsigned long timer_intv;
 };
@@ -77,22 +77,42 @@ struct tipc_link_req {
  * @type: message type (request or response)
  * @b: ptr to bearer issuing message
  */
-static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
-			       struct tipc_bearer *b)
+static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
+			       u32 mtyp,  struct tipc_bearer *b)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_msg *msg;
+	struct tipc_net *tn = tipc_net(net);
 	u32 dest_domain = b->domain;
+	struct tipc_msg *hdr;
 
-	msg = buf_msg(buf);
-	tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
+	hdr = buf_msg(skb);
+	tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
 		      MAX_H_SIZE, dest_domain);
-	msg_set_non_seq(msg, 1);
-	msg_set_node_sig(msg, tn->random);
-	msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
-	msg_set_dest_domain(msg, dest_domain);
-	msg_set_bc_netid(msg, tn->net_id);
-	b->media->addr2msg(msg_media_addr(msg), &b->addr);
+	msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN);
+	msg_set_non_seq(hdr, 1);
+	msg_set_node_sig(hdr, tn->random);
+	msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES);
+	msg_set_dest_domain(hdr, dest_domain);
+	msg_set_bc_netid(hdr, tn->net_id);
+	b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+	msg_set_node_id(hdr, tipc_own_id(net));
+}
+
+static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst,
+			       u32 src, u32 sugg_addr,
+			       struct tipc_media_addr *maddr,
+			       struct tipc_bearer *b)
+{
+	struct tipc_msg *hdr;
+	struct sk_buff *skb;
+
+	skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+	if (!skb)
+		return;
+	hdr = buf_msg(skb);
+	tipc_disc_init_msg(net, skb, mtyp, b);
+	msg_set_sugg_node_addr(hdr, sugg_addr);
+	msg_set_dest_domain(hdr, dst);
+	tipc_bearer_xmit_skb(net, b->identity, skb, maddr);
 }
 
 /**
@@ -104,161 +124,207 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
 static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
 			    struct tipc_media_addr *media_addr)
 {
-	char node_addr_str[16];
 	char media_addr_str[64];
 
-	tipc_addr_string_fill(node_addr_str, node_addr);
 	tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
 			       media_addr);
-	pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
+	pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr,
 		media_addr_str, b->name);
 }
 
+/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ */
+static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+				     struct tipc_media_addr *maddr,
+				     struct tipc_bearer *b,
+				     u32 dst, u32 src,
+				     u32 sugg_addr,
+				     u8 *peer_id,
+				     int mtyp)
+{
+	struct net *net = d->net;
+	struct tipc_net *tn = tipc_net(net);
+	bool trial = time_before(jiffies, tn->addr_trial_end);
+	u32 self = tipc_own_addr(net);
+
+	if (mtyp == DSC_TRIAL_FAIL_MSG) {
+		if (!trial)
+			return true;
+
+		/* Ignore if somebody else already gave new suggestion */
+		if (dst != tn->trial_addr)
+			return true;
+
+		/* Otherwise update trial address and restart trial period */
+		tn->trial_addr = sugg_addr;
+		msg_set_prevnode(buf_msg(d->skb), sugg_addr);
+		tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+		return true;
+	}
+
+	/* Apply trial address if we just left trial period */
+	if (!trial && !self) {
+		tipc_net_finalize(net, tn->trial_addr);
+		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+	}
+
+	if (mtyp != DSC_TRIAL_MSG)
+		return false;
+
+	sugg_addr = tipc_node_try_addr(net, peer_id, src);
+	if (sugg_addr)
+		tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src,
+				   self, sugg_addr, maddr, b);
+	return true;
+}
+
 /**
  * tipc_disc_rcv - handle incoming discovery message (request or response)
- * @net: the applicable net namespace
- * @buf: buffer containing message
- * @bearer: bearer that message arrived on
+ * @net: applicable net namespace
+ * @skb: buffer containing message
+ * @b: bearer that message arrived on
  */
 void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
-		   struct tipc_bearer *bearer)
+		   struct tipc_bearer *b)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_media_addr maddr;
-	struct sk_buff *rskb;
+	struct tipc_net *tn = tipc_net(net);
 	struct tipc_msg *hdr = buf_msg(skb);
-	u32 ddom = msg_dest_domain(hdr);
-	u32 onode = msg_prevnode(hdr);
+	u16 caps = msg_node_capabilities(hdr);
+	bool legacy = tn->legacy_addr_format;
+	u32 sugg = msg_sugg_node_addr(hdr);
+	u32 signature = msg_node_sig(hdr);
+	u8 peer_id[NODE_ID_LEN] = {0,};
+	u32 dst = msg_dest_domain(hdr);
 	u32 net_id = msg_bc_netid(hdr);
+	struct tipc_media_addr maddr;
+	u32 src = msg_prevnode(hdr);
 	u32 mtyp = msg_type(hdr);
-	u32 signature = msg_node_sig(hdr);
-	u16 caps = msg_node_capabilities(hdr);
-	bool respond = false;
 	bool dupl_addr = false;
+	bool respond = false;
+	u32 self;
 	int err;
 
-	err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
-	kfree_skb(skb);
-	if (err)
-		return;
+	skb_linearize(skb);
+	hdr = buf_msg(skb);
 
-	/* Ensure message from node is valid and communication is permitted */
-	if (net_id != tn->net_id)
+	if (caps & TIPC_NODE_ID128)
+		memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN);
+	else
+		sprintf(peer_id, "%x", src);
+
+	err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr));
+	kfree_skb(skb);
+	if (err || maddr.broadcast) {
+		pr_warn_ratelimited("Rcv corrupt discovery message\n");
 		return;
-	if (maddr.broadcast)
+	}
+	/* Ignore discovery messages from own node */
+	if (!memcmp(&maddr, &b->addr, sizeof(maddr)))
 		return;
-	if (!tipc_addr_domain_valid(ddom))
+	if (net_id != tn->net_id)
 		return;
-	if (!tipc_addr_node_valid(onode))
+	if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst,
+				     src, sugg, peer_id, mtyp))
 		return;
+	self = tipc_own_addr(net);
 
-	if (in_own_node(net, onode)) {
-		if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
-			disc_dupl_alert(bearer, tn->own_addr, &maddr);
+	/* Message from somebody using this node's address */
+	if (in_own_node(net, src)) {
+		disc_dupl_alert(b, self, &maddr);
 		return;
 	}
-	if (!tipc_in_scope(ddom, tn->own_addr))
+	if (!tipc_in_scope(legacy, dst, self))
 		return;
-	if (!tipc_in_scope(bearer->domain, onode))
+	if (!tipc_in_scope(legacy, b->domain, src))
 		return;
-
-	tipc_node_check_dest(net, onode, bearer, caps, signature,
+	tipc_node_check_dest(net, src, peer_id, b, caps, signature,
 			     &maddr, &respond, &dupl_addr);
 	if (dupl_addr)
-		disc_dupl_alert(bearer, onode, &maddr);
-
-	/* Send response, if necessary */
-	if (respond && (mtyp == DSC_REQ_MSG)) {
-		rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
-		if (!rskb)
-			return;
-		tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
-		tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr);
-	}
+		disc_dupl_alert(b, src, &maddr);
+	if (!respond)
+		return;
+	if (mtyp != DSC_REQ_MSG)
+		return;
+	tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b);
 }
 
-/**
- * disc_update - update frequency of periodic link setup requests
- * @req: ptr to link request structure
- *
- * Reinitiates discovery process if discovery object has no associated nodes
- * and is either not currently searching or is searching at a slow rate
+/* tipc_disc_add_dest - increment set of discovered nodes
  */
-static void disc_update(struct tipc_link_req *req)
+void tipc_disc_add_dest(struct tipc_discoverer *d)
 {
-	if (!req->num_nodes) {
-		if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
-		    (req->timer_intv > TIPC_LINK_REQ_FAST)) {
-			req->timer_intv = TIPC_LINK_REQ_INIT;
-			mod_timer(&req->timer, jiffies + req->timer_intv);
-		}
-	}
+	spin_lock_bh(&d->lock);
+	d->num_nodes++;
+	spin_unlock_bh(&d->lock);
 }
 
-/**
- * tipc_disc_add_dest - increment set of discovered nodes
- * @req: ptr to link request structure
+/* tipc_disc_remove_dest - decrement set of discovered nodes
  */
-void tipc_disc_add_dest(struct tipc_link_req *req)
+void tipc_disc_remove_dest(struct tipc_discoverer *d)
 {
-	spin_lock_bh(&req->lock);
-	req->num_nodes++;
-	spin_unlock_bh(&req->lock);
-}
+	int intv, num;
 
-/**
- * tipc_disc_remove_dest - decrement set of discovered nodes
- * @req: ptr to link request structure
- */
-void tipc_disc_remove_dest(struct tipc_link_req *req)
-{
-	spin_lock_bh(&req->lock);
-	req->num_nodes--;
-	disc_update(req);
-	spin_unlock_bh(&req->lock);
+	spin_lock_bh(&d->lock);
+	d->num_nodes--;
+	num = d->num_nodes;
+	intv = d->timer_intv;
+	if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST))  {
+		d->timer_intv = TIPC_DISC_INIT;
+		mod_timer(&d->timer, jiffies + d->timer_intv);
+	}
+	spin_unlock_bh(&d->lock);
 }
 
-/**
- * disc_timeout - send a periodic link setup request
- * @data: ptr to link request structure
- *
+/* tipc_disc_timeout - send a periodic link setup request
  * Called whenever a link setup request timer associated with a bearer expires.
+ * - Keep doubling time between sent request until limit is reached;
+ * - Hold at fast polling rate if we don't have any associated nodes
+ * - Otherwise hold at slow polling rate
  */
-static void disc_timeout(struct timer_list *t)
+static void tipc_disc_timeout(struct timer_list *t)
 {
-	struct tipc_link_req *req = from_timer(req, t, timer);
-	struct sk_buff *skb;
-	int max_delay;
+	struct tipc_discoverer *d = from_timer(d, t, timer);
+	struct tipc_net *tn = tipc_net(d->net);
+	u32 self = tipc_own_addr(d->net);
+	struct tipc_media_addr maddr;
+	struct sk_buff *skb = NULL;
+	struct net *net = d->net;
+	u32 bearer_id;
 
-	spin_lock_bh(&req->lock);
+	spin_lock_bh(&d->lock);
 
 	/* Stop searching if only desired node has been found */
-	if (tipc_node(req->domain) && req->num_nodes) {
-		req->timer_intv = TIPC_LINK_REQ_INACTIVE;
+	if (tipc_node(d->domain) && d->num_nodes) {
+		d->timer_intv = TIPC_DISC_INACTIVE;
 		goto exit;
 	}
 
-	/*
-	 * Send discovery message, then update discovery timer
-	 *
-	 * Keep doubling time between requests until limit is reached;
-	 * hold at fast polling rate if don't have any associated nodes,
-	 * otherwise hold at slow polling rate
-	 */
-	skb = skb_clone(req->buf, GFP_ATOMIC);
-	if (skb)
-		tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest);
-	req->timer_intv *= 2;
-	if (req->num_nodes)
-		max_delay = TIPC_LINK_REQ_SLOW;
-	else
-		max_delay = TIPC_LINK_REQ_FAST;
-	if (req->timer_intv > max_delay)
-		req->timer_intv = max_delay;
+	/* Did we just leave the address trial period ? */
+	if (!self && !time_before(jiffies, tn->addr_trial_end)) {
+		self = tn->trial_addr;
+		tipc_net_finalize(net, self);
+		msg_set_prevnode(buf_msg(d->skb), self);
+		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+	}
+
+	/* Adjust timeout interval according to discovery phase */
+	if (time_before(jiffies, tn->addr_trial_end)) {
+		d->timer_intv = TIPC_DISC_INIT;
+	} else {
+		d->timer_intv *= 2;
+		if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
+			d->timer_intv = TIPC_DISC_SLOW;
+		else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
+			d->timer_intv = TIPC_DISC_FAST;
+	}
 
-	mod_timer(&req->timer, jiffies + req->timer_intv);
+	mod_timer(&d->timer, jiffies + d->timer_intv);
+	memcpy(&maddr, &d->dest, sizeof(maddr));
+	skb = skb_clone(d->skb, GFP_ATOMIC);
+	bearer_id = d->bearer_id;
 exit:
-	spin_unlock_bh(&req->lock);
+	spin_unlock_bh(&d->lock);
+	if (skb)
+		tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr);
 }
 
 /**
@@ -273,41 +339,47 @@ exit:
 int tipc_disc_create(struct net *net, struct tipc_bearer *b,
 		     struct tipc_media_addr *dest, struct sk_buff **skb)
 {
-	struct tipc_link_req *req;
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_discoverer *d;
 
-	req = kmalloc(sizeof(*req), GFP_ATOMIC);
-	if (!req)
+	d = kmalloc(sizeof(*d), GFP_ATOMIC);
+	if (!d)
 		return -ENOMEM;
-	req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
-	if (!req->buf) {
-		kfree(req);
+	d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+	if (!d->skb) {
+		kfree(d);
 		return -ENOMEM;
 	}
+	tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
 
-	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
-	memcpy(&req->dest, dest, sizeof(*dest));
-	req->net = net;
-	req->bearer_id = b->identity;
-	req->domain = b->domain;
-	req->num_nodes = 0;
-	req->timer_intv = TIPC_LINK_REQ_INIT;
-	spin_lock_init(&req->lock);
-	timer_setup(&req->timer, disc_timeout, 0);
-	mod_timer(&req->timer, jiffies + req->timer_intv);
-	b->link_req = req;
-	*skb = skb_clone(req->buf, GFP_ATOMIC);
+	/* Do we need an address trial period first ? */
+	if (!tipc_own_addr(net)) {
+		tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+		msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
+	}
+	memcpy(&d->dest, dest, sizeof(*dest));
+	d->net = net;
+	d->bearer_id = b->identity;
+	d->domain = b->domain;
+	d->num_nodes = 0;
+	d->timer_intv = TIPC_DISC_INIT;
+	spin_lock_init(&d->lock);
+	timer_setup(&d->timer, tipc_disc_timeout, 0);
+	mod_timer(&d->timer, jiffies + d->timer_intv);
+	b->disc = d;
+	*skb = skb_clone(d->skb, GFP_ATOMIC);
 	return 0;
 }
 
 /**
  * tipc_disc_delete - destroy object sending periodic link setup requests
- * @req: ptr to link request structure
+ * @d: ptr to link duest structure
  */
-void tipc_disc_delete(struct tipc_link_req *req)
+void tipc_disc_delete(struct tipc_discoverer *d)
 {
-	del_timer_sync(&req->timer);
-	kfree_skb(req->buf);
-	kfree(req);
+	del_timer_sync(&d->timer);
+	kfree_skb(d->skb);
+	kfree(d);
 }
 
 /**
@@ -318,19 +390,21 @@ void tipc_disc_delete(struct tipc_link_req *req)
  */
 void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
 {
-	struct tipc_link_req *req = b->link_req;
+	struct tipc_discoverer *d = b->disc;
+	struct tipc_media_addr maddr;
 	struct sk_buff *skb;
 
-	spin_lock_bh(&req->lock);
-	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
-	req->net = net;
-	req->bearer_id = b->identity;
-	req->domain = b->domain;
-	req->num_nodes = 0;
-	req->timer_intv = TIPC_LINK_REQ_INIT;
-	mod_timer(&req->timer, jiffies + req->timer_intv);
-	skb = skb_clone(req->buf, GFP_ATOMIC);
+	spin_lock_bh(&d->lock);
+	tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
+	d->net = net;
+	d->bearer_id = b->identity;
+	d->domain = b->domain;
+	d->num_nodes = 0;
+	d->timer_intv = TIPC_DISC_INIT;
+	memcpy(&maddr, &d->dest, sizeof(maddr));
+	mod_timer(&d->timer, jiffies + d->timer_intv);
+	skb = skb_clone(d->skb, GFP_ATOMIC);
+	spin_unlock_bh(&d->lock);
 	if (skb)
-		tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
-	spin_unlock_bh(&req->lock);
+		tipc_bearer_xmit_skb(net, b->identity, skb, &maddr);
 }
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index b80a335389c0..521d96c41dfd 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -37,14 +37,14 @@
 #ifndef _TIPC_DISCOVER_H
 #define _TIPC_DISCOVER_H
 
-struct tipc_link_req;
+struct tipc_discoverer;
 
 int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
 		     struct tipc_media_addr *dest, struct sk_buff **skb);
-void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_delete(struct tipc_discoverer *req);
 void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr);
-void tipc_disc_add_dest(struct tipc_link_req *req);
-void tipc_disc_remove_dest(struct tipc_link_req *req);
+void tipc_disc_add_dest(struct tipc_discoverer *req);
+void tipc_disc_remove_dest(struct tipc_discoverer *req);
 void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
 		   struct tipc_bearer *b_ptr);
 
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 122162a31816..d7a7befeddd4 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -37,7 +37,7 @@
 #include "addr.h"
 #include "group.h"
 #include "bcast.h"
-#include "server.h"
+#include "topsrv.h"
 #include "msg.h"
 #include "socket.h"
 #include "node.h"
@@ -189,6 +189,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid,
 	grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
 	grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
 	grp->open = group_is_open;
+	*grp->open = false;
 	filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE;
 	if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0,
 				    filter, &grp->subid))
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2d6b2aed30e0..1289b4ba404f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -434,14 +434,16 @@ char *tipc_link_name(struct tipc_link *l)
  */
 bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 		      int tolerance, char net_plane, u32 mtu, int priority,
-		      int window, u32 session, u32 ownnode, u32 peer,
-		      u16 peer_caps,
+		      int window, u32 session, u32 self,
+		      u32 peer, u8 *peer_id, u16 peer_caps,
 		      struct tipc_link *bc_sndlink,
 		      struct tipc_link *bc_rcvlink,
 		      struct sk_buff_head *inputq,
 		      struct sk_buff_head *namedq,
 		      struct tipc_link **link)
 {
+	char peer_str[NODE_ID_STR_LEN] = {0,};
+	char self_str[NODE_ID_STR_LEN] = {0,};
 	struct tipc_link *l;
 
 	l = kzalloc(sizeof(*l), GFP_ATOMIC);
@@ -450,10 +452,18 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 	*link = l;
 	l->session = session;
 
-	/* Note: peer i/f name is completed by reset/activate message */
-	sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
-		tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
-		if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+	/* Set link name for unicast links only */
+	if (peer_id) {
+		tipc_nodeid2string(self_str, tipc_own_id(net));
+		if (strlen(self_str) > 16)
+			sprintf(self_str, "%x", self);
+		tipc_nodeid2string(peer_str, peer_id);
+		if (strlen(peer_str) > 16)
+			sprintf(peer_str, "%x", peer);
+	}
+	/* Peer i/f name will be completed by reset/activate message */
+	sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);
+
 	strcpy(l->if_name, if_name);
 	l->addr = peer;
 	l->peer_caps = peer_caps;
@@ -501,7 +511,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
 	struct tipc_link *l;
 
 	if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
-			      0, ownnode, peer, peer_caps, bc_sndlink,
+			      0, ownnode, peer, NULL, peer_caps, bc_sndlink,
 			      NULL, inputq, namedq, link))
 		return false;
 
@@ -1938,11 +1948,11 @@ msg_full:
 int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
 		       struct tipc_link *link, int nlflags)
 {
-	int err;
-	void *hdr;
+	u32 self = tipc_own_addr(net);
 	struct nlattr *attrs;
 	struct nlattr *prop;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	void *hdr;
+	int err;
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  nlflags, TIPC_NL_LINK_GET);
@@ -1955,8 +1965,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
 
 	if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name))
 		goto attr_msg_full;
-	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST,
-			tipc_cluster_mask(tn->own_addr)))
+	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self)))
 		goto attr_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
 		goto attr_msg_full;
@@ -2126,7 +2135,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
 			     struct sk_buff_head *xmitq)
 {
 	l->tolerance = tol;
-	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
+	if (link_is_up(l))
+		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
 }
 
 void tipc_link_set_prio(struct tipc_link *l, u32 prio,
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d1bd1787a768..ec59348a81e8 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -73,8 +73,8 @@ enum {
 
 bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 		      int tolerance, char net_plane, u32 mtu, int priority,
-		      int window, u32 session, u32 ownnode, u32 peer,
-		      u16 peer_caps,
+		      int window, u32 session, u32 ownnode,
+		      u32 peer, u8 *peer_id, u16 peer_caps,
 		      struct tipc_link *bc_sndlink,
 		      struct tipc_link *bc_rcvlink,
 		      struct sk_buff_head *inputq,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 4e1c6f6450bb..b6c45dccba3d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	msg = buf_msg(skb);
 	if (msg_reroute_cnt(msg))
 		return false;
-	dnode = addr_domain(net, msg_lookup_scope(msg));
+	dnode = tipc_scope2node(net, msg_lookup_scope(msg));
 	dport = tipc_nametbl_translate(net, msg_nametype(msg),
 				       msg_nameinst(msg), &dnode);
 	if (!dport)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index b4ba1b4f9ae7..a4e944d59394 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -550,6 +550,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
  */
 #define DSC_REQ_MSG		0
 #define DSC_RESP_MSG		1
+#define DSC_TRIAL_MSG		2
+#define DSC_TRIAL_FAIL_MSG	3
 
 /*
  * Group protocol message types
@@ -627,7 +629,6 @@ static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 2, 0, 0xffff, n);
 }
 
-
 /*
  * Word 4
  */
@@ -925,6 +926,26 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
 	return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
 }
 
+static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
+{
+	return msg_word(m, 14);
+}
+
+static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n)
+{
+	msg_set_word(m, 14, n);
+}
+
+static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id)
+{
+	memcpy(msg_data(hdr), id, 16);
+}
+
+static inline u8 *msg_node_id(struct tipc_msg *hdr)
+{
+	return (u8 *)msg_data(hdr);
+}
+
 struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
 bool tipc_msg_validate(struct sk_buff **_skb);
 bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 23f8899e0f8c..8240a85b0d0c 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 	i->type = htonl(p->type);
 	i->lower = htonl(p->lower);
 	i->upper = htonl(p->upper);
-	i->ref = htonl(p->ref);
+	i->port = htonl(p->port);
 	i->key = htonl(p->key);
 }
 
@@ -68,14 +68,14 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
 					 u32 dest)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+	u32 self = tipc_own_addr(net);
 	struct tipc_msg *msg;
 
 	if (buf != NULL) {
 		msg = buf_msg(buf);
-		tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type,
-			      INT_H_SIZE, dest);
+		tipc_msg_init(self, msg, NAME_DISTRIBUTOR,
+			      type, INT_H_SIZE, dest);
 		msg_set_size(msg, INT_H_SIZE + size);
 	}
 	return buf;
@@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
  */
 struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *buf;
+	struct name_table *nt = tipc_name_table(net);
 	struct distr_item *item;
+	struct sk_buff *skb;
 
-	list_add_tail_rcu(&publ->local_list,
-			  &tn->nametbl->publ_list[publ->scope]);
-
-	if (publ->scope == TIPC_NODE_SCOPE)
+	if (publ->scope == TIPC_NODE_SCOPE) {
+		list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
 		return NULL;
+	}
+	list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope);
 
-	buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
-	if (!buf) {
+	skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
+	if (!skb) {
 		pr_warn("Publication distribution failure\n");
 		return NULL;
 	}
 
-	item = (struct distr_item *)msg_data(buf_msg(buf));
+	item = (struct distr_item *)msg_data(buf_msg(skb));
 	publ_to_item(item, publ);
-	return buf;
+	return skb;
 }
 
 /**
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_del(&publ->local_list);
+	list_del(&publ->binding_node);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
 			ITEM_SIZE) * ITEM_SIZE;
 	u32 msg_rem = msg_dsz;
 
-	list_for_each_entry(publ, pls, local_list) {
+	list_for_each_entry(publ, pls, binding_node) {
 		/* Prepare next buffer: */
 		if (!skb) {
 			skb = named_prepare_buf(net, PUBLICATION, msg_rem,
@@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
  */
 void tipc_named_node_up(struct net *net, u32 dnode)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *nt = tipc_name_table(net);
 	struct sk_buff_head head;
 
 	__skb_queue_head_init(&head);
 
 	rcu_read_lock();
-	named_distribute(net, &head, dnode,
-			 &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-	named_distribute(net, &head, dnode,
-			 &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
+	named_distribute(net, &head, dnode, &nt->cluster_scope);
 	rcu_read_unlock();
 
 	tipc_node_xmit(net, &head, dnode, 0);
@@ -212,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 
 	spin_lock_bh(&tn->nametbl_lock);
 	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
-				     publ->node, publ->ref, publ->key);
+				     publ->node, publ->port, publ->key);
 	if (p)
-		tipc_node_unsubscribe(net, &p->nodesub_list, addr);
+		tipc_node_unsubscribe(net, &p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (p != publ) {
 		pr_err("Unable to remove publication from failed node\n"
-		       " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
-		       publ->type, publ->lower, publ->node, publ->ref,
+		       " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
+		       publ->type, publ->lower, publ->node, publ->port,
 		       publ->key);
 	}
 
@@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
 {
 	struct publication *publ, *tmp;
 
-	list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
+	list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
 		tipc_publ_purge(net, publ, addr);
 	tipc_dist_queue_purge(net, addr);
 }
@@ -271,18 +268,18 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 						ntohl(i->lower),
 						ntohl(i->upper),
 						TIPC_CLUSTER_SCOPE, node,
-						ntohl(i->ref), ntohl(i->key));
+						ntohl(i->port), ntohl(i->key));
 		if (publ) {
-			tipc_node_subscribe(net, &publ->nodesub_list, node);
+			tipc_node_subscribe(net, &publ->binding_node, node);
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
 		publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
 						ntohl(i->lower),
-						node, ntohl(i->ref),
+						node, ntohl(i->port),
 						ntohl(i->key));
 		if (publ) {
-			tipc_node_unsubscribe(net, &publ->nodesub_list, node);
+			tipc_node_unsubscribe(net, &publ->binding_node, node);
 			kfree_rcu(publ, rcu);
 			return true;
 		}
@@ -321,7 +318,6 @@ void tipc_named_process_backlog(struct net *net)
 {
 	struct distr_queue_item *e, *tmp;
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	char addr[16];
 	unsigned long now = get_jiffies_64();
 
 	list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
@@ -329,12 +325,11 @@ void tipc_named_process_backlog(struct net *net)
 			if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype))
 				continue;
 		} else {
-			tipc_addr_string_fill(addr, e->node);
-			pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n",
+			pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %x key=%u\n",
 					    e->dtype, ntohl(e->i.type),
 					    ntohl(e->i.lower),
 					    ntohl(e->i.upper),
-					    addr, ntohl(e->i.key));
+					    e->node, ntohl(e->i.key));
 		}
 		list_del(&e->next);
 		kfree(e);
@@ -382,16 +377,17 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
  */
 void tipc_named_reinit(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_net *tn = tipc_net(net);
 	struct publication *publ;
-	int scope;
+	u32 self = tipc_own_addr(net);
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
-		list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
-					local_list)
-			publ->node = tn->own_addr;
+	list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
+		publ->node = self;
+	list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
+		publ->node = self;
 
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1264ba0af937..4753e628d7c4 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -63,7 +63,7 @@ struct distr_item {
 	__be32 type;
 	__be32 lower;
 	__be32 upper;
-	__be32 ref;
+	__be32 port;
 	__be32 key;
 };
 
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ed0457cc99d6..4359605b1bec 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.c: TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
  * All rights reserved.
  *
@@ -50,24 +50,12 @@
 
 /**
  * struct name_info - name sequence publication info
- * @node_list: circular list of publications made by own node
- * @cluster_list: circular list of publications made by own cluster
- * @zone_list: circular list of publications made by own zone
- * @node_list_size: number of entries in "node_list"
- * @cluster_list_size: number of entries in "cluster_list"
- * @zone_list_size: number of entries in "zone_list"
- *
- * Note: The zone list always contains at least one entry, since all
- *       publications of the associated name sequence belong to it.
- *       (The cluster and node lists may be empty.)
+ * @node_list: list of publications on own node of this <type,lower,upper>
+ * @all_publ: list of all publications of this <type,lower,upper>
  */
 struct name_info {
-	struct list_head node_list;
-	struct list_head cluster_list;
-	struct list_head zone_list;
-	u32 node_list_size;
-	u32 cluster_list_size;
-	u32 zone_list_size;
+	struct list_head local_publ;
+	struct list_head all_publ;
 };
 
 /**
@@ -114,7 +102,7 @@ static int hash(int x)
  * publ_create - create a publication structure
  */
 static struct publication *publ_create(u32 type, u32 lower, u32 upper,
-				       u32 scope, u32 node, u32 port_ref,
+				       u32 scope, u32 node, u32 port,
 				       u32 key)
 {
 	struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
@@ -128,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
 	publ->upper = upper;
 	publ->scope = scope;
 	publ->node = node;
-	publ->ref = port_ref;
+	publ->port = port;
 	publ->key = key;
-	INIT_LIST_HEAD(&publ->pport_list);
+	INIT_LIST_HEAD(&publ->binding_sock);
 	return publ;
 }
 
@@ -249,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 		info = sseq->info;
 
 		/* Check if an identical publication already exists */
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
-			if ((publ->ref == port) && (publ->key == key) &&
-			    (!publ->node || (publ->node == node)))
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
+			if (publ->port == port && publ->key == key &&
+			    (!publ->node || publ->node == node))
 				return NULL;
 		}
 	} else {
@@ -290,9 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 			return NULL;
 		}
 
-		INIT_LIST_HEAD(&info->node_list);
-		INIT_LIST_HEAD(&info->cluster_list);
-		INIT_LIST_HEAD(&info->zone_list);
+		INIT_LIST_HEAD(&info->local_publ);
+		INIT_LIST_HEAD(&info->all_publ);
 
 		/* Insert new sub-sequence */
 		sseq = &nseq->sseqs[inspos];
@@ -311,25 +298,17 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 	if (!publ)
 		return NULL;
 
-	list_add(&publ->zone_list, &info->zone_list);
-	info->zone_list_size++;
-
-	if (in_own_cluster(net, node)) {
-		list_add(&publ->cluster_list, &info->cluster_list);
-		info->cluster_list_size++;
-	}
+	list_add(&publ->all_publ, &info->all_publ);
 
-	if (in_own_node(net, node)) {
-		list_add(&publ->node_list, &info->node_list);
-		info->node_list_size++;
-	}
+	if (in_own_node(net, node))
+		list_add(&publ->local_publ, &info->local_publ);
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-					    TIPC_PUBLISHED, publ->ref,
-					    publ->node, publ->scope,
-					    created_subseq);
+		tipc_sub_report_overlap(s, publ->lower, publ->upper,
+					TIPC_PUBLISHED, publ->port,
+					publ->node, publ->scope,
+					created_subseq);
 	}
 	return publ;
 }
@@ -348,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 static struct publication *tipc_nameseq_remove_publ(struct net *net,
 						    struct name_seq *nseq,
 						    u32 inst, u32 node,
-						    u32 ref, u32 key)
+						    u32 port, u32 key)
 {
 	struct publication *publ;
 	struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
@@ -363,32 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
 	info = sseq->info;
 
 	/* Locate publication, if it exists */
-	list_for_each_entry(publ, &info->zone_list, zone_list) {
-		if ((publ->key == key) && (publ->ref == ref) &&
-		    (!publ->node || (publ->node == node)))
+	list_for_each_entry(publ, &info->all_publ, all_publ) {
+		if (publ->key == key && publ->port == port &&
+		    (!publ->node || publ->node == node))
 			goto found;
 	}
 	return NULL;
 
 found:
-	/* Remove publication from zone scope list */
-	list_del(&publ->zone_list);
-	info->zone_list_size--;
-
-	/* Remove publication from cluster scope list, if present */
-	if (in_own_cluster(net, node)) {
-		list_del(&publ->cluster_list);
-		info->cluster_list_size--;
-	}
-
-	/* Remove publication from node scope list, if present */
-	if (in_own_node(net, node)) {
-		list_del(&publ->node_list);
-		info->node_list_size--;
-	}
+	list_del(&publ->all_publ);
+	if (in_own_node(net, node))
+		list_del(&publ->local_publ);
 
 	/* Contract subseq list if no more publications for that subseq */
-	if (list_empty(&info->zone_list)) {
+	if (list_empty(&info->all_publ)) {
 		kfree(info);
 		free = &nseq->sseqs[nseq->first_free--];
 		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
@@ -397,10 +364,10 @@ found:
 
 	/* Notify any waiting subscriptions */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-					    TIPC_WITHDRAWN, publ->ref,
-					    publ->node, publ->scope,
-					    removed_subseq);
+		tipc_sub_report_overlap(s, publ->lower, publ->upper,
+					TIPC_WITHDRAWN, publ->port,
+					publ->node, publ->scope,
+					removed_subseq);
 	}
 
 	return publ;
@@ -412,33 +379,38 @@ found:
  * sequence overlapping with the requested sequence
  */
 static void tipc_nameseq_subscribe(struct name_seq *nseq,
-				   struct tipc_subscription *s,
-				   bool status)
+				   struct tipc_subscription *sub)
 {
 	struct sub_seq *sseq = nseq->sseqs;
 	struct tipc_name_seq ns;
+	struct tipc_subscr *s = &sub->evt.s;
+	bool no_status;
 
-	tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+	ns.type = tipc_sub_read(s, seq.type);
+	ns.lower = tipc_sub_read(s, seq.lower);
+	ns.upper = tipc_sub_read(s, seq.upper);
+	no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
 
-	tipc_subscrp_get(s);
-	list_add(&s->nameseq_list, &nseq->subscriptions);
+	tipc_sub_get(sub);
+	list_add(&sub->nameseq_list, &nseq->subscriptions);
 
-	if (!status || !sseq)
+	if (no_status || !sseq)
 		return;
 
 	while (sseq != &nseq->sseqs[nseq->first_free]) {
-		if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
+		if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) {
 			struct publication *crs;
 			struct name_info *info = sseq->info;
 			int must_report = 1;
 
-			list_for_each_entry(crs, &info->zone_list, zone_list) {
-				tipc_subscrp_report_overlap(s, sseq->lower,
-							    sseq->upper,
-							    TIPC_PUBLISHED,
-							    crs->ref, crs->node,
-							    crs->scope,
-							    must_report);
+			list_for_each_entry(crs, &info->all_publ, all_publ) {
+				tipc_sub_report_overlap(sub, sseq->lower,
+							sseq->upper,
+							TIPC_PUBLISHED,
+							crs->port,
+							crs->node,
+							crs->scope,
+							must_report);
 				must_report = 0;
 			}
 		}
@@ -470,8 +442,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 	struct name_seq *seq = nametbl_find_seq(net, type);
 	int index = hash(type);
 
-	if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
-	    (lower > upper)) {
+	if (scope > TIPC_NODE_SCOPE || lower > upper) {
 		pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
 			 type, lower, upper, scope);
 		return NULL;
@@ -490,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 }
 
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 node, u32 ref,
+					     u32 lower, u32 node, u32 port,
 					     u32 key)
 {
 	struct publication *publ;
@@ -500,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 		return NULL;
 
 	spin_lock_bh(&seq->lock);
-	publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key);
+	publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key);
 	if (!seq->first_free && list_empty(&seq->subscriptions)) {
 		hlist_del_init_rcu(&seq->ns_list);
 		kfree(seq->sseqs);
@@ -528,15 +499,17 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 			   u32 *destnode)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
+	bool legacy = tn->legacy_addr_format;
+	u32 self = tipc_own_addr(net);
 	struct sub_seq *sseq;
 	struct name_info *info;
 	struct publication *publ;
 	struct name_seq *seq;
-	u32 ref = 0;
+	u32 port = 0;
 	u32 node = 0;
 
-	if (!tipc_in_scope(*destnode, tn->own_addr))
+	if (!tipc_in_scope(legacy, *destnode, self))
 		return 0;
 
 	rcu_read_lock();
@@ -550,55 +523,43 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	info = sseq->info;
 
 	/* Closest-First Algorithm */
-	if (likely(!*destnode)) {
-		if (!list_empty(&info->node_list)) {
-			publ = list_first_entry(&info->node_list,
+	if (legacy && !*destnode) {
+		if (!list_empty(&info->local_publ)) {
+			publ = list_first_entry(&info->local_publ,
 						struct publication,
-						node_list);
-			list_move_tail(&publ->node_list,
-				       &info->node_list);
-		} else if (!list_empty(&info->cluster_list)) {
-			publ = list_first_entry(&info->cluster_list,
-						struct publication,
-						cluster_list);
-			list_move_tail(&publ->cluster_list,
-				       &info->cluster_list);
+						local_publ);
+			list_move_tail(&publ->local_publ,
+				       &info->local_publ);
 		} else {
-			publ = list_first_entry(&info->zone_list,
+			publ = list_first_entry(&info->all_publ,
 						struct publication,
-						zone_list);
-			list_move_tail(&publ->zone_list,
-				       &info->zone_list);
+						all_publ);
+			list_move_tail(&publ->all_publ,
+				       &info->all_publ);
 		}
 	}
 
 	/* Round-Robin Algorithm */
-	else if (*destnode == tn->own_addr) {
-		if (list_empty(&info->node_list))
-			goto no_match;
-		publ = list_first_entry(&info->node_list, struct publication,
-					node_list);
-		list_move_tail(&publ->node_list, &info->node_list);
-	} else if (in_own_cluster_exact(net, *destnode)) {
-		if (list_empty(&info->cluster_list))
+	else if (*destnode == tipc_own_addr(net)) {
+		if (list_empty(&info->local_publ))
 			goto no_match;
-		publ = list_first_entry(&info->cluster_list, struct publication,
-					cluster_list);
-		list_move_tail(&publ->cluster_list, &info->cluster_list);
+		publ = list_first_entry(&info->local_publ, struct publication,
+					local_publ);
+		list_move_tail(&publ->local_publ, &info->local_publ);
 	} else {
-		publ = list_first_entry(&info->zone_list, struct publication,
-					zone_list);
-		list_move_tail(&publ->zone_list, &info->zone_list);
+		publ = list_first_entry(&info->all_publ, struct publication,
+					all_publ);
+		list_move_tail(&publ->all_publ, &info->all_publ);
 	}
 
-	ref = publ->ref;
+	port = publ->port;
 	node = publ->node;
 no_match:
 	spin_unlock_bh(&seq->lock);
 not_found:
 	rcu_read_unlock();
 	*destnode = node;
-	return ref;
+	return port;
 }
 
 bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
@@ -620,16 +581,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 	sseq = nameseq_find_subseq(seq, instance);
 	if (likely(sseq)) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
 			if (publ->scope != scope)
 				continue;
-			if (publ->ref == exclude && publ->node == self)
+			if (publ->port == exclude && publ->node == self)
 				continue;
-			tipc_dest_push(dsts, publ->node, publ->ref);
+			tipc_dest_push(dsts, publ->node, publ->port);
 			(*dstcnt)++;
 			if (all)
 				continue;
-			list_move_tail(&publ->zone_list, &info->zone_list);
+			list_move_tail(&publ->all_publ, &info->all_publ);
 			break;
 		}
 	}
@@ -639,15 +600,14 @@ exit:
 	return !list_empty(dsts);
 }
 
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			   u32 scope, bool exact, struct list_head *dports)
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+			    u32 scope, bool exact, struct list_head *dports)
 {
 	struct sub_seq *sseq_stop;
 	struct name_info *info;
 	struct publication *p;
 	struct name_seq *seq;
 	struct sub_seq *sseq;
-	int res = 0;
 
 	rcu_read_lock();
 	seq = nametbl_find_seq(net, type);
@@ -661,18 +621,14 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
 		if (sseq->lower > upper)
 			break;
 		info = sseq->info;
-		list_for_each_entry(p, &info->node_list, node_list) {
+		list_for_each_entry(p, &info->local_publ, local_publ) {
 			if (p->scope == scope || (!exact && p->scope < scope))
-				tipc_dest_push(dports, 0, p->ref);
+				tipc_dest_push(dports, 0, p->port);
 		}
-
-		if (info->cluster_list_size != info->node_list_size)
-			res = 1;
 	}
 	spin_unlock_bh(&seq->lock);
 exit:
 	rcu_read_unlock();
-	return res;
 }
 
 /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
@@ -697,7 +653,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop && sseq->lower <= upper; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
 			tipc_nlist_add(nodes, publ->node);
 		}
 	}
@@ -726,10 +682,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(p, &info->zone_list, zone_list) {
+		list_for_each_entry(p, &info->all_publ, all_publ) {
 			if (p->scope != scope)
 				continue;
-			tipc_group_add_member(grp, p->node, p->ref, p->lower);
+			tipc_group_add_member(grp, p->node, p->port, p->lower);
 		}
 	}
 	spin_unlock_bh(&seq->lock);
@@ -757,7 +713,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 	}
 
 	publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
-					tn->own_addr, port_ref, key);
+					tipc_own_addr(net), port_ref, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count++;
 		buf = tipc_named_publish(net, publ);
@@ -774,7 +730,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 /**
  * tipc_nametbl_withdraw - withdraw name publication from network name tables
  */
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
 			  u32 key)
 {
 	struct publication *publ;
@@ -782,19 +738,19 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	spin_lock_bh(&tn->nametbl_lock);
-	publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
-					ref, key);
+	publ = tipc_nametbl_remove_publ(net, type, lower, tipc_own_addr(net),
+					port, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count--;
 		skb = tipc_named_withdraw(net, publ);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
-		list_del_init(&publ->pport_list);
+		list_del_init(&publ->binding_sock);
 		kfree_rcu(publ, rcu);
 	} else {
 		pr_err("Unable to remove local publication\n"
-		       "(type=%u, lower=%u, ref=%u, key=%u)\n",
-		       type, lower, ref, key);
+		       "(type=%u, lower=%u, port=%u, key=%u)\n",
+		       type, lower, port, key);
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
 
@@ -808,24 +764,27 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 /**
  * tipc_nametbl_subscribe - add a subscription object to the name table
  */
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
+void tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
-	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
-	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+	struct tipc_net *tn = tipc_net(sub->net);
+	struct tipc_subscr *s = &sub->evt.s;
+	u32 type = tipc_sub_read(s, seq.type);
 	int index = hash(type);
 	struct name_seq *seq;
 	struct tipc_name_seq ns;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(s->net, type);
+	seq = nametbl_find_seq(sub->net, type);
 	if (!seq)
 		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
 	if (seq) {
 		spin_lock_bh(&seq->lock);
-		tipc_nameseq_subscribe(seq, s, status);
+		tipc_nameseq_subscribe(seq, sub);
 		spin_unlock_bh(&seq->lock);
 	} else {
-		tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+		ns.type = tipc_sub_read(s, seq.type);
+		ns.lower = tipc_sub_read(s, seq.lower);
+		ns.upper = tipc_sub_read(s, seq.upper);
 		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
 			ns.type, ns.lower, ns.upper);
 	}
@@ -835,18 +794,19 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
 /**
  * tipc_nametbl_unsubscribe - remove a subscription object from name table
  */
-void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
-	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
+	struct tipc_subscr *s = &sub->evt.s;
+	struct tipc_net *tn = tipc_net(sub->net);
 	struct name_seq *seq;
-	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+	u32 type = tipc_sub_read(s, seq.type);
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(s->net, type);
+	seq = nametbl_find_seq(sub->net, type);
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
-		list_del_init(&s->nameseq_list);
-		tipc_subscrp_put(s);
+		list_del_init(&sub->nameseq_list);
+		tipc_sub_put(sub);
 		if (!seq->first_free && list_empty(&seq->subscriptions)) {
 			hlist_del_init_rcu(&seq->ns_list);
 			kfree(seq->sseqs);
@@ -872,9 +832,8 @@ int tipc_nametbl_init(struct net *net)
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
 		INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
 
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
+	INIT_LIST_HEAD(&tipc_nametbl->node_scope);
+	INIT_LIST_HEAD(&tipc_nametbl->cluster_scope);
 	tn->nametbl = tipc_nametbl;
 	spin_lock_init(&tn->nametbl_lock);
 	return 0;
@@ -894,9 +853,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq)
 	spin_lock_bh(&seq->lock);
 	sseq = seq->sseqs;
 	info = sseq->info;
-	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
+	list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) {
 		tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
-					 publ->ref, publ->key);
+					 publ->port, publ->key);
 		kfree_rcu(publ, rcu);
 	}
 	hlist_del_init_rcu(&seq->ns_list);
@@ -943,17 +902,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 	struct publication *p;
 
 	if (*last_publ) {
-		list_for_each_entry(p, &sseq->info->zone_list, zone_list)
+		list_for_each_entry(p, &sseq->info->all_publ, all_publ)
 			if (p->key == *last_publ)
 				break;
 		if (p->key != *last_publ)
 			return -EPIPE;
 	} else {
-		p = list_first_entry(&sseq->info->zone_list, struct publication,
-				     zone_list);
+		p = list_first_entry(&sseq->info->all_publ, struct publication,
+				     all_publ);
 	}
 
-	list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) {
+	list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) {
 		*last_publ = p->key;
 
 		hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
@@ -980,7 +939,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
 			goto publ_msg_full;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f56e7cb3d436..34a4ccb907aa 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.h: Include file for TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -54,19 +54,22 @@ struct tipc_group;
  * @type: name sequence type
  * @lower: name sequence lower bound
  * @upper: name sequence upper bound
- * @scope: scope of publication
- * @node: network address of publishing port's node
- * @ref: publishing port
- * @key: publication key
- * @nodesub_list: subscription to "node down" event (off-node publication only)
- * @local_list: adjacent entries in list of publications made by this node
- * @pport_list: adjacent entries in list of publications made by this port
- * @node_list: adjacent matching name seq publications with >= node scope
- * @cluster_list: adjacent matching name seq publications with >= cluster scope
- * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
+ * @node: network address of publishing socket's node
+ * @port: publishing port
+ * @key: publication key, unique across the cluster
+ * @binding_node: all publications from the same node which bound this one
+ * - Remote publications: in node->publ_list
+ *   Used by node/name distr to withdraw publications when node is lost
+ * - Local/node scope publications: in name_table->node_scope list
+ * - Local/cluster scope publications: in name_table->cluster_scope list
+ * @binding_sock: all publications from the same socket which bound this one
+ *   Used by socket to withdraw publications when socket is unbound/released
+ * @local_publ: list of identical publications made from this node
+ *   Used by closest_first and multicast receive lookup algorithms
+ * @all_publ: all publications identical to this one, whatever node and scope
+ *   Used by round-robin lookup algorithm
  * @rcu: RCU callback head used for deferred freeing
- *
- * Note that the node list, cluster list, and zone list are circular lists.
  */
 struct publication {
 	u32 type;
@@ -74,34 +77,37 @@ struct publication {
 	u32 upper;
 	u32 scope;
 	u32 node;
-	u32 ref;
+	u32 port;
 	u32 key;
-	struct list_head nodesub_list;
-	struct list_head local_list;
-	struct list_head pport_list;
-	struct list_head node_list;
-	struct list_head cluster_list;
-	struct list_head zone_list;
+	struct list_head binding_node;
+	struct list_head binding_sock;
+	struct list_head local_publ;
+	struct list_head all_publ;
 	struct rcu_head rcu;
 };
 
 /**
  * struct name_table - table containing all existing port name publications
  * @seq_hlist: name sequence hash lists
- * @publ_list: pulication lists
+ * @node_scope: all local publications with node scope
+ *               - used by name_distr during re-init of name table
+ * @cluster_scope: all local publications with cluster scope
+ *               - used by name_distr to send bulk updates to new nodes
+ *               - used by name_distr during re-init of name table
  * @local_publ_count: number of publications issued by this node
  */
 struct name_table {
 	struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
-	struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+	struct list_head node_scope;
+	struct list_head cluster_scope;
 	u32 local_publ_count;
 };
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			   u32 scope, bool exact, struct list_head *dports);
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+			    u32 scope, bool exact, struct list_head *dports);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 			      u32 type, u32 domain);
 void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
@@ -120,7 +126,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 					     u32 lower, u32 node, u32 ref,
 					     u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
+void tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
 void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 719c5924b638..29538dc00857 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -104,38 +104,39 @@
  *     - A local spin_lock protecting the queue of subscriber events.
 */
 
-int tipc_net_start(struct net *net, u32 addr)
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	char addr_string[16];
+	if (tipc_own_id(net)) {
+		pr_info("Cannot configure node identity twice\n");
+		return -1;
+	}
+	pr_info("Started in network mode\n");
 
-	tn->own_addr = addr;
+	if (node_id)
+		tipc_set_node_id(net, node_id);
+	if (addr)
+		tipc_net_finalize(net, addr);
+	return 0;
+}
 
-	/* Ensure that the new address is visible before we reinit. */
+void tipc_net_finalize(struct net *net, u32 addr)
+{
+	tipc_set_node_addr(net, addr);
 	smp_mb();
-
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
-
-	tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
-			     TIPC_ZONE_SCOPE, 0, tn->own_addr);
-
-	pr_info("Started in network mode\n");
-	pr_info("Own node address %s, network identity %u\n",
-		tipc_addr_string_fill(addr_string, tn->own_addr),
-		tn->net_id);
-	return 0;
+	tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+			     TIPC_CLUSTER_SCOPE, 0, addr);
 }
 
 void tipc_net_stop(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	u32 self = tipc_own_addr(net);
 
-	if (!tn->own_addr)
+	if (!self)
 		return;
 
-	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0,
-			      tn->own_addr);
+	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, 0, self);
 	rtnl_lock();
 	tipc_bearer_stop(net);
 	tipc_node_stop(net);
@@ -147,8 +148,10 @@ void tipc_net_stop(struct net *net)
 static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	void *hdr;
+	u64 *w0 = (u64 *)&tn->node_id[0];
+	u64 *w1 = (u64 *)&tn->node_id[8];
 	struct nlattr *attrs;
+	void *hdr;
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_NET_GET);
@@ -161,7 +164,10 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 
 	if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id))
 		goto attr_msg_full;
-
+	if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0))
+		goto attr_msg_full;
+	if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0))
+		goto attr_msg_full;
 	nla_nest_end(msg->skb, attrs);
 	genlmsg_end(msg->skb, hdr);
 
@@ -200,11 +206,11 @@ out:
 	return skb->len;
 }
 
-int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = tipc_net(net);
 	int err;
 
 	if (!info->attrs[TIPC_NLA_NET])
@@ -213,16 +219,17 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 	err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
 			       info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
 			       info->extack);
+
 	if (err)
 		return err;
 
+	/* Can't change net id once TIPC has joined a network */
+	if (tipc_own_addr(net))
+		return -EPERM;
+
 	if (attrs[TIPC_NLA_NET_ID]) {
 		u32 val;
 
-		/* Can't change net id once TIPC has joined a network */
-		if (tn->own_addr)
-			return -EPERM;
-
 		val = nla_get_u32(attrs[TIPC_NLA_NET_ID]);
 		if (val < 1 || val > 9999)
 			return -EINVAL;
@@ -233,18 +240,32 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 	if (attrs[TIPC_NLA_NET_ADDR]) {
 		u32 addr;
 
-		/* Can't change net addr once TIPC has joined a network */
-		if (tn->own_addr)
-			return -EPERM;
-
 		addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
-		if (!tipc_addr_node_valid(addr))
+		if (!addr)
 			return -EINVAL;
-
-		rtnl_lock();
-		tipc_net_start(net, addr);
-		rtnl_unlock();
+		tn->legacy_addr_format = true;
+		tipc_net_init(net, NULL, addr);
 	}
 
+	if (attrs[TIPC_NLA_NET_NODEID]) {
+		u8 node_id[NODE_ID_LEN];
+		u64 *w0 = (u64 *)&node_id[0];
+		u64 *w1 = (u64 *)&node_id[8];
+
+		*w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+		*w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+		tipc_net_init(net, node_id, 0);
+	}
 	return 0;
 }
+
+int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+
+	rtnl_lock();
+	err = __tipc_nl_net_set(skb, info);
+	rtnl_unlock();
+
+	return err;
+}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index c7c254902873..09ad02b50bb1 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -41,11 +41,11 @@
 
 extern const struct nla_policy tipc_nl_net_policy[];
 
-int tipc_net_start(struct net *net, u32 addr);
-
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
+void tipc_net_finalize(struct net *net, u32 addr);
 void tipc_net_stop(struct net *net);
-
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
 
 #endif
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index e48f0b2c01b9..4492cda45566 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -285,10 +285,6 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
 	if (!trans_buf)
 		return -ENOMEM;
 
-	err = (*cmd->transcode)(cmd, trans_buf, msg);
-	if (err)
-		goto trans_out;
-
 	attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
 			sizeof(struct nlattr *), GFP_KERNEL);
 	if (!attrbuf) {
@@ -296,27 +292,34 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
 		goto trans_out;
 	}
 
-	err = nla_parse(attrbuf, tipc_genl_family.maxattr,
-			(const struct nlattr *)trans_buf->data,
-			trans_buf->len, NULL, NULL);
-	if (err)
-		goto parse_out;
-
 	doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!doit_buf) {
 		err = -ENOMEM;
-		goto parse_out;
+		goto attrbuf_out;
 	}
 
-	doit_buf->sk = msg->dst_sk;
-
 	memset(&info, 0, sizeof(info));
 	info.attrs = attrbuf;
 
+	rtnl_lock();
+	err = (*cmd->transcode)(cmd, trans_buf, msg);
+	if (err)
+		goto doit_out;
+
+	err = nla_parse(attrbuf, tipc_genl_family.maxattr,
+			(const struct nlattr *)trans_buf->data,
+			trans_buf->len, NULL, NULL);
+	if (err)
+		goto doit_out;
+
+	doit_buf->sk = msg->dst_sk;
+
 	err = (*cmd->doit)(doit_buf, &info);
+doit_out:
+	rtnl_unlock();
 
 	kfree_skb(doit_buf);
-parse_out:
+attrbuf_out:
 	kfree(attrbuf);
 trans_out:
 	kfree_skb(trans_buf);
@@ -722,13 +725,13 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
 
 	media = tipc_media_find(lc->name);
 	if (media) {
-		cmd->doit = &tipc_nl_media_set;
+		cmd->doit = &__tipc_nl_media_set;
 		return tipc_nl_compat_media_set(skb, msg);
 	}
 
 	bearer = tipc_bearer_find(msg->net, lc->name);
 	if (bearer) {
-		cmd->doit = &tipc_nl_bearer_set;
+		cmd->doit = &__tipc_nl_bearer_set;
 		return tipc_nl_compat_bearer_set(skb, msg);
 	}
 
@@ -1089,12 +1092,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		return tipc_nl_compat_dumpit(&dump, msg);
 	case TIPC_CMD_ENABLE_BEARER:
 		msg->req_type = TIPC_TLV_BEARER_CONFIG;
-		doit.doit = tipc_nl_bearer_enable;
+		doit.doit = __tipc_nl_bearer_enable;
 		doit.transcode = tipc_nl_compat_bearer_enable;
 		return tipc_nl_compat_doit(&doit, msg);
 	case TIPC_CMD_DISABLE_BEARER:
 		msg->req_type = TIPC_TLV_BEARER_NAME;
-		doit.doit = tipc_nl_bearer_disable;
+		doit.doit = __tipc_nl_bearer_disable;
 		doit.transcode = tipc_nl_compat_bearer_disable;
 		return tipc_nl_compat_doit(&doit, msg);
 	case TIPC_CMD_SHOW_LINK_STATS:
@@ -1148,12 +1151,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		return tipc_nl_compat_dumpit(&dump, msg);
 	case TIPC_CMD_SET_NODE_ADDR:
 		msg->req_type = TIPC_TLV_NET_ADDR;
-		doit.doit = tipc_nl_net_set;
+		doit.doit = __tipc_nl_net_set;
 		doit.transcode = tipc_nl_compat_net_set;
 		return tipc_nl_compat_doit(&doit, msg);
 	case TIPC_CMD_SET_NETID:
 		msg->req_type = TIPC_TLV_UNSIGNED;
-		doit.doit = tipc_nl_net_set;
+		doit.doit = __tipc_nl_net_set;
 		doit.transcode = tipc_nl_compat_net_set;
 		return tipc_nl_compat_doit(&doit, msg);
 	case TIPC_CMD_GET_NETID:
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9036d8756e73..4fb4327311bb 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -115,6 +115,7 @@ struct tipc_node {
 	u16 capabilities;
 	u32 signature;
 	u32 link_id;
+	u8 peer_id[16];
 	struct list_head publ_list;
 	struct list_head conn_sks;
 	unsigned long keepalive_intv;
@@ -156,6 +157,7 @@ static void tipc_node_delete(struct tipc_node *node);
 static void tipc_node_timeout(struct timer_list *t);
 static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
 static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
 static void tipc_node_put(struct tipc_node *node);
 static bool node_is_up(struct tipc_node *n);
 
@@ -233,9 +235,6 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
 	struct tipc_node *node;
 	unsigned int thash = tipc_hashfn(addr);
 
-	if (unlikely(!in_own_cluster_exact(net, addr)))
-		return NULL;
-
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
 		if (node->addr != addr)
@@ -248,6 +247,30 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
 	return node;
 }
 
+/* tipc_node_find_by_id - locate specified node object by its 128-bit id
+ * Note: this function is called only when a discovery request failed
+ * to find the node by its 32-bit id, and is not time critical
+ */
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_node *n;
+	bool found = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(n, &tn->node_list, list) {
+		read_lock_bh(&n->lock);
+		if (!memcmp(id, n->peer_id, 16) &&
+		    kref_get_unless_zero(&n->kref))
+			found = true;
+		read_unlock_bh(&n->lock);
+		if (found)
+			break;
+	}
+	rcu_read_unlock();
+	return found ? n : NULL;
+}
+
 static void tipc_node_read_lock(struct tipc_node *n)
 {
 	read_lock_bh(&n->lock);
@@ -310,7 +333,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	}
 }
 
-struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
+static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
+					  u8 *peer_id, u16 capabilities)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *n, *temp_node;
@@ -329,6 +353,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
 		goto exit;
 	}
 	n->addr = addr;
+	memcpy(&n->peer_id, peer_id, 16);
 	n->net = net;
 	n->capabilities = capabilities;
 	kref_init(&n->kref);
@@ -347,8 +372,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
 	n->signature = INVALID_NODE_SIG;
 	n->active_links[0] = INVALID_BEARER_ID;
 	n->active_links[1] = INVALID_BEARER_ID;
-	if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr,
-				 U16_MAX,
+	if (!tipc_link_bc_create(net, tipc_own_addr(net),
+				 addr, U16_MAX,
 				 tipc_link_window(tipc_bc_sndlink(net)),
 				 n->capabilities,
 				 &n->bc_entry.inputq1,
@@ -738,8 +763,51 @@ bool tipc_node_is_up(struct net *net, u32 addr)
 	return retval;
 }
 
-void tipc_node_check_dest(struct net *net, u32 onode,
-			  struct tipc_bearer *b,
+static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
+{
+	struct tipc_node *n;
+
+	addr ^= tipc_net(net)->random;
+	while ((n = tipc_node_find(net, addr))) {
+		tipc_node_put(n);
+		addr++;
+	}
+	return addr;
+}
+
+/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ */
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_node *n;
+
+	/* Suggest new address if some other peer is using this one */
+	n = tipc_node_find(net, addr);
+	if (n) {
+		if (!memcmp(n->peer_id, id, NODE_ID_LEN))
+			addr = 0;
+		tipc_node_put(n);
+		if (!addr)
+			return 0;
+		return tipc_node_suggest_addr(net, addr);
+	}
+
+	/* Suggest previously used address if peer is known */
+	n = tipc_node_find_by_id(net, id);
+	if (n) {
+		addr = n->addr;
+		tipc_node_put(n);
+	}
+	/* Even this node may be in trial phase */
+	if (tn->trial_addr == addr)
+		return tipc_node_suggest_addr(net, addr);
+
+	return addr;
+}
+
+void tipc_node_check_dest(struct net *net, u32 addr,
+			  u8 *peer_id, struct tipc_bearer *b,
 			  u16 capabilities, u32 signature,
 			  struct tipc_media_addr *maddr,
 			  bool *respond, bool *dupl_addr)
@@ -758,7 +826,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 	*dupl_addr = false;
 	*respond = false;
 
-	n = tipc_node_create(net, onode, capabilities);
+	n = tipc_node_create(net, addr, peer_id, capabilities);
 	if (!n)
 		return;
 
@@ -836,15 +904,14 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 
 	/* Now create new link if not already existing */
 	if (!l) {
-		if (n->link_cnt == 2) {
-			pr_warn("Cannot establish 3rd link to %x\n", n->addr);
+		if (n->link_cnt == 2)
 			goto exit;
-		}
+
 		if_name = strchr(b->name, ':') + 1;
 		if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
 				      b->net_plane, b->mtu, b->priority,
 				      b->window, mod(tipc_net(net)->random),
-				      tipc_own_addr(net), onode,
+				      tipc_own_addr(net), addr, peer_id,
 				      n->capabilities,
 				      tipc_bc_sndlink(n->net), n->bc_entry.link,
 				      &le->inputq,
@@ -887,11 +954,9 @@ void tipc_node_delete_links(struct net *net, int bearer_id)
 
 static void tipc_node_reset_links(struct tipc_node *n)
 {
-	char addr_string[16];
 	int i;
 
-	pr_warn("Resetting all links to %s\n",
-		tipc_addr_string_fill(addr_string, n->addr));
+	pr_warn("Resetting all links to %x\n", n->addr);
 
 	for (i = 0; i < MAX_BEARERS; i++) {
 		tipc_node_link_down(n, i, false);
@@ -1078,15 +1143,13 @@ illegal_evt:
 static void node_lost_contact(struct tipc_node *n,
 			      struct sk_buff_head *inputq)
 {
-	char addr_string[16];
 	struct tipc_sock_conn *conn, *safe;
 	struct tipc_link *l;
 	struct list_head *conns = &n->conn_sks;
 	struct sk_buff *skb;
 	uint i;
 
-	pr_debug("Lost contact with %s\n",
-		 tipc_addr_string_fill(addr_string, n->addr));
+	pr_debug("Lost contact with %x\n", n->addr);
 
 	/* Clean up broadcast state */
 	tipc_bcast_remove_peer(n->net, n->bc_entry.link);
@@ -1618,6 +1681,30 @@ discard:
 	kfree_skb(skb);
 }
 
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+{
+	struct tipc_net *tn = tipc_net(net);
+	int bearer_id = b->identity;
+	struct sk_buff_head xmitq;
+	struct tipc_link_entry *e;
+	struct tipc_node *n;
+
+	__skb_queue_head_init(&xmitq);
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(n, &tn->node_list, list) {
+		tipc_node_write_lock(n);
+		e = &n->links[bearer_id];
+		if (e->link)
+			tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+		tipc_node_write_unlock(n);
+		tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+	}
+
+	rcu_read_unlock();
+}
+
 int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = sock_net(skb->sk);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index acd58d23a70e..f24b83500df1 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -49,22 +49,25 @@ enum {
 	TIPC_BCAST_STATE_NACK = (1 << 2),
 	TIPC_BLOCK_FLOWCTL    = (1 << 3),
 	TIPC_BCAST_RCAST      = (1 << 4),
-	TIPC_MCAST_GROUPS     = (1 << 5)
+	TIPC_NODE_ID128       = (1 << 5)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
 				TIPC_BCAST_STATE_NACK | \
 				TIPC_BCAST_RCAST | \
-				TIPC_BLOCK_FLOWCTL)
+				TIPC_BLOCK_FLOWCTL | \
+				TIPC_NODE_ID128)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
-void tipc_node_check_dest(struct net *net, u32 onode,
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
+void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
 			  struct tipc_bearer *bearer,
 			  u16 capabilities, u32 signature,
 			  struct tipc_media_addr *maddr,
 			  bool *respond, bool *dupl_addr);
 void tipc_node_delete_links(struct net *net, int bearer_id);
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
 			   char *linkname, size_t len);
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
diff --git a/net/tipc/server.c b/net/tipc/server.c
deleted file mode 100644
index df0c563c90cd..000000000000
--- a/net/tipc/server.c
+++ /dev/null
@@ -1,710 +0,0 @@
-/*
- * net/tipc/server.c: TIPC server infrastructure
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "server.h"
-#include "core.h"
-#include "socket.h"
-#include "addr.h"
-#include "msg.h"
-#include <net/sock.h>
-#include <linux/module.h>
-
-/* Number of messages to send before rescheduling */
-#define MAX_SEND_MSG_COUNT	25
-#define MAX_RECV_MSG_COUNT	25
-#define CF_CONNECTED		1
-#define CF_SERVER		2
-
-#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
-
-/**
- * struct tipc_conn - TIPC connection structure
- * @kref: reference counter to connection object
- * @conid: connection identifier
- * @sock: socket handler associated with connection
- * @flags: indicates connection state
- * @server: pointer to connected server
- * @rwork: receive work item
- * @usr_data: user-specified field
- * @rx_action: what to do when connection socket is active
- * @outqueue: pointer to first outbound message in queue
- * @outqueue_lock: control access to the outqueue
- * @outqueue: list of connection objects for its server
- * @swork: send work item
- */
-struct tipc_conn {
-	struct kref kref;
-	int conid;
-	struct socket *sock;
-	unsigned long flags;
-	struct tipc_server *server;
-	struct work_struct rwork;
-	int (*rx_action) (struct tipc_conn *con);
-	void *usr_data;
-	struct list_head outqueue;
-	spinlock_t outqueue_lock;
-	struct work_struct swork;
-};
-
-/* An entry waiting to be sent */
-struct outqueue_entry {
-	struct list_head list;
-	struct kvec iov;
-	struct sockaddr_tipc dest;
-};
-
-static void tipc_recv_work(struct work_struct *work);
-static void tipc_send_work(struct work_struct *work);
-static void tipc_clean_outqueues(struct tipc_conn *con);
-
-static void tipc_conn_kref_release(struct kref *kref)
-{
-	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
-	struct tipc_server *s = con->server;
-	struct sockaddr_tipc *saddr = s->saddr;
-	struct socket *sock = con->sock;
-	struct sock *sk;
-
-	if (sock) {
-		sk = sock->sk;
-		if (test_bit(CF_SERVER, &con->flags)) {
-			__module_get(sock->ops->owner);
-			__module_get(sk->sk_prot_creator->owner);
-		}
-		saddr->scope = -TIPC_NODE_SCOPE;
-		kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
-		sock_release(sock);
-		con->sock = NULL;
-	}
-	spin_lock_bh(&s->idr_lock);
-	idr_remove(&s->conn_idr, con->conid);
-	s->idr_in_use--;
-	spin_unlock_bh(&s->idr_lock);
-	tipc_clean_outqueues(con);
-	kfree(con);
-}
-
-static void conn_put(struct tipc_conn *con)
-{
-	kref_put(&con->kref, tipc_conn_kref_release);
-}
-
-static void conn_get(struct tipc_conn *con)
-{
-	kref_get(&con->kref);
-}
-
-static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
-{
-	struct tipc_conn *con;
-
-	spin_lock_bh(&s->idr_lock);
-	con = idr_find(&s->conn_idr, conid);
-	if (con) {
-		if (!test_bit(CF_CONNECTED, &con->flags) ||
-		    !kref_get_unless_zero(&con->kref))
-			con = NULL;
-	}
-	spin_unlock_bh(&s->idr_lock);
-	return con;
-}
-
-static void sock_data_ready(struct sock *sk)
-{
-	struct tipc_conn *con;
-
-	read_lock_bh(&sk->sk_callback_lock);
-	con = sock2con(sk);
-	if (con && test_bit(CF_CONNECTED, &con->flags)) {
-		conn_get(con);
-		if (!queue_work(con->server->rcv_wq, &con->rwork))
-			conn_put(con);
-	}
-	read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void sock_write_space(struct sock *sk)
-{
-	struct tipc_conn *con;
-
-	read_lock_bh(&sk->sk_callback_lock);
-	con = sock2con(sk);
-	if (con && test_bit(CF_CONNECTED, &con->flags)) {
-		conn_get(con);
-		if (!queue_work(con->server->send_wq, &con->swork))
-			conn_put(con);
-	}
-	read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
-{
-	struct sock *sk = sock->sk;
-
-	write_lock_bh(&sk->sk_callback_lock);
-
-	sk->sk_data_ready = sock_data_ready;
-	sk->sk_write_space = sock_write_space;
-	sk->sk_user_data = con;
-
-	con->sock = sock;
-
-	write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct sock *sk = con->sock->sk;
-	bool disconnect = false;
-
-	write_lock_bh(&sk->sk_callback_lock);
-	disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
-	if (disconnect) {
-		sk->sk_user_data = NULL;
-		if (con->conid)
-			s->tipc_conn_release(con->conid, con->usr_data);
-	}
-	write_unlock_bh(&sk->sk_callback_lock);
-
-	/* Handle concurrent calls from sending and receiving threads */
-	if (!disconnect)
-		return;
-
-	/* Don't flush pending works, -just let them expire */
-	kernel_sock_shutdown(con->sock, SHUT_RDWR);
-	conn_put(con);
-}
-
-static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
-{
-	struct tipc_conn *con;
-	int ret;
-
-	con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
-	if (!con)
-		return ERR_PTR(-ENOMEM);
-
-	kref_init(&con->kref);
-	INIT_LIST_HEAD(&con->outqueue);
-	spin_lock_init(&con->outqueue_lock);
-	INIT_WORK(&con->swork, tipc_send_work);
-	INIT_WORK(&con->rwork, tipc_recv_work);
-
-	spin_lock_bh(&s->idr_lock);
-	ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
-	if (ret < 0) {
-		kfree(con);
-		spin_unlock_bh(&s->idr_lock);
-		return ERR_PTR(-ENOMEM);
-	}
-	con->conid = ret;
-	s->idr_in_use++;
-	spin_unlock_bh(&s->idr_lock);
-
-	set_bit(CF_CONNECTED, &con->flags);
-	con->server = s;
-
-	return con;
-}
-
-static int tipc_receive_from_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct sock *sk = con->sock->sk;
-	struct sockaddr_tipc addr;
-	struct msghdr msg = {};
-	struct kvec iov;
-	void *buf;
-	int ret;
-
-	buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
-	if (!buf) {
-		ret = -ENOMEM;
-		goto out_close;
-	}
-
-	iov.iov_base = buf;
-	iov.iov_len = s->max_rcvbuf_size;
-	msg.msg_name = &addr;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
-	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
-	if (ret <= 0) {
-		kmem_cache_free(s->rcvbuf_cache, buf);
-		goto out_close;
-	}
-
-	read_lock_bh(&sk->sk_callback_lock);
-	if (test_bit(CF_CONNECTED, &con->flags))
-		ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
-					   &addr, con->usr_data, buf, ret);
-	read_unlock_bh(&sk->sk_callback_lock);
-	kmem_cache_free(s->rcvbuf_cache, buf);
-	if (ret < 0)
-		tipc_conn_terminate(s, con->conid);
-	return ret;
-
-out_close:
-	if (ret != -EWOULDBLOCK)
-		tipc_close_conn(con);
-	else if (ret == 0)
-		/* Don't return success if we really got EOF */
-		ret = -EAGAIN;
-
-	return ret;
-}
-
-static int tipc_accept_from_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct socket *sock = con->sock;
-	struct socket *newsock;
-	struct tipc_conn *newcon;
-	int ret;
-
-	ret = kernel_accept(sock, &newsock, O_NONBLOCK);
-	if (ret < 0)
-		return ret;
-
-	newcon = tipc_alloc_conn(con->server);
-	if (IS_ERR(newcon)) {
-		ret = PTR_ERR(newcon);
-		sock_release(newsock);
-		return ret;
-	}
-
-	newcon->rx_action = tipc_receive_from_sock;
-	tipc_register_callbacks(newsock, newcon);
-
-	/* Notify that new connection is incoming */
-	newcon->usr_data = s->tipc_conn_new(newcon->conid);
-	if (!newcon->usr_data) {
-		sock_release(newsock);
-		conn_put(newcon);
-		return -ENOMEM;
-	}
-
-	/* Wake up receive process in case of 'SYN+' message */
-	newsock->sk->sk_data_ready(newsock->sk);
-	return ret;
-}
-
-static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct socket *sock = NULL;
-	int ret;
-
-	ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
-	if (ret < 0)
-		return NULL;
-	ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
-				(char *)&s->imp, sizeof(s->imp));
-	if (ret < 0)
-		goto create_err;
-	ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
-	if (ret < 0)
-		goto create_err;
-
-	switch (s->type) {
-	case SOCK_STREAM:
-	case SOCK_SEQPACKET:
-		con->rx_action = tipc_accept_from_sock;
-
-		ret = kernel_listen(sock, 0);
-		if (ret < 0)
-			goto create_err;
-		break;
-	case SOCK_DGRAM:
-	case SOCK_RDM:
-		con->rx_action = tipc_receive_from_sock;
-		break;
-	default:
-		pr_err("Unknown socket type %d\n", s->type);
-		goto create_err;
-	}
-
-	/* As server's listening socket owner and creator is the same module,
-	 * we have to decrease TIPC module reference count to guarantee that
-	 * it remains zero after the server socket is created, otherwise,
-	 * executing "rmmod" command is unable to make TIPC module deleted
-	 * after TIPC module is inserted successfully.
-	 *
-	 * However, the reference count is ever increased twice in
-	 * sock_create_kern(): one is to increase the reference count of owner
-	 * of TIPC socket's proto_ops struct; another is to increment the
-	 * reference count of owner of TIPC proto struct. Therefore, we must
-	 * decrement the module reference count twice to ensure that it keeps
-	 * zero after server's listening socket is created. Of course, we
-	 * must bump the module reference count twice as well before the socket
-	 * is closed.
-	 */
-	module_put(sock->ops->owner);
-	module_put(sock->sk->sk_prot_creator->owner);
-	set_bit(CF_SERVER, &con->flags);
-
-	return sock;
-
-create_err:
-	kernel_sock_shutdown(sock, SHUT_RDWR);
-	sock_release(sock);
-	return NULL;
-}
-
-static int tipc_open_listening_sock(struct tipc_server *s)
-{
-	struct socket *sock;
-	struct tipc_conn *con;
-
-	con = tipc_alloc_conn(s);
-	if (IS_ERR(con))
-		return PTR_ERR(con);
-
-	sock = tipc_create_listen_sock(con);
-	if (!sock) {
-		idr_remove(&s->conn_idr, con->conid);
-		s->idr_in_use--;
-		kfree(con);
-		return -EINVAL;
-	}
-
-	tipc_register_callbacks(sock, con);
-	return 0;
-}
-
-static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
-{
-	struct outqueue_entry *entry;
-	void *buf;
-
-	entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
-	if (!entry)
-		return NULL;
-
-	buf = kmemdup(data, len, GFP_ATOMIC);
-	if (!buf) {
-		kfree(entry);
-		return NULL;
-	}
-
-	entry->iov.iov_base = buf;
-	entry->iov.iov_len = len;
-
-	return entry;
-}
-
-static void tipc_free_entry(struct outqueue_entry *e)
-{
-	kfree(e->iov.iov_base);
-	kfree(e);
-}
-
-static void tipc_clean_outqueues(struct tipc_conn *con)
-{
-	struct outqueue_entry *e, *safe;
-
-	spin_lock_bh(&con->outqueue_lock);
-	list_for_each_entry_safe(e, safe, &con->outqueue, list) {
-		list_del(&e->list);
-		tipc_free_entry(e);
-	}
-	spin_unlock_bh(&con->outqueue_lock);
-}
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      struct sockaddr_tipc *addr, void *data, size_t len)
-{
-	struct outqueue_entry *e;
-	struct tipc_conn *con;
-
-	con = tipc_conn_lookup(s, conid);
-	if (!con)
-		return -EINVAL;
-
-	if (!test_bit(CF_CONNECTED, &con->flags)) {
-		conn_put(con);
-		return 0;
-	}
-
-	e = tipc_alloc_entry(data, len);
-	if (!e) {
-		conn_put(con);
-		return -ENOMEM;
-	}
-
-	if (addr)
-		memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
-
-	spin_lock_bh(&con->outqueue_lock);
-	list_add_tail(&e->list, &con->outqueue);
-	spin_unlock_bh(&con->outqueue_lock);
-
-	if (!queue_work(s->send_wq, &con->swork))
-		conn_put(con);
-	return 0;
-}
-
-void tipc_conn_terminate(struct tipc_server *s, int conid)
-{
-	struct tipc_conn *con;
-
-	con = tipc_conn_lookup(s, conid);
-	if (con) {
-		tipc_close_conn(con);
-		conn_put(con);
-	}
-}
-
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
-			     u32 upper, u32 filter, int *conid)
-{
-	struct tipc_subscriber *scbr;
-	struct tipc_subscr sub;
-	struct tipc_server *s;
-	struct tipc_conn *con;
-
-	sub.seq.type = type;
-	sub.seq.lower = lower;
-	sub.seq.upper = upper;
-	sub.timeout = TIPC_WAIT_FOREVER;
-	sub.filter = filter;
-	*(u32 *)&sub.usr_handle = port;
-
-	con = tipc_alloc_conn(tipc_topsrv(net));
-	if (IS_ERR(con))
-		return false;
-
-	*conid = con->conid;
-	s = con->server;
-	scbr = s->tipc_conn_new(*conid);
-	if (!scbr) {
-		conn_put(con);
-		return false;
-	}
-
-	con->usr_data = scbr;
-	con->sock = NULL;
-	s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
-	return true;
-}
-
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
-{
-	struct tipc_conn *con;
-	struct tipc_server *srv;
-
-	con = tipc_conn_lookup(tipc_topsrv(net), conid);
-	if (!con)
-		return;
-
-	test_and_clear_bit(CF_CONNECTED, &con->flags);
-	srv = con->server;
-	if (con->conid)
-		srv->tipc_conn_release(con->conid, con->usr_data);
-	conn_put(con);
-	conn_put(con);
-}
-
-static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
-{
-	u32 port = *(u32 *)&evt->s.usr_handle;
-	u32 self = tipc_own_addr(net);
-	struct sk_buff_head evtq;
-	struct sk_buff *skb;
-
-	skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
-			      self, self, port, port, 0);
-	if (!skb)
-		return;
-	msg_set_dest_droppable(buf_msg(skb), true);
-	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
-	skb_queue_head_init(&evtq);
-	__skb_queue_tail(&evtq, skb);
-	tipc_sk_rcv(net, &evtq);
-}
-
-static void tipc_send_to_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct outqueue_entry *e;
-	struct tipc_event *evt;
-	struct msghdr msg;
-	int count = 0;
-	int ret;
-
-	spin_lock_bh(&con->outqueue_lock);
-	while (test_bit(CF_CONNECTED, &con->flags)) {
-		e = list_entry(con->outqueue.next, struct outqueue_entry, list);
-		if ((struct list_head *) e == &con->outqueue)
-			break;
-
-		spin_unlock_bh(&con->outqueue_lock);
-
-		if (con->sock) {
-			memset(&msg, 0, sizeof(msg));
-			msg.msg_flags = MSG_DONTWAIT;
-			if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
-				msg.msg_name = &e->dest;
-				msg.msg_namelen = sizeof(struct sockaddr_tipc);
-			}
-			ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
-					     e->iov.iov_len);
-			if (ret == -EWOULDBLOCK || ret == 0) {
-				cond_resched();
-				goto out;
-			} else if (ret < 0) {
-				goto send_err;
-			}
-		} else {
-			evt = e->iov.iov_base;
-			tipc_send_kern_top_evt(s->net, evt);
-		}
-		/* Don't starve users filling buffers */
-		if (++count >= MAX_SEND_MSG_COUNT) {
-			cond_resched();
-			count = 0;
-		}
-
-		spin_lock_bh(&con->outqueue_lock);
-		list_del(&e->list);
-		tipc_free_entry(e);
-	}
-	spin_unlock_bh(&con->outqueue_lock);
-out:
-	return;
-
-send_err:
-	tipc_close_conn(con);
-}
-
-static void tipc_recv_work(struct work_struct *work)
-{
-	struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
-	int count = 0;
-
-	while (test_bit(CF_CONNECTED, &con->flags)) {
-		if (con->rx_action(con))
-			break;
-
-		/* Don't flood Rx machine */
-		if (++count >= MAX_RECV_MSG_COUNT) {
-			cond_resched();
-			count = 0;
-		}
-	}
-	conn_put(con);
-}
-
-static void tipc_send_work(struct work_struct *work)
-{
-	struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
-
-	if (test_bit(CF_CONNECTED, &con->flags))
-		tipc_send_to_sock(con);
-
-	conn_put(con);
-}
-
-static void tipc_work_stop(struct tipc_server *s)
-{
-	destroy_workqueue(s->rcv_wq);
-	destroy_workqueue(s->send_wq);
-}
-
-static int tipc_work_start(struct tipc_server *s)
-{
-	s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
-	if (!s->rcv_wq) {
-		pr_err("can't start tipc receive workqueue\n");
-		return -ENOMEM;
-	}
-
-	s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
-	if (!s->send_wq) {
-		pr_err("can't start tipc send workqueue\n");
-		destroy_workqueue(s->rcv_wq);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-int tipc_server_start(struct tipc_server *s)
-{
-	int ret;
-
-	spin_lock_init(&s->idr_lock);
-	idr_init(&s->conn_idr);
-	s->idr_in_use = 0;
-
-	s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
-					    0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!s->rcvbuf_cache)
-		return -ENOMEM;
-
-	ret = tipc_work_start(s);
-	if (ret < 0) {
-		kmem_cache_destroy(s->rcvbuf_cache);
-		return ret;
-	}
-	ret = tipc_open_listening_sock(s);
-	if (ret < 0) {
-		tipc_work_stop(s);
-		kmem_cache_destroy(s->rcvbuf_cache);
-		return ret;
-	}
-	return ret;
-}
-
-void tipc_server_stop(struct tipc_server *s)
-{
-	struct tipc_conn *con;
-	int id;
-
-	spin_lock_bh(&s->idr_lock);
-	for (id = 0; s->idr_in_use; id++) {
-		con = idr_find(&s->conn_idr, id);
-		if (con) {
-			spin_unlock_bh(&s->idr_lock);
-			tipc_close_conn(con);
-			spin_lock_bh(&s->idr_lock);
-		}
-	}
-	spin_unlock_bh(&s->idr_lock);
-
-	tipc_work_stop(s);
-	kmem_cache_destroy(s->rcvbuf_cache);
-	idr_destroy(&s->conn_idr);
-}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f93477187a90..275b666f6231 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -289,10 +289,9 @@ static bool tipc_sk_type_connectionless(struct sock *sk)
 static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 {
 	struct sock *sk = &tsk->sk;
-	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
+	u32 self = tipc_own_addr(sock_net(sk));
 	u32 peer_port = tsk_peer_port(tsk);
-	u32 orig_node;
-	u32 peer_node;
+	u32 orig_node, peer_node;
 
 	if (unlikely(!tipc_sk_connected(sk)))
 		return false;
@@ -306,10 +305,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 	if (likely(orig_node == peer_node))
 		return true;
 
-	if (!orig_node && (peer_node == tn->own_addr))
+	if (!orig_node && peer_node == self)
 		return true;
 
-	if (!peer_node && (orig_node == tn->own_addr))
+	if (!peer_node && orig_node == self)
 		return true;
 
 	return false;
@@ -461,8 +460,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	/* Ensure tsk is visible before we read own_addr. */
 	smp_mb();
 
-	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
-		      NAMED_H_SIZE, 0);
+	tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
+		      TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
 
 	msg_set_origport(msg, tsk->portid);
 	timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
@@ -473,6 +472,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	sk->sk_write_space = tipc_write_space;
 	sk->sk_destruct = tipc_sock_destruct;
 	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
+	tsk->group_is_open = true;
 	atomic_set(&tsk->dupl_rcvcnt, 0);
 
 	/* Start out with safe limits until we receive an advertised window */
@@ -643,7 +643,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
 		goto exit;
 	}
 
-	res = (addr->scope > 0) ?
+	res = (addr->scope >= 0) ?
 		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
 		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
 exit:
@@ -670,7 +670,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
-	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
 
 	memset(addr, 0, sizeof(*addr));
 	if (peer) {
@@ -681,7 +680,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 		addr->addr.id.node = tsk_peer_node(tsk);
 	} else {
 		addr->addr.id.ref = tsk->portid;
-		addr->addr.id.node = tn->own_addr;
+		addr->addr.id.node = tipc_own_addr(sock_net(sk));
 	}
 
 	addr->addrtype = TIPC_ADDR_ID;
@@ -1279,8 +1278,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct tipc_name_seq *seq;
 	struct sk_buff_head pkts;
-	u32 type, inst, domain;
 	u32 dnode, dport;
+	u32 type, inst;
 	int mtu, rc;
 
 	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1331,13 +1330,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	if (dest->addrtype == TIPC_ADDR_NAME) {
 		type = dest->addr.name.name.type;
 		inst = dest->addr.name.name.instance;
-		domain = dest->addr.name.domain;
-		dnode = domain;
+		dnode = dest->addr.name.domain;
 		msg_set_type(hdr, TIPC_NAMED_MSG);
 		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
 		msg_set_nametype(hdr, type);
 		msg_set_nameinst(hdr, inst);
-		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
+		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
 		dport = tipc_nametbl_translate(net, type, inst, &dnode);
 		msg_set_destnode(hdr, dnode);
 		msg_set_destport(hdr, dport);
@@ -2122,8 +2120,10 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
 		    (!sk_conn && msg_connected(hdr)) ||
 		    (!grp && msg_in_group(hdr)))
 			err = TIPC_ERR_NO_PORT;
-		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+			atomic_inc(&sk->sk_drops);
 			err = TIPC_ERR_OVERLOAD;
+		}
 
 		if (unlikely(err)) {
 			tipc_skb_reject(net, err, skb, xmitq);
@@ -2202,6 +2202,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
 		/* Overload => reject message back to sender */
 		onode = tipc_own_addr(sock_net(sk));
+		atomic_inc(&sk->sk_drops);
 		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
 			__skb_queue_tail(xmitq, skb);
 		break;
@@ -2591,6 +2592,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 	struct publication *publ;
 	u32 key;
 
+	if (scope != TIPC_NODE_SCOPE)
+		scope = TIPC_CLUSTER_SCOPE;
+
 	if (tipc_sk_connected(sk))
 		return -EINVAL;
 	key = tsk->portid + tsk->pub_count + 1;
@@ -2602,7 +2606,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 	if (unlikely(!publ))
 		return -EINVAL;
 
-	list_add(&publ->pport_list, &tsk->publications);
+	list_add(&publ->binding_sock, &tsk->publications);
 	tsk->pub_count++;
 	tsk->published = 1;
 	return 0;
@@ -2616,7 +2620,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 	struct publication *safe;
 	int rc = -EINVAL;
 
-	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+	if (scope != TIPC_NODE_SCOPE)
+		scope = TIPC_CLUSTER_SCOPE;
+
+	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
 		if (seq) {
 			if (publ->scope != scope)
 				continue;
@@ -2627,12 +2634,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			if (publ->upper != seq->upper)
 				break;
 			tipc_nametbl_withdraw(net, publ->type, publ->lower,
-					      publ->ref, publ->key);
+					      publ->port, publ->key);
 			rc = 0;
 			break;
 		}
 		tipc_nametbl_withdraw(net, publ->type, publ->lower,
-				      publ->ref, publ->key);
+				      publ->port, publ->key);
 		rc = 0;
 	}
 	if (list_empty(&tsk->publications))
@@ -2658,8 +2665,8 @@ void tipc_sk_reinit(struct net *net)
 		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
 			msg = &tsk->phdr;
-			msg_set_prevnode(msg, tn->own_addr);
-			msg_set_orignode(msg, tn->own_addr);
+			msg_set_prevnode(msg, tipc_own_addr(net));
+			msg_set_orignode(msg, tipc_own_addr(net));
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
 
@@ -3154,16 +3161,32 @@ msg_full:
 	return -EMSGSIZE;
 }
 
+static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
+			  *tsk)
+{
+	struct net *net = sock_net(skb->sk);
+	struct sock *sk = &tsk->sk;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
+		return -EMSGSIZE;
+
+	if (tipc_sk_connected(sk)) {
+		if (__tipc_nl_add_sk_con(skb, tsk))
+			return -EMSGSIZE;
+	} else if (!list_empty(&tsk->publications)) {
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
+			return -EMSGSIZE;
+	}
+	return 0;
+}
+
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct tipc_sock *tsk)
 {
-	int err;
-	void *hdr;
 	struct nlattr *attrs;
-	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sock *sk = &tsk->sk;
+	void *hdr;
 
 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -3173,19 +3196,10 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
 	if (!attrs)
 		goto genlmsg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
-		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+
+	if (__tipc_nl_add_sk_info(skb, tsk))
 		goto attr_msg_cancel;
 
-	if (tipc_sk_connected(sk)) {
-		err = __tipc_nl_add_sk_con(skb, tsk);
-		if (err)
-			goto attr_msg_cancel;
-	} else if (!list_empty(&tsk->publications)) {
-		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
-			goto attr_msg_cancel;
-	}
 	nla_nest_end(skb, attrs);
 	genlmsg_end(skb, hdr);
 
@@ -3199,16 +3213,19 @@ msg_cancel:
 	return -EMSGSIZE;
 }
 
-int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+		    int (*skb_handler)(struct sk_buff *skb,
+				       struct netlink_callback *cb,
+				       struct tipc_sock *tsk))
 {
-	int err;
-	struct tipc_sock *tsk;
-	const struct bucket_table *tbl;
-	struct rhash_head *pos;
 	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 tbl_id = cb->args[0];
+	struct tipc_net *tn = tipc_net(net);
+	const struct bucket_table *tbl;
 	u32 prev_portid = cb->args[1];
+	u32 tbl_id = cb->args[0];
+	struct rhash_head *pos;
+	struct tipc_sock *tsk;
+	int err;
 
 	rcu_read_lock();
 	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
@@ -3220,12 +3237,13 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			}
 
-			err = __tipc_nl_add_sk(skb, cb, tsk);
+			err = skb_handler(skb, cb, tsk);
 			if (err) {
 				prev_portid = tsk->portid;
 				spin_unlock_bh(&tsk->sk.sk_lock.slock);
 				goto out;
 			}
+
 			prev_portid = 0;
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
@@ -3237,6 +3255,75 @@ out:
 
 	return skb->len;
 }
+EXPORT_SYMBOL(tipc_nl_sk_walk);
+
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+			   u32 sk_filter_state,
+			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
+{
+	struct sock *sk = &tsk->sk;
+	struct nlattr *attrs;
+	struct nlattr *stat;
+
+	/*filter response w.r.t sk_state*/
+	if (!(sk_filter_state & (1 << sk->sk_state)))
+		return 0;
+
+	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+	if (!attrs)
+		goto msg_cancel;
+
+	if (__tipc_nl_add_sk_info(skb, tsk))
+		goto attr_msg_cancel;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_UID,
+			from_kuid_munged(sk_user_ns(sk), sock_i_uid(sk))) ||
+	    nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
+			      tipc_diag_gen_cookie(sk),
+			      TIPC_NLA_SOCK_PAD))
+		goto attr_msg_cancel;
+
+	stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
+	if (!stat)
+		goto attr_msg_cancel;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
+			skb_queue_len(&sk->sk_receive_queue)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
+			skb_queue_len(&sk->sk_write_queue)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
+			atomic_read(&sk->sk_drops)))
+		goto stat_msg_cancel;
+
+	if (tsk->cong_link_cnt &&
+	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
+		goto stat_msg_cancel;
+
+	if (tsk_conn_cong(tsk) &&
+	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
+		goto stat_msg_cancel;
+
+	nla_nest_end(skb, stat);
+	nla_nest_end(skb, attrs);
+
+	return 0;
+
+stat_msg_cancel:
+	nla_nest_cancel(skb, stat);
+attr_msg_cancel:
+	nla_nest_cancel(skb, attrs);
+msg_cancel:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
+
+int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
+}
 
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
@@ -3286,7 +3373,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
 	struct publication *p;
 
 	if (*last_publ) {
-		list_for_each_entry(p, &tsk->publications, pport_list) {
+		list_for_each_entry(p, &tsk->publications, binding_sock) {
 			if (p->key == *last_publ)
 				break;
 		}
@@ -3303,10 +3390,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
 		}
 	} else {
 		p = list_first_entry(&tsk->publications, struct publication,
-				     pport_list);
+				     binding_sock);
 	}
 
-	list_for_each_entry_from(p, &tsk->publications, pport_list) {
+	list_for_each_entry_from(p, &tsk->publications, binding_sock) {
 		err = __tipc_nl_add_sk_publ(skb, cb, p);
 		if (err) {
 			*last_publ = p->key;
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 06fb5944cf76..aae3fd4cd06c 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -49,6 +49,8 @@
 #define RCVBUF_DEF  (FLOWCTL_BLK_SZ * 1024 * 2)
 #define RCVBUF_MAX  (FLOWCTL_BLK_SZ * 1024 * 16)
 
+struct tipc_sock;
+
 int tipc_socket_init(void);
 void tipc_socket_stop(void);
 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
@@ -59,5 +61,11 @@ int tipc_sk_rht_init(struct net *net);
 void tipc_sk_rht_destroy(struct net *net);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+			   u32 sk_filter_state,
+			   u64 (*tipc_diag_gen_cookie)(struct sock *sk));
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+		    int (*skb_handler)(struct sk_buff *skb,
+				       struct netlink_callback *cb,
+				       struct tipc_sock *tsk));
 #endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 68e26470c516..6925a989569b 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.c: TIPC network topology service
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -38,61 +38,30 @@
 #include "name_table.h"
 #include "subscr.h"
 
-/**
- * struct tipc_subscriber - TIPC network topology subscriber
- * @kref: reference counter to tipc_subscription object
- * @conid: connection identifier to server connecting to subscriber
- * @lock: control access to subscriber
- * @subscrp_list: list of subscription objects for this subscriber
- */
-struct tipc_subscriber {
-	struct kref kref;
-	int conid;
-	spinlock_t lock;
-	struct list_head subscrp_list;
-};
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
-{
-	return swap ? swab32(in) : in;
-}
-
-static void tipc_subscrp_send_event(struct tipc_subscription *sub,
-				    u32 found_lower, u32 found_upper,
-				    u32 event, u32 port_ref, u32 node)
+static void tipc_sub_send_event(struct tipc_subscription *sub,
+				u32 found_lower, u32 found_upper,
+				u32 event, u32 port, u32 node)
 {
-	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-	struct tipc_subscriber *subscriber = sub->subscriber;
-	struct kvec msg_sect;
+	struct tipc_event *evt = &sub->evt;
 
-	msg_sect.iov_base = (void *)&sub->evt;
-	msg_sect.iov_len = sizeof(struct tipc_event);
-	sub->evt.event = htohl(event, sub->swap);
-	sub->evt.found_lower = htohl(found_lower, sub->swap);
-	sub->evt.found_upper = htohl(found_upper, sub->swap);
-	sub->evt.port.ref = htohl(port_ref, sub->swap);
-	sub->evt.port.node = htohl(node, sub->swap);
-	tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL,
-			  msg_sect.iov_base, msg_sect.iov_len);
+	if (sub->inactive)
+		return;
+	tipc_evt_write(evt, event, event);
+	tipc_evt_write(evt, found_lower, found_lower);
+	tipc_evt_write(evt, found_upper, found_upper);
+	tipc_evt_write(evt, port.ref, port);
+	tipc_evt_write(evt, port.node, node);
+	tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
 }
 
 /**
- * tipc_subscrp_check_overlap - test for subscription overlap with the
+ * tipc_sub_check_overlap - test for subscription overlap with the
  * given values
  *
  * Returns 1 if there is overlap, otherwise 0.
  */
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-			       u32 found_upper)
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+			   u32 found_upper)
 {
 	if (found_lower < seq->lower)
 		found_lower = seq->lower;
@@ -103,298 +72,98 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 	return 1;
 }
 
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
-{
-	return htohl(type, swap);
-}
-
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-			      struct tipc_name_seq *out)
-{
-	out->type = htohl(in->type, swap);
-	out->lower = htohl(in->lower, swap);
-	out->upper = htohl(in->upper, swap);
-}
-
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
-				 u32 found_upper, u32 event, u32 port_ref,
-				 u32 node, u32 scope, int must)
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+			     u32 found_lower, u32 found_upper,
+			     u32 event, u32 port, u32 node,
+			     u32 scope, int must)
 {
-	u32 filter = htohl(sub->evt.s.filter, sub->swap);
+	struct tipc_subscr *s = &sub->evt.s;
+	u32 filter = tipc_sub_read(s, filter);
 	struct tipc_name_seq seq;
 
-	tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
-	if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
+	seq.type = tipc_sub_read(s, seq.type);
+	seq.lower = tipc_sub_read(s, seq.lower);
+	seq.upper = tipc_sub_read(s, seq.upper);
+
+	if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
 		return;
+
 	if (!must && !(filter & TIPC_SUB_PORTS))
 		return;
 	if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
 		return;
 	if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
 		return;
-
-	tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
-				node);
+	spin_lock(&sub->lock);
+	tipc_sub_send_event(sub, found_lower, found_upper,
+			    event, port, node);
+	spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_timeout(struct timer_list *t)
+static void tipc_sub_timeout(struct timer_list *t)
 {
 	struct tipc_subscription *sub = from_timer(sub, t, timer);
-	struct tipc_subscriber *subscriber = sub->subscriber;
-
-	spin_lock_bh(&subscriber->lock);
-	tipc_nametbl_unsubscribe(sub);
-	list_del(&sub->subscrp_list);
-	spin_unlock_bh(&subscriber->lock);
-
-	/* Notify subscriber of timeout */
-	tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
-				TIPC_SUBSCR_TIMEOUT, 0, 0);
-
-	tipc_subscrp_put(sub);
-}
-
-static void tipc_subscrb_kref_release(struct kref *kref)
-{
-	kfree(container_of(kref,struct tipc_subscriber, kref));
-}
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
-{
-	kref_put(&subscriber->kref, tipc_subscrb_kref_release);
-}
+	struct tipc_subscr *s = &sub->evt.s;
 
-static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
-{
-	kref_get(&subscriber->kref);
+	spin_lock(&sub->lock);
+	tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
+			    TIPC_SUBSCR_TIMEOUT, 0, 0);
+	sub->inactive = true;
+	spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_kref_release(struct kref *kref)
+static void tipc_sub_kref_release(struct kref *kref)
 {
-	struct tipc_subscription *sub = container_of(kref,
-						     struct tipc_subscription,
-						     kref);
-	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-	struct tipc_subscriber *subscriber = sub->subscriber;
-
-	atomic_dec(&tn->subscription_count);
-	kfree(sub);
-	tipc_subscrb_put(subscriber);
+	kfree(container_of(kref, struct tipc_subscription, kref));
 }
 
-void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_sub_put(struct tipc_subscription *subscription)
 {
-	kref_put(&subscription->kref, tipc_subscrp_kref_release);
+	kref_put(&subscription->kref, tipc_sub_kref_release);
 }
 
-void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_sub_get(struct tipc_subscription *subscription)
 {
 	kref_get(&subscription->kref);
 }
 
-/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
- * subscriptions for a given subscriber.
- */
-static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
-					struct tipc_subscr *s)
-{
-	struct list_head *subscription_list = &subscriber->subscrp_list;
-	struct tipc_subscription *sub, *temp;
-	u32 timeout;
-
-	spin_lock_bh(&subscriber->lock);
-	list_for_each_entry_safe(sub, temp, subscription_list,  subscrp_list) {
-		if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
-			continue;
-
-		timeout = htohl(sub->evt.s.timeout, sub->swap);
-		if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
-			tipc_nametbl_unsubscribe(sub);
-			list_del(&sub->subscrp_list);
-			tipc_subscrp_put(sub);
-		}
-
-		if (s)
-			break;
-	}
-	spin_unlock_bh(&subscriber->lock);
-}
-
-static struct tipc_subscriber *tipc_subscrb_create(int conid)
-{
-	struct tipc_subscriber *subscriber;
-
-	subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
-	if (!subscriber) {
-		pr_warn("Subscriber rejected, no memory\n");
-		return NULL;
-	}
-	INIT_LIST_HEAD(&subscriber->subscrp_list);
-	kref_init(&subscriber->kref);
-	subscriber->conid = conid;
-	spin_lock_init(&subscriber->lock);
-
-	return subscriber;
-}
-
-static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
-{
-	tipc_subscrb_subscrp_delete(subscriber, NULL);
-	tipc_subscrb_put(subscriber);
-}
-
-static void tipc_subscrp_cancel(struct tipc_subscr *s,
-				struct tipc_subscriber *subscriber)
-{
-	tipc_subscrb_get(subscriber);
-	tipc_subscrb_subscrp_delete(subscriber, s);
-	tipc_subscrb_put(subscriber);
-}
-
-static struct tipc_subscription *tipc_subscrp_create(struct net *net,
-						     struct tipc_subscr *s,
-						     int swap)
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+					     struct tipc_subscr *s,
+					     int conid)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	u32 filter = tipc_sub_read(s, filter);
 	struct tipc_subscription *sub;
-	u32 filter = htohl(s->filter, swap);
+	u32 timeout;
 
-	/* Refuse subscription if global limit exceeded */
-	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
-		pr_warn("Subscription rejected, limit reached (%u)\n",
-			TIPC_MAX_SUBSCRIPTIONS);
+	if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
+	    (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+		pr_warn("Subscription rejected, illegal request\n");
 		return NULL;
 	}
-
-	/* Allocate subscription object */
 	sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
 	if (!sub) {
 		pr_warn("Subscription rejected, no memory\n");
 		return NULL;
 	}
-
-	/* Initialize subscription object */
 	sub->net = net;
-	if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
-	    (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
-		pr_warn("Subscription rejected, illegal request\n");
-		kfree(sub);
-		return NULL;
-	}
-
-	sub->swap = swap;
+	sub->conid = conid;
+	sub->inactive = false;
 	memcpy(&sub->evt.s, s, sizeof(*s));
-	atomic_inc(&tn->subscription_count);
+	spin_lock_init(&sub->lock);
 	kref_init(&sub->kref);
-	return sub;
-}
-
-static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
-				  struct tipc_subscriber *subscriber, int swap,
-				  bool status)
-{
-	struct tipc_subscription *sub = NULL;
-	u32 timeout;
-
-	sub = tipc_subscrp_create(net, s, swap);
-	if (!sub)
-		return -1;
-
-	spin_lock_bh(&subscriber->lock);
-	list_add(&sub->subscrp_list, &subscriber->subscrp_list);
-	sub->subscriber = subscriber;
-	tipc_nametbl_subscribe(sub, status);
-	tipc_subscrb_get(subscriber);
-	spin_unlock_bh(&subscriber->lock);
-
-	timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
-	timeout = htohl(sub->evt.s.timeout, swap);
-
+	tipc_nametbl_subscribe(sub);
+	timer_setup(&sub->timer, tipc_sub_timeout, 0);
+	timeout = tipc_sub_read(&sub->evt.s, timeout);
 	if (timeout != TIPC_WAIT_FOREVER)
 		mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
-	return 0;
-}
-
-/* Handle one termination request for the subscriber */
-static void tipc_subscrb_release_cb(int conid, void *usr_data)
-{
-	tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
-}
-
-/* Handle one request to create a new subscription for the subscriber */
-static int tipc_subscrb_rcv_cb(struct net *net, int conid,
-			       struct sockaddr_tipc *addr, void *usr_data,
-			       void *buf, size_t len)
-{
-	struct tipc_subscriber *subscriber = usr_data;
-	struct tipc_subscr *s = (struct tipc_subscr *)buf;
-	bool status;
-	int swap;
-
-	/* Determine subscriber's endianness */
-	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
-			      TIPC_SUB_CANCEL));
-
-	/* Detect & process a subscription cancellation request */
-	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
-		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
-		tipc_subscrp_cancel(s, subscriber);
-		return 0;
-	}
-	status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
-	return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
-}
-
-/* Handle one request to establish a new subscriber */
-static void *tipc_subscrb_connect_cb(int conid)
-{
-	return (void *)tipc_subscrb_create(conid);
-}
-
-int tipc_topsrv_start(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	const char name[] = "topology_server";
-	struct tipc_server *topsrv;
-	struct sockaddr_tipc *saddr;
-
-	saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
-	if (!saddr)
-		return -ENOMEM;
-	saddr->family			= AF_TIPC;
-	saddr->addrtype			= TIPC_ADDR_NAMESEQ;
-	saddr->addr.nameseq.type	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.lower	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.upper	= TIPC_TOP_SRV;
-	saddr->scope			= TIPC_NODE_SCOPE;
-
-	topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
-	if (!topsrv) {
-		kfree(saddr);
-		return -ENOMEM;
-	}
-	topsrv->net			= net;
-	topsrv->saddr			= saddr;
-	topsrv->imp			= TIPC_CRITICAL_IMPORTANCE;
-	topsrv->type			= SOCK_SEQPACKET;
-	topsrv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
-	topsrv->tipc_conn_recvmsg	= tipc_subscrb_rcv_cb;
-	topsrv->tipc_conn_new		= tipc_subscrb_connect_cb;
-	topsrv->tipc_conn_release	= tipc_subscrb_release_cb;
-
-	strncpy(topsrv->name, name, strlen(name) + 1);
-	tn->topsrv = topsrv;
-	atomic_set(&tn->subscription_count, 0);
-
-	return tipc_server_start(topsrv);
+	return sub;
 }
 
-void tipc_topsrv_stop(struct net *net)
+void tipc_sub_unsubscribe(struct tipc_subscription *sub)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_server *topsrv = tn->topsrv;
-
-	tipc_server_stop(topsrv);
-	kfree(topsrv->saddr);
-	kfree(topsrv);
+	tipc_nametbl_unsubscribe(sub);
+	if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
+		del_timer_sync(&sub->timer);
+	list_del(&sub->sub_list);
+	tipc_sub_put(sub);
 }
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index f3edca775d9f..8b2d22b18f22 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.h: Include file for TIPC network topology service
  *
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
  * All rights reserved.
  *
@@ -37,48 +37,72 @@
 #ifndef _TIPC_SUBSCR_H
 #define _TIPC_SUBSCR_H
 
-#include "server.h"
+#include "topsrv.h"
 
-#define TIPC_MAX_SUBSCRIPTIONS	65535
-#define TIPC_MAX_PUBLICATIONS	65535
+#define TIPC_MAX_SUBSCR         65535
+#define TIPC_MAX_PUBLICATIONS   65535
 
 struct tipc_subscription;
-struct tipc_subscriber;
+struct tipc_conn;
 
 /**
  * struct tipc_subscription - TIPC network topology subscription object
  * @subscriber: pointer to its subscriber
  * @seq: name sequence associated with subscription
- * @net: point to network namespace
  * @timer: timer governing subscription duration (optional)
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscrp_list: adjacent subscriptions in subscriber's subscription list
- * @swap: indicates if subscriber uses opposite endianness in its messages
+ * @sub_list: adjacent subscriptions in subscriber's subscription list
  * @evt: template for events generated by subscription
  */
 struct tipc_subscription {
 	struct kref kref;
-	struct tipc_subscriber *subscriber;
 	struct net *net;
 	struct timer_list timer;
 	struct list_head nameseq_list;
-	struct list_head subscrp_list;
-	int swap;
+	struct list_head sub_list;
 	struct tipc_event evt;
+	int conid;
+	bool inactive;
+	spinlock_t lock; /* serialize up/down and timer events */
 };
 
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-			       u32 found_upper);
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
-				 u32 found_lower, u32 found_upper, u32 event,
-				 u32 port_ref, u32 node, u32 scope, int must);
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-			      struct tipc_name_seq *out);
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+					     struct tipc_subscr *s,
+					     int conid);
+void tipc_sub_unsubscribe(struct tipc_subscription *sub);
+
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+			   u32 found_upper);
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+			     u32 found_lower, u32 found_upper,
+			     u32 event, u32 port, u32 node,
+			     u32 scope, int must);
 int tipc_topsrv_start(struct net *net);
 void tipc_topsrv_stop(struct net *net);
 
-void tipc_subscrp_put(struct tipc_subscription *subscription);
-void tipc_subscrp_get(struct tipc_subscription *subscription);
+void tipc_sub_put(struct tipc_subscription *subscription);
+void tipc_sub_get(struct tipc_subscription *subscription);
+
+#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
+
+/* tipc_sub_read - return field_ of struct sub_ in host endian format
+ */
+#define tipc_sub_read(sub_, field_)					\
+	({								\
+		struct tipc_subscr *sub__ = sub_;			\
+		u32 val__ = (sub__)->field_;				\
+		int swap_ = !((sub__)->filter & TIPC_FILTER_MASK);	\
+		(swap_ ? swab32(val__) : val__);			\
+	})
+
+/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
+ */
+#define tipc_evt_write(evt_, field_, val_)				\
+	({								\
+		struct tipc_event *evt__ = evt_;			\
+		u32 val__ = val_;					\
+		int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK));	\
+		(evt__)->field_ = swap_ ? swab32(val__) : val__;	\
+	})
 
 #endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
new file mode 100644
index 000000000000..c8e34ef22c30
--- /dev/null
+++ b/net/tipc/topsrv.c
@@ -0,0 +1,703 @@
+/*
+ * net/tipc/server.c: TIPC server infrastructure
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017-2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "subscr.h"
+#include "topsrv.h"
+#include "core.h"
+#include "socket.h"
+#include "addr.h"
+#include "msg.h"
+#include <net/sock.h>
+#include <linux/module.h>
+
+/* Number of messages to send before rescheduling */
+#define MAX_SEND_MSG_COUNT	25
+#define MAX_RECV_MSG_COUNT	25
+#define CF_CONNECTED		1
+#define CF_SERVER		2
+
+#define TIPC_SERVER_NAME_LEN	32
+
+/**
+ * struct tipc_topsrv - TIPC server structure
+ * @conn_idr: identifier set of connection
+ * @idr_lock: protect the connection identifier set
+ * @idr_in_use: amount of allocated identifier entry
+ * @net: network namspace instance
+ * @rcvbuf_cache: memory cache of server receive buffer
+ * @rcv_wq: receive workqueue
+ * @send_wq: send workqueue
+ * @max_rcvbuf_size: maximum permitted receive message length
+ * @tipc_conn_new: callback will be called when new connection is incoming
+ * @tipc_conn_release: callback will be called before releasing the connection
+ * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @name: server name
+ * @imp: message importance
+ * @type: socket type
+ */
+struct tipc_topsrv {
+	struct idr conn_idr;
+	spinlock_t idr_lock; /* for idr list */
+	int idr_in_use;
+	struct net *net;
+	struct work_struct awork;
+	struct workqueue_struct *rcv_wq;
+	struct workqueue_struct *send_wq;
+	int max_rcvbuf_size;
+	struct socket *listener;
+	char name[TIPC_SERVER_NAME_LEN];
+};
+
+/**
+ * struct tipc_conn - TIPC connection structure
+ * @kref: reference counter to connection object
+ * @conid: connection identifier
+ * @sock: socket handler associated with connection
+ * @flags: indicates connection state
+ * @server: pointer to connected server
+ * @sub_list: lsit to all pertaing subscriptions
+ * @sub_lock: lock protecting the subscription list
+ * @outqueue_lock: control access to the outqueue
+ * @rwork: receive work item
+ * @rx_action: what to do when connection socket is active
+ * @outqueue: pointer to first outbound message in queue
+ * @outqueue_lock: control access to the outqueue
+ * @swork: send work item
+ */
+struct tipc_conn {
+	struct kref kref;
+	int conid;
+	struct socket *sock;
+	unsigned long flags;
+	struct tipc_topsrv *server;
+	struct list_head sub_list;
+	spinlock_t sub_lock; /* for subscription list */
+	struct work_struct rwork;
+	struct list_head outqueue;
+	spinlock_t outqueue_lock; /* for outqueue */
+	struct work_struct swork;
+};
+
+/* An entry waiting to be sent */
+struct outqueue_entry {
+	bool inactive;
+	struct tipc_event evt;
+	struct list_head list;
+};
+
+static void tipc_conn_recv_work(struct work_struct *work);
+static void tipc_conn_send_work(struct work_struct *work);
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
+
+static bool connected(struct tipc_conn *con)
+{
+	return con && test_bit(CF_CONNECTED, &con->flags);
+}
+
+static void tipc_conn_kref_release(struct kref *kref)
+{
+	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+	struct tipc_topsrv *s = con->server;
+	struct outqueue_entry *e, *safe;
+
+	spin_lock_bh(&s->idr_lock);
+	idr_remove(&s->conn_idr, con->conid);
+	s->idr_in_use--;
+	spin_unlock_bh(&s->idr_lock);
+	if (con->sock)
+		sock_release(con->sock);
+
+	spin_lock_bh(&con->outqueue_lock);
+	list_for_each_entry_safe(e, safe, &con->outqueue, list) {
+		list_del(&e->list);
+		kfree(e);
+	}
+	spin_unlock_bh(&con->outqueue_lock);
+	kfree(con);
+}
+
+static void conn_put(struct tipc_conn *con)
+{
+	kref_put(&con->kref, tipc_conn_kref_release);
+}
+
+static void conn_get(struct tipc_conn *con)
+{
+	kref_get(&con->kref);
+}
+
+static void tipc_conn_close(struct tipc_conn *con)
+{
+	struct sock *sk = con->sock->sk;
+	bool disconnect = false;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+
+	if (disconnect) {
+		sk->sk_user_data = NULL;
+		tipc_conn_delete_sub(con, NULL);
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	/* Handle concurrent calls from sending and receiving threads */
+	if (!disconnect)
+		return;
+
+	/* Don't flush pending works, -just let them expire */
+	kernel_sock_shutdown(con->sock, SHUT_RDWR);
+
+	conn_put(con);
+}
+
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+{
+	struct tipc_conn *con;
+	int ret;
+
+	con = kzalloc(sizeof(*con), GFP_ATOMIC);
+	if (!con)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&con->kref);
+	INIT_LIST_HEAD(&con->outqueue);
+	INIT_LIST_HEAD(&con->sub_list);
+	spin_lock_init(&con->outqueue_lock);
+	spin_lock_init(&con->sub_lock);
+	INIT_WORK(&con->swork, tipc_conn_send_work);
+	INIT_WORK(&con->rwork, tipc_conn_recv_work);
+
+	spin_lock_bh(&s->idr_lock);
+	ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
+	if (ret < 0) {
+		kfree(con);
+		spin_unlock_bh(&s->idr_lock);
+		return ERR_PTR(-ENOMEM);
+	}
+	con->conid = ret;
+	s->idr_in_use++;
+	spin_unlock_bh(&s->idr_lock);
+
+	set_bit(CF_CONNECTED, &con->flags);
+	con->server = s;
+
+	return con;
+}
+
+static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
+{
+	struct tipc_conn *con;
+
+	spin_lock_bh(&s->idr_lock);
+	con = idr_find(&s->conn_idr, conid);
+	if (!connected(con) || !kref_get_unless_zero(&con->kref))
+		con = NULL;
+	spin_unlock_bh(&s->idr_lock);
+	return con;
+}
+
+/* tipc_conn_delete_sub - delete a specific or all subscriptions
+ * for a given subscriber
+ */
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
+{
+	struct tipc_net *tn = tipc_net(con->server->net);
+	struct list_head *sub_list = &con->sub_list;
+	struct tipc_subscription *sub, *tmp;
+
+	spin_lock_bh(&con->sub_lock);
+	list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
+		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
+			tipc_sub_unsubscribe(sub);
+			atomic_dec(&tn->subscription_count);
+		} else if (s) {
+			break;
+		}
+	}
+	spin_unlock_bh(&con->sub_lock);
+}
+
+static void tipc_conn_send_to_sock(struct tipc_conn *con)
+{
+	struct list_head *queue = &con->outqueue;
+	struct tipc_topsrv *srv = con->server;
+	struct outqueue_entry *e;
+	struct tipc_event *evt;
+	struct msghdr msg;
+	struct kvec iov;
+	int count = 0;
+	int ret;
+
+	spin_lock_bh(&con->outqueue_lock);
+
+	while (!list_empty(queue)) {
+		e = list_first_entry(queue, struct outqueue_entry, list);
+		evt = &e->evt;
+		spin_unlock_bh(&con->outqueue_lock);
+
+		if (e->inactive)
+			tipc_conn_delete_sub(con, &evt->s);
+
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_flags = MSG_DONTWAIT;
+		iov.iov_base = evt;
+		iov.iov_len = sizeof(*evt);
+		msg.msg_name = NULL;
+
+		if (con->sock) {
+			ret = kernel_sendmsg(con->sock, &msg, &iov,
+					     1, sizeof(*evt));
+			if (ret == -EWOULDBLOCK || ret == 0) {
+				cond_resched();
+				return;
+			} else if (ret < 0) {
+				return tipc_conn_close(con);
+			}
+		} else {
+			tipc_topsrv_kern_evt(srv->net, evt);
+		}
+
+		/* Don't starve users filling buffers */
+		if (++count >= MAX_SEND_MSG_COUNT) {
+			cond_resched();
+			count = 0;
+		}
+		spin_lock_bh(&con->outqueue_lock);
+		list_del(&e->list);
+		kfree(e);
+	}
+	spin_unlock_bh(&con->outqueue_lock);
+}
+
+static void tipc_conn_send_work(struct work_struct *work)
+{
+	struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+
+	if (connected(con))
+		tipc_conn_send_to_sock(con);
+
+	conn_put(con);
+}
+
+/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
+ */
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+			   u32 event, struct tipc_event *evt)
+{
+	struct tipc_topsrv *srv = tipc_topsrv(net);
+	struct outqueue_entry *e;
+	struct tipc_conn *con;
+
+	con = tipc_conn_lookup(srv, conid);
+	if (!con)
+		return;
+
+	if (!connected(con))
+		goto err;
+
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
+	if (!e)
+		goto err;
+	e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
+	memcpy(&e->evt, evt, sizeof(*evt));
+	spin_lock_bh(&con->outqueue_lock);
+	list_add_tail(&e->list, &con->outqueue);
+	spin_unlock_bh(&con->outqueue_lock);
+
+	if (queue_work(srv->send_wq, &con->swork))
+		return;
+err:
+	conn_put(con);
+}
+
+/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
+ */
+static void tipc_conn_write_space(struct sock *sk)
+{
+	struct tipc_conn *con;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	con = sk->sk_user_data;
+	if (connected(con)) {
+		conn_get(con);
+		if (!queue_work(con->server->send_wq, &con->swork))
+			conn_put(con);
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
+			     struct tipc_conn *con,
+			     struct tipc_subscr *s)
+{
+	struct tipc_net *tn = tipc_net(srv->net);
+	struct tipc_subscription *sub;
+
+	if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+		tipc_conn_delete_sub(con, s);
+		return 0;
+	}
+	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+		pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+		return -1;
+	}
+	sub = tipc_sub_subscribe(srv->net, s, con->conid);
+	if (!sub)
+		return -1;
+	atomic_inc(&tn->subscription_count);
+	spin_lock_bh(&con->sub_lock);
+	list_add(&sub->sub_list, &con->sub_list);
+	spin_unlock_bh(&con->sub_lock);
+	return 0;
+}
+
+static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
+{
+	struct tipc_topsrv *srv = con->server;
+	struct sock *sk = con->sock->sk;
+	struct msghdr msg = {};
+	struct tipc_subscr s;
+	struct kvec iov;
+	int ret;
+
+	iov.iov_base = &s;
+	iov.iov_len = sizeof(s);
+	msg.msg_name = NULL;
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
+	if (ret == -EWOULDBLOCK)
+		return -EWOULDBLOCK;
+	if (ret > 0) {
+		read_lock_bh(&sk->sk_callback_lock);
+		ret = tipc_conn_rcv_sub(srv, con, &s);
+		read_unlock_bh(&sk->sk_callback_lock);
+	}
+	if (ret < 0)
+		tipc_conn_close(con);
+
+	return ret;
+}
+
+static void tipc_conn_recv_work(struct work_struct *work)
+{
+	struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
+	int count = 0;
+
+	while (connected(con)) {
+		if (tipc_conn_rcv_from_sock(con))
+			break;
+
+		/* Don't flood Rx machine */
+		if (++count >= MAX_RECV_MSG_COUNT) {
+			cond_resched();
+			count = 0;
+		}
+	}
+	conn_put(con);
+}
+
+/* tipc_conn_data_ready - interrupt callback indicating the socket has data
+ * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
+ */
+static void tipc_conn_data_ready(struct sock *sk)
+{
+	struct tipc_conn *con;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	con = sk->sk_user_data;
+	if (connected(con)) {
+		conn_get(con);
+		if (!queue_work(con->server->rcv_wq, &con->rwork))
+			conn_put(con);
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void tipc_topsrv_accept(struct work_struct *work)
+{
+	struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
+	struct socket *lsock = srv->listener;
+	struct socket *newsock;
+	struct tipc_conn *con;
+	struct sock *newsk;
+	int ret;
+
+	while (1) {
+		ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+		if (ret < 0)
+			return;
+		con = tipc_conn_alloc(srv);
+		if (IS_ERR(con)) {
+			ret = PTR_ERR(con);
+			sock_release(newsock);
+			return;
+		}
+		/* Register callbacks */
+		newsk = newsock->sk;
+		write_lock_bh(&newsk->sk_callback_lock);
+		newsk->sk_data_ready = tipc_conn_data_ready;
+		newsk->sk_write_space = tipc_conn_write_space;
+		newsk->sk_user_data = con;
+		con->sock = newsock;
+		write_unlock_bh(&newsk->sk_callback_lock);
+
+		/* Wake up receive process in case of 'SYN+' message */
+		newsk->sk_data_ready(newsk);
+	}
+}
+
+/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+ * The queued job is launched into tipc_topsrv_accept()
+ */
+static void tipc_topsrv_listener_data_ready(struct sock *sk)
+{
+	struct tipc_topsrv *srv;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	srv = sk->sk_user_data;
+	if (srv->listener)
+		queue_work(srv->rcv_wq, &srv->awork);
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
+{
+	int imp = TIPC_CRITICAL_IMPORTANCE;
+	struct socket *lsock = NULL;
+	struct sockaddr_tipc saddr;
+	struct sock *sk;
+	int rc;
+
+	rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
+	if (rc < 0)
+		return rc;
+
+	srv->listener = lsock;
+	sk = lsock->sk;
+	write_lock_bh(&sk->sk_callback_lock);
+	sk->sk_data_ready = tipc_topsrv_listener_data_ready;
+	sk->sk_user_data = srv;
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
+			       (char *)&imp, sizeof(imp));
+	if (rc < 0)
+		goto err;
+
+	saddr.family	                = AF_TIPC;
+	saddr.addrtype		        = TIPC_ADDR_NAMESEQ;
+	saddr.addr.nameseq.type	        = TIPC_TOP_SRV;
+	saddr.addr.nameseq.lower	= TIPC_TOP_SRV;
+	saddr.addr.nameseq.upper	= TIPC_TOP_SRV;
+	saddr.scope			= TIPC_NODE_SCOPE;
+
+	rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (rc < 0)
+		goto err;
+	rc = kernel_listen(lsock, 0);
+	if (rc < 0)
+		goto err;
+
+	/* As server's listening socket owner and creator is the same module,
+	 * we have to decrease TIPC module reference count to guarantee that
+	 * it remains zero after the server socket is created, otherwise,
+	 * executing "rmmod" command is unable to make TIPC module deleted
+	 * after TIPC module is inserted successfully.
+	 *
+	 * However, the reference count is ever increased twice in
+	 * sock_create_kern(): one is to increase the reference count of owner
+	 * of TIPC socket's proto_ops struct; another is to increment the
+	 * reference count of owner of TIPC proto struct. Therefore, we must
+	 * decrement the module reference count twice to ensure that it keeps
+	 * zero after server's listening socket is created. Of course, we
+	 * must bump the module reference count twice as well before the socket
+	 * is closed.
+	 */
+	module_put(lsock->ops->owner);
+	module_put(sk->sk_prot_creator->owner);
+
+	return 0;
+err:
+	sock_release(lsock);
+	return -EINVAL;
+}
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+			     u32 upper, u32 filter, int *conid)
+{
+	struct tipc_subscr sub;
+	struct tipc_conn *con;
+	int rc;
+
+	sub.seq.type = type;
+	sub.seq.lower = lower;
+	sub.seq.upper = upper;
+	sub.timeout = TIPC_WAIT_FOREVER;
+	sub.filter = filter;
+	*(u32 *)&sub.usr_handle = port;
+
+	con = tipc_conn_alloc(tipc_topsrv(net));
+	if (IS_ERR(con))
+		return false;
+
+	*conid = con->conid;
+	con->sock = NULL;
+	rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+	if (rc >= 0)
+		return true;
+	conn_put(con);
+	return false;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+	struct tipc_conn *con;
+
+	con = tipc_conn_lookup(tipc_topsrv(net), conid);
+	if (!con)
+		return;
+
+	test_and_clear_bit(CF_CONNECTED, &con->flags);
+	tipc_conn_delete_sub(con, NULL);
+	conn_put(con);
+	conn_put(con);
+}
+
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
+{
+	u32 port = *(u32 *)&evt->s.usr_handle;
+	u32 self = tipc_own_addr(net);
+	struct sk_buff_head evtq;
+	struct sk_buff *skb;
+
+	skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+			      self, self, port, port, 0);
+	if (!skb)
+		return;
+	msg_set_dest_droppable(buf_msg(skb), true);
+	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+	skb_queue_head_init(&evtq);
+	__skb_queue_tail(&evtq, skb);
+	tipc_sk_rcv(net, &evtq);
+}
+
+static int tipc_topsrv_work_start(struct tipc_topsrv *s)
+{
+	s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
+	if (!s->rcv_wq) {
+		pr_err("can't start tipc receive workqueue\n");
+		return -ENOMEM;
+	}
+
+	s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
+	if (!s->send_wq) {
+		pr_err("can't start tipc send workqueue\n");
+		destroy_workqueue(s->rcv_wq);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
+{
+	destroy_workqueue(s->rcv_wq);
+	destroy_workqueue(s->send_wq);
+}
+
+int tipc_topsrv_start(struct net *net)
+{
+	struct tipc_net *tn = tipc_net(net);
+	const char name[] = "topology_server";
+	struct tipc_topsrv *srv;
+	int ret;
+
+	srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
+	if (!srv)
+		return -ENOMEM;
+
+	srv->net = net;
+	srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
+	INIT_WORK(&srv->awork, tipc_topsrv_accept);
+
+	strncpy(srv->name, name, strlen(name) + 1);
+	tn->topsrv = srv;
+	atomic_set(&tn->subscription_count, 0);
+
+	spin_lock_init(&srv->idr_lock);
+	idr_init(&srv->conn_idr);
+	srv->idr_in_use = 0;
+
+	ret = tipc_topsrv_work_start(srv);
+	if (ret < 0)
+		return ret;
+
+	ret = tipc_topsrv_create_listener(srv);
+	if (ret < 0)
+		tipc_topsrv_work_stop(srv);
+
+	return ret;
+}
+
+void tipc_topsrv_stop(struct net *net)
+{
+	struct tipc_topsrv *srv = tipc_topsrv(net);
+	struct socket *lsock = srv->listener;
+	struct tipc_conn *con;
+	int id;
+
+	spin_lock_bh(&srv->idr_lock);
+	for (id = 0; srv->idr_in_use; id++) {
+		con = idr_find(&srv->conn_idr, id);
+		if (con) {
+			spin_unlock_bh(&srv->idr_lock);
+			tipc_conn_close(con);
+			spin_lock_bh(&srv->idr_lock);
+		}
+	}
+	__module_get(lsock->ops->owner);
+	__module_get(lsock->sk->sk_prot_creator->owner);
+	srv->listener = NULL;
+	spin_unlock_bh(&srv->idr_lock);
+	sock_release(lsock);
+	tipc_topsrv_work_stop(srv);
+	idr_destroy(&srv->conn_idr);
+	kfree(srv);
+}
diff --git a/net/tipc/server.h b/net/tipc/topsrv.h
index 64df7513cd70..c7ea71293748 100644
--- a/net/tipc/server.h
+++ b/net/tipc/topsrv.h
@@ -2,6 +2,7 @@
  * net/tipc/server.h: Include file for TIPC server code
  *
  * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,68 +37,18 @@
 #ifndef _TIPC_SERVER_H
 #define _TIPC_SERVER_H
 
-#include <linux/idr.h>
-#include <linux/tipc.h>
-#include <net/net_namespace.h>
+#include "core.h"
 
 #define TIPC_SERVER_NAME_LEN	32
 #define TIPC_SUB_CLUSTER_SCOPE  0x20
 #define TIPC_SUB_NODE_SCOPE     0x40
 #define TIPC_SUB_NO_STATUS      0x80
 
-/**
- * struct tipc_server - TIPC server structure
- * @conn_idr: identifier set of connection
- * @idr_lock: protect the connection identifier set
- * @idr_in_use: amount of allocated identifier entry
- * @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
- * @rcv_wq: receive workqueue
- * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
- * @name: server name
- * @imp: message importance
- * @type: socket type
- */
-struct tipc_server {
-	struct idr conn_idr;
-	spinlock_t idr_lock;
-	int idr_in_use;
-	struct net *net;
-	struct kmem_cache *rcvbuf_cache;
-	struct workqueue_struct *rcv_wq;
-	struct workqueue_struct *send_wq;
-	int max_rcvbuf_size;
-	void *(*tipc_conn_new)(int conid);
-	void (*tipc_conn_release)(int conid, void *usr_data);
-	int (*tipc_conn_recvmsg)(struct net *net, int conid,
-				 struct sockaddr_tipc *addr, void *usr_data,
-				 void *buf, size_t len);
-	struct sockaddr_tipc *saddr;
-	char name[TIPC_SERVER_NAME_LEN];
-	int imp;
-	int type;
-};
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      struct sockaddr_tipc *addr, void *data, size_t len);
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+			   u32 event, struct tipc_event *evt);
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid);
 void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
 
-/**
- * tipc_conn_terminate - terminate connection with server
- *
- * Note: Must call it in process context since it might sleep
- */
-void tipc_conn_terminate(struct tipc_server *s, int conid);
-int tipc_server_start(struct tipc_server *s);
-
-void tipc_server_stop(struct tipc_server *s);
-
 #endif
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 3deabcab4882..e7d91f5d5cae 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -47,6 +47,8 @@
 #include <net/addrconf.h>
 #include <linux/tipc_netlink.h>
 #include "core.h"
+#include "addr.h"
+#include "net.h"
 #include "bearer.h"
 #include "netlink.h"
 #include "msg.h"
@@ -647,6 +649,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	struct udp_port_cfg udp_conf = {0};
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
 	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+	u8 node_id[NODE_ID_LEN] = {0,};
 
 	ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
 	if (!ub)
@@ -677,6 +680,17 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	if (err)
 		goto err;
 
+	/* Autoconfigure own node identity if needed */
+	if (!tipc_own_id(net)) {
+		memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
+		tipc_net_init(net, node_id, 0);
+	}
+	if (!tipc_own_id(net)) {
+		pr_warn("Failed to set node id, please configure manually\n");
+		err = -EINVAL;
+		goto err;
+	}
+
 	b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
 	b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
 	rcu_assign_pointer(b->media_ptr, ub);
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index eb583038c67e..89b8745a986f 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -7,6 +7,7 @@ config TLS
 	select CRYPTO
 	select CRYPTO_AES
 	select CRYPTO_GCM
+	select STREAM_PARSER
 	default n
 	---help---
 	Enable kernel support for TLS protocol. This allows symmetric
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b0d5fcea47e7..6f5c1146da4a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -46,16 +46,29 @@ MODULE_DESCRIPTION("Transport Layer Security Support");
 MODULE_LICENSE("Dual BSD/GPL");
 
 enum {
-	TLS_BASE_TX,
+	TLSV4,
+	TLSV6,
+	TLS_NUM_PROTS,
+};
+
+enum {
+	TLS_BASE,
 	TLS_SW_TX,
+	TLS_SW_RX,
+	TLS_SW_RXTX,
 	TLS_NUM_CONFIG,
 };
 
-static struct proto tls_prots[TLS_NUM_CONFIG];
+static struct proto *saved_tcpv6_prot;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
+static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
+static struct proto_ops tls_sw_proto_ops;
 
 static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
-	sk->sk_prot = &tls_prots[ctx->tx_conf];
+	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+
+	sk->sk_prot = &tls_prots[ip_ver][ctx->conf];
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -228,7 +241,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	lock_sock(sk);
 	sk_proto_close = ctx->sk_proto_close;
 
-	if (ctx->tx_conf == TLS_BASE_TX) {
+	if (ctx->conf == TLS_BASE) {
 		kfree(ctx);
 		goto skip_tx_cleanup;
 	}
@@ -249,11 +262,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 		}
 	}
 
-	kfree(ctx->rec_seq);
-	kfree(ctx->iv);
+	kfree(ctx->tx.rec_seq);
+	kfree(ctx->tx.iv);
+	kfree(ctx->rx.rec_seq);
+	kfree(ctx->rx.iv);
 
-	if (ctx->tx_conf == TLS_SW_TX)
-		tls_sw_free_tx_resources(sk);
+	if (ctx->conf == TLS_SW_TX ||
+	    ctx->conf == TLS_SW_RX ||
+	    ctx->conf == TLS_SW_RXTX) {
+		tls_sw_free_resources(sk);
+	}
 
 skip_tx_cleanup:
 	release_sock(sk);
@@ -308,8 +326,11 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
 			goto out;
 		}
 		lock_sock(sk);
-		memcpy(crypto_info_aes_gcm_128->iv, ctx->iv,
+		memcpy(crypto_info_aes_gcm_128->iv,
+		       ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 		       TLS_CIPHER_AES_GCM_128_IV_SIZE);
+		memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq,
+		       TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
 		release_sock(sk);
 		if (copy_to_user(optval,
 				 crypto_info_aes_gcm_128,
@@ -352,20 +373,24 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
 	return do_tls_getsockopt(sk, optname, optval, optlen);
 }
 
-static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
-				unsigned int optlen)
+static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
+				  unsigned int optlen, int tx)
 {
 	struct tls_crypto_info *crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
 	int rc = 0;
-	int tx_conf;
+	int conf;
 
 	if (!optval || (optlen < sizeof(*crypto_info))) {
 		rc = -EINVAL;
 		goto out;
 	}
 
-	crypto_info = &ctx->crypto_send;
+	if (tx)
+		crypto_info = &ctx->crypto_send;
+	else
+		crypto_info = &ctx->crypto_recv;
+
 	/* Currently we don't support set crypto info more than one time */
 	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
 		rc = -EBUSY;
@@ -375,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 	rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
 	if (rc) {
 		rc = -EFAULT;
-		goto out;
+		goto err_crypto_info;
 	}
 
 	/* check version */
@@ -404,15 +429,31 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 	}
 
 	/* currently SW is default, we will have ethtool in future */
-	rc = tls_set_sw_offload(sk, ctx);
-	tx_conf = TLS_SW_TX;
+	if (tx) {
+		rc = tls_set_sw_offload(sk, ctx, 1);
+		if (ctx->conf == TLS_SW_RX)
+			conf = TLS_SW_RXTX;
+		else
+			conf = TLS_SW_TX;
+	} else {
+		rc = tls_set_sw_offload(sk, ctx, 0);
+		if (ctx->conf == TLS_SW_TX)
+			conf = TLS_SW_RXTX;
+		else
+			conf = TLS_SW_RX;
+	}
+
 	if (rc)
 		goto err_crypto_info;
 
-	ctx->tx_conf = tx_conf;
+	ctx->conf = conf;
 	update_sk_prot(sk, ctx);
-	ctx->sk_write_space = sk->sk_write_space;
-	sk->sk_write_space = tls_write_space;
+	if (tx) {
+		ctx->sk_write_space = sk->sk_write_space;
+		sk->sk_write_space = tls_write_space;
+	} else {
+		sk->sk_socket->ops = &tls_sw_proto_ops;
+	}
 	goto out;
 
 err_crypto_info:
@@ -428,8 +469,10 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
 
 	switch (optname) {
 	case TLS_TX:
+	case TLS_RX:
 		lock_sock(sk);
-		rc = do_tls_setsockopt_tx(sk, optval, optlen);
+		rc = do_tls_setsockopt_conf(sk, optval, optlen,
+					    optname == TLS_TX);
 		release_sock(sk);
 		break;
 	default:
@@ -450,8 +493,29 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
 	return do_tls_setsockopt(sk, optname, optval, optlen);
 }
 
+static void build_protos(struct proto *prot, struct proto *base)
+{
+	prot[TLS_BASE] = *base;
+	prot[TLS_BASE].setsockopt	= tls_setsockopt;
+	prot[TLS_BASE].getsockopt	= tls_getsockopt;
+	prot[TLS_BASE].close		= tls_sk_proto_close;
+
+	prot[TLS_SW_TX] = prot[TLS_BASE];
+	prot[TLS_SW_TX].sendmsg		= tls_sw_sendmsg;
+	prot[TLS_SW_TX].sendpage	= tls_sw_sendpage;
+
+	prot[TLS_SW_RX] = prot[TLS_BASE];
+	prot[TLS_SW_RX].recvmsg		= tls_sw_recvmsg;
+	prot[TLS_SW_RX].close		= tls_sk_proto_close;
+
+	prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
+	prot[TLS_SW_RXTX].recvmsg	= tls_sw_recvmsg;
+	prot[TLS_SW_RXTX].close		= tls_sk_proto_close;
+}
+
 static int tls_init(struct sock *sk)
 {
+	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tls_context *ctx;
 	int rc = 0;
@@ -476,7 +540,18 @@ static int tls_init(struct sock *sk)
 	ctx->getsockopt = sk->sk_prot->getsockopt;
 	ctx->sk_proto_close = sk->sk_prot->close;
 
-	ctx->tx_conf = TLS_BASE_TX;
+	/* Build IPv6 TLS whenever the address of tcpv6_prot changes */
+	if (ip_ver == TLSV6 &&
+	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+		mutex_lock(&tcpv6_prot_mutex);
+		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+			build_protos(tls_prots[TLSV6], sk->sk_prot);
+			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+		}
+		mutex_unlock(&tcpv6_prot_mutex);
+	}
+
+	ctx->conf = TLS_BASE;
 	update_sk_prot(sk, ctx);
 out:
 	return rc;
@@ -490,21 +565,13 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 	.init			= tls_init,
 };
 
-static void build_protos(struct proto *prot, struct proto *base)
-{
-	prot[TLS_BASE_TX] = *base;
-	prot[TLS_BASE_TX].setsockopt	= tls_setsockopt;
-	prot[TLS_BASE_TX].getsockopt	= tls_getsockopt;
-	prot[TLS_BASE_TX].close		= tls_sk_proto_close;
-
-	prot[TLS_SW_TX] = prot[TLS_BASE_TX];
-	prot[TLS_SW_TX].sendmsg		= tls_sw_sendmsg;
-	prot[TLS_SW_TX].sendpage	= tls_sw_sendpage;
-}
-
 static int __init tls_register(void)
 {
-	build_protos(tls_prots, &tcp_prot);
+	build_protos(tls_prots[TLSV4], &tcp_prot);
+
+	tls_sw_proto_ops = inet_stream_ops;
+	tls_sw_proto_ops.poll = tls_sw_poll;
+	tls_sw_proto_ops.splice_read = tls_sw_splice_read;
 
 	tcp_register_ulp(&tcp_tls_ulp_ops);
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index f26376e954ae..4dc766b03f00 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -34,11 +34,60 @@
  * SOFTWARE.
  */
 
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <crypto/aead.h>
 
+#include <net/strparser.h>
 #include <net/tls.h>
 
+static int tls_do_decryption(struct sock *sk,
+			     struct scatterlist *sgin,
+			     struct scatterlist *sgout,
+			     char *iv_recv,
+			     size_t data_len,
+			     struct sk_buff *skb,
+			     gfp_t flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm = strp_msg(skb);
+	struct aead_request *aead_req;
+
+	int ret;
+	unsigned int req_size = sizeof(struct aead_request) +
+		crypto_aead_reqsize(ctx->aead_recv);
+
+	aead_req = kzalloc(req_size, flags);
+	if (!aead_req)
+		return -ENOMEM;
+
+	aead_request_set_tfm(aead_req, ctx->aead_recv);
+	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+	aead_request_set_crypt(aead_req, sgin, sgout,
+			       data_len + tls_ctx->rx.tag_size,
+			       (u8 *)iv_recv);
+	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				  crypto_req_done, &ctx->async_wait);
+
+	ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
+
+	if (ret < 0)
+		goto out;
+
+	rxm->offset += tls_ctx->rx.prepend_size;
+	rxm->full_len -= tls_ctx->rx.overhead_size;
+	tls_advance_record_sn(sk, &tls_ctx->rx);
+
+	ctx->decrypted = true;
+
+	ctx->saved_data_ready(sk);
+
+out:
+	kfree(aead_req);
+	return ret;
+}
+
 static void trim_sg(struct sock *sk, struct scatterlist *sg,
 		    int *sg_num_elem, unsigned int *sg_size, int target_size)
 {
@@ -79,7 +128,7 @@ static void trim_both_sgl(struct sock *sk, int target_size)
 		target_size);
 
 	if (target_size > 0)
-		target_size += tls_ctx->overhead_size;
+		target_size += tls_ctx->tx.overhead_size;
 
 	trim_sg(sk, ctx->sg_encrypted_data,
 		&ctx->sg_encrypted_num_elem,
@@ -87,71 +136,16 @@ static void trim_both_sgl(struct sock *sk, int target_size)
 		target_size);
 }
 
-static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size,
-		    int first_coalesce)
-{
-	struct page_frag *pfrag;
-	unsigned int size = *sg_size;
-	int num_elem = *sg_num_elem, use = 0, rc = 0;
-	struct scatterlist *sge;
-	unsigned int orig_offset;
-
-	len -= size;
-	pfrag = sk_page_frag(sk);
-
-	while (len > 0) {
-		if (!sk_page_frag_refill(sk, pfrag)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		use = min_t(int, len, pfrag->size - pfrag->offset);
-
-		if (!sk_wmem_schedule(sk, use)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		sk_mem_charge(sk, use);
-		size += use;
-		orig_offset = pfrag->offset;
-		pfrag->offset += use;
-
-		sge = sg + num_elem - 1;
-		if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
-		    sg->offset + sg->length == orig_offset) {
-			sg->length += use;
-		} else {
-			sge++;
-			sg_unmark_end(sge);
-			sg_set_page(sge, pfrag->page, use, orig_offset);
-			get_page(pfrag->page);
-			++num_elem;
-			if (num_elem == MAX_SKB_FRAGS) {
-				rc = -ENOSPC;
-				break;
-			}
-		}
-
-		len -= use;
-	}
-	goto out;
-
-out:
-	*sg_size = size;
-	*sg_num_elem = num_elem;
-	return rc;
-}
-
 static int alloc_encrypted_sg(struct sock *sk, int len)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	int rc = 0;
 
-	rc = alloc_sg(sk, len, ctx->sg_encrypted_data,
-		      &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0);
+	rc = sk_alloc_sg(sk, len,
+			 ctx->sg_encrypted_data, 0,
+			 &ctx->sg_encrypted_num_elem,
+			 &ctx->sg_encrypted_size, 0);
 
 	return rc;
 }
@@ -162,9 +156,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len)
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	int rc = 0;
 
-	rc = alloc_sg(sk, len, ctx->sg_plaintext_data,
-		      &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
-		      tls_ctx->pending_open_record_frags);
+	rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
+			 &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
+			 tls_ctx->pending_open_record_frags);
 
 	return rc;
 }
@@ -207,21 +201,21 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
 	if (!aead_req)
 		return -ENOMEM;
 
-	ctx->sg_encrypted_data[0].offset += tls_ctx->prepend_size;
-	ctx->sg_encrypted_data[0].length -= tls_ctx->prepend_size;
+	ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
+	ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
 
 	aead_request_set_tfm(aead_req, ctx->aead_send);
 	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
 	aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
-			       data_len, tls_ctx->iv);
+			       data_len, tls_ctx->tx.iv);
 
 	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				  crypto_req_done, &ctx->async_wait);
 
 	rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
 
-	ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size;
-	ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size;
+	ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
+	ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
 
 	kfree(aead_req);
 	return rc;
@@ -238,7 +232,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
 
 	tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
-		     tls_ctx->rec_seq, tls_ctx->rec_seq_size,
+		     tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
 		     record_type);
 
 	tls_fill_prepend(tls_ctx,
@@ -269,9 +263,9 @@ static int tls_push_record(struct sock *sk, int flags,
 	/* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */
 	rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags);
 	if (rc < 0 && rc != -EAGAIN)
-		tls_err_abort(sk);
+		tls_err_abort(sk, EBADMSG);
 
-	tls_advance_record_sn(sk, tls_ctx);
+	tls_advance_record_sn(sk, &tls_ctx->tx);
 	return rc;
 }
 
@@ -281,23 +275,24 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
 }
 
 static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
-			      int length)
+			      int length, int *pages_used,
+			      unsigned int *size_used,
+			      struct scatterlist *to, int to_max_pages,
+			      bool charge)
 {
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	struct page *pages[MAX_SKB_FRAGS];
 
 	size_t offset;
 	ssize_t copied, use;
 	int i = 0;
-	unsigned int size = ctx->sg_plaintext_size;
-	int num_elem = ctx->sg_plaintext_num_elem;
+	unsigned int size = *size_used;
+	int num_elem = *pages_used;
 	int rc = 0;
 	int maxpages;
 
 	while (length > 0) {
 		i = 0;
-		maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem;
+		maxpages = to_max_pages - num_elem;
 		if (maxpages == 0) {
 			rc = -EFAULT;
 			goto out;
@@ -317,10 +312,11 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 		while (copied) {
 			use = min_t(int, copied, PAGE_SIZE - offset);
 
-			sg_set_page(&ctx->sg_plaintext_data[num_elem],
+			sg_set_page(&to[num_elem],
 				    pages[i], use, offset);
-			sg_unmark_end(&ctx->sg_plaintext_data[num_elem]);
-			sk_mem_charge(sk, use);
+			sg_unmark_end(&to[num_elem]);
+			if (charge)
+				sk_mem_charge(sk, use);
 
 			offset = 0;
 			copied -= use;
@@ -331,8 +327,9 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 	}
 
 out:
-	ctx->sg_plaintext_size = size;
-	ctx->sg_plaintext_num_elem = num_elem;
+	*size_used = size;
+	*pages_used = num_elem;
+
 	return rc;
 }
 
@@ -409,7 +406,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		}
 
 		required_size = ctx->sg_plaintext_size + try_to_copy +
-				tls_ctx->overhead_size;
+				tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
@@ -429,7 +426,11 @@ alloc_encrypted:
 
 		if (full_record || eor) {
 			ret = zerocopy_from_iter(sk, &msg->msg_iter,
-						 try_to_copy);
+				try_to_copy, &ctx->sg_plaintext_num_elem,
+				&ctx->sg_plaintext_size,
+				ctx->sg_plaintext_data,
+				ARRAY_SIZE(ctx->sg_plaintext_data),
+				true);
 			if (ret)
 				goto fallback_to_reg_send;
 
@@ -468,7 +469,7 @@ alloc_plaintext:
 				&ctx->sg_encrypted_num_elem,
 				&ctx->sg_encrypted_size,
 				ctx->sg_plaintext_size +
-				tls_ctx->overhead_size);
+				tls_ctx->tx.overhead_size);
 		}
 
 		ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
@@ -560,7 +561,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 			full_record = true;
 		}
 		required_size = ctx->sg_plaintext_size + copy +
-			      tls_ctx->overhead_size;
+			      tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
@@ -629,13 +630,404 @@ sendpage_end:
 	return ret;
 }
 
-void tls_sw_free_tx_resources(struct sock *sk)
+static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
+				     long timeo, int *err)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct sk_buff *skb;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+	while (!(skb = ctx->recv_pkt)) {
+		if (sk->sk_err) {
+			*err = sock_error(sk);
+			return NULL;
+		}
+
+		if (sock_flag(sk, SOCK_DONE))
+			return NULL;
+
+		if ((flags & MSG_DONTWAIT) || !timeo) {
+			*err = -EAGAIN;
+			return NULL;
+		}
+
+		add_wait_queue(sk_sleep(sk), &wait);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+		sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+		remove_wait_queue(sk_sleep(sk), &wait);
+
+		/* Handle signals */
+		if (signal_pending(current)) {
+			*err = sock_intr_errno(timeo);
+			return NULL;
+		}
+	}
+
+	return skb;
+}
+
+static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+		       struct scatterlist *sgout)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size];
+	struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
+	struct scatterlist *sgin = &sgin_arr[0];
+	struct strp_msg *rxm = strp_msg(skb);
+	int ret, nsg = ARRAY_SIZE(sgin_arr);
+	char aad_recv[TLS_AAD_SPACE_SIZE];
+	struct sk_buff *unused;
+
+	ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+			    iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			    tls_ctx->rx.iv_size);
+	if (ret < 0)
+		return ret;
+
+	memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	if (!sgout) {
+		nsg = skb_cow_data(skb, 0, &unused) + 1;
+		sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
+		if (!sgout)
+			sgout = sgin;
+	}
+
+	sg_init_table(sgin, nsg);
+	sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv));
+
+	nsg = skb_to_sgvec(skb, &sgin[1],
+			   rxm->offset + tls_ctx->rx.prepend_size,
+			   rxm->full_len - tls_ctx->rx.prepend_size);
+
+	tls_make_aad(aad_recv,
+		     rxm->full_len - tls_ctx->rx.overhead_size,
+		     tls_ctx->rx.rec_seq,
+		     tls_ctx->rx.rec_seq_size,
+		     ctx->control);
+
+	ret = tls_do_decryption(sk, sgin, sgout, iv,
+				rxm->full_len - tls_ctx->rx.overhead_size,
+				skb, sk->sk_allocation);
+
+	if (sgin != &sgin_arr[0])
+		kfree(sgin);
+
+	return ret;
+}
+
+static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+			       unsigned int len)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm = strp_msg(skb);
+
+	if (len < rxm->full_len) {
+		rxm->offset += len;
+		rxm->full_len -= len;
+
+		return false;
+	}
+
+	/* Finished with message */
+	ctx->recv_pkt = NULL;
+	kfree_skb(skb);
+	strp_unpause(&ctx->strp);
+
+	return true;
+}
+
+int tls_sw_recvmsg(struct sock *sk,
+		   struct msghdr *msg,
+		   size_t len,
+		   int nonblock,
+		   int flags,
+		   int *addr_len)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	unsigned char control;
+	struct strp_msg *rxm;
+	struct sk_buff *skb;
+	ssize_t copied = 0;
+	bool cmsg = false;
+	int err = 0;
+	long timeo;
+
+	flags |= nonblock;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	do {
+		bool zc = false;
+		int chunk = 0;
+
+		skb = tls_wait_data(sk, flags, timeo, &err);
+		if (!skb)
+			goto recv_end;
+
+		rxm = strp_msg(skb);
+		if (!cmsg) {
+			int cerr;
+
+			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+					sizeof(ctx->control), &ctx->control);
+			cmsg = true;
+			control = ctx->control;
+			if (ctx->control != TLS_RECORD_TYPE_DATA) {
+				if (cerr || msg->msg_flags & MSG_CTRUNC) {
+					err = -EIO;
+					goto recv_end;
+				}
+			}
+		} else if (control != ctx->control) {
+			goto recv_end;
+		}
+
+		if (!ctx->decrypted) {
+			int page_count;
+			int to_copy;
+
+			page_count = iov_iter_npages(&msg->msg_iter,
+						     MAX_SKB_FRAGS);
+			to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
+			if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
+			    likely(!(flags & MSG_PEEK)))  {
+				struct scatterlist sgin[MAX_SKB_FRAGS + 1];
+				char unused[21];
+				int pages = 0;
+
+				zc = true;
+				sg_init_table(sgin, MAX_SKB_FRAGS + 1);
+				sg_set_buf(&sgin[0], unused, 13);
+
+				err = zerocopy_from_iter(sk, &msg->msg_iter,
+							 to_copy, &pages,
+							 &chunk, &sgin[1],
+							 MAX_SKB_FRAGS,	false);
+				if (err < 0)
+					goto fallback_to_reg_recv;
+
+				err = decrypt_skb(sk, skb, sgin);
+				for (; pages > 0; pages--)
+					put_page(sg_page(&sgin[pages]));
+				if (err < 0) {
+					tls_err_abort(sk, EBADMSG);
+					goto recv_end;
+				}
+			} else {
+fallback_to_reg_recv:
+				err = decrypt_skb(sk, skb, NULL);
+				if (err < 0) {
+					tls_err_abort(sk, EBADMSG);
+					goto recv_end;
+				}
+			}
+			ctx->decrypted = true;
+		}
+
+		if (!zc) {
+			chunk = min_t(unsigned int, rxm->full_len, len);
+			err = skb_copy_datagram_msg(skb, rxm->offset, msg,
+						    chunk);
+			if (err < 0)
+				goto recv_end;
+		}
+
+		copied += chunk;
+		len -= chunk;
+		if (likely(!(flags & MSG_PEEK))) {
+			u8 control = ctx->control;
+
+			if (tls_sw_advance_skb(sk, skb, chunk)) {
+				/* Return full control message to
+				 * userspace before trying to parse
+				 * another message type
+				 */
+				msg->msg_flags |= MSG_EOR;
+				if (control != TLS_RECORD_TYPE_DATA)
+					goto recv_end;
+			}
+		}
+	} while (len);
+
+recv_end:
+	release_sock(sk);
+	return copied ? : err;
+}
+
+ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+			   struct pipe_inode_info *pipe,
+			   size_t len, unsigned int flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sock->sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm = NULL;
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	ssize_t copied = 0;
+	int err = 0;
+	long timeo;
+	int chunk;
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	skb = tls_wait_data(sk, flags, timeo, &err);
+	if (!skb)
+		goto splice_read_end;
+
+	/* splice does not support reading control messages */
+	if (ctx->control != TLS_RECORD_TYPE_DATA) {
+		err = -ENOTSUPP;
+		goto splice_read_end;
+	}
+
+	if (!ctx->decrypted) {
+		err = decrypt_skb(sk, skb, NULL);
+
+		if (err < 0) {
+			tls_err_abort(sk, EBADMSG);
+			goto splice_read_end;
+		}
+		ctx->decrypted = true;
+	}
+	rxm = strp_msg(skb);
+
+	chunk = min_t(unsigned int, rxm->full_len, len);
+	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
+	if (copied < 0)
+		goto splice_read_end;
+
+	if (likely(!(flags & MSG_PEEK)))
+		tls_sw_advance_skb(sk, skb, copied);
+
+splice_read_end:
+	release_sock(sk);
+	return copied ? : err;
+}
+
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+			 struct poll_table_struct *wait)
+{
+	unsigned int ret;
+	struct sock *sk = sock->sk;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+	/* Grab POLLOUT and POLLHUP from the underlying socket */
+	ret = ctx->sk_poll(file, sock, wait);
+
+	/* Clear POLLIN bits, and set based on recv_pkt */
+	ret &= ~(POLLIN | POLLRDNORM);
+	if (ctx->recv_pkt)
+		ret |= POLLIN | POLLRDNORM;
+
+	return ret;
+}
+
+static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	char header[tls_ctx->rx.prepend_size];
+	struct strp_msg *rxm = strp_msg(skb);
+	size_t cipher_overhead;
+	size_t data_len = 0;
+	int ret;
+
+	/* Verify that we have a full TLS header, or wait for more data */
+	if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
+		return 0;
+
+	/* Linearize header to local buffer */
+	ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
+
+	if (ret < 0)
+		goto read_failure;
+
+	ctx->control = header[0];
+
+	data_len = ((header[4] & 0xFF) | (header[3] << 8));
+
+	cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size;
+
+	if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) {
+		ret = -EMSGSIZE;
+		goto read_failure;
+	}
+	if (data_len < cipher_overhead) {
+		ret = -EBADMSG;
+		goto read_failure;
+	}
+
+	if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.version) ||
+	    header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.version)) {
+		ret = -EINVAL;
+		goto read_failure;
+	}
+
+	return data_len + TLS_HEADER_SIZE;
+
+read_failure:
+	tls_err_abort(strp->sk, ret);
+
+	return ret;
+}
+
+static void tls_queue(struct strparser *strp, struct sk_buff *skb)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm;
+
+	rxm = strp_msg(skb);
+
+	ctx->decrypted = false;
+
+	ctx->recv_pkt = skb;
+	strp_pause(strp);
+
+	strp->sk->sk_state_change(strp->sk);
+}
+
+static void tls_data_ready(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+	strp_data_ready(&ctx->strp);
+}
+
+void tls_sw_free_resources(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 
 	if (ctx->aead_send)
 		crypto_free_aead(ctx->aead_send);
+	if (ctx->aead_recv) {
+		if (ctx->recv_pkt) {
+			kfree_skb(ctx->recv_pkt);
+			ctx->recv_pkt = NULL;
+		}
+		crypto_free_aead(ctx->aead_recv);
+		strp_stop(&ctx->strp);
+		write_lock_bh(&sk->sk_callback_lock);
+		sk->sk_data_ready = ctx->saved_data_ready;
+		write_unlock_bh(&sk->sk_callback_lock);
+		release_sock(sk);
+		strp_done(&ctx->strp);
+		lock_sock(sk);
+	}
 
 	tls_free_both_sg(sk);
 
@@ -643,12 +1035,15 @@ void tls_sw_free_tx_resources(struct sock *sk)
 	kfree(tls_ctx);
 }
 
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
 	char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
 	struct tls_crypto_info *crypto_info;
 	struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
 	struct tls_sw_context *sw_ctx;
+	struct cipher_context *cctx;
+	struct crypto_aead **aead;
+	struct strp_callbacks cb;
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
 	char *iv, *rec_seq;
 	int rc = 0;
@@ -658,22 +1053,29 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 		goto out;
 	}
 
-	if (ctx->priv_ctx) {
-		rc = -EEXIST;
-		goto out;
-	}
-
-	sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
-	if (!sw_ctx) {
-		rc = -ENOMEM;
-		goto out;
+	if (!ctx->priv_ctx) {
+		sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
+		if (!sw_ctx) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		crypto_init_wait(&sw_ctx->async_wait);
+	} else {
+		sw_ctx = ctx->priv_ctx;
 	}
 
-	crypto_init_wait(&sw_ctx->async_wait);
-
 	ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
 
-	crypto_info = &ctx->crypto_send;
+	if (tx) {
+		crypto_info = &ctx->crypto_send;
+		cctx = &ctx->tx;
+		aead = &sw_ctx->aead_send;
+	} else {
+		crypto_info = &ctx->crypto_recv;
+		cctx = &ctx->rx;
+		aead = &sw_ctx->aead_recv;
+	}
+
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128: {
 		nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
@@ -692,46 +1094,49 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 		goto free_priv;
 	}
 
-	ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
-	ctx->tag_size = tag_size;
-	ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
-	ctx->iv_size = iv_size;
-	ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
-	if (!ctx->iv) {
+	cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
+	cctx->tag_size = tag_size;
+	cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
+	cctx->iv_size = iv_size;
+	cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			   GFP_KERNEL);
+	if (!cctx->iv) {
 		rc = -ENOMEM;
 		goto free_priv;
 	}
-	memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
-	memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
-	ctx->rec_seq_size = rec_seq_size;
-	ctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
-	if (!ctx->rec_seq) {
+	memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+	cctx->rec_seq_size = rec_seq_size;
+	cctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+	if (!cctx->rec_seq) {
 		rc = -ENOMEM;
 		goto free_iv;
 	}
-	memcpy(ctx->rec_seq, rec_seq, rec_seq_size);
-
-	sg_init_table(sw_ctx->sg_encrypted_data,
-		      ARRAY_SIZE(sw_ctx->sg_encrypted_data));
-	sg_init_table(sw_ctx->sg_plaintext_data,
-		      ARRAY_SIZE(sw_ctx->sg_plaintext_data));
-
-	sg_init_table(sw_ctx->sg_aead_in, 2);
-	sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
-		   sizeof(sw_ctx->aad_space));
-	sg_unmark_end(&sw_ctx->sg_aead_in[1]);
-	sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
-	sg_init_table(sw_ctx->sg_aead_out, 2);
-	sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
-		   sizeof(sw_ctx->aad_space));
-	sg_unmark_end(&sw_ctx->sg_aead_out[1]);
-	sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
-
-	if (!sw_ctx->aead_send) {
-		sw_ctx->aead_send = crypto_alloc_aead("gcm(aes)", 0, 0);
-		if (IS_ERR(sw_ctx->aead_send)) {
-			rc = PTR_ERR(sw_ctx->aead_send);
-			sw_ctx->aead_send = NULL;
+	memcpy(cctx->rec_seq, rec_seq, rec_seq_size);
+
+	if (tx) {
+		sg_init_table(sw_ctx->sg_encrypted_data,
+			      ARRAY_SIZE(sw_ctx->sg_encrypted_data));
+		sg_init_table(sw_ctx->sg_plaintext_data,
+			      ARRAY_SIZE(sw_ctx->sg_plaintext_data));
+
+		sg_init_table(sw_ctx->sg_aead_in, 2);
+		sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
+			   sizeof(sw_ctx->aad_space));
+		sg_unmark_end(&sw_ctx->sg_aead_in[1]);
+		sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
+		sg_init_table(sw_ctx->sg_aead_out, 2);
+		sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
+			   sizeof(sw_ctx->aad_space));
+		sg_unmark_end(&sw_ctx->sg_aead_out[1]);
+		sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+	}
+
+	if (!*aead) {
+		*aead = crypto_alloc_aead("gcm(aes)", 0, 0);
+		if (IS_ERR(*aead)) {
+			rc = PTR_ERR(*aead);
+			*aead = NULL;
 			goto free_rec_seq;
 		}
 	}
@@ -740,24 +1145,44 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 
 	memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
 
-	rc = crypto_aead_setkey(sw_ctx->aead_send, keyval,
+	rc = crypto_aead_setkey(*aead, keyval,
 				TLS_CIPHER_AES_GCM_128_KEY_SIZE);
 	if (rc)
 		goto free_aead;
 
-	rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
-	if (!rc)
-		return 0;
+	rc = crypto_aead_setauthsize(*aead, cctx->tag_size);
+	if (rc)
+		goto free_aead;
+
+	if (!tx) {
+		/* Set up strparser */
+		memset(&cb, 0, sizeof(cb));
+		cb.rcv_msg = tls_queue;
+		cb.parse_msg = tls_read_size;
+
+		strp_init(&sw_ctx->strp, sk, &cb);
+
+		write_lock_bh(&sk->sk_callback_lock);
+		sw_ctx->saved_data_ready = sk->sk_data_ready;
+		sk->sk_data_ready = tls_data_ready;
+		write_unlock_bh(&sk->sk_callback_lock);
+
+		sw_ctx->sk_poll = sk->sk_socket->ops->poll;
+
+		strp_check_rcv(&sw_ctx->strp);
+	}
+
+	goto out;
 
 free_aead:
-	crypto_free_aead(sw_ctx->aead_send);
-	sw_ctx->aead_send = NULL;
+	crypto_free_aead(*aead);
+	*aead = NULL;
 free_rec_seq:
-	kfree(ctx->rec_seq);
-	ctx->rec_seq = NULL;
+	kfree(cctx->rec_seq);
+	cctx->rec_seq = NULL;
 free_iv:
-	kfree(ctx->iv);
-	ctx->iv = NULL;
+	kfree(ctx->tx.iv);
+	ctx->tx.iv = NULL;
 free_priv:
 	kfree(ctx->priv_ctx);
 	ctx->priv_ctx = NULL;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 723698416242..aded82da1aea 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1825,7 +1825,7 @@ out:
 }
 
 /* We use paged skbs for stream sockets, and limit occupancy to 32768
- * bytes, and a minimun of a full page.
+ * bytes, and a minimum of a full page.
  */
 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
 
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 1abcc4fc4df1..41722046b937 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -34,9 +34,10 @@ config CFG80211
 
 	  When built as a module it will be called cfg80211.
 
+if CFG80211
+
 config NL80211_TESTMODE
 	bool "nl80211 testmode command"
-	depends on CFG80211
 	help
 	  The nl80211 testmode command helps implementing things like
 	  factory calibration or validation tools for wireless chips.
@@ -51,7 +52,6 @@ config NL80211_TESTMODE
 
 config CFG80211_DEVELOPER_WARNINGS
 	bool "enable developer warnings"
-	depends on CFG80211
 	default n
 	help
 	  This option enables some additional warnings that help
@@ -68,7 +68,7 @@ config CFG80211_DEVELOPER_WARNINGS
 
 config CFG80211_CERTIFICATION_ONUS
 	bool "cfg80211 certification onus"
-	depends on CFG80211 && EXPERT
+	depends on EXPERT
 	default n
 	---help---
 	  You should disable this option unless you are both capable
@@ -159,7 +159,6 @@ config CFG80211_REG_RELAX_NO_IR
 
 config CFG80211_DEFAULT_PS
 	bool "enable powersave by default"
-	depends on CFG80211
 	default y
 	help
 	  This option enables powersave mode by default.
@@ -170,7 +169,6 @@ config CFG80211_DEFAULT_PS
 
 config CFG80211_DEBUGFS
 	bool "cfg80211 DebugFS entries"
-	depends on CFG80211
 	depends on DEBUG_FS
 	---help---
 	  You can enable this if you want debugfs entries for cfg80211.
@@ -180,7 +178,6 @@ config CFG80211_DEBUGFS
 config CFG80211_CRDA_SUPPORT
 	bool "support CRDA" if EXPERT
 	default y
-	depends on CFG80211
 	help
 	  You should enable this option unless you know for sure you have no
 	  need for it, for example when using internal regdb (above) or the
@@ -190,7 +187,6 @@ config CFG80211_CRDA_SUPPORT
 
 config CFG80211_WEXT
 	bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
-	depends on CFG80211
 	select WEXT_CORE
 	default y if CFG80211_WEXT_EXPORT
 	help
@@ -199,11 +195,12 @@ config CFG80211_WEXT
 
 config CFG80211_WEXT_EXPORT
 	bool
-	depends on CFG80211
 	help
 	  Drivers should select this option if they require cfg80211's
 	  wext compatibility symbols to be exported.
 
+endif # CFG80211
+
 config LIB80211
 	tristate
 	default n
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 51aa55618ef7..b12da6ef3c12 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -170,9 +170,28 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 		enum nl80211_bss_scan_width scan_width;
 		struct ieee80211_supported_band *sband =
 				rdev->wiphy.bands[setup->chandef.chan->band];
-		scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
-		setup->basic_rates = ieee80211_mandatory_rates(sband,
-							       scan_width);
+
+		if (setup->chandef.chan->band == NL80211_BAND_2GHZ) {
+			int i;
+
+			/*
+			 * Older versions selected the mandatory rates for
+			 * 2.4 GHz as well, but were broken in that only
+			 * 1 Mbps was regarded as a mandatory rate. Keep
+			 * using just 1 Mbps as the default basic rate for
+			 * mesh to be interoperable with older versions.
+			 */
+			for (i = 0; i < sband->n_bitrates; i++) {
+				if (sband->bitrates[i].bitrate == 10) {
+					setup->basic_rates = BIT(i);
+					break;
+				}
+			}
+		} else {
+			scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
+			setup->basic_rates = ieee80211_mandatory_rates(sband,
+								       scan_width);
+		}
 	}
 
 	err = cfg80211_chandef_dfs_required(&rdev->wiphy,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9c0dcc8324b0..a910150f8169 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -421,6 +421,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
 	[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
 	[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
+	[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -3923,9 +3924,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
 			return false;
 		return true;
 	case NL80211_CMD_CONNECT:
-		/* SAE not supported yet */
-		if (auth_type == NL80211_AUTHTYPE_SAE)
+		if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
+		    auth_type == NL80211_AUTHTYPE_SAE)
 			return false;
+
 		/* FILS with SK PFS or PK not supported yet */
 		if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
 		    auth_type == NL80211_AUTHTYPE_FILS_PK)
@@ -4487,6 +4489,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
 	PUT_SINFO_U64(BEACON_RX, rx_beacon);
 	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
+	PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
 
 #undef PUT_SINFO
 #undef PUT_SINFO_U64
@@ -5848,7 +5851,6 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
  out:
 	nlmsg_free(msg);
 	return -ENOBUFS;
@@ -6329,7 +6331,6 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 nla_put_failure_rcu:
 	rcu_read_unlock();
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 put_failure:
 	nlmsg_free(msg);
 	return -EMSGSIZE;
@@ -6718,8 +6719,17 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 
 	*flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
 
-	if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-	    !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+	if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+	     !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
 		return -EOPNOTSUPP;
 
 	if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
@@ -9155,6 +9165,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
+		if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+			GENL_SET_ERR_MSG(info,
+					 "external auth requires connection ownership");
+			return -EINVAL;
+		}
+		connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT;
+	}
+
 	wdev_lock(dev->ieee80211_ptr);
 
 	err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -12463,6 +12482,41 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
+static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct cfg80211_external_auth_params params;
+
+	if (!rdev->ops->external_auth)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_SSID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_BSSID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_STATUS_CODE])
+		return -EINVAL;
+
+	memset(&params, 0, sizeof(params));
+
+	params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+	if (params.ssid.ssid_len == 0 ||
+	    params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+		return -EINVAL;
+	memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
+	       params.ssid.ssid_len);
+
+	memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
+	       ETH_ALEN);
+
+	params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
+
+	return rdev_external_auth(rdev, dev, &params);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -13358,6 +13412,14 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_EXTERNAL_AUTH,
+		.doit = nl80211_external_auth,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 
 };
 
@@ -13672,7 +13734,6 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
 	return;
 
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13720,7 +13781,6 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13808,7 +13868,6 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13884,7 +13943,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13924,7 +13982,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13954,7 +14011,6 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13991,7 +14047,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14024,7 +14079,6 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14065,7 +14119,6 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
@@ -14104,7 +14157,6 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14159,7 +14211,6 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 	return;
 
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14205,7 +14256,6 @@ static void nl80211_send_remain_on_chan_event(
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14319,7 +14369,6 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_conn_failed);
@@ -14356,7 +14405,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 	return true;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 	return true;
 }
@@ -14440,7 +14488,6 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 	return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 	return -ENOBUFS;
 }
@@ -14484,7 +14531,6 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
@@ -14693,7 +14739,6 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14751,7 +14796,6 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14804,7 +14848,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14886,12 +14929,67 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+				       struct sta_opmode_info *sta_opmode,
+				       gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	void *hdr;
+
+	if (WARN_ON(!mac))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_MLME, gfp);
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify);
+
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-			   u64 cookie, bool acked, gfp_t gfp)
+			   u64 cookie, bool acked, s32 ack_signal,
+			   bool is_valid_ack_signal, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -14916,7 +15014,9 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
 	    nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
 			      NL80211_ATTR_PAD) ||
-	    (acked && nla_put_flag(msg, NL80211_ATTR_ACK)))
+	    (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) ||
+	    (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL,
+						ack_signal)))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -14926,7 +15026,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_probe_status);
@@ -14971,8 +15070,6 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 
  nla_put_failure:
 	spin_unlock_bh(&rdev->beacon_registrations_lock);
-	if (hdr)
-		genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_report_obss_beacon);
@@ -15188,7 +15285,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_tdls_oper_request);
@@ -15333,8 +15429,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
 	return;
 
  nla_put_failure:
-	if (hdr)
-		genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
@@ -15369,6 +15463,47 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
 	nlmsg_free(msg);
 }
 
+int cfg80211_external_auth_request(struct net_device *dev,
+				   struct cfg80211_external_auth_params *params,
+				   gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!wdev->conn_owner_nlportid)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+	    nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
+	    nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+			params->action) ||
+	    nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
+	    nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len,
+		    params->ssid.ssid))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+			wdev->conn_owner_nlportid);
+	return 0;
+
+ nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(cfg80211_external_auth_request);
+
 /* initialisation/exit functions */
 
 int __init nl80211_init(void)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 0c06240d25af..84f23ae015fc 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1190,4 +1190,19 @@ static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
+
+static inline int
+rdev_external_auth(struct cfg80211_registered_device *rdev,
+		   struct net_device *dev,
+		   struct cfg80211_external_auth_params *params)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_external_auth(&rdev->wiphy, dev, params);
+	if (rdev->ops->external_auth)
+		ret = rdev->ops->external_auth(&rdev->wiphy, dev, params);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index fdb3646274a5..701cfd7acc1b 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1032,6 +1032,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 	wdev->current_bss = NULL;
 	wdev->ssid_len = 0;
 	wdev->conn_owner_nlportid = 0;
+	kzfree(wdev->connect_keys);
+	wdev->connect_keys = NULL;
 
 	nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index bcfedd39e7a3..5152938b358d 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2319,6 +2319,29 @@ TRACE_EVENT(rdev_del_pmk,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
 );
 
+TRACE_EVENT(rdev_external_auth,
+	    TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		     struct cfg80211_external_auth_params *params),
+	    TP_ARGS(wiphy, netdev, params),
+	    TP_STRUCT__entry(WIPHY_ENTRY
+			     NETDEV_ENTRY
+			     MAC_ENTRY(bssid)
+			     __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
+			     __field(u16, status)
+	    ),
+	    TP_fast_assign(WIPHY_ASSIGN;
+			   NETDEV_ASSIGN;
+			   MAC_ASSIGN(bssid, params->bssid);
+			   memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
+			   memcpy(__entry->ssid, params->ssid.ssid,
+				  params->ssid.ssid_len);
+			   __entry->status = params->status;
+	    ),
+	    TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+		      ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+		      __entry->bssid, __entry->ssid, __entry->status)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
diff --git a/net/wireless/util.c b/net/wireless/util.c
index c69160694b6c..d112e9a89364 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-				  const u8 *addr, enum nl80211_iftype iftype)
+				  const u8 *addr, enum nl80211_iftype iftype,
+				  u8 data_offset)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct {
@@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return -1;
 
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
 	if (skb->len < hdrlen + 8)
 		return -1;
 
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index 5511f989ef47..b4c464594a5e 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -142,7 +142,7 @@ static const struct file_operations wireless_seq_fops = {
 int __net_init wext_proc_init(struct net *net)
 {
 	/* Create /proc/net/wireless entry */
-	if (!proc_create("wireless", S_IRUGO, net->proc_net,
+	if (!proc_create("wireless", 0444, net->proc_net,
 			 &wireless_seq_fops))
 		return -ENOMEM;
 
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 0917f047f2cf..64b415e93f6a 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -212,16 +212,16 @@ int __init x25_proc_init(void)
 	if (!proc_mkdir("x25", init_net.proc_net))
 		return -ENOMEM;
 
-	if (!proc_create("x25/route", S_IRUGO, init_net.proc_net,
-			&x25_seq_route_fops))
+	if (!proc_create("x25/route", 0444, init_net.proc_net,
+			 &x25_seq_route_fops))
 		goto out;
 
-	if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net,
-			&x25_seq_socket_fops))
+	if (!proc_create("x25/socket", 0444, init_net.proc_net,
+			 &x25_seq_socket_fops))
 		goto out;
 
-	if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net,
-			&x25_seq_forward_fops))
+	if (!proc_create("x25/forward", 0444, init_net.proc_net,
+			 &x25_seq_forward_fops))
 		goto out;
 	return 0;
 
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index db0b1315d577..9c214ec681ac 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -335,8 +335,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
 		}
 	}
 
-	pr_debug("invalid PLP frame %02X %02X %02X\n",
-	       frame[0], frame[1], frame[2]);
+	pr_debug("invalid PLP frame %3ph\n", frame);
 
 	return X25_ILLEGAL;
 }
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 8e70291e586a..e87d6c4dd5b6 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -217,7 +217,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 		if (skb->len <= mtu)
 			goto ok;
 
-		if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+		if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 			goto ok;
 	}
 
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index ccfdc7115a83..a00ec715aa46 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -283,7 +283,7 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)
 		struct crypto_comp *tfm;
 
 		/* This can be any valid CPU ID so we don't need locking. */
-		tfm = __this_cpu_read(*pos->tfms);
+		tfm = this_cpu_read(*pos->tfms);
 
 		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
 			pos->users++;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2b7babb66175..0e065db6c7c0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1458,10 +1458,13 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
 static int xfrm_get_tos(const struct flowi *fl, int family)
 {
 	const struct xfrm_policy_afinfo *afinfo;
-	int tos = 0;
+	int tos;
 
 	afinfo = xfrm_policy_get_afinfo(family);
-	tos = afinfo ? afinfo->get_tos(fl) : 0;
+	if (!afinfo)
+		return 0;
+
+	tos = afinfo->get_tos(fl);
 
 	rcu_read_unlock();
 
@@ -1891,7 +1894,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
 	spin_unlock(&pq->hold_queue.lock);
 
 	dst_hold(xfrm_dst_path(dst));
-	dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
+	dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE);
 	if (IS_ERR(dst))
 		goto purge_queue;
 
@@ -2729,14 +2732,14 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
 	while (dst->xfrm) {
 		const struct xfrm_state *xfrm = dst->xfrm;
 
+		dst = xfrm_dst_child(dst);
+
 		if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
 			continue;
 		if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR)
 			daddr = xfrm->coaddr;
 		else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
 			daddr = &xfrm->id.daddr;
-
-		dst = xfrm_dst_child(dst);
 	}
 	return daddr;
 }
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 6d5f85f4e672..ed06903cd84d 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -79,7 +79,7 @@ static const struct file_operations xfrm_statistics_seq_fops = {
 
 int __net_init xfrm_proc_init(struct net *net)
 {
-	if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net,
+	if (!proc_create("xfrm_stat", 0444, net->proc_net,
 			 &xfrm_statistics_seq_fops))
 		return -ENOMEM;
 	return 0;
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 1d38c6acf8af..9e3a5e85f828 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -660,7 +660,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
 		} else {
 			XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
 			XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
-			xo->seq.low = oseq = oseq + 1;
+			xo->seq.low = oseq + 1;
 			xo->seq.hi = oseq_hi;
 			oseq += skb_shinfo(skb)->gso_segs;
 		}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 54e21f19d722..f9d2f2233f09 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2056,6 +2056,11 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
 	struct xfrm_mgr *km;
 	struct xfrm_policy *pol = NULL;
 
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall())
+		return -EOPNOTSUPP;
+#endif
+
 	if (!optval && !optlen) {
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7f52b8eb177d..080035f056d9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -121,22 +121,17 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
 	struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
 	struct xfrm_replay_state_esn *rs;
 
-	if (p->flags & XFRM_STATE_ESN) {
-		if (!rt)
-			return -EINVAL;
+	if (!rt)
+		return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0;
 
-		rs = nla_data(rt);
+	rs = nla_data(rt);
 
-		if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
-			return -EINVAL;
-
-		if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
-		    nla_len(rt) != sizeof(*rs))
-			return -EINVAL;
-	}
+	if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+		return -EINVAL;
 
-	if (!rt)
-		return 0;
+	if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
+	    nla_len(rt) != sizeof(*rs))
+		return -EINVAL;
 
 	/* As only ESP and AH support ESN feature. */
 	if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index ec3fc8d88e87..2c2a587e0942 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
 hostprogs-y += xdp_monitor
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
+hostprogs-y += cpustat
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -144,6 +146,7 @@ always += xdp_monitor_kern.o
 always += xdp_rxq_info_kern.o
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
+always += cpustat_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
 HOSTLOADLIBES_xdp_monitor += -lelf
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 69806d74fa53..b1a310c3ae89 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -67,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
 	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 	bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
+	bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
@@ -96,6 +97,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_SOCK_OPS;
 	} else if (is_sk_skb) {
 		prog_type = BPF_PROG_TYPE_SK_SKB;
+	} else if (is_sk_msg) {
+		prog_type = BPF_PROG_TYPE_SK_MSG;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -113,7 +116,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
-	if (is_socket || is_sockops || is_sk_skb) {
+	if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
 		if (is_socket)
 			event += 6;
 		else
@@ -589,7 +592,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "socket", 6) == 0 ||
 		    memcmp(shname, "cgroup/", 7) == 0 ||
 		    memcmp(shname, "sockops", 7) == 0 ||
-		    memcmp(shname, "sk_skb", 6) == 0) {
+		    memcmp(shname, "sk_skb", 6) == 0 ||
+		    memcmp(shname, "sk_msg", 6) == 0) {
 			ret = load_and_attach(shname, data->d_buf,
 					      data->d_size);
 			if (ret != 0)
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644
index 000000000000..68c84da065b1
--- /dev/null
+++ b/samples/bpf/cpustat_kern.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ *   WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU			8
+#define MAX_PSTATE_ENTRIES	5
+#define MAX_CSTATE_ENTRIES	3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp         |
+ * +--------------------------+
+ * | cstate index             |
+ * +--------------------------+
+ * | pstate timestamp         |
+ * +--------------------------+
+ * | pstate index             |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME	0
+#define MAP_OFF_CSTATE_IDX	1
+#define MAP_OFF_PSTATE_TIME	2
+#define MAP_OFF_PSTATE_IDX	3
+#define MAP_OFF_NUM		4
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+	u64 pad;
+	u32 state;
+	u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+		if (frequency == cpu_opps[i])
+			return i;
+	}
+
+	return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+	u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+	u32 key, cpu, pstate_idx;
+	u64 *val;
+
+	if (ctx->cpu_id > MAX_CPU)
+		return 0;
+
+	cpu = ctx->cpu_id;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+	cts = bpf_map_lookup_elem(&my_map, &key);
+	if (!cts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+	cstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!cstate)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+	pts = bpf_map_lookup_elem(&my_map, &key);
+	if (!pts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+	pstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!pstate)
+		return 0;
+
+	prev_state = *cstate;
+	*cstate = ctx->state;
+
+	if (!*cts) {
+		*cts = bpf_ktime_get_ns();
+		return 0;
+	}
+
+	cur_ts = bpf_ktime_get_ns();
+	delta = cur_ts - *cts;
+	*cts = cur_ts;
+
+	/*
+	 * When state doesn't equal to (u32)-1, the cpu will enter
+	 * one idle state; for this case we need to record interval
+	 * for the pstate.
+	 *
+	 *                 OPP2
+	 *            +---------------------+
+	 *     OPP1   |                     |
+	 *   ---------+                     |
+	 *                                  |  Idle state
+	 *                                  +---------------
+	 *
+	 *            |<- pstate duration ->|
+	 *            ^                     ^
+	 *           pts                  cur_ts
+	 */
+	if (ctx->state != (u32)-1) {
+
+		/* record pstate after have first cpu_frequency event */
+		if (!*pts)
+			return 0;
+
+		delta = cur_ts - *pts;
+
+		pstate_idx = find_cpu_pstate_idx(*pstate);
+		if (pstate_idx >= MAX_PSTATE_ENTRIES)
+			return 0;
+
+		key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+		val = bpf_map_lookup_elem(&pstate_duration, &key);
+		if (val)
+			__sync_fetch_and_add((long *)val, delta);
+
+	/*
+	 * When state equal to (u32)-1, the cpu just exits from one
+	 * specific idle state; for this case we need to record
+	 * interval for the pstate.
+	 *
+	 *       OPP2
+	 *   -----------+
+	 *              |                          OPP1
+	 *              |                     +-----------
+	 *              |     Idle state      |
+	 *              +---------------------+
+	 *
+	 *              |<- cstate duration ->|
+	 *              ^                     ^
+	 *             cts                  cur_ts
+	 */
+	} else {
+
+		key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+		val = bpf_map_lookup_elem(&cstate_duration, &key);
+		if (val)
+			__sync_fetch_and_add((long *)val, delta);
+	}
+
+	/* Update timestamp for pstate as new start time */
+	if (*pts)
+		*pts = cur_ts;
+
+	return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+	u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+	u32 key, cpu, pstate_idx;
+	u64 *val;
+
+	cpu = ctx->cpu_id;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+	pts = bpf_map_lookup_elem(&my_map, &key);
+	if (!pts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+	pstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!pstate)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+	cstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!cstate)
+		return 0;
+
+	prev_state = *pstate;
+	*pstate = ctx->state;
+
+	if (!*pts) {
+		*pts = bpf_ktime_get_ns();
+		return 0;
+	}
+
+	cur_ts = bpf_ktime_get_ns();
+	delta = cur_ts - *pts;
+	*pts = cur_ts;
+
+	/* When CPU is in idle, bail out to skip pstate statistics */
+	if (*cstate != (u32)(-1))
+		return 0;
+
+	/*
+	 * The cpu changes to another different OPP (in below diagram
+	 * change frequency from OPP3 to OPP1), need recording interval
+	 * for previous frequency OPP3 and update timestamp as start
+	 * time for new frequency OPP1.
+	 *
+	 *                 OPP3
+	 *            +---------------------+
+	 *     OPP2   |                     |
+	 *   ---------+                     |
+	 *                                  |    OPP1
+	 *                                  +---------------
+	 *
+	 *            |<- pstate duration ->|
+	 *            ^                     ^
+	 *           pts                  cur_ts
+	 */
+	pstate_idx = find_cpu_pstate_idx(*pstate);
+	if (pstate_idx >= MAX_PSTATE_ENTRIES)
+		return 0;
+
+	key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+	val = bpf_map_lookup_elem(&pstate_duration, &key);
+	if (val)
+		__sync_fetch_and_add((long *)val, delta);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644
index 000000000000..2b4cd1ae57c5
--- /dev/null
+++ b/samples/bpf/cpustat_user.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU			8
+#define MAX_PSTATE_ENTRIES	5
+#define MAX_CSTATE_ENTRIES	3
+#define MAX_STARS		40
+
+#define CPUFREQ_MAX_SYSFS_PATH	"/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ	"208000"
+#define CPUFREQ_HIGHEST_FREQ	"12000000"
+
+struct cpu_stat_data {
+	unsigned long cstate[MAX_CSTATE_ENTRIES];
+	unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+	int i, j;
+	char state_str[sizeof("cstate-9")];
+	struct cpu_stat_data *data;
+
+	/* Clear screen */
+	printf("\033[2J");
+
+	/* Header */
+	printf("\nCPU states statistics:\n");
+	printf("%-10s ", "state(ms)");
+
+	for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+		sprintf(state_str, "cstate-%d", i);
+		printf("%-11s ", state_str);
+	}
+
+	for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+		sprintf(state_str, "pstate-%d", i);
+		printf("%-11s ", state_str);
+	}
+
+	printf("\n");
+
+	for (j = 0; j < MAX_CPU; j++) {
+		data = &stat_data[j];
+
+		printf("CPU-%-6d ", j);
+		for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+			printf("%-11ld ", data->cstate[i] / 1000000);
+
+		for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+			printf("%-11ld ", data->pstate[i] / 1000000);
+
+		printf("\n");
+	}
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+	unsigned long key, value;
+	int c, i;
+
+	for (c = 0; c < MAX_CPU; c++) {
+		for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+			key = c * MAX_CSTATE_ENTRIES + i;
+			bpf_map_lookup_elem(cstate_fd, &key, &value);
+			stat_data[c].cstate[i] = value;
+		}
+
+		for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+			key = c * MAX_PSTATE_ENTRIES + i;
+			bpf_map_lookup_elem(pstate_fd, &key, &value);
+			stat_data[c].pstate[i] = value;
+		}
+	}
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+	int rcpu, i, ret;
+	cpu_set_t cpumask;
+	cpu_set_t original_cpumask;
+
+	ret = sysconf(_SC_NPROCESSORS_CONF);
+	if (ret < 0)
+		return -1;
+
+	rcpu = sched_getcpu();
+	if (rcpu < 0)
+		return -1;
+
+	/* Keep track of the CPUs we will run on */
+	sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+	for (i = 0; i < ret; i++) {
+
+		/* Pointless to wake up ourself */
+		if (i == rcpu)
+			continue;
+
+		/* Pointless to wake CPUs we will not run on */
+		if (!CPU_ISSET(i, &original_cpumask))
+			continue;
+
+		CPU_ZERO(&cpumask);
+		CPU_SET(i, &cpumask);
+
+		sched_setaffinity(0, sizeof(cpumask), &cpumask);
+	}
+
+	/* Enable all the CPUs of the original mask */
+	sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+	return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+	int len, fd;
+
+	fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+	if (fd < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		return fd;
+	}
+
+	len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+	if (len < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		goto err;
+	}
+
+	len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+	if (len < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		goto err;
+	}
+
+err:
+	close(fd);
+	return len;
+}
+
+static void int_exit(int sig)
+{
+	cpu_stat_inject_cpu_idle_event();
+	cpu_stat_inject_cpu_frequency_event();
+	cpu_stat_update(map_fd[1], map_fd[2]);
+	cpu_stat_print();
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int ret;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	ret = cpu_stat_inject_cpu_idle_event();
+	if (ret < 0)
+		return 1;
+
+	ret = cpu_stat_inject_cpu_frequency_event();
+	if (ret < 0)
+		return 1;
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	while (1) {
+		cpu_stat_update(map_fd[1], map_fd[2]);
+		cpu_stat_print();
+		sleep(5);
+	}
+
+	return 0;
+}
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index efdc16d195ff..9a8db7bd6db4 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -52,7 +52,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_tos = 0;
 	key.tunnel_ttl = 64;
 
-	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
 		ERROR(ret);
 		return TC_ACT_SHOT;
@@ -92,7 +93,8 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_label = 0xabcde;
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
-				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+				     BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
 		ERROR(ret);
 		return TC_ACT_SHOT;
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
index 8ee0371a100a..9f6174236856 100755
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -61,6 +61,7 @@ cleanup_and_exit()
 
 	[ -n "$msg" ] && echo "ERROR: $msg"
 
+	test_cgrp2_sock -d ${CGRP_MNT}/sockopts
 	ip li del cgrp2_sock
 	umount ${CGRP_MNT}
 
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
index fc4e64d00cb3..0f396a86e0cb 100755
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -28,6 +28,9 @@ function attach_bpf {
 }
 
 function cleanup {
+	if [ -d /tmp/cgroupv2/foo ]; then
+		test_cgrp2_sock -d /tmp/cgroupv2/foo
+	fi
 	ip link del veth0b
 	ip netns delete at_ns0
 	umount /tmp/cgroupv2
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 43ce049996ee..c265863ccdf9 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -23,7 +23,8 @@ function config_device {
 function add_gre_tunnel {
 	# in namespace
 	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200
 	ip netns exec at_ns0 ip link set dev $DEV_NS up
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
 
@@ -43,7 +44,7 @@ function add_ip6gretap_tunnel {
 
 	# in namespace
 	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
 		local ::11 remote ::22
 
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index a77a583d94d4..7068fbdde951 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
 {
 	char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
 	char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+	char addr_fmt[] = "Address recorded on event: %llx";
 	char fmt[] = "CPU-%d period %lld ip %llx";
 	u32 cpu = bpf_get_smp_processor_id();
 	struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
 	else
 	  bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
 
+	if (ctx->addr != 0)
+	  bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
 	val = bpf_map_lookup_elem(&counts, &key);
 	if (val)
 		(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index bf4f1b6d9a52..56f7a259a7c9 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
 		/* Intel Instruction Retired */
 		.config = 0xc0,
 	};
+	struct perf_event_attr attr_type_raw_lock_load = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_RAW,
+		/* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+		.config = 0x21d0,
+		/* Request to record lock address from PEBS */
+		.sample_type = PERF_SAMPLE_ADDR,
+		/* Record address value requires precise event */
+		.precise_ip = 2,
+	};
 
 	printf("Test HW_CPU_CYCLES\n");
 	test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
 	test_perf_event_all_cpu(&attr_type_raw);
 	test_perf_event_task(&attr_type_raw);
 
+	printf("Test Lock Load\n");
+	test_perf_event_all_cpu(&attr_type_raw_lock_load);
+	test_perf_event_task(&attr_type_raw_lock_load);
+
 	printf("*** PASS ***\n");
 }
 
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index d54e91eb6cbf..b701b5c21342 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <sys/resource.h>
 
 #include "bpf_load.h"
 #include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	const char *optstr = "SN";
 	char filename[256];
 	int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
 	ifindex_in = strtoul(argv[optind], NULL, 0);
 	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
 	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 0e349b80686e..ba942e3ead89 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
 hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct
 
 HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
@@ -16,7 +17,6 @@ HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
 bpf-direct-objs := bpf-direct.o
 
 # Try to match the kernel target.
-ifndef CROSS_COMPILE
 ifndef CONFIG_64BIT
 
 # s390 has -m31 flag to build 31 bit binaries
@@ -35,12 +35,4 @@ HOSTLOADLIBES_bpf-fancy += $(MFLAG)
 HOSTLOADLIBES_dropper += $(MFLAG)
 endif
 always := $(hostprogs-m)
-else
-# MIPS system calls are defined based on the -mabi that is passed
-# to the toolchain which may or may not be a valid option
-# for the host toolchain. So disable tests if target architecture
-# is MIPS but the host isn't.
-ifndef CONFIG_MIPS
-always := $(hostprogs-m)
-endif
 endif
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 73f1da4d116c..9bf2881bd11b 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -2,7 +2,7 @@
 hostprogs-y := sockmap
 
 # Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 52b0053274f4..9ad5ba79c85a 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -43,6 +43,42 @@ struct bpf_map_def SEC("maps") sock_map = {
 	.max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 2
+};
+
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -105,4 +141,165 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 
 	return 0;
 }
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+	int err1 = -1, err2 = -1, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+		   len1, err1, err2);
+	return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+	int err1 = 0, err2 = 0, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
+		   len1, err1, err2);
+	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes) {
+		ret = bpf_msg_apply_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	} else {
+		return SK_DROP;
+	}
+	return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes) {
+		if (((__u64)data_end - (__u64)data) >= *bytes)
+			return SK_PASS;
+		ret = bpf_msg_cork_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	}
+	return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+
+	return SK_DROP;
+}
+
+
 char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
new file mode 100755
index 000000000000..6d8cc40cca22
--- /dev/null
+++ b/samples/sockmap/sockmap_test.sh
@@ -0,0 +1,450 @@
+#Test a bunch of positive cases to verify basic functionality
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+	for i in 1 10 100; do
+		for l in 1 10 100; do
+			TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+			echo $TEST
+			$TEST
+			sleep 2
+		done
+	done
+done
+done
+done
+
+#Test max iov
+t="sendmsg"
+r=1
+i=1024
+l=1
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+
+# Test max iov with 1k send
+
+t="sendmsg"
+r=1
+i=1024
+l=1024
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+
+# Test apply with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with 1B not really useful but test it anyways
+r=1
+i=1024
+l=1024
+prog="--txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_redir --txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# mix and match cork and apply not really useful but valid programs
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Tests for bpf_msg_pull_data()
+for i in `seq 99 100 1600`; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+		--txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+for i in `seq 199 100 1600`; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+		--txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
+echo $TEST
+$TEST
+sleep 2
+
+# Run through gamut again with start and end
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+	for i in 1 10 100; do
+		for l in 1 10 100; do
+			TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
+			echo $TEST
+			$TEST
+			sleep 2
+		done
+	done
+done
+done
+done
+
+# Some specific tests to cover specific code paths
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7c25c0c112bc..07aa237221d1 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -29,6 +29,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
+#include <sys/sendfile.h>
 
 #include <linux/netlink.h>
 #include <linux/socket.h>
@@ -54,6 +55,16 @@ void running_handler(int a);
 /* global sockets */
 int s1, s2, c1, c2, p1, p2;
 
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
 	{"cgroup",	required_argument,	NULL, 'c' },
@@ -62,6 +73,16 @@ static const struct option long_options[] = {
 	{"iov_count",	required_argument,	NULL, 'i' },
 	{"length",	required_argument,	NULL, 'l' },
 	{"test",	required_argument,	NULL, 't' },
+	{"data_test",   no_argument,		NULL, 'd' },
+	{"txmsg",		no_argument,	&txmsg_pass,  1  },
+	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
+	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
+	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
+	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
+	{"txmsg_apply",	required_argument,	NULL, 'a'},
+	{"txmsg_cork",	required_argument,	NULL, 'k'},
+	{"txmsg_start", required_argument,	NULL, 's'},
+	{"txmsg_end",	required_argument,	NULL, 'e'},
 	{0, 0, NULL, 0 }
 };
 
@@ -195,19 +216,71 @@ struct msg_stats {
 	struct timespec end;
 };
 
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+			     struct msg_stats *s,
+			     struct sockmap_options *opt)
+{
+	bool drop = opt->drop_expected;
+	unsigned char k = 0;
+	FILE *file;
+	int i, fp;
+
+	file = fopen(".sendpage_tst.tmp", "w+");
+	for (i = 0; i < iov_length * cnt; i++, k++)
+		fwrite(&k, sizeof(char), 1, file);
+	fflush(file);
+	fseek(file, 0, SEEK_SET);
+	fclose(file);
+
+	fp = open(".sendpage_tst.tmp", O_RDONLY);
+	clock_gettime(CLOCK_MONOTONIC, &s->start);
+	for (i = 0; i < cnt; i++) {
+		int sent = sendfile(fd, fp, NULL, iov_length);
+
+		if (!drop && sent < 0) {
+			perror("send loop error:");
+			close(fp);
+			return sent;
+		} else if (drop && sent >= 0) {
+			printf("sendpage loop error expected: %i\n", sent);
+			close(fp);
+			return -EIO;
+		}
+
+		if (sent > 0)
+			s->bytes_sent += sent;
+	}
+	clock_gettime(CLOCK_MONOTONIC, &s->end);
+	close(fp);
+	return 0;
+}
+
 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
-		    struct msg_stats *s, bool tx)
+		    struct msg_stats *s, bool tx,
+		    struct sockmap_options *opt)
 {
 	struct msghdr msg = {0};
 	int err, i, flags = MSG_NOSIGNAL;
 	struct iovec *iov;
+	unsigned char k;
+	bool data_test = opt->data_test;
+	bool drop = opt->drop_expected;
 
 	iov = calloc(iov_count, sizeof(struct iovec));
 	if (!iov)
 		return errno;
 
+	k = 0;
 	for (i = 0; i < iov_count; i++) {
-		char *d = calloc(iov_length, sizeof(char));
+		unsigned char *d = calloc(iov_length, sizeof(char));
 
 		if (!d) {
 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
@@ -215,21 +288,34 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		}
 		iov[i].iov_base = d;
 		iov[i].iov_len = iov_length;
+
+		if (data_test && tx) {
+			int j;
+
+			for (j = 0; j < iov_length; j++)
+				d[j] = k++;
+		}
 	}
 
 	msg.msg_iov = iov;
 	msg.msg_iovlen = iov_count;
+	k = 0;
 
 	if (tx) {
 		clock_gettime(CLOCK_MONOTONIC, &s->start);
 		for (i = 0; i < cnt; i++) {
 			int sent = sendmsg(fd, &msg, flags);
 
-			if (sent < 0) {
+			if (!drop && sent < 0) {
 				perror("send loop error:");
 				goto out_errno;
+			} else if (drop && sent >= 0) {
+				printf("send loop error expected: %i\n", sent);
+				errno = -EIO;
+				goto out_errno;
 			}
-			s->bytes_sent += sent;
+			if (sent > 0)
+				s->bytes_sent += sent;
 		}
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	} else {
@@ -272,6 +358,26 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 			}
 
 			s->bytes_recvd += recv;
+
+			if (data_test) {
+				int j;
+
+				for (i = 0; i < msg.msg_iovlen; i++) {
+					unsigned char *d = iov[i].iov_base;
+
+					for (j = 0;
+					     j < iov[i].iov_len && recv; j++) {
+						if (d[j] != k++) {
+							errno = -EIO;
+							fprintf(stderr,
+								"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+								i, j, d[j], k - 1, d[j+1], k + 1);
+							goto out_errno;
+						}
+						recv--;
+					}
+				}
+			}
 		}
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	}
@@ -300,7 +406,7 @@ static inline float recvdBps(struct msg_stats s)
 }
 
 static int sendmsg_test(int iov_count, int iov_buf, int cnt,
-			int verbose, bool base)
+			struct sockmap_options *opt)
 {
 	float sent_Bps = 0, recvd_Bps = 0;
 	int rx_fd, txpid, rxpid, err = 0;
@@ -309,14 +415,20 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	errno = 0;
 
-	if (base)
+	if (opt->base)
 		rx_fd = p1;
 	else
 		rx_fd = p2;
 
 	rxpid = fork();
 	if (rxpid == 0) {
-		err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
+		if (opt->drop_expected)
+			exit(1);
+
+		if (opt->sendpage)
+			iov_count = 1;
+		err = msg_loop(rx_fd, iov_count, iov_buf,
+			       cnt, &s, false, opt);
 		if (err)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -339,7 +451,12 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	txpid = fork();
 	if (txpid == 0) {
-		err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+		if (opt->sendpage)
+			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+		else
+			err = msg_loop(c1, iov_count, iov_buf,
+				       cnt, &s, true, opt);
+
 		if (err)
 			fprintf(stderr,
 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -364,7 +481,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 	return err;
 }
 
-static int forever_ping_pong(int rate, int verbose)
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
 {
 	struct timeval timeout;
 	char buf[1024] = {0};
@@ -429,7 +546,7 @@ static int forever_ping_pong(int rate, int verbose)
 		if (rate)
 			sleep(rate);
 
-		if (verbose) {
+		if (opt->verbose) {
 			printf(".");
 			fflush(stdout);
 
@@ -443,20 +560,34 @@ enum {
 	PING_PONG,
 	SENDMSG,
 	BASE,
+	BASE_SENDPAGE,
+	SENDPAGE,
 };
 
 int main(int argc, char **argv)
 {
-	int iov_count = 1, length = 1024, rate = 1, verbose = 0;
+	int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
 	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
 	int opt, longindex, err, cg_fd = 0;
+	struct sockmap_options options = {0};
 	int test = PING_PONG;
 	char filename[256];
 
-	while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
+	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
-		/* Cgroup configuration */
+		case 's':
+			txmsg_start = atoi(optarg);
+			break;
+		case 'e':
+			txmsg_end = atoi(optarg);
+			break;
+		case 'a':
+			txmsg_apply = atoi(optarg);
+			break;
+		case 'k':
+			txmsg_cork = atoi(optarg);
+			break;
 		case 'c':
 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
 			if (cg_fd < 0) {
@@ -470,7 +601,7 @@ int main(int argc, char **argv)
 			rate = atoi(optarg);
 			break;
 		case 'v':
-			verbose = 1;
+			options.verbose = 1;
 			break;
 		case 'i':
 			iov_count = atoi(optarg);
@@ -478,6 +609,9 @@ int main(int argc, char **argv)
 		case 'l':
 			length = atoi(optarg);
 			break;
+		case 'd':
+			options.data_test = true;
+			break;
 		case 't':
 			if (strcmp(optarg, "ping") == 0) {
 				test = PING_PONG;
@@ -485,11 +619,17 @@ int main(int argc, char **argv)
 				test = SENDMSG;
 			} else if (strcmp(optarg, "base") == 0) {
 				test = BASE;
+			} else if (strcmp(optarg, "base_sendpage") == 0) {
+				test = BASE_SENDPAGE;
+			} else if (strcmp(optarg, "sendpage") == 0) {
+				test = SENDPAGE;
 			} else {
 				usage(argv);
 				return -1;
 			}
 			break;
+		case 0:
+			break;
 		case 'h':
 		default:
 			usage(argv);
@@ -515,16 +655,16 @@ int main(int argc, char **argv)
 	/* catch SIGINT */
 	signal(SIGINT, running_handler);
 
-	/* If base test skip BPF setup */
-	if (test == BASE)
-		goto run;
-
 	if (load_bpf_file(filename)) {
 		fprintf(stderr, "load_bpf_file: (%s) %s\n",
 			filename, strerror(errno));
 		return 1;
 	}
 
+	/* If base test skip BPF setup */
+	if (test == BASE || test == BASE_SENDPAGE)
+		goto run;
+
 	/* Attach programs to sockmap */
 	err = bpf_prog_attach(prog_fd[0], map_fd[0],
 				BPF_SK_SKB_STREAM_PARSER, 0);
@@ -557,15 +697,129 @@ run:
 		goto out;
 	}
 
-	if (test == PING_PONG)
-		err = forever_ping_pong(rate, verbose);
-	else if (test == SENDMSG)
-		err = sendmsg_test(iov_count, length, rate, verbose, false);
-	else if (test == BASE)
-		err = sendmsg_test(iov_count, length, rate, verbose, true);
+	/* Attach txmsg program to sockmap */
+	if (txmsg_pass)
+		tx_prog_fd = prog_fd[3];
+	else if (txmsg_noisy)
+		tx_prog_fd = prog_fd[4];
+	else if (txmsg_redir)
+		tx_prog_fd = prog_fd[5];
+	else if (txmsg_redir_noisy)
+		tx_prog_fd = prog_fd[6];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[9];
+	/* apply and cork must be last */
+	else if (txmsg_apply)
+		tx_prog_fd = prog_fd[7];
+	else if (txmsg_cork)
+		tx_prog_fd = prog_fd[8];
 	else
+		tx_prog_fd = 0;
+
+	if (tx_prog_fd) {
+		int redir_fd, i = 0;
+
+		err = bpf_prog_attach(tx_prog_fd,
+				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+				err, strerror(errno));
+			return err;
+		}
+
+		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			return err;
+		}
+
+		if (txmsg_redir || txmsg_redir_noisy)
+			redir_fd = c2;
+		else
+			redir_fd = c1;
+
+		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			return err;
+		}
+
+		if (txmsg_apply) {
+			err = bpf_map_update_elem(map_fd[3],
+						  &i, &txmsg_apply, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
+		if (txmsg_cork) {
+			err = bpf_map_update_elem(map_fd[4],
+						  &i, &txmsg_cork, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
+		if (txmsg_start) {
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
+		if (txmsg_end) {
+			i = 1;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+	}
+
+	if (txmsg_drop)
+		options.drop_expected = true;
+
+	if (test == PING_PONG)
+		err = forever_ping_pong(rate, &options);
+	else if (test == SENDMSG) {
+		options.base = false;
+		options.sendpage = false;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == SENDPAGE) {
+		options.base = false;
+		options.sendpage = true;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == BASE) {
+		options.base = true;
+		options.sendpage = false;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == BASE_SENDPAGE) {
+		options.base = true;
+		options.sendpage = true;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else
 		fprintf(stderr, "unknown test\n");
 out:
+	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
 	close(s1);
 	close(s2);
 	close(p1);
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 47cddf32aeba..4f2b25d43ec9 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
 
 objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
 
+objtool_args += $(if $(part-of-module), --module,)
+
 ifndef CONFIG_FRAME_POINTER
 objtool_args += --no-fp
 endif
@@ -264,6 +266,12 @@ objtool_args += --no-unreachable
 else
 objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
 endif
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_CFLAGS),)
+  objtool_args += --retpoline
+endif
+endif
+
 
 ifdef CONFIG_MODVERSIONS
 objtool_o = $(@D)/.tmp_$(@F)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 5589bae34af6..a6f538b31ad6 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -297,11 +297,11 @@ cmd_dt_S_dtb=						\
 	echo '\#include <asm-generic/vmlinux.lds.h>'; 	\
 	echo '.section .dtb.init.rodata,"a"';		\
 	echo '.balign STRUCT_ALIGNMENT';		\
-	echo '.global __dtb_$(*F)_begin';		\
-	echo '__dtb_$(*F)_begin:';			\
+	echo '.global __dtb_$(subst -,_,$(*F))_begin';	\
+	echo '__dtb_$(subst -,_,$(*F))_begin:';		\
 	echo '.incbin "$<" ';				\
-	echo '__dtb_$(*F)_end:';			\
-	echo '.global __dtb_$(*F)_end';			\
+	echo '__dtb_$(subst -,_,$(*F))_end:';		\
+	echo '.global __dtb_$(subst -,_,$(*F))_end';	\
 	echo '.balign STRUCT_ALIGNMENT'; 		\
 ) > $@
 
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index fa3d39b6f23b..449b68c4c90c 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -93,14 +93,6 @@
  * (Note: it'd be easy to port over the complete mkdep state machine,
  *  but I don't think the added complexity is worth it)
  */
-/*
- * Note 2: if somebody writes HELLO_CONFIG_BOOM in a file, it will depend onto
- * CONFIG_BOOM. This could seem a bug (not too hard to fix), but please do not
- * fix it! Some UserModeLinux files (look at arch/um/) call CONFIG_BOOM as
- * UML_CONFIG_BOOM, to avoid conflicts with /usr/include/linux/autoconf.h,
- * through arch/um/include/uml-config.h; this fixdep "bug" makes sure that
- * those files will have correct dependencies.
- */
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -233,8 +225,13 @@ static int str_ends_with(const char *s, int slen, const char *sub)
 static void parse_config_file(const char *p)
 {
 	const char *q, *r;
+	const char *start = p;
 
 	while ((p = strstr(p, "CONFIG_"))) {
+		if (p > start && (isalnum(p[-1]) || p[-1] == '_')) {
+			p += 7;
+			continue;
+		}
 		p += 7;
 		q = p;
 		while (*q && (isalnum(*q) || *q == '_'))
@@ -286,8 +283,6 @@ static int is_ignored_file(const char *s, int len)
 {
 	return str_ends_with(s, len, "include/generated/autoconf.h") ||
 	       str_ends_with(s, len, "include/generated/autoksyms.h") ||
-	       str_ends_with(s, len, "arch/um/include/uml-config.h") ||
-	       str_ends_with(s, len, "include/linux/kconfig.h") ||
 	       str_ends_with(s, len, ".ver");
 }
 
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
index 94b664817ad9..d84a5674e95e 100755
--- a/scripts/bloat-o-meter
+++ b/scripts/bloat-o-meter
@@ -15,7 +15,7 @@ signal(SIGPIPE, SIG_DFL)
 if len(sys.argv) < 3:
     sys.stderr.write("usage: %s [option] file1 file2\n" % sys.argv[0])
     sys.stderr.write("The options are:\n")
-    sys.stderr.write("-c	cateogrize output based on symbole type\n")
+    sys.stderr.write("-c	categorize output based on symbol type\n")
     sys.stderr.write("-d	Show delta of Data Section\n")
     sys.stderr.write("-t	Show delta of text Section\n")
     sys.exit(-1)
diff --git a/scripts/coccinelle/api/memdup.cocci b/scripts/coccinelle/api/memdup.cocci
index 1249b727644b..8fd6437beda8 100644
--- a/scripts/coccinelle/api/memdup.cocci
+++ b/scripts/coccinelle/api/memdup.cocci
@@ -56,10 +56,10 @@ statement S;
 p << r.p;
 @@
 
-coccilib.org.print_todo(p[0], "WARNING opportunity for kmemdep")
+coccilib.org.print_todo(p[0], "WARNING opportunity for kmemdup")
 
 @script:python depends on report@
 p << r.p;
 @@
 
-coccilib.report.print_report(p[0], "WARNING opportunity for kmemdep")
+coccilib.report.print_report(p[0], "WARNING opportunity for kmemdup")
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 9ee9bf7fd1a2..65792650c630 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -595,7 +595,7 @@ static void optimize_result(void)
 		 * original char code */
 		if (!best_table_len[i]) {
 
-			/* find the token with the breates profit value */
+			/* find the token with the best profit value */
 			best = find_best_token();
 			if (token_profit[best] == 0)
 				break;
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 5c12dc91ef34..df26c7b0fe13 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -178,7 +178,7 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p)
 	case S_HEX:
 	done:
 		if (sym_string_valid(sym, p)) {
-			sym->def[def].val = strdup(p);
+			sym->def[def].val = xstrdup(p);
 			sym->flags |= def_flags;
 		} else {
 			if (def != S_DEF_AUTO)
diff --git a/scripts/kconfig/kxgettext.c b/scripts/kconfig/kxgettext.c
index 2858738b22d5..240880a89111 100644
--- a/scripts/kconfig/kxgettext.c
+++ b/scripts/kconfig/kxgettext.c
@@ -101,7 +101,7 @@ static struct message *message__new(const char *msg, char *option,
 	if (self->files == NULL)
 		goto out_fail;
 
-	self->msg = strdup(msg);
+	self->msg = xstrdup(msg);
 	if (self->msg == NULL)
 		goto out_fail_msg;
 
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index 4e23febbe4b2..2d5ec2d0e952 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -115,6 +115,7 @@ int file_write_dep(const char *name);
 void *xmalloc(size_t size);
 void *xcalloc(size_t nmemb, size_t size);
 void *xrealloc(void *p, size_t size);
+char *xstrdup(const char *s);
 
 struct gstr {
 	size_t len;
diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh
index a10bd9d6fafd..6c0bcd9c472d 100755
--- a/scripts/kconfig/lxdialog/check-lxdialog.sh
+++ b/scripts/kconfig/lxdialog/check-lxdialog.sh
@@ -55,7 +55,8 @@ EOF
 	    echo " *** required header files."                            1>&2
 	    echo " *** 'make menuconfig' requires the ncurses libraries." 1>&2
 	    echo " *** "                                                  1>&2
-	    echo " *** Install ncurses (ncurses-devel) and try again."    1>&2
+	    echo " *** Install ncurses (ncurses-devel or libncurses-dev " 1>&2
+	    echo " *** depending on your distribution) and try again."    1>&2
 	    echo " *** "                                                  1>&2
 	    exit 1
 	fi
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 99222855544c..36cd3e1f1c28 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -212,6 +212,7 @@ void menu_add_option(int token, char *arg)
 			sym_defconfig_list = current_entry->sym;
 		else if (sym_defconfig_list != current_entry->sym)
 			zconf_error("trying to redefine defconfig symbol");
+		sym_defconfig_list->flags |= SYMBOL_AUTO;
 		break;
 	case T_OPT_ENV:
 		prop_add_env(arg);
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index cca9663be5dd..2220bc4b051b 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -183,7 +183,7 @@ static void sym_validate_range(struct symbol *sym)
 		sprintf(str, "%lld", val2);
 	else
 		sprintf(str, "0x%llx", val2);
-	sym->curr.val = strdup(str);
+	sym->curr.val = xstrdup(str);
 }
 
 static void sym_set_changed(struct symbol *sym)
@@ -849,7 +849,7 @@ struct symbol *sym_lookup(const char *name, int flags)
 				   : !(symbol->flags & (SYMBOL_CONST|SYMBOL_CHOICE))))
 				return symbol;
 		}
-		new_name = strdup(name);
+		new_name = xstrdup(name);
 	} else {
 		new_name = NULL;
 		hash = 0;
diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c
index b98a79e30e04..c6f6e21b809f 100644
--- a/scripts/kconfig/util.c
+++ b/scripts/kconfig/util.c
@@ -154,3 +154,14 @@ void *xrealloc(void *p, size_t size)
 	fprintf(stderr, "Out of memory.\n");
 	exit(1);
 }
+
+char *xstrdup(const char *s)
+{
+	char *p;
+
+	p = strdup(s);
+	if (p)
+		return p;
+	fprintf(stderr, "Out of memory.\n");
+	exit(1);
+}
diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l
index 02de6fe302a9..88b650eb9cc9 100644
--- a/scripts/kconfig/zconf.l
+++ b/scripts/kconfig/zconf.l
@@ -332,16 +332,12 @@ void zconf_nextfile(const char *name)
 				"Inclusion path:\n  current file : '%s'\n",
 				zconf_curname(), zconf_lineno(),
 				zconf_curname());
-			iter = current_file->parent;
-			while (iter && \
-			       strcmp(iter->name,current_file->name)) {
-				fprintf(stderr, "  included from: '%s:%d'\n",
-					iter->name, iter->lineno-1);
+			iter = current_file;
+			do {
 				iter = iter->parent;
-			}
-			if (iter)
 				fprintf(stderr, "  included from: '%s:%d'\n",
-					iter->name, iter->lineno+1);
+					iter->name, iter->lineno - 1);
+			} while (strcmp(iter->name, current_file->name));
 			exit(1);
 		}
 	}
diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y
index 4be98050b961..ad6305b0f40c 100644
--- a/scripts/kconfig/zconf.y
+++ b/scripts/kconfig/zconf.y
@@ -127,7 +127,7 @@ no_mainmenu_stmt: /* empty */
 	 * later regardless of whether it comes from the 'prompt' in
 	 * mainmenu_stmt or here
 	 */
-	menu_add_prompt(P_MENU, strdup("Linux Kernel Configuration"), NULL);
+	menu_add_prompt(P_MENU, xstrdup("Linux Kernel Configuration"), NULL);
 };
 
 
@@ -276,6 +276,7 @@ choice: T_CHOICE word_opt T_EOL
 	sym->flags |= SYMBOL_AUTO;
 	menu_add_entry(sym);
 	menu_add_expr(P_CHOICE, NULL, NULL);
+	free($2);
 	printd(DEBUG_PARSE, "%s:%d:choice\n", zconf_curname(), zconf_lineno());
 };
 
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index c0d129d7f430..be56a1153014 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -246,7 +246,7 @@ else
 fi;
 
 # final build of init/
-${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init GCC_PLUGINS_CFLAGS="${GCC_PLUGINS_CFLAGS}"
+${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
 
 archive_builtin
 
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 6f9e4ce568cd..9bb0a7f2863e 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -18,6 +18,7 @@
 #include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/digsig.h>
+#include <linux/vmalloc.h>
 #include <crypto/public_key.h>
 #include <keys/system_keyring.h>
 
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 929e14978c42..fa728f662a6f 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -22,6 +22,13 @@
 #include <keys/big_key-type.h>
 #include <crypto/aead.h>
 
+struct big_key_buf {
+	unsigned int		nr_pages;
+	void			*virt;
+	struct scatterlist	*sg;
+	struct page		*pages[];
+};
+
 /*
  * Layout of key payload words.
  */
@@ -91,10 +98,9 @@ static DEFINE_MUTEX(big_key_aead_lock);
 /*
  * Encrypt/decrypt big_key data
  */
-static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
+static int big_key_crypt(enum big_key_op op, struct big_key_buf *buf, size_t datalen, u8 *key)
 {
 	int ret;
-	struct scatterlist sgio;
 	struct aead_request *aead_req;
 	/* We always use a zero nonce. The reason we can get away with this is
 	 * because we're using a different randomly generated key for every
@@ -109,8 +115,7 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
 		return -ENOMEM;
 
 	memset(zero_nonce, 0, sizeof(zero_nonce));
-	sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0));
-	aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce);
+	aead_request_set_crypt(aead_req, buf->sg, buf->sg, datalen, zero_nonce);
 	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 	aead_request_set_ad(aead_req, 0);
 
@@ -130,21 +135,81 @@ error:
 }
 
 /*
+ * Free up the buffer.
+ */
+static void big_key_free_buffer(struct big_key_buf *buf)
+{
+	unsigned int i;
+
+	if (buf->virt) {
+		memset(buf->virt, 0, buf->nr_pages * PAGE_SIZE);
+		vunmap(buf->virt);
+	}
+
+	for (i = 0; i < buf->nr_pages; i++)
+		if (buf->pages[i])
+			__free_page(buf->pages[i]);
+
+	kfree(buf);
+}
+
+/*
+ * Allocate a buffer consisting of a set of pages with a virtual mapping
+ * applied over them.
+ */
+static void *big_key_alloc_buffer(size_t len)
+{
+	struct big_key_buf *buf;
+	unsigned int npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned int i, l;
+
+	buf = kzalloc(sizeof(struct big_key_buf) +
+		      sizeof(struct page) * npg +
+		      sizeof(struct scatterlist) * npg,
+		      GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	buf->nr_pages = npg;
+	buf->sg = (void *)(buf->pages + npg);
+	sg_init_table(buf->sg, npg);
+
+	for (i = 0; i < buf->nr_pages; i++) {
+		buf->pages[i] = alloc_page(GFP_KERNEL);
+		if (!buf->pages[i])
+			goto nomem;
+
+		l = min_t(size_t, len, PAGE_SIZE);
+		sg_set_page(&buf->sg[i], buf->pages[i], l, 0);
+		len -= l;
+	}
+
+	buf->virt = vmap(buf->pages, buf->nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!buf->virt)
+		goto nomem;
+
+	return buf;
+
+nomem:
+	big_key_free_buffer(buf);
+	return NULL;
+}
+
+/*
  * Preparse a big key
  */
 int big_key_preparse(struct key_preparsed_payload *prep)
 {
+	struct big_key_buf *buf;
 	struct path *path = (struct path *)&prep->payload.data[big_key_path];
 	struct file *file;
 	u8 *enckey;
-	u8 *data = NULL;
 	ssize_t written;
-	size_t datalen = prep->datalen;
+	size_t datalen = prep->datalen, enclen = datalen + ENC_AUTHTAG_SIZE;
 	int ret;
 
-	ret = -EINVAL;
 	if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
-		goto error;
+		return -EINVAL;
 
 	/* Set an arbitrary quota */
 	prep->quotalen = 16;
@@ -157,13 +222,12 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 		 *
 		 * File content is stored encrypted with randomly generated key.
 		 */
-		size_t enclen = datalen + ENC_AUTHTAG_SIZE;
 		loff_t pos = 0;
 
-		data = kmalloc(enclen, GFP_KERNEL);
-		if (!data)
+		buf = big_key_alloc_buffer(enclen);
+		if (!buf)
 			return -ENOMEM;
-		memcpy(data, prep->data, datalen);
+		memcpy(buf->virt, prep->data, datalen);
 
 		/* generate random key */
 		enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
@@ -176,7 +240,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 			goto err_enckey;
 
 		/* encrypt aligned data */
-		ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey);
+		ret = big_key_crypt(BIG_KEY_ENC, buf, datalen, enckey);
 		if (ret)
 			goto err_enckey;
 
@@ -187,7 +251,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 			goto err_enckey;
 		}
 
-		written = kernel_write(file, data, enclen, &pos);
+		written = kernel_write(file, buf->virt, enclen, &pos);
 		if (written != enclen) {
 			ret = written;
 			if (written >= 0)
@@ -202,7 +266,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 		*path = file->f_path;
 		path_get(path);
 		fput(file);
-		kzfree(data);
+		big_key_free_buffer(buf);
 	} else {
 		/* Just store the data in a buffer */
 		void *data = kmalloc(datalen, GFP_KERNEL);
@@ -220,7 +284,7 @@ err_fput:
 err_enckey:
 	kzfree(enckey);
 error:
-	kzfree(data);
+	big_key_free_buffer(buf);
 	return ret;
 }
 
@@ -298,15 +362,15 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 		return datalen;
 
 	if (datalen > BIG_KEY_FILE_THRESHOLD) {
+		struct big_key_buf *buf;
 		struct path *path = (struct path *)&key->payload.data[big_key_path];
 		struct file *file;
-		u8 *data;
 		u8 *enckey = (u8 *)key->payload.data[big_key_data];
 		size_t enclen = datalen + ENC_AUTHTAG_SIZE;
 		loff_t pos = 0;
 
-		data = kmalloc(enclen, GFP_KERNEL);
-		if (!data)
+		buf = big_key_alloc_buffer(enclen);
+		if (!buf)
 			return -ENOMEM;
 
 		file = dentry_open(path, O_RDONLY, current_cred());
@@ -316,26 +380,26 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 		}
 
 		/* read file to kernel and decrypt */
-		ret = kernel_read(file, data, enclen, &pos);
+		ret = kernel_read(file, buf->virt, enclen, &pos);
 		if (ret >= 0 && ret != enclen) {
 			ret = -EIO;
 			goto err_fput;
 		}
 
-		ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey);
+		ret = big_key_crypt(BIG_KEY_DEC, buf, enclen, enckey);
 		if (ret)
 			goto err_fput;
 
 		ret = datalen;
 
 		/* copy decrypted data to user */
-		if (copy_to_user(buffer, data, datalen) != 0)
+		if (copy_to_user(buffer, buf->virt, datalen) != 0)
 			ret = -EFAULT;
 
 err_fput:
 		fput(file);
 error:
-		kzfree(data);
+		big_key_free_buffer(buf);
 	} else {
 		ret = datalen;
 		if (copy_to_user(buffer, key->payload.data[big_key_data],
diff --git a/sound/ac97/Kconfig b/sound/ac97/Kconfig
index f8a64e15e5bf..baa5f8ef89d2 100644
--- a/sound/ac97/Kconfig
+++ b/sound/ac97/Kconfig
@@ -5,7 +5,6 @@
 
 config AC97_BUS_NEW
 	tristate
-	select AC97
 	help
 	  This is the new AC97 bus type, successor of AC97_BUS. The ported
 	  drivers which benefit from the AC97 automatic probing should "select"
diff --git a/sound/core/control.c b/sound/core/control.c
index 0b3026d937b1..8a77620a3854 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -889,7 +889,7 @@ static int snd_ctl_elem_read(struct snd_card *card,
 
 	index_offset = snd_ctl_get_ioff(kctl, &control->id);
 	vd = &kctl->vd[index_offset];
-	if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) && kctl->get == NULL)
+	if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL)
 		return -EPERM;
 
 	snd_ctl_build_ioff(&control->id, kctl, index_offset);
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index b044c0a5a674..02298c9c6020 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1762,10 +1762,9 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file)
 		return -ENOMEM;
 	_snd_pcm_hw_params_any(params);
 	err = snd_pcm_hw_refine(substream, params);
-	format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
-	kfree(params);
 	if (err < 0)
-		return err;
+		goto error;
+	format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
 	for (fmt = 0; fmt < 32; ++fmt) {
 		if (snd_mask_test(format_mask, fmt)) {
 			int f = snd_pcm_oss_format_to(fmt);
@@ -1773,7 +1772,10 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file)
 				formats |= f;
 		}
 	}
-	return formats;
+
+ error:
+	kfree(params);
+	return err < 0 ? err : formats;
 }
 
 static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format)
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 60db32785f62..61a07fe34cd2 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -255,12 +255,12 @@ static int seq_free_client1(struct snd_seq_client *client)
 
 	if (!client)
 		return 0;
-	snd_seq_delete_all_ports(client);
-	snd_seq_queue_client_leave(client->number);
 	spin_lock_irqsave(&clients_lock, flags);
 	clienttablock[client->number] = 1;
 	clienttab[client->number] = NULL;
 	spin_unlock_irqrestore(&clients_lock, flags);
+	snd_seq_delete_all_ports(client);
+	snd_seq_queue_client_leave(client->number);
 	snd_use_lock_sync(&client->use_lock);
 	snd_seq_queue_client_termination(client->number);
 	if (client->pool)
@@ -910,7 +910,8 @@ int snd_seq_dispatch_event(struct snd_seq_event_cell *cell, int atomic, int hop)
 static int snd_seq_client_enqueue_event(struct snd_seq_client *client,
 					struct snd_seq_event *event,
 					struct file *file, int blocking,
-					int atomic, int hop)
+					int atomic, int hop,
+					struct mutex *mutexp)
 {
 	struct snd_seq_event_cell *cell;
 	int err;
@@ -948,7 +949,8 @@ static int snd_seq_client_enqueue_event(struct snd_seq_client *client,
 		return -ENXIO; /* queue is not allocated */
 
 	/* allocate an event cell */
-	err = snd_seq_event_dup(client->pool, event, &cell, !blocking || atomic, file);
+	err = snd_seq_event_dup(client->pool, event, &cell, !blocking || atomic,
+				file, mutexp);
 	if (err < 0)
 		return err;
 
@@ -1003,7 +1005,7 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
 {
 	struct snd_seq_client *client = file->private_data;
 	int written = 0, len;
-	int err = -EINVAL;
+	int err;
 	struct snd_seq_event event;
 
 	if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT))
@@ -1017,12 +1019,15 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
 		return -ENXIO;
 
 	/* allocate the pool now if the pool is not allocated yet */ 
+	mutex_lock(&client->ioctl_mutex);
 	if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) {
-		if (snd_seq_pool_init(client->pool) < 0)
-			return -ENOMEM;
+		err = snd_seq_pool_init(client->pool);
+		if (err < 0)
+			goto out;
 	}
 
 	/* only process whole events */
+	err = -EINVAL;
 	while (count >= sizeof(struct snd_seq_event)) {
 		/* Read in the event header from the user */
 		len = sizeof(event);
@@ -1069,7 +1074,7 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
 		/* ok, enqueue it */
 		err = snd_seq_client_enqueue_event(client, &event, file,
 						   !(file->f_flags & O_NONBLOCK),
-						   0, 0);
+						   0, 0, &client->ioctl_mutex);
 		if (err < 0)
 			break;
 
@@ -1080,6 +1085,8 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
 		written += len;
 	}
 
+ out:
+	mutex_unlock(&client->ioctl_mutex);
 	return written ? written : err;
 }
 
@@ -1834,9 +1841,11 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client,
 	    (! snd_seq_write_pool_allocated(client) ||
 	     info->output_pool != client->pool->size)) {
 		if (snd_seq_write_pool_allocated(client)) {
+			/* is the pool in use? */
+			if (atomic_read(&client->pool->counter))
+				return -EBUSY;
 			/* remove all existing cells */
 			snd_seq_pool_mark_closing(client->pool);
-			snd_seq_queue_client_leave_cells(client->number);
 			snd_seq_pool_done(client->pool);
 		}
 		client->pool->size = info->output_pool;
@@ -2256,7 +2265,8 @@ static int kernel_client_enqueue(int client, struct snd_seq_event *ev,
 	if (! cptr->accept_output)
 		result = -EPERM;
 	else /* send it */
-		result = snd_seq_client_enqueue_event(cptr, ev, file, blocking, atomic, hop);
+		result = snd_seq_client_enqueue_event(cptr, ev, file, blocking,
+						      atomic, hop, NULL);
 
 	snd_seq_client_unlock(cptr);
 	return result;
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index a8c2822e0198..72c0302a55d2 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -125,7 +125,7 @@ int snd_seq_fifo_event_in(struct snd_seq_fifo *f,
 		return -EINVAL;
 
 	snd_use_lock_use(&f->use_lock);
-	err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL); /* always non-blocking */
+	err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL, NULL); /* always non-blocking */
 	if (err < 0) {
 		if ((err == -ENOMEM) || (err == -EAGAIN))
 			atomic_inc(&f->overflow);
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index f763682584a8..ab1112e90f88 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -220,7 +220,8 @@ void snd_seq_cell_free(struct snd_seq_event_cell * cell)
  */
 static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
 			      struct snd_seq_event_cell **cellp,
-			      int nonblock, struct file *file)
+			      int nonblock, struct file *file,
+			      struct mutex *mutexp)
 {
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
@@ -244,7 +245,11 @@ static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
 		set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&pool->output_sleep, &wait);
 		spin_unlock_irq(&pool->lock);
+		if (mutexp)
+			mutex_unlock(mutexp);
 		schedule();
+		if (mutexp)
+			mutex_lock(mutexp);
 		spin_lock_irq(&pool->lock);
 		remove_wait_queue(&pool->output_sleep, &wait);
 		/* interrupted? */
@@ -287,7 +292,7 @@ __error:
  */
 int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
 		      struct snd_seq_event_cell **cellp, int nonblock,
-		      struct file *file)
+		      struct file *file, struct mutex *mutexp)
 {
 	int ncells, err;
 	unsigned int extlen;
@@ -304,7 +309,7 @@ int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
 	if (ncells >= pool->total_elements)
 		return -ENOMEM;
 
-	err = snd_seq_cell_alloc(pool, &cell, nonblock, file);
+	err = snd_seq_cell_alloc(pool, &cell, nonblock, file, mutexp);
 	if (err < 0)
 		return err;
 
@@ -330,7 +335,8 @@ int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
 			int size = sizeof(struct snd_seq_event);
 			if (len < size)
 				size = len;
-			err = snd_seq_cell_alloc(pool, &tmp, nonblock, file);
+			err = snd_seq_cell_alloc(pool, &tmp, nonblock, file,
+						 mutexp);
 			if (err < 0)
 				goto __error;
 			if (cell->event.data.ext.ptr == NULL)
diff --git a/sound/core/seq/seq_memory.h b/sound/core/seq/seq_memory.h
index 32f959c17786..3abe306c394a 100644
--- a/sound/core/seq/seq_memory.h
+++ b/sound/core/seq/seq_memory.h
@@ -66,7 +66,8 @@ struct snd_seq_pool {
 void snd_seq_cell_free(struct snd_seq_event_cell *cell);
 
 int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
-		      struct snd_seq_event_cell **cellp, int nonblock, struct file *file);
+		      struct snd_seq_event_cell **cellp, int nonblock,
+		      struct file *file, struct mutex *mutexp);
 
 /* return number of unused (free) cells */
 static inline int snd_seq_unused_cells(struct snd_seq_pool *pool)
diff --git a/sound/core/seq/seq_prioq.c b/sound/core/seq/seq_prioq.c
index bc1c8488fc2a..2bc6759e4adc 100644
--- a/sound/core/seq/seq_prioq.c
+++ b/sound/core/seq/seq_prioq.c
@@ -87,7 +87,7 @@ void snd_seq_prioq_delete(struct snd_seq_prioq **fifo)
 	if (f->cells > 0) {
 		/* drain prioQ */
 		while (f->cells > 0)
-			snd_seq_cell_free(snd_seq_prioq_cell_out(f));
+			snd_seq_cell_free(snd_seq_prioq_cell_out(f, NULL));
 	}
 	
 	kfree(f);
@@ -214,8 +214,18 @@ int snd_seq_prioq_cell_in(struct snd_seq_prioq * f,
 	return 0;
 }
 
+/* return 1 if the current time >= event timestamp */
+static int event_is_ready(struct snd_seq_event *ev, void *current_time)
+{
+	if ((ev->flags & SNDRV_SEQ_TIME_STAMP_MASK) == SNDRV_SEQ_TIME_STAMP_TICK)
+		return snd_seq_compare_tick_time(current_time, &ev->time.tick);
+	else
+		return snd_seq_compare_real_time(current_time, &ev->time.time);
+}
+
 /* dequeue cell from prioq */
-struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f)
+struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f,
+						  void *current_time)
 {
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
@@ -227,6 +237,8 @@ struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f)
 	spin_lock_irqsave(&f->lock, flags);
 
 	cell = f->head;
+	if (cell && current_time && !event_is_ready(&cell->event, current_time))
+		cell = NULL;
 	if (cell) {
 		f->head = cell->next;
 
@@ -252,18 +264,6 @@ int snd_seq_prioq_avail(struct snd_seq_prioq * f)
 	return f->cells;
 }
 
-
-/* peek at cell at the head of the prioq */
-struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq * f)
-{
-	if (f == NULL) {
-		pr_debug("ALSA: seq: snd_seq_prioq_cell_in() called with NULL prioq\n");
-		return NULL;
-	}
-	return f->head;
-}
-
-
 static inline int prioq_match(struct snd_seq_event_cell *cell,
 			      int client, int timestamp)
 {
diff --git a/sound/core/seq/seq_prioq.h b/sound/core/seq/seq_prioq.h
index d38bb78d9345..2c315ca10fc4 100644
--- a/sound/core/seq/seq_prioq.h
+++ b/sound/core/seq/seq_prioq.h
@@ -44,14 +44,12 @@ void snd_seq_prioq_delete(struct snd_seq_prioq **fifo);
 int snd_seq_prioq_cell_in(struct snd_seq_prioq *f, struct snd_seq_event_cell *cell);
 
 /* dequeue cell from prioq */ 
-struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f);
+struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f,
+						  void *current_time);
 
 /* return number of events available in prioq */
 int snd_seq_prioq_avail(struct snd_seq_prioq *f);
 
-/* peek at cell at the head of the prioq */
-struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq *f);
-
 /* client left queue */
 void snd_seq_prioq_leave(struct snd_seq_prioq *f, int client, int timestamp);        
 
diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
index 0428e9061b47..b377f5048352 100644
--- a/sound/core/seq/seq_queue.c
+++ b/sound/core/seq/seq_queue.c
@@ -277,30 +277,20 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
 
       __again:
 	/* Process tick queue... */
-	while ((cell = snd_seq_prioq_cell_peek(q->tickq)) != NULL) {
-		if (snd_seq_compare_tick_time(&q->timer->tick.cur_tick,
-					      &cell->event.time.tick)) {
-			cell = snd_seq_prioq_cell_out(q->tickq);
-			if (cell)
-				snd_seq_dispatch_event(cell, atomic, hop);
-		} else {
-			/* event remains in the queue */
+	for (;;) {
+		cell = snd_seq_prioq_cell_out(q->tickq,
+					      &q->timer->tick.cur_tick);
+		if (!cell)
 			break;
-		}
+		snd_seq_dispatch_event(cell, atomic, hop);
 	}
 
-
 	/* Process time queue... */
-	while ((cell = snd_seq_prioq_cell_peek(q->timeq)) != NULL) {
-		if (snd_seq_compare_real_time(&q->timer->cur_time,
-					      &cell->event.time.time)) {
-			cell = snd_seq_prioq_cell_out(q->timeq);
-			if (cell)
-				snd_seq_dispatch_event(cell, atomic, hop);
-		} else {
-			/* event remains in the queue */
+	for (;;) {
+		cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time);
+		if (!cell)
 			break;
-		}
+		snd_seq_dispatch_event(cell, atomic, hop);
 	}
 
 	/* free lock */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index c71dcacea807..d5017adf9feb 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -186,6 +186,10 @@ module_param(power_save, xint, 0644);
 MODULE_PARM_DESC(power_save, "Automatic power-saving timeout "
 		 "(in second, 0 = disable).");
 
+static bool pm_blacklist = true;
+module_param(pm_blacklist, bool, 0644);
+MODULE_PARM_DESC(pm_blacklist, "Enable power-management blacklist");
+
 /* reset the HD-audio controller in power save mode.
  * this may give more power-saving, but will take longer time to
  * wake up.
@@ -2186,6 +2190,24 @@ out_free:
 	return err;
 }
 
+#ifdef CONFIG_PM
+/* On some boards setting power_save to a non 0 value leads to clicking /
+ * popping sounds when ever we enter/leave powersaving mode. Ideally we would
+ * figure out how to avoid these sounds, but that is not always feasible.
+ * So we keep a list of devices where we disable powersaving as its known
+ * to causes problems on these devices.
+ */
+static struct snd_pci_quirk power_save_blacklist[] = {
+	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+	SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
+	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+	SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+	/* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
+	SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0),
+	{}
+};
+#endif /* CONFIG_PM */
+
 /* number of codec slots for each chipset: 0 = default slots (i.e. 4) */
 static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = {
 	[AZX_DRIVER_NVIDIA] = 8,
@@ -2198,6 +2220,7 @@ static int azx_probe_continue(struct azx *chip)
 	struct hdac_bus *bus = azx_bus(chip);
 	struct pci_dev *pci = chip->pci;
 	int dev = chip->dev_index;
+	int val;
 	int err;
 
 	hda->probe_continued = 1;
@@ -2278,7 +2301,21 @@ static int azx_probe_continue(struct azx *chip)
 
 	chip->running = 1;
 	azx_add_card_list(chip);
-	snd_hda_set_power_save(&chip->bus, power_save * 1000);
+
+	val = power_save;
+#ifdef CONFIG_PM
+	if (pm_blacklist) {
+		const struct snd_pci_quirk *q;
+
+		q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist);
+		if (q && val) {
+			dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n",
+				 q->subvendor, q->subdevice);
+			val = 0;
+		}
+	}
+#endif /* CONFIG_PM */
+	snd_hda_set_power_save(&chip->bus, val * 1000);
 	if (azx_has_pm_runtime(chip) || hda->use_vga_switcheroo)
 		pm_runtime_put_autosuspend(&pci->dev);
 
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 37e1cf8218ff..5b4dbcec6de8 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -957,6 +957,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
 	SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
 	SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
 	SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
+	SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK),
+	SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
 	SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
 	SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 23475888192b..9af301c6bba2 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3465,6 +3465,19 @@ static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec,
 		spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
 }
 
+static void alc269_fixup_pincfg_U7x7_headset_mic(struct hda_codec *codec,
+						 const struct hda_fixup *fix,
+						 int action)
+{
+	unsigned int cfg_headphone = snd_hda_codec_get_pincfg(codec, 0x21);
+	unsigned int cfg_headset_mic = snd_hda_codec_get_pincfg(codec, 0x19);
+
+	if (cfg_headphone && cfg_headset_mic == 0x411111f0)
+		snd_hda_codec_set_pincfg(codec, 0x19,
+			(cfg_headphone & ~AC_DEFCFG_DEVICE) |
+			(AC_JACK_MIC_IN << AC_DEFCFG_DEVICE_SHIFT));
+}
+
 static void alc269_fixup_hweq(struct hda_codec *codec,
 			       const struct hda_fixup *fix, int action)
 {
@@ -4972,6 +4985,29 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec,
 	}
 }
 
+static void alc_fixup_tpt470_dock(struct hda_codec *codec,
+				  const struct hda_fixup *fix, int action)
+{
+	static const struct hda_pintbl pincfgs[] = {
+		{ 0x17, 0x21211010 }, /* dock headphone */
+		{ 0x19, 0x21a11010 }, /* dock mic */
+		{ }
+	};
+	struct alc_spec *spec = codec->spec;
+
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
+		snd_hda_apply_pincfgs(codec, pincfgs);
+	} else if (action == HDA_FIXUP_ACT_INIT) {
+		/* Enable DOCK device */
+		snd_hda_codec_write(codec, 0x17, 0,
+			    AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
+		/* Enable DOCK device */
+		snd_hda_codec_write(codec, 0x19, 0,
+			    AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
+	}
+}
+
 static void alc_shutup_dell_xps13(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
@@ -5238,6 +5274,16 @@ static void alc298_fixup_speaker_volume(struct hda_codec *codec,
 	}
 }
 
+/* disable DAC3 (0x06) selection on NID 0x17 as it has no volume amp control */
+static void alc295_fixup_disable_dac3(struct hda_codec *codec,
+				      const struct hda_fixup *fix, int action)
+{
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		hda_nid_t conn[2] = { 0x02, 0x03 };
+		snd_hda_override_conn_list(codec, 0x17, 2, conn);
+	}
+}
+
 /* Hook to update amp GPIO4 for automute */
 static void alc280_hp_gpio4_automute_hook(struct hda_codec *codec,
 					  struct hda_jack_callback *jack)
@@ -5351,6 +5397,7 @@ enum {
 	ALC269_FIXUP_LIFEBOOK_EXTMIC,
 	ALC269_FIXUP_LIFEBOOK_HP_PIN,
 	ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT,
+	ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC,
 	ALC269_FIXUP_AMIC,
 	ALC269_FIXUP_DMIC,
 	ALC269VB_FIXUP_AMIC,
@@ -5429,6 +5476,7 @@ enum {
 	ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
 	ALC255_FIXUP_DELL_SPK_NOISE,
 	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+	ALC295_FIXUP_DISABLE_DAC3,
 	ALC280_FIXUP_HP_HEADSET_MIC,
 	ALC221_FIXUP_HP_FRONT_MIC,
 	ALC292_FIXUP_TPT460,
@@ -5443,9 +5491,12 @@ enum {
 	ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE,
 	ALC233_FIXUP_LENOVO_MULTI_CODECS,
 	ALC294_FIXUP_LENOVO_MIC_LOCATION,
+	ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE,
 	ALC700_FIXUP_INTEL_REFERENCE,
 	ALC274_FIXUP_DELL_BIND_DACS,
 	ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+	ALC298_FIXUP_TPT470_DOCK,
+	ALC255_FIXUP_DUMMY_LINEOUT_VERB,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5556,6 +5607,10 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc269_fixup_pincfg_no_hp_to_lineout,
 	},
+	[ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc269_fixup_pincfg_U7x7_headset_mic,
+	},
 	[ALC269_FIXUP_AMIC] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -6156,6 +6211,10 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
 	},
+	[ALC295_FIXUP_DISABLE_DAC3] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc295_fixup_disable_dac3,
+	},
 	[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -6241,6 +6300,18 @@ static const struct hda_fixup alc269_fixups[] = {
 			{ }
 		},
 	},
+	[ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x16, 0x0101102f }, /* Rear Headset HP */
+			{ 0x19, 0x02a1913c }, /* use as Front headset mic, without its own jack detect */
+			{ 0x1a, 0x01a19030 }, /* Rear Headset MIC */
+			{ 0x1b, 0x02011020 },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+	},
 	[ALC700_FIXUP_INTEL_REFERENCE] = {
 		.type = HDA_FIXUP_VERBS,
 		.v.verbs = (const struct hda_verb[]) {
@@ -6271,6 +6342,21 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC274_FIXUP_DELL_BIND_DACS
 	},
+	[ALC298_FIXUP_TPT470_DOCK] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_tpt470_dock,
+		.chained = true,
+		.chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE
+	},
+	[ALC255_FIXUP_DUMMY_LINEOUT_VERB] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x14, 0x0201101f },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6319,8 +6405,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
+	SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3),
 	SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
+	SND_PCI_QUIRK(0x1028, 0x080c, "Dell WYSE", ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+	SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+	SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+	SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -6422,6 +6513,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT),
 	SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
 	SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
+	SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC),
 	SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
 	SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
 	SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
@@ -6450,8 +6542,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x222d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x2249, "Thinkpad", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x224b, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -6472,7 +6574,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+	SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
 	SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
@@ -6735,6 +6842,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x14, 0x90170110},
 		{0x21, 0x02211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x90a60130},
+		{0x14, 0x90170110},
+		{0x14, 0x01011020},
+		{0x21, 0x0221101f}),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC256_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC,
 		{0x14, 0x90170110},
@@ -6803,6 +6915,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x12, 0x90a60120},
 		{0x14, 0x90170110},
 		{0x21, 0x0321101f}),
+	SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+		{0x12, 0xb7a60130},
+		{0x14, 0x90170110},
+		{0x21, 0x04211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0290, 0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1,
 		ALC290_STANDARD_PINS,
 		{0x15, 0x04211040},
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index c33a512283a4..9fb356db3ab2 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -579,13 +579,6 @@ static int acp_init(void __iomem *acp_mmio, u32 asic_type)
 		for (bank = 1; bank < 48; bank++)
 			acp_set_sram_bank_state(acp_mmio, bank, false);
 	}
-
-	/* Stoney supports 16bit resolution */
-	if (asic_type == CHIP_STONEY) {
-		val = acp_reg_read(acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
-		val |= 0x03;
-		acp_reg_write(val, acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
-	}
 	return 0;
 }
 
@@ -774,6 +767,7 @@ static int acp_dma_hw_params(struct snd_pcm_substream *substream,
 {
 	int status;
 	uint64_t size;
+	u32 val = 0;
 	struct page *pg;
 	struct snd_pcm_runtime *runtime;
 	struct audio_substream_data *rtd;
@@ -786,6 +780,14 @@ static int acp_dma_hw_params(struct snd_pcm_substream *substream,
 	if (WARN_ON(!rtd))
 		return -EINVAL;
 
+	if (adata->asic_type == CHIP_STONEY) {
+		val = acp_reg_read(adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
+		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+			val |= ACP_I2S_SP_16BIT_RESOLUTION_EN;
+		else
+			val |= ACP_I2S_MIC_16BIT_RESOLUTION_EN;
+		acp_reg_write(val, adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
+	}
 	size = params_buffer_bytes(params);
 	status = snd_pcm_lib_malloc_pages(substream, size);
 	if (status < 0)
diff --git a/sound/soc/amd/acp.h b/sound/soc/amd/acp.h
index ecb458935d1e..9293f179f272 100644
--- a/sound/soc/amd/acp.h
+++ b/sound/soc/amd/acp.h
@@ -70,6 +70,8 @@
 #define CAPTURE_END_DMA_DESCR_CH15 7
 
 #define mmACP_I2S_16BIT_RESOLUTION_EN       0x5209
+#define ACP_I2S_MIC_16BIT_RESOLUTION_EN 0x01
+#define ACP_I2S_SP_16BIT_RESOLUTION_EN	0x02
 enum acp_dma_priority_level {
 	/* 0x0 Specifies the DMA channel is given normal priority */
 	ACP_DMA_PRIORITY_LEVEL_NORMAL = 0x0,
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index 5672e516bec3..c1830ccd3bb8 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -798,12 +798,7 @@ static int hdmi_codec_probe(struct platform_device *pdev)
 
 static int hdmi_codec_remove(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
-	struct hdmi_codec_priv *hcp;
-
-	hcp = dev_get_drvdata(dev);
-	kfree(hcp->chmap_info);
-	snd_soc_unregister_codec(dev);
+	snd_soc_unregister_codec(&pdev->dev);
 
 	return 0;
 }
diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c
index 831b297978a4..45a73049cf64 100644
--- a/sound/soc/codecs/rt5651.c
+++ b/sound/soc/codecs/rt5651.c
@@ -1722,6 +1722,7 @@ static const struct regmap_config rt5651_regmap = {
 	.num_reg_defaults = ARRAY_SIZE(rt5651_reg),
 	.ranges = rt5651_ranges,
 	.num_ranges = ARRAY_SIZE(rt5651_ranges),
+	.use_single_rw = true,
 };
 
 #if defined(CONFIG_OF)
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index e1ab5537d27a..c5c76ab8ccf1 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -529,10 +529,15 @@ static const struct snd_kcontrol_new sgtl5000_snd_controls[] = {
 static int sgtl5000_digital_mute(struct snd_soc_dai *codec_dai, int mute)
 {
 	struct snd_soc_codec *codec = codec_dai->codec;
-	u16 adcdac_ctrl = SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT;
+	u16 i2s_pwr = SGTL5000_I2S_IN_POWERUP;
 
-	snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL,
-			adcdac_ctrl, mute ? adcdac_ctrl : 0);
+	/*
+	 * During 'digital mute' do not mute DAC
+	 * because LINE_IN would be muted aswell. We want to mute
+	 * only I2S block - this can be done by powering it off
+	 */
+	snd_soc_update_bits(codec, SGTL5000_CHIP_DIG_POWER,
+			i2s_pwr, mute ? 0 : i2s_pwr);
 
 	return 0;
 }
@@ -871,15 +876,26 @@ static int sgtl5000_pcm_hw_params(struct snd_pcm_substream *substream,
 static int sgtl5000_set_bias_level(struct snd_soc_codec *codec,
 				   enum snd_soc_bias_level level)
 {
+	struct sgtl5000_priv *sgtl = snd_soc_codec_get_drvdata(codec);
+	int ret;
+
 	switch (level) {
 	case SND_SOC_BIAS_ON:
 	case SND_SOC_BIAS_PREPARE:
 	case SND_SOC_BIAS_STANDBY:
+		regcache_cache_only(sgtl->regmap, false);
+		ret = regcache_sync(sgtl->regmap);
+		if (ret) {
+			regcache_cache_only(sgtl->regmap, true);
+			return ret;
+		}
+
 		snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER,
 				    SGTL5000_REFTOP_POWERUP,
 				    SGTL5000_REFTOP_POWERUP);
 		break;
 	case SND_SOC_BIAS_OFF:
+		regcache_cache_only(sgtl->regmap, true);
 		snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER,
 				    SGTL5000_REFTOP_POWERUP, 0);
 		break;
@@ -1237,6 +1253,10 @@ static int sgtl5000_probe(struct snd_soc_codec *codec)
 	 */
 	snd_soc_write(codec, SGTL5000_DAP_CTRL, 0);
 
+	/* Unmute DAC after start */
+	snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL,
+		SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT, 0);
+
 	return 0;
 
 err:
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 66e32f5d2917..989d093abda7 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1204,12 +1204,14 @@ static int wmfw_add_ctl(struct wm_adsp *dsp, struct wm_coeff_ctl *ctl)
 		kcontrol->put = wm_coeff_put_acked;
 		break;
 	default:
-		kcontrol->get = wm_coeff_get;
-		kcontrol->put = wm_coeff_put;
-
-		ctl->bytes_ext.max = ctl->len;
-		ctl->bytes_ext.get = wm_coeff_tlv_get;
-		ctl->bytes_ext.put = wm_coeff_tlv_put;
+		if (kcontrol->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) {
+			ctl->bytes_ext.max = ctl->len;
+			ctl->bytes_ext.get = wm_coeff_tlv_get;
+			ctl->bytes_ext.put = wm_coeff_tlv_put;
+		} else {
+			kcontrol->get = wm_coeff_get;
+			kcontrol->put = wm_coeff_put;
+		}
 		break;
 	}
 
diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c
index dca1143c1150..a4aa931ebfae 100644
--- a/sound/soc/sunxi/sun4i-i2s.c
+++ b/sound/soc/sunxi/sun4i-i2s.c
@@ -104,7 +104,7 @@
 
 #define SUN8I_I2S_CHAN_CFG_REG		0x30
 #define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM_MASK	GENMASK(6, 4)
-#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan)	(chan - 1)
+#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan)	((chan - 1) << 4)
 #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM_MASK	GENMASK(2, 0)
 #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM(chan)	(chan - 1)
 
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 9afb8ab524c7..06b22624ab7a 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -347,17 +347,20 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request,
 			    int validx, int *value_ret)
 {
 	struct snd_usb_audio *chip = cval->head.mixer->chip;
-	unsigned char buf[4 + 3 * sizeof(__u32)]; /* enough space for one range */
+	/* enough space for one range */
+	unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)];
 	unsigned char *val;
-	int idx = 0, ret, size;
+	int idx = 0, ret, val_size, size;
 	__u8 bRequest;
 
+	val_size = uac2_ctl_value_size(cval->val_type);
+
 	if (request == UAC_GET_CUR) {
 		bRequest = UAC2_CS_CUR;
-		size = uac2_ctl_value_size(cval->val_type);
+		size = val_size;
 	} else {
 		bRequest = UAC2_CS_RANGE;
-		size = sizeof(buf);
+		size = sizeof(__u16) + 3 * val_size;
 	}
 
 	memset(buf, 0, sizeof(buf));
@@ -390,16 +393,17 @@ error:
 		val = buf + sizeof(__u16);
 		break;
 	case UAC_GET_MAX:
-		val = buf + sizeof(__u16) * 2;
+		val = buf + sizeof(__u16) + val_size;
 		break;
 	case UAC_GET_RES:
-		val = buf + sizeof(__u16) * 3;
+		val = buf + sizeof(__u16) + val_size * 2;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	*value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, sizeof(__u16)));
+	*value_ret = convert_signed_value(cval,
+					  snd_usb_combine_bytes(val, val_size));
 
 	return 0;
 }
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index b9c9a19f9588..3cbfae6604f9 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -357,6 +357,15 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs,
 
 		alts = &iface->altsetting[1];
 		goto add_sync_ep;
+	case USB_ID(0x1397, 0x0002):
+		ep = 0x81;
+		iface = usb_ifnum_to_if(dev, 1);
+
+		if (!iface || iface->num_altsetting == 0)
+			return -EINVAL;
+
+		alts = &iface->altsetting[1];
+		goto add_sync_ep;
 
 	}
 	if (attr == USB_ENDPOINT_SYNC_ASYNC &&
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 50252046b01d..754e632a27bd 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -3325,4 +3325,51 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
 	}
 },
 
+{
+	/*
+	 * Bower's & Wilkins PX headphones only support the 48 kHz sample rate
+	 * even though it advertises more. The capture interface doesn't work
+	 * even on windows.
+	 */
+	USB_DEVICE(0x19b5, 0x0021),
+	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_COMPOSITE,
+		.data = (const struct snd_usb_audio_quirk[]) {
+			{
+				.ifnum = 0,
+				.type = QUIRK_AUDIO_STANDARD_MIXER,
+			},
+			/* Capture */
+			{
+				.ifnum = 1,
+				.type = QUIRK_IGNORE_INTERFACE,
+			},
+			/* Playback */
+			{
+				.ifnum = 2,
+				.type = QUIRK_AUDIO_FIXED_ENDPOINT,
+				.data = &(const struct audioformat) {
+					.formats = SNDRV_PCM_FMTBIT_S16_LE,
+					.channels = 2,
+					.iface = 2,
+					.altsetting = 1,
+					.altset_idx = 1,
+					.attributes = UAC_EP_CS_ATTR_FILL_MAX |
+						UAC_EP_CS_ATTR_SAMPLE_RATE,
+					.endpoint = 0x03,
+					.ep_attr = USB_ENDPOINT_XFER_ISOC,
+					.rates = SNDRV_PCM_RATE_48000,
+					.rate_min = 48000,
+					.rate_max = 48000,
+					.nr_rates = 1,
+					.rate_table = (unsigned int[]) {
+						48000
+					}
+				}
+			},
+		}
+	}
+},
+
 #undef USB_DEVICE_VENDOR_SPEC
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index a66ef5777887..ea8f3de92fa4 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1363,8 +1363,11 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
 
-	/* Amanero Combo384 USB interface with native DSD support */
-	case USB_ID(0x16d0, 0x071a):
+	/* Amanero Combo384 USB based DACs with native DSD support */
+	case USB_ID(0x16d0, 0x071a):  /* Amanero - Combo384 */
+	case USB_ID(0x2ab6, 0x0004):  /* T+A DAC8DSD-V2.0, MP1000E-V2.0, MP2000R-V2.0, MP2500R-V2.0, MP3100HV-V2.0 */
+	case USB_ID(0x2ab6, 0x0005):  /* T+A USB HD Audio 1 */
+	case USB_ID(0x2ab6, 0x0006):  /* T+A USB HD Audio 2 */
 		if (fp->altsetting == 2) {
 			switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
 			case 0x199:
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index a0951505c7f5..4ed9d0c41843 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -50,6 +50,7 @@
 /*standard module options for ALSA. This module supports only one card*/
 static int hdmi_card_index = SNDRV_DEFAULT_IDX1;
 static char *hdmi_card_id = SNDRV_DEFAULT_STR1;
+static bool single_port;
 
 module_param_named(index, hdmi_card_index, int, 0444);
 MODULE_PARM_DESC(index,
@@ -57,6 +58,9 @@ MODULE_PARM_DESC(index,
 module_param_named(id, hdmi_card_id, charp, 0444);
 MODULE_PARM_DESC(id,
 		"ID string for INTEL Intel HDMI Audio controller.");
+module_param(single_port, bool, 0444);
+MODULE_PARM_DESC(single_port,
+		"Single-port mode (for compatibility)");
 
 /*
  * ELD SA bits in the CEA Speaker Allocation data block
@@ -1579,7 +1583,11 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
 static void notify_audio_lpe(struct platform_device *pdev, int port)
 {
 	struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
-	struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+	struct snd_intelhad *ctx;
+
+	ctx = &card_ctx->pcm_ctx[single_port ? 0 : port];
+	if (single_port)
+		ctx->port = port;
 
 	schedule_work(&ctx->hdmi_audio_wq);
 }
@@ -1743,6 +1751,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 {
 	struct snd_card *card;
 	struct snd_intelhad_card *card_ctx;
+	struct snd_intelhad *ctx;
 	struct snd_pcm *pcm;
 	struct intel_hdmi_lpe_audio_pdata *pdata;
 	int irq;
@@ -1787,6 +1796,21 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, card_ctx);
 
+	card_ctx->num_pipes = pdata->num_pipes;
+	card_ctx->num_ports = single_port ? 1 : pdata->num_ports;
+
+	for_each_port(card_ctx, port) {
+		ctx = &card_ctx->pcm_ctx[port];
+		ctx->card_ctx = card_ctx;
+		ctx->dev = card_ctx->dev;
+		ctx->port = single_port ? -1 : port;
+		ctx->pipe = -1;
+
+		spin_lock_init(&ctx->had_spinlock);
+		mutex_init(&ctx->mutex);
+		INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+	}
+
 	dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n",
 		__func__, (unsigned int)res_mmio->start,
 		(unsigned int)res_mmio->end);
@@ -1816,19 +1840,12 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 	init_channel_allocations();
 
 	card_ctx->num_pipes = pdata->num_pipes;
-	card_ctx->num_ports = pdata->num_ports;
+	card_ctx->num_ports = single_port ? 1 : pdata->num_ports;
 
 	for_each_port(card_ctx, port) {
-		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
 		int i;
 
-		ctx->card_ctx = card_ctx;
-		ctx->dev = card_ctx->dev;
-		ctx->port = port;
-		ctx->pipe = -1;
-
-		INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
-
+		ctx = &card_ctx->pcm_ctx[port];
 		ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS,
 				  MAX_CAP_STREAMS, &pcm);
 		if (ret)
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
+#define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h
deleted file mode 100644
index 725120939051..000000000000
--- a/tools/arch/s390/include/uapi/asm/unistd.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/unistd.h"
- */
-
-#ifndef _UAPI_ASM_S390_UNISTD_H_
-#define _UAPI_ASM_S390_UNISTD_H_
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_exit                 1
-#define __NR_fork                 2
-#define __NR_read                 3
-#define __NR_write                4
-#define __NR_open                 5
-#define __NR_close                6
-#define __NR_restart_syscall	  7
-#define __NR_creat                8
-#define __NR_link                 9
-#define __NR_unlink              10
-#define __NR_execve              11
-#define __NR_chdir               12
-#define __NR_mknod               14
-#define __NR_chmod               15
-#define __NR_lseek               19
-#define __NR_getpid              20
-#define __NR_mount               21
-#define __NR_umount              22
-#define __NR_ptrace              26
-#define __NR_alarm               27
-#define __NR_pause               29
-#define __NR_utime               30
-#define __NR_access              33
-#define __NR_nice                34
-#define __NR_sync                36
-#define __NR_kill                37
-#define __NR_rename              38
-#define __NR_mkdir               39
-#define __NR_rmdir               40
-#define __NR_dup                 41
-#define __NR_pipe                42
-#define __NR_times               43
-#define __NR_brk                 45
-#define __NR_signal              48
-#define __NR_acct                51
-#define __NR_umount2             52
-#define __NR_ioctl               54
-#define __NR_fcntl               55
-#define __NR_setpgid             57
-#define __NR_umask               60
-#define __NR_chroot              61
-#define __NR_ustat               62
-#define __NR_dup2                63
-#define __NR_getppid             64
-#define __NR_getpgrp             65
-#define __NR_setsid              66
-#define __NR_sigaction           67
-#define __NR_sigsuspend          72
-#define __NR_sigpending          73
-#define __NR_sethostname         74
-#define __NR_setrlimit           75
-#define __NR_getrusage           77
-#define __NR_gettimeofday        78
-#define __NR_settimeofday        79
-#define __NR_symlink             83
-#define __NR_readlink            85
-#define __NR_uselib              86
-#define __NR_swapon              87
-#define __NR_reboot              88
-#define __NR_readdir             89
-#define __NR_mmap                90
-#define __NR_munmap              91
-#define __NR_truncate            92
-#define __NR_ftruncate           93
-#define __NR_fchmod              94
-#define __NR_getpriority         96
-#define __NR_setpriority         97
-#define __NR_statfs              99
-#define __NR_fstatfs            100
-#define __NR_socketcall         102
-#define __NR_syslog             103
-#define __NR_setitimer          104
-#define __NR_getitimer          105
-#define __NR_stat               106
-#define __NR_lstat              107
-#define __NR_fstat              108
-#define __NR_lookup_dcookie     110
-#define __NR_vhangup            111
-#define __NR_idle               112
-#define __NR_wait4              114
-#define __NR_swapoff            115
-#define __NR_sysinfo            116
-#define __NR_ipc                117
-#define __NR_fsync              118
-#define __NR_sigreturn          119
-#define __NR_clone              120
-#define __NR_setdomainname      121
-#define __NR_uname              122
-#define __NR_adjtimex           124
-#define __NR_mprotect           125
-#define __NR_sigprocmask        126
-#define __NR_create_module      127
-#define __NR_init_module        128
-#define __NR_delete_module      129
-#define __NR_get_kernel_syms    130
-#define __NR_quotactl           131
-#define __NR_getpgid            132
-#define __NR_fchdir             133
-#define __NR_bdflush            134
-#define __NR_sysfs              135
-#define __NR_personality        136
-#define __NR_afs_syscall        137 /* Syscall for Andrew File System */
-#define __NR_getdents           141
-#define __NR_flock              143
-#define __NR_msync              144
-#define __NR_readv              145
-#define __NR_writev             146
-#define __NR_getsid             147
-#define __NR_fdatasync          148
-#define __NR__sysctl            149
-#define __NR_mlock              150
-#define __NR_munlock            151
-#define __NR_mlockall           152
-#define __NR_munlockall         153
-#define __NR_sched_setparam             154
-#define __NR_sched_getparam             155
-#define __NR_sched_setscheduler         156
-#define __NR_sched_getscheduler         157
-#define __NR_sched_yield                158
-#define __NR_sched_get_priority_max     159
-#define __NR_sched_get_priority_min     160
-#define __NR_sched_rr_get_interval      161
-#define __NR_nanosleep          162
-#define __NR_mremap             163
-#define __NR_query_module       167
-#define __NR_poll               168
-#define __NR_nfsservctl         169
-#define __NR_prctl              172
-#define __NR_rt_sigreturn       173
-#define __NR_rt_sigaction       174
-#define __NR_rt_sigprocmask     175
-#define __NR_rt_sigpending      176
-#define __NR_rt_sigtimedwait    177
-#define __NR_rt_sigqueueinfo    178
-#define __NR_rt_sigsuspend      179
-#define __NR_pread64            180
-#define __NR_pwrite64           181
-#define __NR_getcwd             183
-#define __NR_capget             184
-#define __NR_capset             185
-#define __NR_sigaltstack        186
-#define __NR_sendfile           187
-#define __NR_getpmsg		188
-#define __NR_putpmsg		189
-#define __NR_vfork		190
-#define __NR_pivot_root         217
-#define __NR_mincore            218
-#define __NR_madvise            219
-#define __NR_getdents64		220
-#define __NR_readahead		222
-#define __NR_setxattr		224
-#define __NR_lsetxattr		225
-#define __NR_fsetxattr		226
-#define __NR_getxattr		227
-#define __NR_lgetxattr		228
-#define __NR_fgetxattr		229
-#define __NR_listxattr		230
-#define __NR_llistxattr		231
-#define __NR_flistxattr		232
-#define __NR_removexattr	233
-#define __NR_lremovexattr	234
-#define __NR_fremovexattr	235
-#define __NR_gettid		236
-#define __NR_tkill		237
-#define __NR_futex		238
-#define __NR_sched_setaffinity	239
-#define __NR_sched_getaffinity	240
-#define __NR_tgkill		241
-/* Number 242 is reserved for tux */
-#define __NR_io_setup		243
-#define __NR_io_destroy		244
-#define __NR_io_getevents	245
-#define __NR_io_submit		246
-#define __NR_io_cancel		247
-#define __NR_exit_group		248
-#define __NR_epoll_create	249
-#define __NR_epoll_ctl		250
-#define __NR_epoll_wait		251
-#define __NR_set_tid_address	252
-#define __NR_fadvise64		253
-#define __NR_timer_create	254
-#define __NR_timer_settime	255
-#define __NR_timer_gettime	256
-#define __NR_timer_getoverrun	257
-#define __NR_timer_delete	258
-#define __NR_clock_settime	259
-#define __NR_clock_gettime	260
-#define __NR_clock_getres	261
-#define __NR_clock_nanosleep	262
-/* Number 263 is reserved for vserver */
-#define __NR_statfs64		265
-#define __NR_fstatfs64		266
-#define __NR_remap_file_pages	267
-#define __NR_mbind		268
-#define __NR_get_mempolicy	269
-#define __NR_set_mempolicy	270
-#define __NR_mq_open		271
-#define __NR_mq_unlink		272
-#define __NR_mq_timedsend	273
-#define __NR_mq_timedreceive	274
-#define __NR_mq_notify		275
-#define __NR_mq_getsetattr	276
-#define __NR_kexec_load		277
-#define __NR_add_key		278
-#define __NR_request_key	279
-#define __NR_keyctl		280
-#define __NR_waitid		281
-#define __NR_ioprio_set		282
-#define __NR_ioprio_get		283
-#define __NR_inotify_init	284
-#define __NR_inotify_add_watch	285
-#define __NR_inotify_rm_watch	286
-#define __NR_migrate_pages	287
-#define __NR_openat		288
-#define __NR_mkdirat		289
-#define __NR_mknodat		290
-#define __NR_fchownat		291
-#define __NR_futimesat		292
-#define __NR_unlinkat		294
-#define __NR_renameat		295
-#define __NR_linkat		296
-#define __NR_symlinkat		297
-#define __NR_readlinkat		298
-#define __NR_fchmodat		299
-#define __NR_faccessat		300
-#define __NR_pselect6		301
-#define __NR_ppoll		302
-#define __NR_unshare		303
-#define __NR_set_robust_list	304
-#define __NR_get_robust_list	305
-#define __NR_splice		306
-#define __NR_sync_file_range	307
-#define __NR_tee		308
-#define __NR_vmsplice		309
-#define __NR_move_pages		310
-#define __NR_getcpu		311
-#define __NR_epoll_pwait	312
-#define __NR_utimes		313
-#define __NR_fallocate		314
-#define __NR_utimensat		315
-#define __NR_signalfd		316
-#define __NR_timerfd		317
-#define __NR_eventfd		318
-#define __NR_timerfd_create	319
-#define __NR_timerfd_settime	320
-#define __NR_timerfd_gettime	321
-#define __NR_signalfd4		322
-#define __NR_eventfd2		323
-#define __NR_inotify_init1	324
-#define __NR_pipe2		325
-#define __NR_dup3		326
-#define __NR_epoll_create1	327
-#define	__NR_preadv		328
-#define	__NR_pwritev		329
-#define __NR_rt_tgsigqueueinfo	330
-#define __NR_perf_event_open	331
-#define __NR_fanotify_init	332
-#define __NR_fanotify_mark	333
-#define __NR_prlimit64		334
-#define __NR_name_to_handle_at	335
-#define __NR_open_by_handle_at	336
-#define __NR_clock_adjtime	337
-#define __NR_syncfs		338
-#define __NR_setns		339
-#define __NR_process_vm_readv	340
-#define __NR_process_vm_writev	341
-#define __NR_s390_runtime_instr 342
-#define __NR_kcmp		343
-#define __NR_finit_module	344
-#define __NR_sched_setattr	345
-#define __NR_sched_getattr	346
-#define __NR_renameat2		347
-#define __NR_seccomp		348
-#define __NR_getrandom		349
-#define __NR_memfd_create	350
-#define __NR_bpf		351
-#define __NR_s390_pci_mmio_write	352
-#define __NR_s390_pci_mmio_read		353
-#define __NR_execveat		354
-#define __NR_userfaultfd	355
-#define __NR_membarrier		356
-#define __NR_recvmmsg		357
-#define __NR_sendmmsg		358
-#define __NR_socket		359
-#define __NR_socketpair		360
-#define __NR_bind		361
-#define __NR_connect		362
-#define __NR_listen		363
-#define __NR_accept4		364
-#define __NR_getsockopt		365
-#define __NR_setsockopt		366
-#define __NR_getsockname	367
-#define __NR_getpeername	368
-#define __NR_sendto		369
-#define __NR_sendmsg		370
-#define __NR_recvfrom		371
-#define __NR_recvmsg		372
-#define __NR_shutdown		373
-#define __NR_mlock2		374
-#define __NR_copy_file_range	375
-#define __NR_preadv2		376
-#define __NR_pwritev2		377
-#define __NR_s390_guarded_storage	378
-#define __NR_statx		379
-#define __NR_s390_sthyi		380
-#define NR_syscalls 381
-
-/* 
- * There are some system calls that are not present on 64 bit, some
- * have a different name although they do the same (e.g. __NR_chown32
- * is __NR_chown on 64 bit).
- */
-#ifndef __s390x__
-
-#define __NR_time		 13
-#define __NR_lchown		 16
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_getrlimit		 76
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_fchown		 95
-#define __NR_ioperm		101
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR__newselect 	142
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_chown		182
-#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_fcntl64		221
-#define __NR_sendfile64		223
-#define __NR_fadvise64_64	264
-#define __NR_fstatat64		293
-
-#else
-
-#define __NR_select		142
-#define __NR_getrlimit		191	/* SuS compliant getrlimit */
-#define __NR_lchown  		198
-#define __NR_getuid  		199
-#define __NR_getgid  		200
-#define __NR_geteuid  		201
-#define __NR_getegid  		202
-#define __NR_setreuid  		203
-#define __NR_setregid  		204
-#define __NR_getgroups  	205
-#define __NR_setgroups  	206
-#define __NR_fchown  		207
-#define __NR_setresuid  	208
-#define __NR_getresuid  	209
-#define __NR_setresgid  	210
-#define __NR_getresgid  	211
-#define __NR_chown  		212
-#define __NR_setuid  		213
-#define __NR_setgid  		214
-#define __NR_setfsuid  		215
-#define __NR_setfsgid  		216
-#define __NR_newfstatat		293
-
-#endif
-
-#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1d9199e1c2ad..f41079da38c5 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -210,8 +210,10 @@
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index c8ec0ae16bf0..1ea545965ee3 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -1,19 +1,28 @@
 # SPDX-License-Identifier: GPL-2.0
-prefix = /usr
+include ../scripts/Makefile.include
+
+prefix ?= /usr/local
 
 CC = gcc
 LEX = flex
 YACC = bison
 MAKE = make
+INSTALL ?= install
 
 CFLAGS += -Wall -O2
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
 FEATURE_USER = .bpf
 FEATURE_TESTS = libbfd disassembler-four-args
 FEATURE_DISPLAY = libbfd disassembler-four-args
@@ -38,40 +47,59 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-%.yacc.c: %.y
-	$(YACC) -o $@ -d $<
+$(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
+	$(QUIET_BISON)$(YACC) -o $@ -d $<
 
-%.lex.c: %.l
-	$(LEX) -o $@ $<
+$(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
+	$(QUIET_FLEX)$(LEX) -o $@ $<
 
-all: bpf_jit_disasm bpf_dbg bpf_asm bpftool
+$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
 
-bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
-bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
-bpf_jit_disasm : bpf_jit_disasm.o
+$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
+$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
 
-bpf_dbg : LDLIBS = -lreadline
-bpf_dbg : bpf_dbg.o
+PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
 
-bpf_asm : LDLIBS =
-bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
-bpf_exp.lex.o : bpf_exp.yacc.c
+all: $(PROGS) bpftool
 
-clean: bpftool_clean
-	rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
+$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
+$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lopcodes -lbfd -ldl
 
-install: bpftool_install
-	install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
-	install bpf_dbg $(prefix)/bin/bpf_dbg
-	install bpf_asm $(prefix)/bin/bpf_asm
+$(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lreadline
+
+$(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
+
+$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+
+clean: bpftool_clean
+	$(call QUIET_CLEAN, bpf-progs)
+	$(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
+	$(call QUIET_CLEAN, core-gen)
+	$(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+
+install: $(PROGS) bpftool_install
+	$(call QUIET_INSTALL, bpf_jit_disasm)
+	$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
+	$(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
+	$(call QUIET_INSTALL, bpf_dbg)
+	$(Q)$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
+	$(call QUIET_INSTALL, bpf_asm)
+	$(Q)$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
 
 bpftool:
-	$(MAKE) -C bpftool
+	$(call descend,bpftool)
 
 bpftool_install:
-	$(MAKE) -C bpftool install
+	$(call descend,bpftool,install)
 
 bpftool_clean:
-	$(MAKE) -C bpftool clean
+	$(call descend,bpftool,clean)
 
-.PHONY: bpftool FORCE
+.PHONY: all install clean bpftool bpftool_install bpftool_clean
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index e4ceee7f2dff..67ca6c69376c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -21,7 +21,7 @@ MAP COMMANDS
 =============
 
 |	**bpftool** **prog { show | list }** [*PROG*]
-|	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
+|	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
 |	**bpftool** **prog load** *OBJ* *FILE*
@@ -39,12 +39,18 @@ DESCRIPTION
 		  Output will start with program ID followed by program type and
 		  zero or more named attributes (depending on kernel version).
 
-	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }]
-		  Dump eBPF instructions of the program from the kernel.
-		  If *FILE* is specified image will be written to a file,
-		  otherwise it will be disassembled and printed to stdout.
+	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }]
+		  Dump eBPF instructions of the program from the kernel. By
+		  default, eBPF will be disassembled and printed to standard
+		  output in human-readable format. In this case, **opcodes**
+		  controls if raw opcodes should be printed as well.
 
-		  **opcodes** controls if raw opcodes will be printed.
+		  If **file** is specified, the binary image will instead be
+		  written to *FILE*.
+
+		  If **visual** is specified, control flow graph (CFG) will be
+		  built instead, and eBPF instructions will be presented with
+		  CFG in DOT format, on standard output.
 
 	**bpftool prog dump jited**  *PROG* [{ **file** *FILE* | **opcodes** }]
 		  Dump jited image (host machine code) of the program.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 26901ec87361..4e69782c4a79 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -38,7 +38,7 @@ bash_compdir ?= /usr/share/bash-completion/completions
 CC = gcc
 
 CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
@@ -70,7 +70,7 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-include $(wildcard *.d)
+include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
 
@@ -89,6 +89,8 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+	$(call QUIET_CLEAN, core-gen)
+	$(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
 
 install: $(OUTPUT)bpftool
 	$(call QUIET_INSTALL, bpftool)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 08719c54a614..490811b45fa7 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -147,7 +147,7 @@ _bpftool()
 
     # Deal with simplest keywords
     case $prev in
-        help|key|opcodes)
+        help|key|opcodes|visual)
             return 0
             ;;
         tag)
@@ -223,11 +223,16 @@ _bpftool()
                             return 0
                             ;;
                     *)
-                            _bpftool_once_attr 'file'
+                        _bpftool_once_attr 'file'
+                        if _bpftool_search_list 'xlated'; then
+                            COMPREPLY+=( $( compgen -W 'opcodes visual' -- \
+                                "$cur" ) )
+                        else
                             COMPREPLY+=( $( compgen -W 'opcodes' -- \
                                 "$cur" ) )
-                            return 0
-                            ;;
+                        fi
+                        return 0
+                        ;;
                     esac
                     ;;
                 pin)
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
new file mode 100644
index 000000000000..f30b3a4a840b
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cfg.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+struct cfg {
+	struct list_head funcs;
+	int func_num;
+};
+
+struct func_node {
+	struct list_head l;
+	struct list_head bbs;
+	struct bpf_insn *start;
+	struct bpf_insn *end;
+	int idx;
+	int bb_num;
+};
+
+struct bb_node {
+	struct list_head l;
+	struct list_head e_prevs;
+	struct list_head e_succs;
+	struct bpf_insn *head;
+	struct bpf_insn *tail;
+	int idx;
+};
+
+#define EDGE_FLAG_EMPTY		0x0
+#define EDGE_FLAG_FALLTHROUGH	0x1
+#define EDGE_FLAG_JUMP		0x2
+struct edge_node {
+	struct list_head l;
+	struct bb_node *src;
+	struct bb_node *dst;
+	int flags;
+};
+
+#define ENTRY_BLOCK_INDEX	0
+#define EXIT_BLOCK_INDEX	1
+#define NUM_FIXED_BLOCKS	2
+#define func_prev(func)		list_prev_entry(func, l)
+#define func_next(func)		list_next_entry(func, l)
+#define bb_prev(bb)		list_prev_entry(bb, l)
+#define bb_next(bb)		list_next_entry(bb, l)
+#define entry_bb(func)		func_first_bb(func)
+#define exit_bb(func)		func_last_bb(func)
+#define cfg_first_func(cfg)	\
+	list_first_entry(&cfg->funcs, struct func_node, l)
+#define cfg_last_func(cfg)	\
+	list_last_entry(&cfg->funcs, struct func_node, l)
+#define func_first_bb(func)	\
+	list_first_entry(&func->bbs, struct bb_node, l)
+#define func_last_bb(func)	\
+	list_last_entry(&func->bbs, struct bb_node, l)
+
+static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
+{
+	struct func_node *new_func, *func;
+
+	list_for_each_entry(func, &cfg->funcs, l) {
+		if (func->start == insn)
+			return func;
+		else if (func->start > insn)
+			break;
+	}
+
+	func = func_prev(func);
+	new_func = calloc(1, sizeof(*new_func));
+	if (!new_func) {
+		p_err("OOM when allocating FUNC node");
+		return NULL;
+	}
+	new_func->start = insn;
+	new_func->idx = cfg->func_num;
+	list_add(&new_func->l, &func->l);
+	cfg->func_num++;
+
+	return new_func;
+}
+
+static struct bb_node *func_append_bb(struct func_node *func,
+				      struct bpf_insn *insn)
+{
+	struct bb_node *new_bb, *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		if (bb->head == insn)
+			return bb;
+		else if (bb->head > insn)
+			break;
+	}
+
+	bb = bb_prev(bb);
+	new_bb = calloc(1, sizeof(*new_bb));
+	if (!new_bb) {
+		p_err("OOM when allocating BB node");
+		return NULL;
+	}
+	new_bb->head = insn;
+	INIT_LIST_HEAD(&new_bb->e_prevs);
+	INIT_LIST_HEAD(&new_bb->e_succs);
+	list_add(&new_bb->l, &bb->l);
+
+	return new_bb;
+}
+
+static struct bb_node *func_insert_dummy_bb(struct list_head *after)
+{
+	struct bb_node *bb;
+
+	bb = calloc(1, sizeof(*bb));
+	if (!bb) {
+		p_err("OOM when allocating BB node");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&bb->e_prevs);
+	INIT_LIST_HEAD(&bb->e_succs);
+	list_add(&bb->l, after);
+
+	return bb;
+}
+
+static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
+				struct bpf_insn *end)
+{
+	struct func_node *func, *last_func;
+
+	func = cfg_append_func(cfg, cur);
+	if (!func)
+		return true;
+
+	for (; cur < end; cur++) {
+		if (cur->code != (BPF_JMP | BPF_CALL))
+			continue;
+		if (cur->src_reg != BPF_PSEUDO_CALL)
+			continue;
+		func = cfg_append_func(cfg, cur + cur->off + 1);
+		if (!func)
+			return true;
+	}
+
+	last_func = cfg_last_func(cfg);
+	last_func->end = end - 1;
+	func = cfg_first_func(cfg);
+	list_for_each_entry_from(func, &last_func->l, l) {
+		func->end = func_next(func)->start - 1;
+	}
+
+	return false;
+}
+
+static bool func_partition_bb_head(struct func_node *func)
+{
+	struct bpf_insn *cur, *end;
+	struct bb_node *bb;
+
+	cur = func->start;
+	end = func->end;
+	INIT_LIST_HEAD(&func->bbs);
+	bb = func_append_bb(func, cur);
+	if (!bb)
+		return true;
+
+	for (; cur <= end; cur++) {
+		if (BPF_CLASS(cur->code) == BPF_JMP) {
+			u8 opcode = BPF_OP(cur->code);
+
+			if (opcode == BPF_EXIT || opcode == BPF_CALL)
+				continue;
+
+			bb = func_append_bb(func, cur + cur->off + 1);
+			if (!bb)
+				return true;
+
+			if (opcode != BPF_JA) {
+				bb = func_append_bb(func, cur + 1);
+				if (!bb)
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+static void func_partition_bb_tail(struct func_node *func)
+{
+	unsigned int bb_idx = NUM_FIXED_BLOCKS;
+	struct bb_node *bb, *last;
+
+	last = func_last_bb(func);
+	last->tail = func->end;
+	bb = func_first_bb(func);
+	list_for_each_entry_from(bb, &last->l, l) {
+		bb->tail = bb_next(bb)->head - 1;
+		bb->idx = bb_idx++;
+	}
+
+	last->idx = bb_idx++;
+	func->bb_num = bb_idx;
+}
+
+static bool func_add_special_bb(struct func_node *func)
+{
+	struct bb_node *bb;
+
+	bb = func_insert_dummy_bb(&func->bbs);
+	if (!bb)
+		return true;
+	bb->idx = ENTRY_BLOCK_INDEX;
+
+	bb = func_insert_dummy_bb(&func_last_bb(func)->l);
+	if (!bb)
+		return true;
+	bb->idx = EXIT_BLOCK_INDEX;
+
+	return false;
+}
+
+static bool func_partition_bb(struct func_node *func)
+{
+	if (func_partition_bb_head(func))
+		return true;
+
+	func_partition_bb_tail(func);
+
+	return false;
+}
+
+static struct bb_node *func_search_bb_with_head(struct func_node *func,
+						struct bpf_insn *insn)
+{
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		if (bb->head == insn)
+			return bb;
+	}
+
+	return NULL;
+}
+
+static struct edge_node *new_edge(struct bb_node *src, struct bb_node *dst,
+				  int flags)
+{
+	struct edge_node *e;
+
+	e = calloc(1, sizeof(*e));
+	if (!e) {
+		p_err("OOM when allocating edge node");
+		return NULL;
+	}
+
+	if (src)
+		e->src = src;
+	if (dst)
+		e->dst = dst;
+
+	e->flags |= flags;
+
+	return e;
+}
+
+static bool func_add_bb_edges(struct func_node *func)
+{
+	struct bpf_insn *insn;
+	struct edge_node *e;
+	struct bb_node *bb;
+
+	bb = entry_bb(func);
+	e = new_edge(bb, bb_next(bb), EDGE_FLAG_FALLTHROUGH);
+	if (!e)
+		return true;
+	list_add_tail(&e->l, &bb->e_succs);
+
+	bb = exit_bb(func);
+	e = new_edge(bb_prev(bb), bb, EDGE_FLAG_FALLTHROUGH);
+	if (!e)
+		return true;
+	list_add_tail(&e->l, &bb->e_prevs);
+
+	bb = entry_bb(func);
+	bb = bb_next(bb);
+	list_for_each_entry_from(bb, &exit_bb(func)->l, l) {
+		e = new_edge(bb, NULL, EDGE_FLAG_EMPTY);
+		if (!e)
+			return true;
+		e->src = bb;
+
+		insn = bb->tail;
+		if (BPF_CLASS(insn->code) != BPF_JMP ||
+		    BPF_OP(insn->code) == BPF_EXIT) {
+			e->dst = bb_next(bb);
+			e->flags |= EDGE_FLAG_FALLTHROUGH;
+			list_add_tail(&e->l, &bb->e_succs);
+			continue;
+		} else if (BPF_OP(insn->code) == BPF_JA) {
+			e->dst = func_search_bb_with_head(func,
+							  insn + insn->off + 1);
+			e->flags |= EDGE_FLAG_JUMP;
+			list_add_tail(&e->l, &bb->e_succs);
+			continue;
+		}
+
+		e->dst = bb_next(bb);
+		e->flags |= EDGE_FLAG_FALLTHROUGH;
+		list_add_tail(&e->l, &bb->e_succs);
+
+		e = new_edge(bb, NULL, EDGE_FLAG_JUMP);
+		if (!e)
+			return true;
+		e->src = bb;
+		e->dst = func_search_bb_with_head(func, insn + insn->off + 1);
+		list_add_tail(&e->l, &bb->e_succs);
+	}
+
+	return false;
+}
+
+static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
+{
+	int cnt = len / sizeof(*insn);
+	struct func_node *func;
+
+	INIT_LIST_HEAD(&cfg->funcs);
+
+	if (cfg_partition_funcs(cfg, insn, insn + cnt))
+		return true;
+
+	list_for_each_entry(func, &cfg->funcs, l) {
+		if (func_partition_bb(func) || func_add_special_bb(func))
+			return true;
+
+		if (func_add_bb_edges(func))
+			return true;
+	}
+
+	return false;
+}
+
+static void cfg_destroy(struct cfg *cfg)
+{
+	struct func_node *func, *func2;
+
+	list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+		struct bb_node *bb, *bb2;
+
+		list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+			struct edge_node *e, *e2;
+
+			list_for_each_entry_safe(e, e2, &bb->e_prevs, l) {
+				list_del(&e->l);
+				free(e);
+			}
+
+			list_for_each_entry_safe(e, e2, &bb->e_succs, l) {
+				list_del(&e->l);
+				free(e);
+			}
+
+			list_del(&bb->l);
+			free(bb);
+		}
+
+		list_del(&func->l);
+		free(func);
+	}
+}
+
+static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+{
+	const char *shape;
+
+	if (bb->idx == ENTRY_BLOCK_INDEX || bb->idx == EXIT_BLOCK_INDEX)
+		shape = "Mdiamond";
+	else
+		shape = "record";
+
+	printf("\tfn_%d_bb_%d [shape=%s,style=filled,label=\"",
+	       func->idx, bb->idx, shape);
+
+	if (bb->idx == ENTRY_BLOCK_INDEX) {
+		printf("ENTRY");
+	} else if (bb->idx == EXIT_BLOCK_INDEX) {
+		printf("EXIT");
+	} else {
+		unsigned int start_idx;
+		struct dump_data dd = {};
+
+		printf("{");
+		kernel_syms_load(&dd);
+		start_idx = bb->head - func->start;
+		dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
+		kernel_syms_destroy(&dd);
+		printf("}");
+	}
+
+	printf("\"];\n\n");
+}
+
+static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
+{
+	const char *style = "\"solid,bold\"";
+	const char *color = "black";
+	int func_idx = func->idx;
+	struct edge_node *e;
+	int weight = 10;
+
+	if (list_empty(&bb->e_succs))
+		return;
+
+	list_for_each_entry(e, &bb->e_succs, l) {
+		printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=%s, color=%s, weight=%d, constraint=true",
+		       func_idx, e->src->idx, func_idx, e->dst->idx,
+		       style, color, weight);
+		printf("];\n");
+	}
+}
+
+static void func_output_bb_def(struct func_node *func)
+{
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		draw_bb_node(func, bb);
+	}
+}
+
+static void func_output_edges(struct func_node *func)
+{
+	int func_idx = func->idx;
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		draw_bb_succ_edges(func, bb);
+	}
+
+	/* Add an invisible edge from ENTRY to EXIT, this is to
+	 * improve the graph layout.
+	 */
+	printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=\"invis\", constraint=true];\n",
+	       func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
+}
+
+static void cfg_dump(struct cfg *cfg)
+{
+	struct func_node *func;
+
+	printf("digraph \"DOT graph for eBPF program\" {\n");
+	list_for_each_entry(func, &cfg->funcs, l) {
+		printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
+		       func->idx, func->idx);
+		func_output_bb_def(func);
+		func_output_edges(func);
+		printf("}\n");
+	}
+	printf("}\n");
+}
+
+void dump_xlated_cfg(void *buf, unsigned int len)
+{
+	struct bpf_insn *insn = buf;
+	struct cfg cfg;
+
+	memset(&cfg, 0, sizeof(cfg));
+	if (cfg_build(&cfg, insn, len))
+		return;
+
+	cfg_dump(&cfg);
+
+	cfg_destroy(&cfg);
+}
diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h
new file mode 100644
index 000000000000..2cc9bd990b13
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.h
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_CFG_H
+#define __BPF_TOOL_CFG_H
+
+void dump_xlated_cfg(void *buf, unsigned int len);
+
+#endif /* __BPF_TOOL_CFG_H */
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 0b482c0070e0..465995281dcd 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -55,6 +55,10 @@
 
 #include "main.h"
 
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC		0xcafe4a11
+#endif
+
 void p_err(const char *fmt, ...)
 {
 	va_list ap;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 3a0396d87c42..1ec852d21d44 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -46,6 +46,9 @@
 
 #include "main.h"
 
+#define BATCH_LINE_LEN_MAX 65536
+#define BATCH_ARG_NB_MAX 4096
+
 const char *bin_name;
 static int last_argc;
 static char **last_argv;
@@ -157,6 +160,54 @@ void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
 	}
 }
 
+/* Split command line into argument vector. */
+static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
+{
+	static const char ws[] = " \t\r\n";
+	char *cp = line;
+	int n_argc = 0;
+
+	while (*cp) {
+		/* Skip leading whitespace. */
+		cp += strspn(cp, ws);
+
+		if (*cp == '\0')
+			break;
+
+		if (n_argc >= (maxargs - 1)) {
+			p_err("too many arguments to command %d", cmd_nb);
+			return -1;
+		}
+
+		/* Word begins with quote. */
+		if (*cp == '\'' || *cp == '"') {
+			char quote = *cp++;
+
+			n_argv[n_argc++] = cp;
+			/* Find ending quote. */
+			cp = strchr(cp, quote);
+			if (!cp) {
+				p_err("unterminated quoted string in command %d",
+				      cmd_nb);
+				return -1;
+			}
+		} else {
+			n_argv[n_argc++] = cp;
+
+			/* Find end of word. */
+			cp += strcspn(cp, ws);
+			if (*cp == '\0')
+				break;
+		}
+
+		/* Separate words. */
+		*cp++ = 0;
+	}
+	n_argv[n_argc] = NULL;
+
+	return n_argc;
+}
+
 static int do_batch(int argc, char **argv);
 
 static const struct cmd cmds[] = {
@@ -171,11 +222,12 @@ static const struct cmd cmds[] = {
 
 static int do_batch(int argc, char **argv)
 {
+	char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
+	char *n_argv[BATCH_ARG_NB_MAX];
 	unsigned int lines = 0;
-	char *n_argv[4096];
-	char buf[65536];
 	int n_argc;
 	FILE *fp;
+	char *cp;
 	int err;
 	int i;
 
@@ -191,7 +243,10 @@ static int do_batch(int argc, char **argv)
 	}
 	NEXT_ARG();
 
-	fp = fopen(*argv, "r");
+	if (!strcmp(*argv, "-"))
+		fp = stdin;
+	else
+		fp = fopen(*argv, "r");
 	if (!fp) {
 		p_err("Can't open file (%s): %s", *argv, strerror(errno));
 		return -1;
@@ -200,27 +255,45 @@ static int do_batch(int argc, char **argv)
 	if (json_output)
 		jsonw_start_array(json_wtr);
 	while (fgets(buf, sizeof(buf), fp)) {
+		cp = strchr(buf, '#');
+		if (cp)
+			*cp = '\0';
+
 		if (strlen(buf) == sizeof(buf) - 1) {
 			errno = E2BIG;
 			break;
 		}
 
-		n_argc = 0;
-		n_argv[n_argc] = strtok(buf, " \t\n");
-
-		while (n_argv[n_argc]) {
-			n_argc++;
-			if (n_argc == ARRAY_SIZE(n_argv)) {
-				p_err("line %d has too many arguments, skip",
+		/* Append continuation lines if any (coming after a line ending
+		 * with '\' in the batch file).
+		 */
+		while ((cp = strstr(buf, "\\\n")) != NULL) {
+			if (!fgets(contline, sizeof(contline), fp) ||
+			    strlen(contline) == 0) {
+				p_err("missing continuation line on command %d",
 				      lines);
-				n_argc = 0;
-				break;
+				err = -1;
+				goto err_close;
+			}
+
+			cp = strchr(contline, '#');
+			if (cp)
+				*cp = '\0';
+
+			if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
+				p_err("command %d is too long", lines);
+				err = -1;
+				goto err_close;
 			}
-			n_argv[n_argc] = strtok(NULL, " \t\n");
+			buf[strlen(buf) - 2] = '\0';
+			strcat(buf, contline);
 		}
 
+		n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
 		if (!n_argc)
 			continue;
+		if (n_argc < 0)
+			goto err_close;
 
 		if (json_output) {
 			jsonw_start_object(json_wtr);
@@ -244,14 +317,15 @@ static int do_batch(int argc, char **argv)
 	}
 
 	if (errno && errno != ENOENT) {
-		perror("reading batch file failed");
+		p_err("reading batch file failed: %s", strerror(errno));
 		err = -1;
 	} else {
-		p_info("processed %d lines", lines);
+		p_info("processed %d commands", lines);
 		err = 0;
 	}
 err_close:
-	fclose(fp);
+	if (fp != stdin)
+		fclose(fp);
 
 	if (json_output)
 		jsonw_end_array(json_wtr);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e8e2baaf93c2..f7a810897eac 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -47,8 +47,9 @@
 #include <bpf.h>
 #include <libbpf.h>
 
+#include "cfg.h"
 #include "main.h"
-#include "disasm.h"
+#include "xlated_dumper.h"
 
 static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_UNSPEC]		= "unspec",
@@ -407,259 +408,6 @@ static int do_show(int argc, char **argv)
 	return err;
 }
 
-#define SYM_MAX_NAME	256
-
-struct kernel_sym {
-	unsigned long address;
-	char name[SYM_MAX_NAME];
-};
-
-struct dump_data {
-	unsigned long address_call_base;
-	struct kernel_sym *sym_mapping;
-	__u32 sym_count;
-	char scratch_buff[SYM_MAX_NAME];
-};
-
-static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
-{
-	return ((struct kernel_sym *)sym_a)->address -
-	       ((struct kernel_sym *)sym_b)->address;
-}
-
-static void kernel_syms_load(struct dump_data *dd)
-{
-	struct kernel_sym *sym;
-	char buff[256];
-	void *tmp, *address;
-	FILE *fp;
-
-	fp = fopen("/proc/kallsyms", "r");
-	if (!fp)
-		return;
-
-	while (!feof(fp)) {
-		if (!fgets(buff, sizeof(buff), fp))
-			break;
-		tmp = realloc(dd->sym_mapping,
-			      (dd->sym_count + 1) *
-			      sizeof(*dd->sym_mapping));
-		if (!tmp) {
-out:
-			free(dd->sym_mapping);
-			dd->sym_mapping = NULL;
-			fclose(fp);
-			return;
-		}
-		dd->sym_mapping = tmp;
-		sym = &dd->sym_mapping[dd->sym_count];
-		if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
-			continue;
-		sym->address = (unsigned long)address;
-		if (!strcmp(sym->name, "__bpf_call_base")) {
-			dd->address_call_base = sym->address;
-			/* sysctl kernel.kptr_restrict was set */
-			if (!sym->address)
-				goto out;
-		}
-		if (sym->address)
-			dd->sym_count++;
-	}
-
-	fclose(fp);
-
-	qsort(dd->sym_mapping, dd->sym_count,
-	      sizeof(*dd->sym_mapping), kernel_syms_cmp);
-}
-
-static void kernel_syms_destroy(struct dump_data *dd)
-{
-	free(dd->sym_mapping);
-}
-
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
-					     unsigned long key)
-{
-	struct kernel_sym sym = {
-		.address = key,
-	};
-
-	return dd->sym_mapping ?
-	       bsearch(&sym, dd->sym_mapping, dd->sym_count,
-		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
-}
-
-static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-	va_list args;
-
-	va_start(args, fmt);
-	vprintf(fmt, args);
-	va_end(args);
-}
-
-static const char *print_call_pcrel(struct dump_data *dd,
-				    struct kernel_sym *sym,
-				    unsigned long address,
-				    const struct bpf_insn *insn)
-{
-	if (sym)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%+d#%s", insn->off, sym->name);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%+d#0x%lx", insn->off, address);
-	return dd->scratch_buff;
-}
-
-static const char *print_call_helper(struct dump_data *dd,
-				     struct kernel_sym *sym,
-				     unsigned long address)
-{
-	if (sym)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%s", sym->name);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "0x%lx", address);
-	return dd->scratch_buff;
-}
-
-static const char *print_call(void *private_data,
-			      const struct bpf_insn *insn)
-{
-	struct dump_data *dd = private_data;
-	unsigned long address = dd->address_call_base + insn->imm;
-	struct kernel_sym *sym;
-
-	sym = kernel_syms_search(dd, address);
-	if (insn->src_reg == BPF_PSEUDO_CALL)
-		return print_call_pcrel(dd, sym, address, insn);
-	else
-		return print_call_helper(dd, sym, address);
-}
-
-static const char *print_imm(void *private_data,
-			     const struct bpf_insn *insn,
-			     __u64 full_imm)
-{
-	struct dump_data *dd = private_data;
-
-	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "map[id:%u]", insn->imm);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "0x%llx", (unsigned long long)full_imm);
-	return dd->scratch_buff;
-}
-
-static void dump_xlated_plain(struct dump_data *dd, void *buf,
-			      unsigned int len, bool opcodes)
-{
-	const struct bpf_insn_cbs cbs = {
-		.cb_print	= print_insn,
-		.cb_call	= print_call,
-		.cb_imm		= print_imm,
-		.private_data	= dd,
-	};
-	struct bpf_insn *insn = buf;
-	bool double_insn = false;
-	unsigned int i;
-
-	for (i = 0; i < len / sizeof(*insn); i++) {
-		if (double_insn) {
-			double_insn = false;
-			continue;
-		}
-
-		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-		printf("% 4d: ", i);
-		print_bpf_insn(&cbs, NULL, insn + i, true);
-
-		if (opcodes) {
-			printf("       ");
-			fprint_hex(stdout, insn + i, 8, " ");
-			if (double_insn && i < len - 1) {
-				printf(" ");
-				fprint_hex(stdout, insn + i + 1, 8, " ");
-			}
-			printf("\n");
-		}
-	}
-}
-
-static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-	unsigned int l = strlen(fmt);
-	char chomped_fmt[l];
-	va_list args;
-
-	va_start(args, fmt);
-	if (l > 0) {
-		strncpy(chomped_fmt, fmt, l - 1);
-		chomped_fmt[l - 1] = '\0';
-	}
-	jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
-	va_end(args);
-}
-
-static void dump_xlated_json(struct dump_data *dd, void *buf,
-			     unsigned int len, bool opcodes)
-{
-	const struct bpf_insn_cbs cbs = {
-		.cb_print	= print_insn_json,
-		.cb_call	= print_call,
-		.cb_imm		= print_imm,
-		.private_data	= dd,
-	};
-	struct bpf_insn *insn = buf;
-	bool double_insn = false;
-	unsigned int i;
-
-	jsonw_start_array(json_wtr);
-	for (i = 0; i < len / sizeof(*insn); i++) {
-		if (double_insn) {
-			double_insn = false;
-			continue;
-		}
-		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-		jsonw_start_object(json_wtr);
-		jsonw_name(json_wtr, "disasm");
-		print_bpf_insn(&cbs, NULL, insn + i, true);
-
-		if (opcodes) {
-			jsonw_name(json_wtr, "opcodes");
-			jsonw_start_object(json_wtr);
-
-			jsonw_name(json_wtr, "code");
-			jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
-
-			jsonw_name(json_wtr, "src_reg");
-			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
-
-			jsonw_name(json_wtr, "dst_reg");
-			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
-
-			jsonw_name(json_wtr, "off");
-			print_hex_data_json((uint8_t *)(&insn[i].off), 2);
-
-			jsonw_name(json_wtr, "imm");
-			if (double_insn && i < len - 1)
-				print_hex_data_json((uint8_t *)(&insn[i].imm),
-						    12);
-			else
-				print_hex_data_json((uint8_t *)(&insn[i].imm),
-						    4);
-			jsonw_end_object(json_wtr);
-		}
-		jsonw_end_object(json_wtr);
-	}
-	jsonw_end_array(json_wtr);
-}
-
 static int do_dump(int argc, char **argv)
 {
 	struct bpf_prog_info info = {};
@@ -668,6 +416,7 @@ static int do_dump(int argc, char **argv)
 	unsigned int buf_size;
 	char *filepath = NULL;
 	bool opcodes = false;
+	bool visual = false;
 	unsigned char *buf;
 	__u32 *member_len;
 	__u64 *member_ptr;
@@ -706,6 +455,9 @@ static int do_dump(int argc, char **argv)
 	} else if (is_prefix(*argv, "opcodes")) {
 		opcodes = true;
 		NEXT_ARG();
+	} else if (is_prefix(*argv, "visual")) {
+		visual = true;
+		NEXT_ARG();
 	}
 
 	if (argc) {
@@ -774,27 +526,33 @@ static int do_dump(int argc, char **argv)
 			      n < 0 ? strerror(errno) : "short write");
 			goto err_free;
 		}
-	} else {
-		if (member_len == &info.jited_prog_len) {
-			const char *name = NULL;
-
-			if (info.ifindex) {
-				name = ifindex_to_bfd_name_ns(info.ifindex,
-							      info.netns_dev,
-							      info.netns_ino);
-				if (!name)
-					goto err_free;
-			}
 
-			disasm_print_insn(buf, *member_len, opcodes, name);
-		} else {
-			kernel_syms_load(&dd);
-			if (json_output)
-				dump_xlated_json(&dd, buf, *member_len, opcodes);
-			else
-				dump_xlated_plain(&dd, buf, *member_len, opcodes);
-			kernel_syms_destroy(&dd);
+		if (json_output)
+			jsonw_null(json_wtr);
+	} else if (member_len == &info.jited_prog_len) {
+		const char *name = NULL;
+
+		if (info.ifindex) {
+			name = ifindex_to_bfd_name_ns(info.ifindex,
+						      info.netns_dev,
+						      info.netns_ino);
+			if (!name)
+				goto err_free;
 		}
+
+		disasm_print_insn(buf, *member_len, opcodes, name);
+	} else if (visual) {
+		if (json_output)
+			jsonw_null(json_wtr);
+		else
+			dump_xlated_cfg(buf, *member_len);
+	} else {
+		kernel_syms_load(&dd);
+		if (json_output)
+			dump_xlated_json(&dd, buf, *member_len, opcodes);
+		else
+			dump_xlated_plain(&dd, buf, *member_len, opcodes);
+		kernel_syms_destroy(&dd);
 	}
 
 	free(buf);
@@ -848,7 +606,7 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %s %s { show | list } [PROG]\n"
-		"       %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
+		"       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
 		"       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
 		"       %s %s pin   PROG FILE\n"
 		"       %s %s load  OBJ  FILE\n"
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
new file mode 100644
index 000000000000..20da835e9e38
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "disasm.h"
+#include "json_writer.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+	return ((struct kernel_sym *)sym_a)->address -
+	       ((struct kernel_sym *)sym_b)->address;
+}
+
+void kernel_syms_load(struct dump_data *dd)
+{
+	struct kernel_sym *sym;
+	char buff[256];
+	void *tmp, *address;
+	FILE *fp;
+
+	fp = fopen("/proc/kallsyms", "r");
+	if (!fp)
+		return;
+
+	while (!feof(fp)) {
+		if (!fgets(buff, sizeof(buff), fp))
+			break;
+		tmp = realloc(dd->sym_mapping,
+			      (dd->sym_count + 1) *
+			      sizeof(*dd->sym_mapping));
+		if (!tmp) {
+out:
+			free(dd->sym_mapping);
+			dd->sym_mapping = NULL;
+			fclose(fp);
+			return;
+		}
+		dd->sym_mapping = tmp;
+		sym = &dd->sym_mapping[dd->sym_count];
+		if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+			continue;
+		sym->address = (unsigned long)address;
+		if (!strcmp(sym->name, "__bpf_call_base")) {
+			dd->address_call_base = sym->address;
+			/* sysctl kernel.kptr_restrict was set */
+			if (!sym->address)
+				goto out;
+		}
+		if (sym->address)
+			dd->sym_count++;
+	}
+
+	fclose(fp);
+
+	qsort(dd->sym_mapping, dd->sym_count,
+	      sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+void kernel_syms_destroy(struct dump_data *dd)
+{
+	free(dd->sym_mapping);
+}
+
+static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+					     unsigned long key)
+{
+	struct kernel_sym sym = {
+		.address = key,
+	};
+
+	return dd->sym_mapping ?
+	       bsearch(&sym, dd->sym_mapping, dd->sym_count,
+		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
+static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vprintf(fmt, args);
+	va_end(args);
+}
+
+static void
+print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	char buf[64], *p;
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+
+	p = buf;
+	while (*p != '\0') {
+		if (*p == '\n') {
+			memmove(p + 3, p, strlen(buf) + 1 - (p - buf));
+			/* Align each instruction dump row left. */
+			*p++ = '\\';
+			*p++ = 'l';
+			/* Output multiline concatenation. */
+			*p++ = '\\';
+		} else if (*p == '<' || *p == '>' || *p == '|' || *p == '&') {
+			memmove(p + 1, p, strlen(buf) + 1 - (p - buf));
+			/* Escape special character. */
+			*p++ = '\\';
+		}
+
+		p++;
+	}
+
+	printf("%s", buf);
+}
+
+static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	unsigned int l = strlen(fmt);
+	char chomped_fmt[l];
+	va_list args;
+
+	va_start(args, fmt);
+	if (l > 0) {
+		strncpy(chomped_fmt, fmt, l - 1);
+		chomped_fmt[l - 1] = '\0';
+	}
+	jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
+	va_end(args);
+}
+
+static const char *print_call_pcrel(struct dump_data *dd,
+				    struct kernel_sym *sym,
+				    unsigned long address,
+				    const struct bpf_insn *insn)
+{
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#%s", insn->off, sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#0x%lx", insn->off, address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+				     struct kernel_sym *sym,
+				     unsigned long address)
+{
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%s", sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%lx", address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+			      const struct bpf_insn *insn)
+{
+	struct dump_data *dd = private_data;
+	unsigned long address = dd->address_call_base + insn->imm;
+	struct kernel_sym *sym;
+
+	sym = kernel_syms_search(dd, address);
+	if (insn->src_reg == BPF_PSEUDO_CALL)
+		return print_call_pcrel(dd, sym, address, insn);
+	else
+		return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+			     const struct bpf_insn *insn,
+			     __u64 full_imm)
+{
+	struct dump_data *dd = private_data;
+
+	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "map[id:%u]", insn->imm);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%llx", (unsigned long long)full_imm);
+	return dd->scratch_buff;
+}
+
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+		      bool opcodes)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn_json,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn = buf;
+	bool double_insn = false;
+	unsigned int i;
+
+	jsonw_start_array(json_wtr);
+	for (i = 0; i < len / sizeof(*insn); i++) {
+		if (double_insn) {
+			double_insn = false;
+			continue;
+		}
+		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "disasm");
+		print_bpf_insn(&cbs, NULL, insn + i, true);
+
+		if (opcodes) {
+			jsonw_name(json_wtr, "opcodes");
+			jsonw_start_object(json_wtr);
+
+			jsonw_name(json_wtr, "code");
+			jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
+
+			jsonw_name(json_wtr, "src_reg");
+			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
+
+			jsonw_name(json_wtr, "dst_reg");
+			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
+
+			jsonw_name(json_wtr, "off");
+			print_hex_data_json((uint8_t *)(&insn[i].off), 2);
+
+			jsonw_name(json_wtr, "imm");
+			if (double_insn && i < len - 1)
+				print_hex_data_json((uint8_t *)(&insn[i].imm),
+						    12);
+			else
+				print_hex_data_json((uint8_t *)(&insn[i].imm),
+						    4);
+			jsonw_end_object(json_wtr);
+		}
+		jsonw_end_object(json_wtr);
+	}
+	jsonw_end_array(json_wtr);
+}
+
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+		       bool opcodes)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn = buf;
+	bool double_insn = false;
+	unsigned int i;
+
+	for (i = 0; i < len / sizeof(*insn); i++) {
+		if (double_insn) {
+			double_insn = false;
+			continue;
+		}
+
+		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+		printf("% 4d: ", i);
+		print_bpf_insn(&cbs, NULL, insn + i, true);
+
+		if (opcodes) {
+			printf("       ");
+			fprint_hex(stdout, insn + i, 8, " ");
+			if (double_insn && i < len - 1) {
+				printf(" ");
+				fprint_hex(stdout, insn + i + 1, 8, " ");
+			}
+			printf("\n");
+		}
+	}
+}
+
+void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+			   unsigned int start_idx)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn_for_graph,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn_start = buf_start;
+	struct bpf_insn *insn_end = buf_end;
+	struct bpf_insn *cur = insn_start;
+
+	for (; cur <= insn_end; cur++) {
+		printf("% 4d: ", (int)(cur - insn_start + start_idx));
+		print_bpf_insn(&cbs, NULL, cur, true);
+		if (cur != insn_end)
+			printf(" | ");
+	}
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
new file mode 100644
index 000000000000..b34affa7ef2d
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_XLATED_DUMPER_H
+#define __BPF_TOOL_XLATED_DUMPER_H
+
+#define SYM_MAX_NAME	256
+
+struct kernel_sym {
+	unsigned long address;
+	char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+	unsigned long address_call_base;
+	struct kernel_sym *sym_mapping;
+	__u32 sym_count;
+	char scratch_buff[SYM_MAX_NAME + 8];
+};
+
+void kernel_syms_load(struct dump_data *dd);
+void kernel_syms_destroy(struct dump_data *dd);
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+		      bool opcodes);
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+		       bool opcodes);
+void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
+			   unsigned int start_index);
+
+#endif
diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile
index 860fa151640a..ffca068e4a76 100644
--- a/tools/cgroup/Makefile
+++ b/tools/cgroup/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for cgroup tools
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra
 
 all: cgroup_event_listener
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index 805a2c0cf4cd..240eda014b37 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -12,8 +12,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 1139d71fa0cf..5db5e62cebda 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Hyper-V tools
 
-CC = $(CROSS_COMPILE)gcc
 WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
 
diff --git a/tools/iio/Makefile b/tools/iio/Makefile
index a08e7a47d6a3..332ed2f6c2c2 100644
--- a/tools/iio/Makefile
+++ b/tools/iio/Makefile
@@ -12,8 +12,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index ac3c6503ca27..536ee4febd74 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -86,6 +86,62 @@ enum i915_mocs_table_index {
 	I915_MOCS_CACHED,
 };
 
+/*
+ * Different engines serve different roles, and there may be more than one
+ * engine serving each role. enum drm_i915_gem_engine_class provides a
+ * classification of the role of the engine, which may be used when requesting
+ * operations to be performed on a certain subset of engines, or for providing
+ * information about that group.
+ */
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_RENDER	= 0,
+	I915_ENGINE_CLASS_COPY		= 1,
+	I915_ENGINE_CLASS_VIDEO		= 2,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE	= 3,
+
+	I915_ENGINE_CLASS_INVALID	= -1
+};
+
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_BUSY = 0,
+	I915_SAMPLE_WAIT = 1,
+	I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
@@ -450,6 +506,27 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
 
+/*
+ * Query whether every context (both per-file default and user created) is
+ * isolated (insofar as HW supports). If this parameter is not true, then
+ * freshly created contexts may inherit values from an existing context,
+ * rather than default HW values. If true, it also ensures (insofar as HW
+ * supports) that all state set by this context will not leak to any other
+ * context.
+ *
+ * As not every engine across every gen support contexts, the returned
+ * value reports the support of context isolation for individual engines by
+ * returning a bitmask of each engine class set to true if that class supports
+ * isolation.
+ */
+#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
+
+/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
+ * registers. This used to be fixed per platform but from CNL onwards, this
+ * might vary depending on the parts.
+ */
+#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db6bdc375126..d245c41213ac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -231,6 +233,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -696,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *	@pt_regs: pointer to struct pt_regs
  *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -757,7 +790,11 @@ union bpf_attr {
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -919,6 +956,14 @@ enum sk_action {
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8616131e2c61..6d9447700e18 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -163,6 +163,7 @@ enum {
 	IFLA_IF_NETNSID,
 	IFLA_CARRIER_UP_COUNT,
 	IFLA_CARRIER_DOWN_COUNT,
+	IFLA_NEW_IFINDEX,
 	__IFLA_MAX
 };
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 8fb90a0819c3..7b26d4b0b052 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
 
 /*
  * Extension capability list.
@@ -934,6 +935,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_AIS_MIGRATION 150
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1362,6 +1364,96 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_CMMA_MIGRATION */
 #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+/* Memory Encryption Commands */
+#define KVM_MEMORY_ENCRYPT_OP      _IOWR(KVMIO, 0xba, unsigned long)
+
+struct kvm_enc_region {
+	__u64 addr;
+	__u64 size;
+};
+
+#define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+#define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+	/* Guest initialization commands */
+	KVM_SEV_INIT = 0,
+	KVM_SEV_ES_INIT,
+	/* Guest launch commands */
+	KVM_SEV_LAUNCH_START,
+	KVM_SEV_LAUNCH_UPDATE_DATA,
+	KVM_SEV_LAUNCH_UPDATE_VMSA,
+	KVM_SEV_LAUNCH_SECRET,
+	KVM_SEV_LAUNCH_MEASURE,
+	KVM_SEV_LAUNCH_FINISH,
+	/* Guest migration commands (outgoing) */
+	KVM_SEV_SEND_START,
+	KVM_SEV_SEND_UPDATE_DATA,
+	KVM_SEV_SEND_UPDATE_VMSA,
+	KVM_SEV_SEND_FINISH,
+	/* Guest migration commands (incoming) */
+	KVM_SEV_RECEIVE_START,
+	KVM_SEV_RECEIVE_UPDATE_DATA,
+	KVM_SEV_RECEIVE_UPDATE_VMSA,
+	KVM_SEV_RECEIVE_FINISH,
+	/* Guest status and debug commands */
+	KVM_SEV_GUEST_STATUS,
+	KVM_SEV_DBG_DECRYPT,
+	KVM_SEV_DBG_ENCRYPT,
+	/* Guest certificates commands */
+	KVM_SEV_CERT_EXPORT,
+
+	KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+	__u32 id;
+	__u64 data;
+	__u32 error;
+	__u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 dh_uaddr;
+	__u32 dh_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+	__u64 uaddr;
+	__u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+	__u64 uaddr;
+	__u32 len;
+};
+
+struct kvm_sev_guest_status {
+	__u32 handle;
+	__u32 policy;
+	__u32 state;
+};
+
+struct kvm_sev_dbg {
+	__u64 src_uaddr;
+	__u64 dst_uaddr;
+	__u32 len;
+};
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index a5684d0968b4..5898c22ba310 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -33,7 +33,7 @@ import resource
 import struct
 import re
 import subprocess
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 
 VMX_EXIT_REASONS = {
     'EXCEPTION_NMI':        0,
@@ -228,6 +228,7 @@ IOCTL_NUMBERS = {
 }
 
 ENCODING = locale.getpreferredencoding(False)
+TRACE_FILTER = re.compile(r'^[^\(]*$')
 
 
 class Arch(object):
@@ -260,6 +261,11 @@ class Arch(object):
                     return ArchX86(SVM_EXIT_REASONS)
                 return
 
+    def tracepoint_is_child(self, field):
+        if (TRACE_FILTER.match(field)):
+            return None
+        return field.split('(', 1)[0]
+
 
 class ArchX86(Arch):
     def __init__(self, exit_reasons):
@@ -267,6 +273,10 @@ class ArchX86(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = exit_reasons
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchPPC(Arch):
     def __init__(self):
@@ -282,6 +292,10 @@ class ArchPPC(Arch):
         self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
         self.exit_reasons = {}
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchA64(Arch):
     def __init__(self):
@@ -289,6 +303,10 @@ class ArchA64(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = AARCH64_EXIT_REASONS
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchS390(Arch):
     def __init__(self):
@@ -296,6 +314,12 @@ class ArchS390(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = None
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        if field.startswith('instruction_'):
+            return 'exit_instruction'
+
+
 ARCH = Arch.get_arch()
 
 
@@ -331,9 +355,6 @@ class perf_event_attr(ctypes.Structure):
 PERF_TYPE_TRACEPOINT = 2
 PERF_FORMAT_GROUP = 1 << 3
 
-PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
-PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
-
 
 class Group(object):
     """Represents a perf event group."""
@@ -376,8 +397,8 @@ class Event(object):
         self.syscall = self.libc.syscall
         self.name = name
         self.fd = None
-        self.setup_event(group, trace_cpu, trace_pid, trace_point,
-                         trace_filter, trace_set)
+        self._setup_event(group, trace_cpu, trace_pid, trace_point,
+                          trace_filter, trace_set)
 
     def __del__(self):
         """Closes the event's file descriptor.
@@ -390,7 +411,7 @@ class Event(object):
         if self.fd:
             os.close(self.fd)
 
-    def perf_event_open(self, attr, pid, cpu, group_fd, flags):
+    def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
         """Wrapper for the sys_perf_evt_open() syscall.
 
         Used to set up performance events, returns a file descriptor or -1
@@ -409,7 +430,7 @@ class Event(object):
                             ctypes.c_int(pid), ctypes.c_int(cpu),
                             ctypes.c_int(group_fd), ctypes.c_long(flags))
 
-    def setup_event_attribute(self, trace_set, trace_point):
+    def _setup_event_attribute(self, trace_set, trace_point):
         """Returns an initialized ctype perf_event_attr struct."""
 
         id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
@@ -419,8 +440,8 @@ class Event(object):
         event_attr.config = int(open(id_path).read())
         return event_attr
 
-    def setup_event(self, group, trace_cpu, trace_pid, trace_point,
-                    trace_filter, trace_set):
+    def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
+                     trace_filter, trace_set):
         """Sets up the perf event in Linux.
 
         Issues the syscall to register the event in the kernel and
@@ -428,7 +449,7 @@ class Event(object):
 
         """
 
-        event_attr = self.setup_event_attribute(trace_set, trace_point)
+        event_attr = self._setup_event_attribute(trace_set, trace_point)
 
         # First event will be group leader.
         group_leader = -1
@@ -437,8 +458,8 @@ class Event(object):
         if group.events:
             group_leader = group.events[0].fd
 
-        fd = self.perf_event_open(event_attr, trace_pid,
-                                  trace_cpu, group_leader, 0)
+        fd = self._perf_event_open(event_attr, trace_pid,
+                                   trace_cpu, group_leader, 0)
         if fd == -1:
             err = ctypes.get_errno()
             raise OSError(err, os.strerror(err),
@@ -475,6 +496,10 @@ class Event(object):
 
 class Provider(object):
     """Encapsulates functionalities used by all providers."""
+    def __init__(self, pid):
+        self.child_events = False
+        self.pid = pid
+
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
@@ -500,12 +525,12 @@ class TracepointProvider(Provider):
     """
     def __init__(self, pid, fields_filter):
         self.group_leaders = []
-        self.filters = self.get_filters()
+        self.filters = self._get_filters()
         self.update_fields(fields_filter)
-        self.pid = pid
+        super(TracepointProvider, self).__init__(pid)
 
     @staticmethod
-    def get_filters():
+    def _get_filters():
         """Returns a dict of trace events, their filter ids and
         the values that can be filtered.
 
@@ -521,8 +546,8 @@ class TracepointProvider(Provider):
             filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
         return filters
 
-    def get_available_fields(self):
-        """Returns a list of available event's of format 'event name(filter
+    def _get_available_fields(self):
+        """Returns a list of available events of format 'event name(filter
         name)'.
 
         All available events have directories under
@@ -549,11 +574,12 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self.fields = [field for field in self.get_available_fields()
-                       if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self._get_available_fields()
+                       if self.is_field_wanted(fields_filter, field) or
+                       ARCH.tracepoint_is_child(field)]
 
     @staticmethod
-    def get_online_cpus():
+    def _get_online_cpus():
         """Returns a list of cpu id integers."""
         def parse_int_list(list_string):
             """Returns an int list from a string of comma separated integers and
@@ -575,17 +601,17 @@ class TracepointProvider(Provider):
             cpu_string = cpu_list.readline()
             return parse_int_list(cpu_string)
 
-    def setup_traces(self):
+    def _setup_traces(self):
         """Creates all event and group objects needed to be able to retrieve
         data."""
-        fields = self.get_available_fields()
+        fields = self._get_available_fields()
         if self._pid > 0:
             # Fetch list of all threads of the monitored pid, as qemu
             # starts a thread for each vcpu.
             path = os.path.join('/proc', str(self._pid), 'task')
             groupids = self.walkdir(path)[1]
         else:
-            groupids = self.get_online_cpus()
+            groupids = self._get_online_cpus()
 
         # The constant is needed as a buffer for python libs, std
         # streams and other files that the script opens.
@@ -663,7 +689,7 @@ class TracepointProvider(Provider):
         # The garbage collector will get rid of all Event/Group
         # objects and open files after removing the references.
         self.group_leaders = []
-        self.setup_traces()
+        self._setup_traces()
         self.fields = self._fields
 
     def read(self, by_guest=0):
@@ -671,8 +697,12 @@ class TracepointProvider(Provider):
         ret = defaultdict(int)
         for group in self.group_leaders:
             for name, val in group.read().items():
-                if name in self._fields:
-                    ret[name] += val
+                if name not in self._fields:
+                    continue
+                parent = ARCH.tracepoint_is_child(name)
+                if parent:
+                    name += ' ' + parent
+                ret[name] += val
         return ret
 
     def reset(self):
@@ -690,11 +720,11 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.do_read = True
         self.paths = []
-        self.pid = pid
+        super(DebugfsProvider, self).__init__(pid)
         if include_past:
-            self.restore()
+            self._restore()
 
-    def get_available_fields(self):
+    def _get_available_fields(self):
         """"Returns a list of available fields.
 
         The fields are all available KVM debugfs files
@@ -704,8 +734,9 @@ class DebugfsProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self._fields = [field for field in self._get_available_fields()
+                        if self.is_field_wanted(fields_filter, field) or
+                        ARCH.debugfs_is_child(field)]
 
     @property
     def fields(self):
@@ -758,7 +789,7 @@ class DebugfsProvider(Provider):
                     paths.append(dir)
         for path in paths:
             for field in self._fields:
-                value = self.read_field(field, path)
+                value = self._read_field(field, path)
                 key = path + field
                 if reset == 1:
                     self._baseline[key] = value
@@ -766,20 +797,21 @@ class DebugfsProvider(Provider):
                     self._baseline[key] = 0
                 if self._baseline.get(key, -1) == -1:
                     self._baseline[key] = value
-                increment = (results.get(field, 0) + value -
-                             self._baseline.get(key, 0))
-                if by_guest:
-                    pid = key.split('-')[0]
-                    if pid in results:
-                        results[pid] += increment
-                    else:
-                        results[pid] = increment
+                parent = ARCH.debugfs_is_child(field)
+                if parent:
+                    field = field + ' ' + parent
+                else:
+                    if by_guest:
+                        field = key.split('-')[0]    # set 'field' to 'pid'
+                increment = value - self._baseline.get(key, 0)
+                if field in results:
+                    results[field] += increment
                 else:
                     results[field] = increment
 
         return results
 
-    def read_field(self, field, path):
+    def _read_field(self, field, path):
         """Returns the value of a single field from a specific VM."""
         try:
             return int(open(os.path.join(PATH_DEBUGFS_KVM,
@@ -794,12 +826,15 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.read(1)
 
-    def restore(self):
+    def _restore(self):
         """Reset field counters"""
         self._baseline = {}
         self.read(2)
 
 
+EventStat = namedtuple('EventStat', ['value', 'delta'])
+
+
 class Stats(object):
     """Manages the data providers and the data they provide.
 
@@ -808,13 +843,13 @@ class Stats(object):
 
     """
     def __init__(self, options):
-        self.providers = self.get_providers(options)
+        self.providers = self._get_providers(options)
         self._pid_filter = options.pid
         self._fields_filter = options.fields
         self.values = {}
+        self._child_events = False
 
-    @staticmethod
-    def get_providers(options):
+    def _get_providers(self, options):
         """Returns a list of data providers depending on the passed options."""
         providers = []
 
@@ -826,7 +861,7 @@ class Stats(object):
 
         return providers
 
-    def update_provider_filters(self):
+    def _update_provider_filters(self):
         """Propagates fields filters to providers."""
         # As we reset the counters when updating the fields we can
         # also clear the cache of old values.
@@ -847,7 +882,7 @@ class Stats(object):
     def fields_filter(self, fields_filter):
         if fields_filter != self._fields_filter:
             self._fields_filter = fields_filter
-            self.update_provider_filters()
+            self._update_provider_filters()
 
     @property
     def pid_filter(self):
@@ -861,16 +896,33 @@ class Stats(object):
             for provider in self.providers:
                 provider.pid = self._pid_filter
 
+    @property
+    def child_events(self):
+        return self._child_events
+
+    @child_events.setter
+    def child_events(self, val):
+        self._child_events = val
+        for provider in self.providers:
+            provider.child_events = val
+
     def get(self, by_guest=0):
         """Returns a dict with field -> (value, delta to last value) of all
-        provider data."""
+        provider data.
+        Key formats:
+          * plain: 'key' is event name
+          * child-parent: 'key' is in format '<child> <parent>'
+          * pid: 'key' is the pid of the guest, and the record contains the
+               aggregated event data
+        These formats are generated by the providers, and handled in class TUI.
+        """
         for provider in self.providers:
             new = provider.read(by_guest=by_guest)
-            for key in new if by_guest else provider.fields:
-                oldval = self.values.get(key, (0, 0))[0]
+            for key in new:
+                oldval = self.values.get(key, EventStat(0, 0)).value
                 newval = new.get(key, 0)
                 newdelta = newval - oldval
-                self.values[key] = (newval, newdelta)
+                self.values[key] = EventStat(newval, newdelta)
         return self.values
 
     def toggle_display_guests(self, to_pid):
@@ -899,10 +951,10 @@ class Stats(object):
         self.get(to_pid)
         return 0
 
+
 DELAY_DEFAULT = 3.0
 MAX_GUEST_NAME_LEN = 48
 MAX_REGEX_LEN = 44
-DEFAULT_REGEX = r'^[^\(]*$'
 SORT_DEFAULT = 0
 
 
@@ -969,7 +1021,7 @@ class Tui(object):
 
         return res
 
-    def print_all_gnames(self, row):
+    def _print_all_gnames(self, row):
         """Print a list of all running guests along with their pids."""
         self.screen.addstr(row, 2, '%8s  %-60s' %
                            ('Pid', 'Guest Name (fuzzy list, might be '
@@ -1032,19 +1084,13 @@ class Tui(object):
 
         return name
 
-    def update_drilldown(self):
-        """Sets or removes a filter that only allows fields without braces."""
-        if not self.stats.fields_filter:
-            self.stats.fields_filter = DEFAULT_REGEX
-
-        elif self.stats.fields_filter == DEFAULT_REGEX:
-            self.stats.fields_filter = None
-
-    def update_pid(self, pid):
+    def _update_pid(self, pid):
         """Propagates pid selection to stats object."""
+        self.screen.addstr(4, 1, 'Updating pid filter...')
+        self.screen.refresh()
         self.stats.pid_filter = pid
 
-    def refresh_header(self, pid=None):
+    def _refresh_header(self, pid=None):
         """Refreshes the header."""
         if pid is None:
             pid = self.stats.pid_filter
@@ -1059,8 +1105,7 @@ class Tui(object):
                                .format(pid, gname), curses.A_BOLD)
         else:
             self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
-        if self.stats.fields_filter and self.stats.fields_filter \
-           != DEFAULT_REGEX:
+        if self.stats.fields_filter:
             regex = self.stats.fields_filter
             if len(regex) > MAX_REGEX_LEN:
                 regex = regex[:MAX_REGEX_LEN] + '...'
@@ -1075,56 +1120,99 @@ class Tui(object):
         self.screen.addstr(4, 1, 'Collecting data...')
         self.screen.refresh()
 
-    def refresh_body(self, sleeptime):
+    def _refresh_body(self, sleeptime):
+        def is_child_field(field):
+            return field.find('(') != -1
+
+        def insert_child(sorted_items, child, values, parent):
+            num = len(sorted_items)
+            for i in range(0, num):
+                # only add child if parent is present
+                if parent.startswith(sorted_items[i][0]):
+                    sorted_items.insert(i + 1, ('  ' + child, values))
+
+        def get_sorted_events(self, stats):
+            """ separate parent and child events """
+            if self._sorting == SORT_DEFAULT:
+                def sortkey((_k, v)):
+                    # sort by (delta value, overall value)
+                    return (v.delta, v.value)
+            else:
+                def sortkey((_k, v)):
+                    # sort by overall value
+                    return v.value
+
+            childs = []
+            sorted_items = []
+            # we can't rule out child events to appear prior to parents even
+            # when sorted - separate out all children first, and add in later
+            for key, values in sorted(stats.items(), key=sortkey,
+                                      reverse=True):
+                if values == (0, 0):
+                    continue
+                if key.find(' ') != -1:
+                    if not self.stats.child_events:
+                        continue
+                    childs.insert(0, (key, values))
+                else:
+                    sorted_items.append((key, values))
+            if self.stats.child_events:
+                for key, values in childs:
+                    (child, parent) = key.split(' ')
+                    insert_child(sorted_items, child, values, parent)
+
+            return sorted_items
+
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
         stats = self.stats.get(self._display_guests)
-
-        def sortCurAvg(x):
-            # sort by current events if available
-            if stats[x][1]:
-                return (-stats[x][1], -stats[x][0])
+        total = 0.
+        ctotal = 0.
+        for key, values in stats.items():
+            if self._display_guests:
+                if self.get_gname_from_pid(key):
+                    total += values.value
+                continue
+            if not key.find(' ') != -1:
+                total += values.value
             else:
-                return (0, -stats[x][0])
+                ctotal += values.value
+        if total == 0.:
+            # we don't have any fields, or all non-child events are filtered
+            total = ctotal
 
-        def sortTotal(x):
-            # sort by totals
-            return (0, -stats[x][0])
-        total = 0.
-        for key in stats.keys():
-            if key.find('(') is -1:
-                total += stats[key][0]
-        if self._sorting == SORT_DEFAULT:
-            sortkey = sortCurAvg
-        else:
-            sortkey = sortTotal
+        # print events
         tavg = 0
-        for key in sorted(stats.keys(), key=sortkey):
-            if row >= self.screen.getmaxyx()[0] - 1:
-                break
-            values = stats[key]
-            if not values[0] and not values[1]:
+        tcur = 0
+        for key, values in get_sorted_events(self, stats):
+            if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
                 break
-            if values[0] is not None:
-                cur = int(round(values[1] / sleeptime)) if values[1] else ''
-                if self._display_guests:
-                    key = self.get_gname_from_pid(key)
-                self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
-                                   (key, values[0], values[0] * 100 / total,
-                                    cur))
-                if cur is not '' and key.find('(') is -1:
-                    tavg += cur
+            if self._display_guests:
+                key = self.get_gname_from_pid(key)
+                if not key:
+                    continue
+            cur = int(round(values.delta / sleeptime)) if values.delta else ''
+            if key[0] != ' ':
+                if values.delta:
+                    tcur += values.delta
+                ptotal = values.value
+                ltotal = total
+            else:
+                ltotal = ptotal
+            self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
+                               values.value,
+                               values.value * 100 / float(ltotal), cur))
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
-        else:
+        if row > 4:
+            tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %
-                               ('Total', total, tavg if tavg else ''),
-                               curses.A_BOLD)
+                               ('Total', total, tavg), curses.A_BOLD)
         self.screen.refresh()
 
-    def show_msg(self, text):
+    def _show_msg(self, text):
         """Display message centered text and exit on key press"""
         hint = 'Press any key to continue'
         curses.cbreak()
@@ -1139,16 +1227,16 @@ class Tui(object):
                            curses.A_STANDOUT)
         self.screen.getkey()
 
-    def show_help_interactive(self):
+    def _show_help_interactive(self):
         """Display help with list of interactive commands"""
         msg = ('   b     toggle events by guests (debugfs only, honors'
                ' filters)',
                '   c     clear filter',
                '   f     filter by regular expression',
-               '   g     filter by guest name',
+               '   g     filter by guest name/PID',
                '   h     display interactive commands reference',
                '   o     toggle sorting order (Total vs CurAvg/s)',
-               '   p     filter by PID',
+               '   p     filter by guest name/PID',
                '   q     quit',
                '   r     reset stats',
                '   s     set update interval',
@@ -1165,14 +1253,15 @@ class Tui(object):
             self.screen.addstr(row, 0, line)
             row += 1
         self.screen.getkey()
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_filter_selection(self):
+    def _show_filter_selection(self):
         """Draws filter selection mask.
 
         Asks for a valid regex and sets the fields filter accordingly.
 
         """
+        msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
@@ -1181,61 +1270,25 @@ class Tui(object):
             self.screen.addstr(2, 0,
                                "Current regex: {0}"
                                .format(self.stats.fields_filter))
+            self.screen.addstr(5, 0, msg)
             self.screen.addstr(3, 0, "New regex: ")
             curses.echo()
             regex = self.screen.getstr().decode(ENCODING)
             curses.noecho()
             if len(regex) == 0:
-                self.stats.fields_filter = DEFAULT_REGEX
-                self.refresh_header()
+                self.stats.fields_filter = ''
+                self._refresh_header()
                 return
             try:
                 re.compile(regex)
                 self.stats.fields_filter = regex
-                self.refresh_header()
+                self._refresh_header()
                 return
             except re.error:
+                msg = '"' + regex + '": Not a valid regular expression'
                 continue
 
-    def show_vm_selection_by_pid(self):
-        """Draws PID selection mask.
-
-        Asks for a pid until a valid pid or 0 has been entered.
-
-        """
-        msg = ''
-        while True:
-            self.screen.erase()
-            self.screen.addstr(0, 0,
-                               'Show statistics for specific pid.',
-                               curses.A_BOLD)
-            self.screen.addstr(1, 0,
-                               'This might limit the shown data to the trace '
-                               'statistics.')
-            self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
-
-            curses.echo()
-            self.screen.addstr(3, 0, "Pid [0 or pid]: ")
-            pid = self.screen.getstr().decode(ENCODING)
-            curses.noecho()
-
-            try:
-                if len(pid) > 0:
-                    pid = int(pid)
-                    if pid != 0 and not os.path.isdir(os.path.join('/proc/',
-                                                                   str(pid))):
-                        msg = '"' + str(pid) + '": Not a running process'
-                        continue
-                else:
-                    pid = 0
-                self.refresh_header(pid)
-                self.update_pid(pid)
-                break
-            except ValueError:
-                msg = '"' + str(pid) + '": Not a valid pid'
-
-    def show_set_update_interval(self):
+    def _show_set_update_interval(self):
         """Draws update interval selection mask."""
         msg = ''
         while True:
@@ -1265,60 +1318,67 @@ class Tui(object):
 
             except ValueError:
                 msg = '"' + str(val) + '": Invalid value'
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_vm_selection_by_guest_name(self):
+    def _show_vm_selection_by_guest(self):
         """Draws guest selection mask.
 
-        Asks for a guest name until a valid guest name or '' is entered.
+        Asks for a guest name or pid until a valid guest name or '' is entered.
 
         """
         msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
-                               'Show statistics for specific guest.',
+                               'Show statistics for specific guest or pid.',
                                curses.A_BOLD)
             self.screen.addstr(1, 0,
                                'This might limit the shown data to the trace '
                                'statistics.')
             self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
+            self._print_all_gnames(7)
             curses.echo()
-            self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
-            gname = self.screen.getstr().decode(ENCODING)
+            curses.curs_set(1)
+            self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
+            guest = self.screen.getstr().decode(ENCODING)
             curses.noecho()
 
-            if not gname:
-                self.refresh_header(0)
-                self.update_pid(0)
+            pid = 0
+            if not guest or guest == '0':
                 break
-            else:
-                pids = []
-                try:
-                    pids = self.get_pid_from_gname(gname)
-                except:
-                    msg = '"' + gname + '": Internal error while searching, ' \
-                          'use pid filter instead'
-                    continue
-                if len(pids) == 0:
-                    msg = '"' + gname + '": Not an active guest'
+            if guest.isdigit():
+                if not os.path.isdir(os.path.join('/proc/', guest)):
+                    msg = '"' + guest + '": Not a running process'
                     continue
-                if len(pids) > 1:
-                    msg = '"' + gname + '": Multiple matches found, use pid ' \
-                          'filter instead'
-                    continue
-                self.refresh_header(pids[0])
-                self.update_pid(pids[0])
+                pid = int(guest)
                 break
+            pids = []
+            try:
+                pids = self.get_pid_from_gname(guest)
+            except:
+                msg = '"' + guest + '": Internal error while searching, ' \
+                      'use pid filter instead'
+                continue
+            if len(pids) == 0:
+                msg = '"' + guest + '": Not an active guest'
+                continue
+            if len(pids) > 1:
+                msg = '"' + guest + '": Multiple matches found, use pid ' \
+                      'filter instead'
+                continue
+            pid = pids[0]
+            break
+        curses.curs_set(0)
+        self._refresh_header(pid)
+        self._update_pid(pid)
 
     def show_stats(self):
         """Refreshes the screen and processes user input."""
         sleeptime = self._delay_initial
-        self.refresh_header()
+        self._refresh_header()
         start = 0.0  # result based on init value never appears on screen
         while True:
-            self.refresh_body(time.time() - start)
+            self._refresh_body(time.time() - start)
             curses.halfdelay(int(sleeptime * 10))
             start = time.time()
             sleeptime = self._delay_regular
@@ -1327,47 +1387,39 @@ class Tui(object):
                 if char == 'b':
                     self._display_guests = not self._display_guests
                     if self.stats.toggle_display_guests(self._display_guests):
-                        self.show_msg(['Command not available with tracepoints'
-                                       ' enabled', 'Restart with debugfs only '
-                                       '(see option \'-d\') and try again!'])
+                        self._show_msg(['Command not available with '
+                                        'tracepoints enabled', 'Restart with '
+                                        'debugfs only (see option \'-d\') and '
+                                        'try again!'])
                         self._display_guests = not self._display_guests
-                    self.refresh_header()
+                    self._refresh_header()
                 if char == 'c':
-                    self.stats.fields_filter = DEFAULT_REGEX
-                    self.refresh_header(0)
-                    self.update_pid(0)
+                    self.stats.fields_filter = ''
+                    self._refresh_header(0)
+                    self._update_pid(0)
                 if char == 'f':
                     curses.curs_set(1)
-                    self.show_filter_selection()
+                    self._show_filter_selection()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
-                if char == 'g':
-                    curses.curs_set(1)
-                    self.show_vm_selection_by_guest_name()
-                    curses.curs_set(0)
+                if char == 'g' or char == 'p':
+                    self._show_vm_selection_by_guest()
                     sleeptime = self._delay_initial
                 if char == 'h':
-                    self.show_help_interactive()
+                    self._show_help_interactive()
                 if char == 'o':
                     self._sorting = not self._sorting
-                if char == 'p':
-                    curses.curs_set(1)
-                    self.show_vm_selection_by_pid()
-                    curses.curs_set(0)
-                    sleeptime = self._delay_initial
                 if char == 'q':
                     break
                 if char == 'r':
                     self.stats.reset()
                 if char == 's':
                     curses.curs_set(1)
-                    self.show_set_update_interval()
+                    self._show_set_update_interval()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'x':
-                    self.update_drilldown()
-                    # prevents display of current values on next refresh
-                    self.stats.get(self._display_guests)
+                    self.stats.child_events = not self.stats.child_events
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1380,9 +1432,9 @@ def batch(stats):
         s = stats.get()
         time.sleep(1)
         s = stats.get()
-        for key in sorted(s.keys()):
-            values = s[key]
-            print('%-42s%10d%10d' % (key, values[0], values[1]))
+        for key, values in sorted(s.items()):
+            print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
+                  values.delta))
     except KeyboardInterrupt:
         pass
 
@@ -1392,14 +1444,14 @@ def log(stats):
     keys = sorted(stats.get().keys())
 
     def banner():
-        for k in keys:
-            print(k, end=' ')
+        for key in keys:
+            print(key.split(' ')[0], end=' ')
         print()
 
     def statline():
         s = stats.get()
-        for k in keys:
-            print(' %9d' % s[k][1], end=' ')
+        for key in keys:
+            print(' %9d' % s[key].delta, end=' ')
         print()
     line = 0
     banner_repeat = 20
@@ -1504,7 +1556,7 @@ Press any other key to refresh statistics immediately.
                          )
     optparser.add_option('-f', '--fields',
                          action='store',
-                         default=DEFAULT_REGEX,
+                         default='',
                          dest='fields',
                          help='''fields to display (regex)
                                  "-f help" for a list of available events''',
@@ -1539,17 +1591,6 @@ Press any other key to refresh statistics immediately.
 
 def check_access(options):
     """Exits if the current user can't access all needed directories."""
-    if not os.path.exists('/sys/kernel/debug'):
-        sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
-        sys.exit(1)
-
-    if not os.path.exists(PATH_DEBUGFS_KVM):
-        sys.stderr.write("Please make sure, that debugfs is mounted and "
-                         "readable by the current user:\n"
-                         "('mount -t debugfs debugfs /sys/kernel/debug')\n"
-                         "Also ensure, that the kvm modules are loaded.\n")
-        sys.exit(1)
-
     if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
                                                      not options.debugfs):
         sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
@@ -1567,7 +1608,33 @@ def check_access(options):
     return options
 
 
+def assign_globals():
+    global PATH_DEBUGFS_KVM
+    global PATH_DEBUGFS_TRACING
+
+    debugfs = ''
+    for line in file('/proc/mounts'):
+        if line.split(' ')[0] == 'debugfs':
+            debugfs = line.split(' ')[1]
+            break
+    if debugfs == '':
+        sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
+                         "your kernel, mounted and\nreadable by the current "
+                         "user:\n"
+                         "('mount -t debugfs debugfs /sys/kernel/debug')\n")
+        sys.exit(1)
+
+    PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
+    PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
+
+    if not os.path.exists(PATH_DEBUGFS_KVM):
+        sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
+                         "your kernel and that the modules are loaded.\n")
+        sys.exit(1)
+
+
 def main():
+    assign_globals()
     options = get_options()
     options = check_access(options)
 
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index b5b3810c9e94..0811d860fe75 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -35,13 +35,13 @@ INTERACTIVE COMMANDS
 
 *f*::	filter by regular expression
 
-*g*::	filter by guest name
+*g*::	filter by guest name/PID
 
 *h*::	display interactive commands reference
 
 *o*::   toggle sorting order (Total vs CurAvg/s)
 
-*p*::	filter by PID
+*p*::	filter by guest name/PID
 
 *q*::	quit
 
diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile
index 5f758c489a20..b572d94255f6 100644
--- a/tools/laptop/freefall/Makefile
+++ b/tools/laptop/freefall/Makefile
@@ -2,7 +2,6 @@
 PREFIX ?= /usr
 SBINDIR ?= sbin
 INSTALL ?= install
-CC = $(CROSS_COMPILE)gcc
 
 TARGET = freefall
 
diff --git a/tools/leds/Makefile b/tools/leds/Makefile
index c379af003807..7b6bed13daaa 100644
--- a/tools/leds/Makefile
+++ b/tools/leds/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for LEDs tools
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra -g -I../../include/uapi
 
 all: uledmon led_hw_brightness_mon
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 97073d649c1a..64a8fc384186 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1060,11 +1060,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 		prog->insns = new_insn;
 		prog->main_prog_cnt = prog->insns_cnt;
 		prog->insns_cnt = new_cnt;
+		pr_debug("added %zd insn from %s to prog %s\n",
+			 text->insns_cnt, text->section_name,
+			 prog->section_name);
 	}
 	insn = &prog->insns[relo->insn_idx];
 	insn->imm += prog->main_prog_cnt - relo->insn_idx;
-	pr_debug("added %zd insn from %s to prog %s\n",
-		 text->insns_cnt, text->section_name, prog->section_name);
 	return 0;
 }
 
@@ -1856,6 +1857,7 @@ static const struct {
 	BPF_PROG_SEC("lwt_xmit",	BPF_PROG_TYPE_LWT_XMIT),
 	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
 	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
+	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
 };
 #undef BPF_PROG_SEC
 
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 57254f5b2779..694abc628e9b 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable;
+bool no_fp, no_unreachable, retpoline, module;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -39,6 +39,8 @@ static const char * const check_usage[] = {
 const struct option check_options[] = {
 	OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
 	OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+	OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
+	OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
 	OPT_END(),
 };
 
@@ -53,5 +55,5 @@ int cmd_check(int argc, const char **argv)
 
 	objname = argv[0];
 
-	return check(objname, no_fp, no_unreachable, false);
+	return check(objname, false);
 }
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 91e8e19ff5e0..77ea2b97117d 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -25,7 +25,6 @@
  */
 
 #include <string.h>
-#include <subcmd/parse-options.h>
 #include "builtin.h"
 #include "check.h"
 
@@ -36,9 +35,6 @@ static const char *orc_usage[] = {
 	NULL,
 };
 
-extern const struct option check_options[];
-extern bool no_fp, no_unreachable;
-
 int cmd_orc(int argc, const char **argv)
 {
 	const char *objname;
@@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
 
 		objname = argv[0];
 
-		return check(objname, no_fp, no_unreachable, true);
+		return check(objname, true);
 	}
 
 	if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index dd526067fed5..28ff40e19a14 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -17,6 +17,11 @@
 #ifndef _BUILTIN_H
 #define _BUILTIN_H
 
+#include <subcmd/parse-options.h>
+
+extern const struct option check_options[];
+extern bool no_fp, no_unreachable, retpoline, module;
+
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b00b1896547e..92b6a2c21631 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -18,6 +18,7 @@
 #include <string.h>
 #include <stdlib.h>
 
+#include "builtin.h"
 #include "check.h"
 #include "elf.h"
 #include "special.h"
@@ -33,7 +34,6 @@ struct alternative {
 };
 
 const char *objname;
-static bool no_fp;
 struct cfi_state initial_func_cfi;
 
 struct instruction *find_insn(struct objtool_file *file,
@@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
 			 * disguise, so convert them accordingly.
 			 */
 			insn->type = INSN_JUMP_DYNAMIC;
+			insn->retpoline_safe = true;
 			continue;
 		} else {
 			/* sibling call */
@@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file)
 			if (!insn->call_dest && !insn->ignore) {
 				WARN_FUNC("unsupported intra-function call",
 					  insn->sec, insn->offset);
-				WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+				if (retpoline)
+					WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
 				return -1;
 			}
 
@@ -852,8 +854,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
  *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
  *    writing, there are 11 occurrences of it in the allmodconfig kernel.
  *
+ *    As of GCC 7 there are quite a few more of these and the 'in between' code
+ *    is significant. Esp. with KASAN enabled some of the code between the mov
+ *    and jmpq uses .rodata itself, which can confuse things.
+ *
  *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
  *    ensure the same register is used in the mov and jump instructions.
+ *
+ *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
  */
 static struct rela *find_switch_table(struct objtool_file *file,
 				      struct symbol *func,
@@ -875,12 +883,25 @@ static struct rela *find_switch_table(struct objtool_file *file,
 						text_rela->addend + 4);
 		if (!rodata_rela)
 			return NULL;
+
 		file->ignore_unreachables = true;
 		return rodata_rela;
 	}
 
 	/* case 3 */
-	func_for_each_insn_continue_reverse(file, func, insn) {
+	/*
+	 * Backward search using the @first_jump_src links, these help avoid
+	 * much of the 'in between' code. Which avoids us getting confused by
+	 * it.
+	 */
+	for (insn = list_prev_entry(insn, list);
+
+	     &insn->list != &file->insn_list &&
+	     insn->sec == func->sec &&
+	     insn->offset >= func->offset;
+
+	     insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+
 		if (insn->type == INSN_JUMP_DYNAMIC)
 			break;
 
@@ -904,20 +925,42 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		if (find_symbol_containing(file->rodata, text_rela->addend))
 			continue;
 
-		return find_rela_by_dest(file->rodata, text_rela->addend);
+		rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
+		if (!rodata_rela)
+			continue;
+
+		return rodata_rela;
 	}
 
 	return NULL;
 }
 
+
 static int add_func_switch_tables(struct objtool_file *file,
 				  struct symbol *func)
 {
-	struct instruction *insn, *prev_jump = NULL;
+	struct instruction *insn, *last = NULL, *prev_jump = NULL;
 	struct rela *rela, *prev_rela = NULL;
 	int ret;
 
 	func_for_each_insn(file, func, insn) {
+		if (!last)
+			last = insn;
+
+		/*
+		 * Store back-pointers for unconditional forward jumps such
+		 * that find_switch_table() can back-track using those and
+		 * avoid some potentially confusing code.
+		 */
+		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
+		    insn->offset > last->offset &&
+		    insn->jump_dest->offset > insn->offset &&
+		    !insn->jump_dest->first_jump_src) {
+
+			insn->jump_dest->first_jump_src = insn;
+			last = insn->jump_dest;
+		}
+
 		if (insn->type != INSN_JUMP_DYNAMIC)
 			continue;
 
@@ -1071,6 +1114,41 @@ static int read_unwind_hints(struct objtool_file *file)
 	return 0;
 }
 
+static int read_retpoline_hints(struct objtool_file *file)
+{
+	struct section *sec;
+	struct instruction *insn;
+	struct rela *rela;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.retpoline_safe entry");
+			return -1;
+		}
+
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC) {
+			WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		insn->retpoline_safe = true;
+	}
+
+	return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
 	int ret;
@@ -1109,6 +1187,10 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = read_retpoline_hints(file);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -1854,6 +1936,38 @@ static int validate_unwind_hints(struct objtool_file *file)
 	return warnings;
 }
 
+static int validate_retpoline(struct objtool_file *file)
+{
+	struct instruction *insn;
+	int warnings = 0;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC)
+			continue;
+
+		if (insn->retpoline_safe)
+			continue;
+
+		/*
+		 * .init.text code is ran before userspace and thus doesn't
+		 * strictly need retpolines, except for modules which are
+		 * loaded late, they very much do need retpoline in their
+		 * .init.text
+		 */
+		if (!strcmp(insn->sec->name, ".init.text") && !module)
+			continue;
+
+		WARN_FUNC("indirect %s found in RETPOLINE build",
+			  insn->sec, insn->offset,
+			  insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+
+		warnings++;
+	}
+
+	return warnings;
+}
+
 static bool is_kasan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
@@ -1899,13 +2013,19 @@ static bool ignore_unreachable_insn(struct instruction *insn)
 		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
 			return true;
 
-		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-			insn = insn->jump_dest;
-			continue;
+		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+			if (insn->jump_dest &&
+			    insn->jump_dest->func == insn->func) {
+				insn = insn->jump_dest;
+				continue;
+			}
+
+			break;
 		}
 
 		if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
 			break;
+
 		insn = list_next_entry(insn, list);
 	}
 
@@ -1979,13 +2099,12 @@ static void cleanup(struct objtool_file *file)
 	elf_close(file->elf);
 }
 
-int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
+int check(const char *_objname, bool orc)
 {
 	struct objtool_file file;
 	int ret, warnings = 0;
 
 	objname = _objname;
-	no_fp = _no_fp;
 
 	file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
 	if (!file.elf)
@@ -2009,6 +2128,13 @@ int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
 	if (list_empty(&file.insn_list))
 		goto out;
 
+	if (retpoline) {
+		ret = validate_retpoline(&file);
+		if (ret < 0)
+			return ret;
+		warnings += ret;
+	}
+
 	ret = validate_functions(&file);
 	if (ret < 0)
 		goto out;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index dbadb304a410..c6b68fcb926f 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -45,8 +45,10 @@ struct instruction {
 	unsigned char type;
 	unsigned long immediate;
 	bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
+	bool retpoline_safe;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
+	struct instruction *first_jump_src;
 	struct list_head alts;
 	struct symbol *func;
 	struct stack_op stack_op;
@@ -62,7 +64,7 @@ struct objtool_file {
 	bool ignore_unreachables, c_file, hints;
 };
 
-int check(const char *objname, bool no_fp, bool no_unreachable, bool orc);
+int check(const char *objname, bool orc);
 
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index f0796a47dfa3..90bb4aabe4f8 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -30,6 +30,10 @@ OPTIONS for 'convert'
 -i::
 	Specify input perf data file path.
 
+-f::
+--force::
+	Don't complain, do it.
+
 -v::
 --verbose::
         Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index 954ea9e21236..cf9f4040ea5c 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -8,7 +8,7 @@ perf-kallsyms - Searches running kernel for symbols
 SYNOPSIS
 --------
 [verse]
-'perf kallsyms <options> symbol_name[,symbol_name...]'
+'perf kallsyms' [<options>] symbol_name[,symbol_name...]
 
 DESCRIPTION
 -----------
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 9b0351d3ce34..012328038594 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -146,12 +146,6 @@ define allow-override
     $(eval $(1) = $(2)))
 endef
 
-# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,LD,$(CROSS_COMPILE)ld)
-$(call allow-override,CXX,$(CROSS_COMPILE)g++)
-
 LD += $(EXTRA_LDFLAGS)
 
 HOSTCC  ?= gcc
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index 48228de415d0..dfa6e3103437 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -10,15 +10,19 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 
 out    := $(OUTPUT)arch/s390/include/generated/asm
 header := $(out)/syscalls_64.c
-sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/
+syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B $(sysdef) $(syskrn) >/dev/null) \
+        || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
 
 clean::
 	$(call QUIET_CLEAN, s390) $(RM) $(header)
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
index 7fa0d0abd419..72ecbb676370 100755
--- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
@@ -3,25 +3,23 @@
 #
 # Generate system call table for perf
 #
-#
-# Copyright IBM Corp. 2017
+# Copyright IBM Corp. 2017, 2018
 # Author(s):  Hendrik Brueckner <[email protected]>
 #
 
-gcc=$1
-input=$2
+SYSCALL_TBL=$1
 
-if ! test -r $input; then
+if ! test -r $SYSCALL_TBL; then
 	echo "Could not read input file" >&2
 	exit 1
 fi
 
 create_table()
 {
-	local max_nr
+	local max_nr nr abi sc discard
 
 	echo 'static const char *syscalltbl_s390_64[] = {'
-	while read sc nr; do
+	while read nr abi sc discard; do
 		printf '\t[%d] = "%s",\n' $nr $sc
 		max_nr=$nr
 	done
@@ -29,8 +27,6 @@ create_table()
 	echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
 }
 
-
-$gcc -m64 -E -dM -x c  $input	       \
-	|sed -ne 's/^#define __NR_//p' \
-	|sort -t' ' -k2 -nu	       \
+grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL	\
+	|sort -k1 -n					\
 	|create_table
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..b38d48464368
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -0,0 +1,390 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1    common	exit			sys_exit			sys_exit
+2    common	fork			sys_fork			sys_fork
+3    common	read			sys_read			compat_sys_s390_read
+4    common	write			sys_write			compat_sys_s390_write
+5    common	open			sys_open			compat_sys_open
+6    common	close			sys_close			sys_close
+7    common	restart_syscall		sys_restart_syscall		sys_restart_syscall
+8    common	creat			sys_creat			compat_sys_creat
+9    common	link			sys_link			compat_sys_link
+10   common	unlink			sys_unlink			compat_sys_unlink
+11   common	execve			sys_execve			compat_sys_execve
+12   common	chdir			sys_chdir			compat_sys_chdir
+13   32		time			-				compat_sys_time
+14   common	mknod			sys_mknod			compat_sys_mknod
+15   common	chmod			sys_chmod			compat_sys_chmod
+16   32		lchown			-				compat_sys_s390_lchown16
+19   common	lseek			sys_lseek			compat_sys_lseek
+20   common	getpid			sys_getpid			sys_getpid
+21   common	mount			sys_mount			compat_sys_mount
+22   common	umount			sys_oldumount			compat_sys_oldumount
+23   32		setuid			-				compat_sys_s390_setuid16
+24   32		getuid			-				compat_sys_s390_getuid16
+25   32		stime			-				compat_sys_stime
+26   common	ptrace			sys_ptrace			compat_sys_ptrace
+27   common	alarm			sys_alarm			sys_alarm
+29   common	pause			sys_pause			sys_pause
+30   common	utime			sys_utime			compat_sys_utime
+33   common	access			sys_access			compat_sys_access
+34   common	nice			sys_nice			sys_nice
+36   common	sync			sys_sync			sys_sync
+37   common	kill			sys_kill			sys_kill
+38   common	rename			sys_rename			compat_sys_rename
+39   common	mkdir			sys_mkdir			compat_sys_mkdir
+40   common	rmdir			sys_rmdir			compat_sys_rmdir
+41   common	dup			sys_dup				sys_dup
+42   common	pipe			sys_pipe			compat_sys_pipe
+43   common	times			sys_times			compat_sys_times
+45   common	brk			sys_brk				compat_sys_brk
+46   32		setgid			-				compat_sys_s390_setgid16
+47   32		getgid			-				compat_sys_s390_getgid16
+48   common	signal			sys_signal			compat_sys_signal
+49   32		geteuid			-				compat_sys_s390_geteuid16
+50   32		getegid			-				compat_sys_s390_getegid16
+51   common	acct			sys_acct			compat_sys_acct
+52   common	umount2			sys_umount			compat_sys_umount
+54   common	ioctl			sys_ioctl			compat_sys_ioctl
+55   common	fcntl			sys_fcntl			compat_sys_fcntl
+57   common	setpgid			sys_setpgid			sys_setpgid
+60   common	umask			sys_umask			sys_umask
+61   common	chroot			sys_chroot			compat_sys_chroot
+62   common	ustat			sys_ustat			compat_sys_ustat
+63   common	dup2			sys_dup2			sys_dup2
+64   common	getppid			sys_getppid			sys_getppid
+65   common	getpgrp			sys_getpgrp			sys_getpgrp
+66   common	setsid			sys_setsid			sys_setsid
+67   common	sigaction		sys_sigaction			compat_sys_sigaction
+70   32		setreuid		-				compat_sys_s390_setreuid16
+71   32		setregid		-				compat_sys_s390_setregid16
+72   common	sigsuspend		sys_sigsuspend			compat_sys_sigsuspend
+73   common	sigpending		sys_sigpending			compat_sys_sigpending
+74   common	sethostname		sys_sethostname			compat_sys_sethostname
+75   common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+76   32		getrlimit		-				compat_sys_old_getrlimit
+77   common	getrusage		sys_getrusage			compat_sys_getrusage
+78   common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+79   common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+80   32		getgroups		-				compat_sys_s390_getgroups16
+81   32		setgroups		-				compat_sys_s390_setgroups16
+83   common	symlink			sys_symlink			compat_sys_symlink
+85   common	readlink		sys_readlink			compat_sys_readlink
+86   common	uselib			sys_uselib			compat_sys_uselib
+87   common	swapon			sys_swapon			compat_sys_swapon
+88   common	reboot			sys_reboot			compat_sys_reboot
+89   common	readdir			-				compat_sys_old_readdir
+90   common	mmap			sys_old_mmap			compat_sys_s390_old_mmap
+91   common	munmap			sys_munmap			compat_sys_munmap
+92   common	truncate		sys_truncate			compat_sys_truncate
+93   common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+94   common	fchmod			sys_fchmod			sys_fchmod
+95   32		fchown			-				compat_sys_s390_fchown16
+96   common	getpriority		sys_getpriority			sys_getpriority
+97   common	setpriority		sys_setpriority			sys_setpriority
+99   common	statfs			sys_statfs			compat_sys_statfs
+100  common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+101  32		ioperm			-				-
+102  common	socketcall		sys_socketcall			compat_sys_socketcall
+103  common	syslog			sys_syslog			compat_sys_syslog
+104  common	setitimer		sys_setitimer			compat_sys_setitimer
+105  common	getitimer		sys_getitimer			compat_sys_getitimer
+106  common	stat			sys_newstat			compat_sys_newstat
+107  common	lstat			sys_newlstat			compat_sys_newlstat
+108  common	fstat			sys_newfstat			compat_sys_newfstat
+110  common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
+111  common	vhangup			sys_vhangup			sys_vhangup
+112  common	idle			-				-
+114  common	wait4			sys_wait4			compat_sys_wait4
+115  common	swapoff			sys_swapoff			compat_sys_swapoff
+116  common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+117  common	ipc			sys_s390_ipc			compat_sys_s390_ipc
+118  common	fsync			sys_fsync			sys_fsync
+119  common	sigreturn		sys_sigreturn			compat_sys_sigreturn
+120  common	clone			sys_clone			compat_sys_clone
+121  common	setdomainname		sys_setdomainname		compat_sys_setdomainname
+122  common	uname			sys_newuname			compat_sys_newuname
+124  common	adjtimex		sys_adjtimex			compat_sys_adjtimex
+125  common	mprotect		sys_mprotect			compat_sys_mprotect
+126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+127  common	create_module		-				-
+128  common	init_module		sys_init_module			compat_sys_init_module
+129  common	delete_module		sys_delete_module		compat_sys_delete_module
+130  common	get_kernel_syms		-				-
+131  common	quotactl		sys_quotactl			compat_sys_quotactl
+132  common	getpgid			sys_getpgid			sys_getpgid
+133  common	fchdir			sys_fchdir			sys_fchdir
+134  common	bdflush			sys_bdflush			compat_sys_bdflush
+135  common	sysfs			sys_sysfs			compat_sys_sysfs
+136  common	personality		sys_s390_personality		sys_s390_personality
+137  common	afs_syscall		-				-
+138  32		setfsuid		-				compat_sys_s390_setfsuid16
+139  32		setfsgid		-				compat_sys_s390_setfsgid16
+140  32		_llseek			-				compat_sys_llseek
+141  common	getdents		sys_getdents			compat_sys_getdents
+142  32		_newselect		-				compat_sys_select
+142  64		select			sys_select			-
+143  common	flock			sys_flock			sys_flock
+144  common	msync			sys_msync			compat_sys_msync
+145  common	readv			sys_readv			compat_sys_readv
+146  common	writev			sys_writev			compat_sys_writev
+147  common	getsid			sys_getsid			sys_getsid
+148  common	fdatasync		sys_fdatasync			sys_fdatasync
+149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+150  common	mlock			sys_mlock			compat_sys_mlock
+151  common	munlock			sys_munlock			compat_sys_munlock
+152  common	mlockall		sys_mlockall			sys_mlockall
+153  common	munlockall		sys_munlockall			sys_munlockall
+154  common	sched_setparam		sys_sched_setparam		compat_sys_sched_setparam
+155  common	sched_getparam		sys_sched_getparam		compat_sys_sched_getparam
+156  common	sched_setscheduler	sys_sched_setscheduler		compat_sys_sched_setscheduler
+157  common	sched_getscheduler	sys_sched_getscheduler		sys_sched_getscheduler
+158  common	sched_yield		sys_sched_yield			sys_sched_yield
+159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
+160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
+162  common	nanosleep		sys_nanosleep			compat_sys_nanosleep
+163  common	mremap			sys_mremap			compat_sys_mremap
+164  32		setresuid		-				compat_sys_s390_setresuid16
+165  32		getresuid		-				compat_sys_s390_getresuid16
+167  common	query_module		-				-
+168  common	poll			sys_poll			compat_sys_poll
+169  common	nfsservctl		-				-
+170  32		setresgid		-				compat_sys_s390_setresgid16
+171  32		getresgid		-				compat_sys_s390_getresgid16
+172  common	prctl			sys_prctl			compat_sys_prctl
+173  common	rt_sigreturn		sys_rt_sigreturn		compat_sys_rt_sigreturn
+174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+180  common	pread64			sys_pread64			compat_sys_s390_pread64
+181  common	pwrite64		sys_pwrite64			compat_sys_s390_pwrite64
+182  32		chown			-				compat_sys_s390_chown16
+183  common	getcwd			sys_getcwd			compat_sys_getcwd
+184  common	capget			sys_capget			compat_sys_capget
+185  common	capset			sys_capset			compat_sys_capset
+186  common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+187  common	sendfile		sys_sendfile64			compat_sys_sendfile
+188  common	getpmsg			-				-
+189  common	putpmsg			-				-
+190  common	vfork			sys_vfork			sys_vfork
+191  32		ugetrlimit		-				compat_sys_getrlimit
+191  64		getrlimit		sys_getrlimit			-
+192  32		mmap2			-				compat_sys_s390_mmap2
+193  32		truncate64		-				compat_sys_s390_truncate64
+194  32		ftruncate64		-				compat_sys_s390_ftruncate64
+195  32		stat64			-				compat_sys_s390_stat64
+196  32		lstat64			-				compat_sys_s390_lstat64
+197  32		fstat64			-				compat_sys_s390_fstat64
+198  32		lchown32		-				compat_sys_lchown
+198  64		lchown			sys_lchown			-
+199  32		getuid32		-				sys_getuid
+199  64		getuid			sys_getuid			-
+200  32		getgid32		-				sys_getgid
+200  64		getgid			sys_getgid			-
+201  32		geteuid32		-				sys_geteuid
+201  64		geteuid			sys_geteuid			-
+202  32		getegid32		-				sys_getegid
+202  64		getegid			sys_getegid			-
+203  32		setreuid32		-				sys_setreuid
+203  64		setreuid		sys_setreuid			-
+204  32		setregid32		-				sys_setregid
+204  64		setregid		sys_setregid			-
+205  32		getgroups32		-				compat_sys_getgroups
+205  64		getgroups		sys_getgroups			-
+206  32		setgroups32		-				compat_sys_setgroups
+206  64		setgroups		sys_setgroups			-
+207  32		fchown32		-				sys_fchown
+207  64		fchown			sys_fchown			-
+208  32		setresuid32		-				sys_setresuid
+208  64		setresuid		sys_setresuid			-
+209  32		getresuid32		-				compat_sys_getresuid
+209  64		getresuid		sys_getresuid			-
+210  32		setresgid32		-				sys_setresgid
+210  64		setresgid		sys_setresgid			-
+211  32		getresgid32		-				compat_sys_getresgid
+211  64		getresgid		sys_getresgid			-
+212  32		chown32			-				compat_sys_chown
+212  64		chown			sys_chown			-
+213  32		setuid32		-				sys_setuid
+213  64		setuid			sys_setuid			-
+214  32		setgid32		-				sys_setgid
+214  64		setgid			sys_setgid			-
+215  32		setfsuid32		-				sys_setfsuid
+215  64		setfsuid		sys_setfsuid			-
+216  32		setfsgid32		-				sys_setfsgid
+216  64		setfsgid		sys_setfsgid			-
+217  common	pivot_root		sys_pivot_root			compat_sys_pivot_root
+218  common	mincore			sys_mincore			compat_sys_mincore
+219  common	madvise			sys_madvise			compat_sys_madvise
+220  common	getdents64		sys_getdents64			compat_sys_getdents64
+221  32		fcntl64			-				compat_sys_fcntl64
+222  common	readahead		sys_readahead			compat_sys_s390_readahead
+223  32		sendfile64		-				compat_sys_sendfile64
+224  common	setxattr		sys_setxattr			compat_sys_setxattr
+225  common	lsetxattr		sys_lsetxattr			compat_sys_lsetxattr
+226  common	fsetxattr		sys_fsetxattr			compat_sys_fsetxattr
+227  common	getxattr		sys_getxattr			compat_sys_getxattr
+228  common	lgetxattr		sys_lgetxattr			compat_sys_lgetxattr
+229  common	fgetxattr		sys_fgetxattr			compat_sys_fgetxattr
+230  common	listxattr		sys_listxattr			compat_sys_listxattr
+231  common	llistxattr		sys_llistxattr			compat_sys_llistxattr
+232  common	flistxattr		sys_flistxattr			compat_sys_flistxattr
+233  common	removexattr		sys_removexattr			compat_sys_removexattr
+234  common	lremovexattr		sys_lremovexattr		compat_sys_lremovexattr
+235  common	fremovexattr		sys_fremovexattr		compat_sys_fremovexattr
+236  common	gettid			sys_gettid			sys_gettid
+237  common	tkill			sys_tkill			sys_tkill
+238  common	futex			sys_futex			compat_sys_futex
+239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+241  common	tgkill			sys_tgkill			sys_tgkill
+243  common	io_setup		sys_io_setup			compat_sys_io_setup
+244  common	io_destroy		sys_io_destroy			compat_sys_io_destroy
+245  common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+246  common	io_submit		sys_io_submit			compat_sys_io_submit
+247  common	io_cancel		sys_io_cancel			compat_sys_io_cancel
+248  common	exit_group		sys_exit_group			sys_exit_group
+249  common	epoll_create		sys_epoll_create		sys_epoll_create
+250  common	epoll_ctl		sys_epoll_ctl			compat_sys_epoll_ctl
+251  common	epoll_wait		sys_epoll_wait			compat_sys_epoll_wait
+252  common	set_tid_address		sys_set_tid_address		compat_sys_set_tid_address
+253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
+254  common	timer_create		sys_timer_create		compat_sys_timer_create
+255  common	timer_settime		sys_timer_settime		compat_sys_timer_settime
+256  common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
+258  common	timer_delete		sys_timer_delete		sys_timer_delete
+259  common	clock_settime		sys_clock_settime		compat_sys_clock_settime
+260  common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
+261  common	clock_getres		sys_clock_getres		compat_sys_clock_getres
+262  common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
+265  common	statfs64		sys_statfs64			compat_sys_statfs64
+266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+267  common	remap_file_pages	sys_remap_file_pages		compat_sys_remap_file_pages
+268  common	mbind			sys_mbind			compat_sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+271  common	mq_open			sys_mq_open			compat_sys_mq_open
+272  common	mq_unlink		sys_mq_unlink			compat_sys_mq_unlink
+273  common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
+274  common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+278  common	add_key			sys_add_key			compat_sys_add_key
+279  common	request_key		sys_request_key			compat_sys_request_key
+280  common	keyctl			sys_keyctl			compat_sys_keyctl
+281  common	waitid			sys_waitid			compat_sys_waitid
+282  common	ioprio_set		sys_ioprio_set			sys_ioprio_set
+283  common	ioprio_get		sys_ioprio_get			sys_ioprio_get
+284  common	inotify_init		sys_inotify_init		sys_inotify_init
+285  common	inotify_add_watch	sys_inotify_add_watch		compat_sys_inotify_add_watch
+286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
+287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
+288  common	openat			sys_openat			compat_sys_openat
+289  common	mkdirat			sys_mkdirat			compat_sys_mkdirat
+290  common	mknodat			sys_mknodat			compat_sys_mknodat
+291  common	fchownat		sys_fchownat			compat_sys_fchownat
+292  common	futimesat		sys_futimesat			compat_sys_futimesat
+293  32		fstatat64		-				compat_sys_s390_fstatat64
+293  64		newfstatat		sys_newfstatat			-
+294  common	unlinkat		sys_unlinkat			compat_sys_unlinkat
+295  common	renameat		sys_renameat			compat_sys_renameat
+296  common	linkat			sys_linkat			compat_sys_linkat
+297  common	symlinkat		sys_symlinkat			compat_sys_symlinkat
+298  common	readlinkat		sys_readlinkat			compat_sys_readlinkat
+299  common	fchmodat		sys_fchmodat			compat_sys_fchmodat
+300  common	faccessat		sys_faccessat			compat_sys_faccessat
+301  common	pselect6		sys_pselect6			compat_sys_pselect6
+302  common	ppoll			sys_ppoll			compat_sys_ppoll
+303  common	unshare			sys_unshare			compat_sys_unshare
+304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+306  common	splice			sys_splice			compat_sys_splice
+307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
+308  common	tee			sys_tee				compat_sys_tee
+309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+310  common	move_pages		sys_move_pages			compat_sys_move_pages
+311  common	getcpu			sys_getcpu			compat_sys_getcpu
+312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+313  common	utimes			sys_utimes			compat_sys_utimes
+314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
+315  common	utimensat		sys_utimensat			compat_sys_utimensat
+316  common	signalfd		sys_signalfd			compat_sys_signalfd
+317  common	timerfd			-				-
+318  common	eventfd			sys_eventfd			sys_eventfd
+319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
+320  common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
+321  common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
+323  common	eventfd2		sys_eventfd2			sys_eventfd2
+324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
+325  common	pipe2			sys_pipe2			compat_sys_pipe2
+326  common	dup3			sys_dup3			sys_dup3
+327  common	epoll_create1		sys_epoll_create1		sys_epoll_create1
+328  common	preadv			sys_preadv			compat_sys_preadv
+329  common	pwritev			sys_pwritev			compat_sys_pwritev
+330  common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+331  common	perf_event_open		sys_perf_event_open		compat_sys_perf_event_open
+332  common	fanotify_init		sys_fanotify_init		sys_fanotify_init
+333  common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+334  common	prlimit64		sys_prlimit64			compat_sys_prlimit64
+335  common	name_to_handle_at	sys_name_to_handle_at		compat_sys_name_to_handle_at
+336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+338  common	syncfs			sys_syncfs			sys_syncfs
+339  common	setns			sys_setns			sys_setns
+340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
+343  common	kcmp			sys_kcmp			compat_sys_kcmp
+344  common	finit_module		sys_finit_module		compat_sys_finit_module
+345  common	sched_setattr		sys_sched_setattr		compat_sys_sched_setattr
+346  common	sched_getattr		sys_sched_getattr		compat_sys_sched_getattr
+347  common	renameat2		sys_renameat2			compat_sys_renameat2
+348  common	seccomp			sys_seccomp			compat_sys_seccomp
+349  common	getrandom		sys_getrandom			compat_sys_getrandom
+350  common	memfd_create		sys_memfd_create		compat_sys_memfd_create
+351  common	bpf			sys_bpf				compat_sys_bpf
+352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		compat_sys_s390_pci_mmio_write
+353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		compat_sys_s390_pci_mmio_read
+354  common	execveat		sys_execveat			compat_sys_execveat
+355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
+356  common	membarrier		sys_membarrier			sys_membarrier
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+359  common	socket			sys_socket			sys_socket
+360  common	socketpair		sys_socketpair			compat_sys_socketpair
+361  common	bind			sys_bind			compat_sys_bind
+362  common	connect			sys_connect			compat_sys_connect
+363  common	listen			sys_listen			sys_listen
+364  common	accept4			sys_accept4			compat_sys_accept4
+365  common	getsockopt		sys_getsockopt			compat_sys_getsockopt
+366  common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+367  common	getsockname		sys_getsockname			compat_sys_getsockname
+368  common	getpeername		sys_getpeername			compat_sys_getpeername
+369  common	sendto			sys_sendto			compat_sys_sendto
+370  common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+371  common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+372  common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+373  common	shutdown		sys_shutdown			sys_shutdown
+374  common	mlock2			sys_mlock2			compat_sys_mlock2
+375  common	copy_file_range		sys_copy_file_range		compat_sys_copy_file_range
+376  common	preadv2			sys_preadv2			compat_sys_preadv2
+377  common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+378  common	s390_guarded_storage	sys_s390_guarded_storage	compat_sys_s390_guarded_storage
+379  common	statx			sys_statx			compat_sys_statx
+380  common	s390_sthyi		sys_s390_sthyi			compat_sys_s390_sthyi
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c0815a37fdb5..539c3d460158 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2245,7 +2245,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
 	c2c_browser__update_nr_entries(browser);
 
 	while (1) {
-		key = hist_browser__run(browser, "? - help");
+		key = hist_browser__run(browser, "? - help", true);
 
 		switch (key) {
 		case 's':
@@ -2314,7 +2314,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
 	c2c_browser__update_nr_entries(browser);
 
 	while (1) {
-		key = hist_browser__run(browser, "? - help");
+		key = hist_browser__run(browser, "? - help", true);
 
 		switch (key) {
 		case 'q':
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bf4ca749d1ac..a217623fec2e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -881,6 +881,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 	}
 
+	/*
+	 * If we have just single event and are sending data
+	 * through pipe, we need to force the ids allocation,
+	 * because we synthesize event name through the pipe
+	 * and need the id for that.
+	 */
+	if (data->is_pipe && rec->evlist->nr_entries == 1)
+		rec->opts.sample_id = true;
+
 	if (record__open(rec) != 0) {
 		err = -1;
 		goto out_child;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 42a52dcc41cd..4ad5dc649716 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -530,7 +530,8 @@ static int report__browse_hists(struct report *rep)
 	case 1:
 		ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
 						    rep->min_percent,
-						    &session->header.env);
+						    &session->header.env,
+						    true);
 		/*
 		 * Usually "ret" is the last pressed key, and we only
 		 * care if the key notifies us to switch data file.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 98bf9d32f222..54a4c152edb3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -917,7 +917,7 @@ static void print_metric_csv(void *ctx,
 	char buf[64], *vals, *ends;
 
 	if (unit == NULL || fmt == NULL) {
-		fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+		fprintf(out, "%s%s", csv_sep, csv_sep);
 		return;
 	}
 	snprintf(buf, sizeof(buf), fmt, val);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c6ccda52117d..35ac016fcb98 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -283,8 +283,9 @@ static void perf_top__print_sym_table(struct perf_top *top)
 
 	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 
-	if (hists->stats.nr_lost_warned !=
-	    hists->stats.nr_events[PERF_RECORD_LOST]) {
+	if (!top->record_opts.overwrite &&
+	    (hists->stats.nr_lost_warned !=
+	    hists->stats.nr_events[PERF_RECORD_LOST])) {
 		hists->stats.nr_lost_warned =
 			      hists->stats.nr_events[PERF_RECORD_LOST];
 		color_fprintf(stdout, PERF_COLOR_RED,
@@ -611,7 +612,8 @@ static void *display_thread_tui(void *arg)
 
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
 				      top->min_percent,
-				      &top->session->header.env);
+				      &top->session->header.env,
+				      !top->record_opts.overwrite);
 
 	done = 1;
 	return NULL;
@@ -807,15 +809,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 {
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
 	struct perf_sample sample;
 	struct perf_evsel *evsel;
+	struct perf_mmap *md;
 	struct perf_session *session = top->session;
 	union perf_event *event;
 	struct machine *machine;
+	u64 end, start;
 	int ret;
 
-	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
-		ret = perf_evlist__parse_sample(top->evlist, event, &sample);
+	md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
+	if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
+		return;
+
+	while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
+		ret = perf_evlist__parse_sample(evlist, event, &sample);
 		if (ret) {
 			pr_err("Can't parse sample, err = %d\n", ret);
 			goto next_event;
@@ -869,16 +879,120 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 		} else
 			++session->evlist->stats.nr_unknown_events;
 next_event:
-		perf_evlist__mmap_consume(top->evlist, idx);
+		perf_mmap__consume(md, opts->overwrite);
 	}
+
+	perf_mmap__read_done(md);
 }
 
 static void perf_top__mmap_read(struct perf_top *top)
 {
+	bool overwrite = top->record_opts.overwrite;
+	struct perf_evlist *evlist = top->evlist;
+	unsigned long long start, end;
 	int i;
 
+	start = rdclock();
+	if (overwrite)
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+
 	for (i = 0; i < top->evlist->nr_mmaps; i++)
 		perf_top__mmap_read_idx(top, i);
+
+	if (overwrite) {
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+	}
+	end = rdclock();
+
+	if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
+		ui__warning("Too slow to read ring buffer.\n"
+			    "Please try increasing the period (-c) or\n"
+			    "decreasing the freq (-F) or\n"
+			    "limiting the number of CPUs (-C)\n");
+}
+
+/*
+ * Check per-event overwrite term.
+ * perf top should support consistent term for all events.
+ * - All events don't have per-event term
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
+ *   Nothing change, return 0.
+ * - All events have same per-event term
+ *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
+ *   Using the per-event setting to replace the opts->overwrite if
+ *   they are different, then return 0.
+ * - Events have different per-event term
+ *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ * - Some of the event set per-event term, but some not.
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ */
+static int perf_top__overwrite_check(struct perf_top *top)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel_config_term *term;
+	struct list_head *config_terms;
+	struct perf_evsel *evsel;
+	int set, overwrite = -1;
+
+	evlist__for_each_entry(evlist, evsel) {
+		set = -1;
+		config_terms = &evsel->config_terms;
+		list_for_each_entry(term, config_terms, list) {
+			if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+				set = term->val.overwrite ? 1 : 0;
+		}
+
+		/* no term for current and previous event (likely) */
+		if ((overwrite < 0) && (set < 0))
+			continue;
+
+		/* has term for both current and previous event, compare */
+		if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
+			return -1;
+
+		/* no term for current event but has term for previous one */
+		if ((overwrite >= 0) && (set < 0))
+			return -1;
+
+		/* has term for current event */
+		if ((overwrite < 0) && (set >= 0)) {
+			/* if it's first event, set overwrite */
+			if (evsel == perf_evlist__first(evlist))
+				overwrite = set;
+			else
+				return -1;
+		}
+	}
+
+	if ((overwrite >= 0) && (opts->overwrite != overwrite))
+		opts->overwrite = overwrite;
+
+	return 0;
+}
+
+static int perf_top_overwrite_fallback(struct perf_top *top,
+				       struct perf_evsel *evsel)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel *counter;
+
+	if (!opts->overwrite)
+		return 0;
+
+	/* only fall back when first event fails */
+	if (evsel != perf_evlist__first(evlist))
+		return 0;
+
+	evlist__for_each_entry(evlist, counter)
+		counter->attr.write_backward = false;
+	opts->overwrite = false;
+	pr_debug2("fall back to non-overwrite mode\n");
+	return 1;
 }
 
 static int perf_top__start_counters(struct perf_top *top)
@@ -888,12 +1002,33 @@ static int perf_top__start_counters(struct perf_top *top)
 	struct perf_evlist *evlist = top->evlist;
 	struct record_opts *opts = &top->record_opts;
 
+	if (perf_top__overwrite_check(top)) {
+		ui__error("perf top only support consistent per-event "
+			  "overwrite setting for all events\n");
+		goto out_err;
+	}
+
 	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, counter) {
 try_again:
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
+
+			/*
+			 * Specially handle overwrite fall back.
+			 * Because perf top is the only tool which has
+			 * overwrite mode by default, support
+			 * both overwrite and non-overwrite mode, and
+			 * require consistent mode for all events.
+			 *
+			 * May move it to generic code with more tools
+			 * have similar attribute.
+			 */
+			if (perf_missing_features.write_backward &&
+			    perf_top_overwrite_fallback(top, counter))
+				goto try_again;
+
 			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
 				if (verbose > 0)
 					ui__warning("%s\n", msg);
@@ -1033,7 +1168,7 @@ static int __cmd_top(struct perf_top *top)
 
 		perf_top__mmap_read(top);
 
-		if (hits == top->samples)
+		if (opts->overwrite || (hits == top->samples))
 			ret = perf_evlist__poll(top->evlist, 100);
 
 		if (resize) {
@@ -1127,6 +1262,7 @@ int cmd_top(int argc, const char **argv)
 				.uses_mmap   = true,
 			},
 			.proc_map_timeout    = 500,
+			.overwrite	= 1,
 		},
 		.max_stack	     = sysctl_perf_event_max_stack,
 		.sym_pcnt_filter     = 5,
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 51abdb0a4047..790ec25919a0 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -33,7 +33,6 @@ arch/s390/include/uapi/asm/kvm.h
 arch/s390/include/uapi/asm/kvm_perf.h
 arch/s390/include/uapi/asm/ptrace.h
 arch/s390/include/uapi/asm/sie.h
-arch/s390/include/uapi/asm/unistd.h
 arch/arm/include/uapi/asm/kvm.h
 arch/arm64/include/uapi/asm/kvm.h
 arch/alpha/include/uapi/asm/errno.h
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cfe46236a5e5..57b9b342d533 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -61,6 +61,7 @@ struct record_opts {
 	bool	     tail_synthesize;
 	bool	     overwrite;
 	bool	     ignore_missing_thread;
+	bool	     sample_id;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
new file mode 100644
index 000000000000..3b6208763e50
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0x7A",
+    "EventName": "BR_INDIRECT_SPEC",
+    "BriefDescription": "Branch speculatively executed - Indirect branch"
+  },
+  {,
+    "EventCode": "0xC9",
+    "EventName": "BR_COND",
+    "BriefDescription": "Conditional branch executed"
+  },
+  {,
+    "EventCode": "0xCA",
+    "EventName": "BR_INDIRECT_MISPRED",
+    "BriefDescription": "Indirect branch mispredicted"
+  },
+  {,
+    "EventCode": "0xCB",
+    "EventName": "BR_INDIRECT_MISPRED_ADDR",
+    "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+  },
+  {,
+    "EventCode": "0xCC",
+    "EventName": "BR_COND_MISPRED",
+    "BriefDescription": "Conditional branch mispredicted"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
new file mode 100644
index 000000000000..11baad6344b9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0xC2",
+    "EventName": "PREFETCH_LINEFILL",
+    "BriefDescription": "Linefill because of prefetch"
+  },
+  {,
+    "EventCode": "0xC3",
+    "EventName": "PREFETCH_LINEFILL_DROP",
+    "BriefDescription": "Instruction Cache Throttle occurred"
+  },
+  {,
+    "EventCode": "0xC4",
+    "EventName": "READ_ALLOC_ENTER",
+    "BriefDescription": "Entering read allocate mode"
+  },
+  {,
+    "EventCode": "0xC5",
+    "EventName": "READ_ALLOC",
+    "BriefDescription": "Read allocate mode"
+  },
+  {,
+    "EventCode": "0xC8",
+    "EventName": "EXT_SNOOP",
+    "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
new file mode 100644
index 000000000000..73a22402d003
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
@@ -0,0 +1,32 @@
+[
+  {,
+    "EventCode": "0x86",
+    "EventName": "EXC_IRQ",
+    "BriefDescription": "Exception taken, IRQ"
+  },
+  {,
+    "EventCode": "0x87",
+    "EventName": "EXC_FIQ",
+    "BriefDescription": "Exception taken, FIQ"
+  },
+  {,
+    "EventCode": "0xC6",
+    "EventName": "PRE_DECODE_ERR",
+    "BriefDescription": "Pre-decode error"
+  },
+  {,
+    "EventCode": "0xD0",
+    "EventName": "L1I_CACHE_ERR",
+    "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+  },
+  {,
+    "EventCode": "0xD1",
+    "EventName": "L1D_CACHE_ERR",
+    "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+  },
+  {,
+    "EventCode": "0xD2",
+    "EventName": "TLB_ERR",
+    "BriefDescription": "TLB memory error"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
new file mode 100644
index 000000000000..3149fb90555a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
@@ -0,0 +1,52 @@
+[
+  {,
+    "EventCode": "0xC7",
+    "EventName": "STALL_SB_FULL",
+    "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+  },
+  {,
+    "EventCode": "0xE0",
+    "EventName": "OTHER_IQ_DEP_STALL",
+    "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+  },
+  {,
+    "EventCode": "0xE1",
+    "EventName": "IC_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+  },
+  {,
+    "EventCode": "0xE2",
+    "EventName": "IUTLB_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+  },
+  {,
+    "EventCode": "0xE3",
+    "EventName": "DECODE_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+  },
+  {,
+    "EventCode": "0xE4",
+    "EventName": "OTHER_INTERLOCK_STALL",
+    "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
+  },
+  {,
+    "EventCode": "0xE5",
+    "EventName": "AGU_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+  },
+  {,
+    "EventCode": "0xE6",
+    "EventName": "SIMD_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+  },
+  {,
+    "EventCode": "0xE7",
+    "EventName": "LD_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+  },
+  {,
+    "EventCode": "0xE8",
+    "EventName": "ST_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 219d6756134e..e61c9ca6cf9e 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -13,3 +13,4 @@
 #
 #Family-model,Version,Filename,EventType
 0x00000000420f5160,v1,cavium,core
+0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 4035d43523c3..e0b1b414d466 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -31,10 +31,12 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 	int i;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct perf_mmap *map = &evlist->overwrite_mmap[i];
 		union perf_event *event;
+		u64 start, end;
 
-		perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
-		while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
+		perf_mmap__read_init(map, true, &start, &end);
+		while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
 			const u32 type = event->header.type;
 
 			switch (type) {
@@ -49,6 +51,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 				return TEST_FAIL;
 			}
 		}
+		perf_mmap__read_done(map);
 	}
 	return TEST_OK;
 }
diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index 8b3da21a08f1..c446c894b297 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -22,10 +22,23 @@ trace_libc_inet_pton_backtrace() {
 	expected[4]="rtt min.*"
 	expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
 	expected[6]=".*inet_pton[[:space:]]\($libc\)$"
-	expected[7]="getaddrinfo[[:space:]]\($libc\)$"
-	expected[8]=".*\(.*/bin/ping.*\)$"
-
-	perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
+	case "$(uname -m)" in
+	s390x)
+		eventattr='call-graph=dwarf'
+		expected[7]="gaih_inet[[:space:]]\(inlined\)$"
+		expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
+		expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
+		expected[10]="__libc_start_main[[:space:]]\($libc\)$"
+		expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
+		;;
+	*)
+		eventattr='max-stack=3'
+		expected[7]="getaddrinfo[[:space:]]\($libc\)$"
+		expected[8]=".*\(.*/bin/ping.*\)$"
+		;;
+	esac
+
+	perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
 		echo $line
 		echo "$line" | egrep -q "${expected[$idx]}"
 		if [ $? -ne 0 ] ; then
@@ -33,7 +46,7 @@ trace_libc_inet_pton_backtrace() {
 			exit 1
 		fi
 		let idx+=1
-		[ $idx -eq 9 ] && break
+		[ -z "${expected[$idx]}" ] && break
 	done
 }
 
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 286427975112..fbf927cf775d 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -327,7 +327,32 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	if (!disasm_line__is_valid_jump(cursor, sym))
 		return;
 
+	/*
+	 * This first was seen with a gcc function, _cpp_lex_token, that
+	 * has the usual jumps:
+	 *
+	 *  │1159e6c: ↓ jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 *
+	 * I.e. jumps to a label inside that function (_cpp_lex_token), and
+	 * those works, but also this kind:
+	 *
+	 *  │1159e8b: ↓ jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 *  I.e. jumps to another function, outside _cpp_lex_token, which
+	 *  are not being correctly handled generating as a side effect references
+	 *  to ab->offset[] entries that are set to NULL, so to make this code
+	 *  more robust, check that here.
+	 *
+	 *  A proper fix for will be put in place, looking at the function
+	 *  name right after the '<' token and probably treating this like a
+	 *  'call' instruction.
+	 */
 	target = ab->offsets[cursor->ops.target.offset];
+	if (target == NULL) {
+		ui_helpline__printf("WARN: jump target inconsistency, press 'o', ab->offsets[%#x] = NULL\n",
+				    cursor->ops.target.offset);
+		return;
+	}
 
 	bcursor = browser_line(&cursor->al);
 	btarget = browser_line(target);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 68146f4620a5..6495ee55d9c3 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -608,7 +608,8 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
 	return browser->title ? browser->title(browser, bf, size) : 0;
 }
 
-int hist_browser__run(struct hist_browser *browser, const char *help)
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event)
 {
 	int key;
 	char title[160];
@@ -638,8 +639,9 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
 			nr_entries = hist_browser__nr_entries(browser);
 			ui_browser__update_nr_entries(&browser->b, nr_entries);
 
-			if (browser->hists->stats.nr_lost_warned !=
-			    browser->hists->stats.nr_events[PERF_RECORD_LOST]) {
+			if (warn_lost_event &&
+			    (browser->hists->stats.nr_lost_warned !=
+			    browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
 				browser->hists->stats.nr_lost_warned =
 					browser->hists->stats.nr_events[PERF_RECORD_LOST];
 				ui_browser__warn_lost_events(&browser->b);
@@ -2763,7 +2765,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    bool left_exits,
 				    struct hist_browser_timer *hbt,
 				    float min_pcnt,
-				    struct perf_env *env)
+				    struct perf_env *env,
+				    bool warn_lost_event)
 {
 	struct hists *hists = evsel__hists(evsel);
 	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
@@ -2844,7 +2847,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 		nr_options = 0;
 
-		key = hist_browser__run(browser, helpline);
+		key = hist_browser__run(browser, helpline,
+					warn_lost_event);
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);
@@ -3184,7 +3188,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 
 static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 				int nr_events, const char *help,
-				struct hist_browser_timer *hbt)
+				struct hist_browser_timer *hbt,
+				bool warn_lost_event)
 {
 	struct perf_evlist *evlist = menu->b.priv;
 	struct perf_evsel *pos;
@@ -3203,7 +3208,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 		case K_TIMER:
 			hbt->timer(hbt->arg);
 
-			if (!menu->lost_events_warned && menu->lost_events) {
+			if (!menu->lost_events_warned &&
+			    menu->lost_events &&
+			    warn_lost_event) {
 				ui_browser__warn_lost_events(&menu->b);
 				menu->lost_events_warned = true;
 			}
@@ -3224,7 +3231,8 @@ browse_hists:
 			key = perf_evsel__hists_browse(pos, nr_events, help,
 						       true, hbt,
 						       menu->min_pcnt,
-						       menu->env);
+						       menu->env,
+						       warn_lost_event);
 			ui_browser__show_title(&menu->b, title);
 			switch (key) {
 			case K_TAB:
@@ -3282,7 +3290,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 					   int nr_entries, const char *help,
 					   struct hist_browser_timer *hbt,
 					   float min_pcnt,
-					   struct perf_env *env)
+					   struct perf_env *env,
+					   bool warn_lost_event)
 {
 	struct perf_evsel *pos;
 	struct perf_evsel_menu menu = {
@@ -3309,13 +3318,15 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 			menu.b.width = line_len;
 	}
 
-	return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
+	return perf_evsel_menu__run(&menu, nr_entries, help,
+				    hbt, warn_lost_event);
 }
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
-				  struct perf_env *env)
+				  struct perf_env *env,
+				  bool warn_lost_event)
 {
 	int nr_entries = evlist->nr_entries;
 
@@ -3325,7 +3336,7 @@ single_entry:
 
 		return perf_evsel__hists_browse(first, nr_entries, help,
 						false, hbt, min_pcnt,
-						env);
+						env, warn_lost_event);
 	}
 
 	if (symbol_conf.event_group) {
@@ -3342,5 +3353,6 @@ single_entry:
 	}
 
 	return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
-					       hbt, min_pcnt, env);
+					       hbt, min_pcnt, env,
+					       warn_lost_event);
 }
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index ba431777f559..9428bee076f2 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -28,7 +28,8 @@ struct hist_browser {
 
 struct hist_browser *hist_browser__new(struct hists *hists);
 void hist_browser__delete(struct hist_browser *browser);
-int hist_browser__run(struct hist_browser *browser, const char *help);
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event);
 void hist_browser__init(struct hist_browser *browser,
 			struct hists *hists);
 #endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 9faf3b5367db..6470ea2aa25e 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -60,6 +60,12 @@
 #include "sane_ctype.h"
 #include "symbol/kallsyms.h"
 
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+	return !session->itrace_synth_opts ||
+	       session->itrace_synth_opts->dont_decode;
+}
+
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 			struct auxtrace_mmap_params *mp,
 			void *userpg, int fd)
@@ -762,6 +768,9 @@ int auxtrace_queues__process_index(struct auxtrace_queues *queues,
 	size_t i;
 	int err;
 
+	if (auxtrace__dont_decode(session))
+		return 0;
+
 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
 		for (i = 0; i < auxtrace_index->nr; i++) {
 			ent = &auxtrace_index->entries[i];
@@ -892,12 +901,6 @@ out_free:
 	return err;
 }
 
-static bool auxtrace__dont_decode(struct perf_session *session)
-{
-	return !session->itrace_synth_opts ||
-	       session->itrace_synth_opts->dont_decode;
-}
-
 int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
 				      union perf_event *event,
 				      struct perf_session *session)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ac35cd214feb..e5fc14e53c05 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -715,28 +715,11 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
 	return perf_mmap__read_forward(md);
 }
 
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
-{
-	struct perf_mmap *md = &evlist->mmap[idx];
-
-	/*
-	 * No need to check messup for backward ring buffer:
-	 * We can always read arbitrary long data from a backward
-	 * ring buffer unless we forget to pause it before reading.
-	 */
-	return perf_mmap__read_backward(md);
-}
-
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	return perf_evlist__mmap_read_forward(evlist, idx);
 }
 
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
-{
-	perf_mmap__read_catchup(&evlist->mmap[idx]);
-}
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 {
 	perf_mmap__consume(&evlist->mmap[idx], false);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 75f8e0ad5d76..336b838e6957 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -133,10 +133,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
 
 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
 						 int idx);
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
-						  int idx);
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ff359c9ece2e..ef351688b797 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -41,17 +41,7 @@
 
 #include "sane_ctype.h"
 
-static struct {
-	bool sample_id_all;
-	bool exclude_guest;
-	bool mmap2;
-	bool cloexec;
-	bool clockid;
-	bool clockid_wrong;
-	bool lbr_flags;
-	bool write_backward;
-	bool group_read;
-} perf_missing_features;
+struct perf_missing_features perf_missing_features;
 
 static clockid_t clockid;
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 846e41644525..a7487c6d1866 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -149,6 +149,20 @@ union u64_swap {
 	u32 val32[2];
 };
 
+struct perf_missing_features {
+	bool sample_id_all;
+	bool exclude_guest;
+	bool mmap2;
+	bool cloexec;
+	bool clockid;
+	bool clockid_wrong;
+	bool lbr_flags;
+	bool write_backward;
+	bool group_read;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
 struct cpu_map;
 struct target;
 struct thread_map;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f6630cb95eff..02721b579746 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -430,7 +430,8 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
-				  struct perf_env *env);
+				  struct perf_env *env,
+				  bool warn_lost_event);
 int script_browse(const char *script_opt);
 #else
 static inline
@@ -438,7 +439,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
 				  const char *help __maybe_unused,
 				  struct hist_browser_timer *hbt __maybe_unused,
 				  float min_pcnt __maybe_unused,
-				  struct perf_env *env __maybe_unused)
+				  struct perf_env *env __maybe_unused,
+				  bool warn_lost_event __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 05076e683938..91531a7c8fbf 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -22,29 +22,27 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
 
 /* When check_messup is true, 'end' must points to a good entry */
 static union perf_event *perf_mmap__read(struct perf_mmap *map,
-					 u64 start, u64 end, u64 *prev)
+					 u64 *startp, u64 end)
 {
 	unsigned char *data = map->base + page_size;
 	union perf_event *event = NULL;
-	int diff = end - start;
+	int diff = end - *startp;
 
 	if (diff >= (int)sizeof(event->header)) {
 		size_t size;
 
-		event = (union perf_event *)&data[start & map->mask];
+		event = (union perf_event *)&data[*startp & map->mask];
 		size = event->header.size;
 
-		if (size < sizeof(event->header) || diff < (int)size) {
-			event = NULL;
-			goto broken_event;
-		}
+		if (size < sizeof(event->header) || diff < (int)size)
+			return NULL;
 
 		/*
 		 * Event straddles the mmap boundary -- header should always
 		 * be inside due to u64 alignment of output.
 		 */
-		if ((start & map->mask) + size != ((start + size) & map->mask)) {
-			unsigned int offset = start;
+		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+			unsigned int offset = *startp;
 			unsigned int len = min(sizeof(*event), size), cpy;
 			void *dst = map->event_copy;
 
@@ -59,20 +57,19 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 			event = (union perf_event *)map->event_copy;
 		}
 
-		start += size;
+		*startp += size;
 	}
 
-broken_event:
-	if (prev)
-		*prev = start;
-
 	return event;
 }
 
+/*
+ * legacy interface for mmap read.
+ * Don't use it. Use perf_mmap__read_event().
+ */
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 {
 	u64 head;
-	u64 old = map->prev;
 
 	/*
 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -82,13 +79,26 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 
 	head = perf_mmap__read_head(map);
 
-	return perf_mmap__read(map, old, head, &map->prev);
+	return perf_mmap__read(map, &map->prev, head);
 }
 
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ *	//process the event
+ *	perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+					bool overwrite,
+					u64 *startp, u64 end)
 {
-	u64 head, end;
-	u64 start = map->prev;
+	union perf_event *event;
 
 	/*
 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -96,40 +106,19 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
 	if (!refcount_read(&map->refcnt))
 		return NULL;
 
-	head = perf_mmap__read_head(map);
-	if (!head)
+	if (startp == NULL)
 		return NULL;
 
-	/*
-	 * 'head' pointer starts from 0. Kernel minus sizeof(record) form
-	 * it each time when kernel writes to it, so in fact 'head' is
-	 * negative. 'end' pointer is made manually by adding the size of
-	 * the ring buffer to 'head' pointer, means the validate data can
-	 * read is the whole ring buffer. If 'end' is positive, the ring
-	 * buffer has not fully filled, so we must adjust 'end' to 0.
-	 *
-	 * However, since both 'head' and 'end' is unsigned, we can't
-	 * simply compare 'end' against 0. Here we compare '-head' and
-	 * the size of the ring buffer, where -head is the number of bytes
-	 * kernel write to the ring buffer.
-	 */
-	if (-head < (u64)(map->mask + 1))
-		end = 0;
-	else
-		end = head + map->mask + 1;
-
-	return perf_mmap__read(map, start, end, &map->prev);
-}
+	/* non-overwirte doesn't pause the ringbuffer */
+	if (!overwrite)
+		end = perf_mmap__read_head(map);
 
-void perf_mmap__read_catchup(struct perf_mmap *map)
-{
-	u64 head;
+	event = perf_mmap__read(map, startp, end);
 
-	if (!refcount_read(&map->refcnt))
-		return;
+	if (!overwrite)
+		map->prev = *startp;
 
-	head = perf_mmap__read_head(map);
-	map->prev = head;
+	return event;
 }
 
 static bool perf_mmap__empty(struct perf_mmap *map)
@@ -267,41 +256,60 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
 	return -1;
 }
 
-int perf_mmap__push(struct perf_mmap *md, bool overwrite,
-		    void *to, int push(void *to, void *buf, size_t size))
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+			 u64 *startp, u64 *endp)
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
-	u64 end = head, start = old;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
-	void *buf;
-	int rc = 0;
 
-	start = overwrite ? head : old;
-	end = overwrite ? old : head;
+	*startp = overwrite ? head : old;
+	*endp = overwrite ? old : head;
 
-	if (start == end)
-		return 0;
+	if (*startp == *endp)
+		return -EAGAIN;
 
-	size = end - start;
+	size = *endp - *startp;
 	if (size > (unsigned long)(md->mask) + 1) {
 		if (!overwrite) {
 			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 
 			md->prev = head;
 			perf_mmap__consume(md, overwrite);
-			return 0;
+			return -EAGAIN;
 		}
 
 		/*
 		 * Backward ring buffer is full. We still have a chance to read
 		 * most of data from it.
 		 */
-		if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
-			return -1;
+		if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
+			return -EINVAL;
 	}
 
+	return 0;
+}
+
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
+		    void *to, int push(void *to, void *buf, size_t size))
+{
+	u64 head = perf_mmap__read_head(md);
+	u64 end, start;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int rc = 0;
+
+	rc = perf_mmap__read_init(md, overwrite, &start, &end);
+	if (rc < 0)
+		return (rc == -EAGAIN) ? 0 : -1;
+
+	size = end - start;
+
 	if ((start & md->mask) + size != (end & md->mask)) {
 		buf = &data[start & md->mask];
 		size = md->mask + 1 - (start & md->mask);
@@ -327,3 +335,14 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 out:
 	return rc;
 }
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->prev.
+ * Need to correct the map->prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+	map->prev = perf_mmap__read_head(map);
+}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e43d7b55a55f..ec7d3a24e276 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -65,8 +65,6 @@ void perf_mmap__put(struct perf_mmap *map);
 
 void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
 
-void perf_mmap__read_catchup(struct perf_mmap *md);
-
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->base;
@@ -87,11 +85,17 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
+
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+					bool overwrite,
+					u64 *startp, u64 end);
 
 int perf_mmap__push(struct perf_mmap *md, bool backward,
 		    void *to, int push(void *to, void *buf, size_t size));
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+			 u64 *startp, u64 *endp);
+void perf_mmap__read_done(struct perf_mmap *map);
 #endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 1e97937b03a9..6f09e4962dad 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -137,6 +137,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 	struct perf_evsel *evsel;
 	bool use_sample_identifier = false;
 	bool use_comm_exec;
+	bool sample_id = opts->sample_id;
 
 	/*
 	 * Set the evsel leader links before we configure attributes,
@@ -163,8 +164,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 		 * match the id.
 		 */
 		use_sample_identifier = perf_can_sample_identifier();
-		evlist__for_each_entry(evlist, evsel)
-			perf_evsel__set_sample_id(evsel, use_sample_identifier);
+		sample_id = true;
 	} else if (evlist->nr_entries > 1) {
 		struct perf_evsel *first = perf_evlist__first(evlist);
 
@@ -174,6 +174,10 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 			use_sample_identifier = perf_can_sample_identifier();
 			break;
 		}
+		sample_id = true;
+	}
+
+	if (sample_id) {
 		evlist__for_each_entry(evlist, evsel)
 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	}
diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
index 370138e7e35c..88223bc7c82b 100644
--- a/tools/perf/util/trigger.h
+++ b/tools/perf/util/trigger.h
@@ -12,7 +12,7 @@
  * States and transits:
  *
  *
- *  OFF--(on)--> READY --(hit)--> HIT
+ *  OFF--> ON --> READY --(hit)--> HIT
  *                 ^               |
  *                 |            (ready)
  *                 |               |
@@ -27,8 +27,9 @@ struct trigger {
 	volatile enum {
 		TRIGGER_ERROR		= -2,
 		TRIGGER_OFF		= -1,
-		TRIGGER_READY		= 0,
-		TRIGGER_HIT		= 1,
+		TRIGGER_ON		= 0,
+		TRIGGER_READY		= 1,
+		TRIGGER_HIT		= 2,
 	} state;
 	const char *name;
 };
@@ -50,7 +51,7 @@ static inline bool trigger_is_error(struct trigger *t)
 static inline void trigger_on(struct trigger *t)
 {
 	TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
-	t->state = TRIGGER_READY;
+	t->state = TRIGGER_ON;
 }
 
 static inline void trigger_ready(struct trigger *t)
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 443892dabedb..1019bbc5dbd8 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -340,35 +340,15 @@ size_t hex_width(u64 v)
 	return n;
 }
 
-static int hex(char ch)
-{
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
 /*
  * While we find nice hex chars, build a long_val.
  * Return number of chars processed.
  */
 int hex2u64(const char *ptr, u64 *long_val)
 {
-	const char *p = ptr;
-	*long_val = 0;
-
-	while (*p) {
-		const int hex_val = hex(*p);
+	char *p;
 
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		p++;
-	}
+	*long_val = strtoull(ptr, &p, 16);
 
 	return p - ptr;
 }
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index a1883bbb0144..2cccbba64418 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -56,9 +56,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 # to compile vs uClibc, that can be done here as well.
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
 CROSS_COMPILE ?= $(CROSS)
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)gcc
-STRIP = $(CROSS_COMPILE)strip
 HOSTCC = gcc
 
 # check if compiler option is supported
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index fcb3ed0be5f8..dd614463d4d6 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -42,6 +42,24 @@ EXTRA_WARNINGS += -Wformat
 
 CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
 
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+$(call allow-override,CXX,$(CROSS_COMPILE)g++)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+
 ifeq ($(CC_NO_CLANG), 1)
 EXTRA_WARNINGS += -Wstrict-aliasing=3
 endif
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index 90615e10c79a..815d15589177 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -11,8 +11,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := spidev_test spidev_fdx
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 44ef9eba5a7a..6c645eb77d42 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -178,6 +178,55 @@ void idr_get_next_test(int base)
 	idr_destroy(&idr);
 }
 
+int idr_u32_cb(int id, void *ptr, void *data)
+{
+	BUG_ON(id < 0);
+	BUG_ON(ptr != DUMMY_PTR);
+	return 0;
+}
+
+void idr_u32_test1(struct idr *idr, u32 handle)
+{
+	static bool warned = false;
+	u32 id = handle;
+	int sid = 0;
+	void *ptr;
+
+	BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL));
+	BUG_ON(id != handle);
+	BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC);
+	BUG_ON(id != handle);
+	if (!warned && id > INT_MAX)
+		printk("vvv Ignore these warnings\n");
+	ptr = idr_get_next(idr, &sid);
+	if (id > INT_MAX) {
+		BUG_ON(ptr != NULL);
+		BUG_ON(sid != 0);
+	} else {
+		BUG_ON(ptr != DUMMY_PTR);
+		BUG_ON(sid != id);
+	}
+	idr_for_each(idr, idr_u32_cb, NULL);
+	if (!warned && id > INT_MAX) {
+		printk("^^^ Warnings over\n");
+		warned = true;
+	}
+	BUG_ON(idr_remove(idr, id) != DUMMY_PTR);
+	BUG_ON(!idr_is_empty(idr));
+}
+
+void idr_u32_test(int base)
+{
+	DEFINE_IDR(idr);
+	idr_init_base(&idr, base);
+	idr_u32_test1(&idr, 10);
+	idr_u32_test1(&idr, 0x7fffffff);
+	idr_u32_test1(&idr, 0x80000000);
+	idr_u32_test1(&idr, 0x80000001);
+	idr_u32_test1(&idr, 0xffe00000);
+	idr_u32_test1(&idr, 0xffffffff);
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -248,6 +297,9 @@ void idr_checks(void)
 	idr_get_next_test(0);
 	idr_get_next_test(1);
 	idr_get_next_test(4);
+	idr_u32_test(4);
+	idr_u32_test(1);
+	idr_u32_test(0);
 }
 
 /*
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 6903ccf35595..44a0d1ad4408 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -29,7 +29,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 {
 	struct radix_tree_node *node;
 
-	if (flags & __GFP_NOWARN)
+	if (!(flags & __GFP_DIRECT_RECLAIM))
 		return NULL;
 
 	pthread_mutex_lock(&cachep->lock);
@@ -73,10 +73,17 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 
 void *kmalloc(size_t size, gfp_t gfp)
 {
-	void *ret = malloc(size);
+	void *ret;
+
+	if (!(gfp & __GFP_DIRECT_RECLAIM))
+		return NULL;
+
+	ret = malloc(size);
 	uatomic_inc(&nr_allocated);
 	if (kmalloc_verbose)
 		printf("Allocating %p from malloc\n", ret);
+	if (gfp & __GFP_ZERO)
+		memset(ret, 0, size);
 	return ret;
 }
 
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/radix-tree/linux/compiler_types.h
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index e9fff59dfd8a..e3201ccf54c3 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -11,6 +11,7 @@
 #define __GFP_IO		0x40u
 #define __GFP_FS		0x80u
 #define __GFP_NOWARN		0x200u
+#define __GFP_ZERO		0x8000u
 #define __GFP_ATOMIC		0x80000u
 #define __GFP_ACCOUNT		0x100000u
 #define __GFP_DIRECT_RECLAIM	0x400000u
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
index 979baeec7e70..a037def0dec6 100644
--- a/tools/testing/radix-tree/linux/slab.h
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -3,6 +3,7 @@
 #define SLAB_H
 
 #include <linux/types.h>
+#include <linux/gfp.h>
 
 #define SLAB_HWCACHE_ALIGN 1
 #define SLAB_PANIC 2
@@ -11,6 +12,11 @@
 void *kmalloc(size_t size, gfp_t);
 void kfree(void *);
 
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+        return kmalloc(size, gfp | __GFP_ZERO);
+}
+
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
 void kmem_cache_free(struct kmem_cache *cachep, void *objp);
 
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index 1a7492268993..f6304d2be90c 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -11,11 +11,11 @@ all:
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-		#SUBDIR test prog name should be in the form: SUBDIR_test.sh
+		#SUBDIR test prog name should be in the form: SUBDIR_test.sh \
 		TEST=$$DIR"_test.sh"; \
-		if [ -e $$DIR/$$TEST ]; then
-			rsync -a $$DIR/$$TEST $$BUILD_TARGET/;
-		fi
+		if [ -e $$DIR/$$TEST ]; then \
+			rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+		fi \
 	done
 
 override define RUN_TESTS
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index cc15af2e54fe..9cf83f895d98 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -11,3 +11,4 @@ test_progs
 test_tcpbpf_user
 test_verifier_log
 feature
+test_libbpf_open
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5c43c187f27c..f35fb02bdf56 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -13,6 +13,14 @@ endif
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): urandom_read
+
+urandom_read: urandom_read.c
+	$(CC) -o $(TEST_CUSTOM_PROGS) -static $<
+
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
@@ -21,7 +29,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	sample_map_ret0.o test_tcpbpf_kern.o
+	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
+	sockmap_tcp_msg_prog.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -35,12 +44,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
 .PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
@@ -72,3 +83,5 @@ $(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
 	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index dde2c11d7771..7cae376d8d0c 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -86,6 +86,14 @@ static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
 	(void *) BPF_FUNC_perf_prog_read_value;
 static int (*bpf_override_return)(void *ctx, unsigned long rc) =
 	(void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_pull_data;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -123,6 +131,8 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
 	(void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+	(void *) BPF_FUNC_skb_pull_data;
 
 /* Scan the ARCH passed in from ARCH env variable (see Makefile) */
 #if defined(__TARGET_ARCH_x86)
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644
index 000000000000..9dac9b30f8ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rlimit.h
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+	struct rlimit rlim_old, rlim_new = {
+		.rlim_cur	= RLIM_INFINITY,
+		.rlim_max	= RLIM_INFINITY,
+	};
+
+	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+	/* For the sake of running the test cases, we temporarily
+	 * set rlimit to infinity in order for kernel to focus on
+	 * errors from actual test cases and not getting noise
+	 * from hitting memlock limits. The limit is on per-process
+	 * basis and not a global one, hence destructor not really
+	 * needed here.
+	 */
+	if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+		perror("Unable to lift memlock rlimit");
+		/* Trying out lower limit, but expect potential test
+		 * case failures from this!
+		 */
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
+}
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
index a1dec2b6d9c5..0f92858f6226 100644
--- a/tools/testing/selftests/bpf/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -20,14 +20,25 @@ int bpf_prog1(struct __sk_buff *skb)
 	__u32 lport = skb->local_port;
 	__u32 rport = skb->remote_port;
 	__u8 *d = data;
+	__u32 len = (__u32) data_end - (__u32) data;
+	int err;
 
-	if (data + 10 > data_end)
-		return skb->len;
+	if (data + 10 > data_end) {
+		err = bpf_skb_pull_data(skb, 10);
+		if (err)
+			return SK_DROP;
+
+		data_end = (void *)(long)skb->data_end;
+		data = (void *)(long)skb->data;
+		if (data + 10 > data_end)
+			return SK_DROP;
+	}
 
 	/* This write/read is a bit pointless but tests the verifier and
 	 * strparser handler for read/write pkt data and access into sk
 	 * fields.
 	 */
+	d = data;
 	d[7] = 1;
 	return skb->len;
 }
diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
new file mode 100644
index 000000000000..12a7b5c82ed6
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
@@ -0,0 +1,33 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+
+	char *d;
+
+	if (data + 8 > data_end)
+		return SK_DROP;
+
+	bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+	d = (char *)data;
+	bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index d7bea972cb21..2ce7634a4012 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -26,6 +26,13 @@ struct bpf_map_def SEC("maps") sock_map_tx = {
 	.max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_msg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
 struct bpf_map_def SEC("maps") sock_map_break = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(int),
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index ff8bd7e3e50c..6b1b302310fe 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -9,8 +9,6 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-#include <sys/resource.h>
-
 #include <linux/unistd.h>
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
@@ -19,6 +17,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
@@ -702,9 +701,6 @@ static int do_test(unsigned int from, unsigned int to)
 int main(int argc, char **argv)
 {
 	unsigned int from = 0, to = ARRAY_SIZE(tests);
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
 
 	if (argc == 3) {
 		unsigned int l = atoi(argv[argc - 2]);
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index 3489cc283433..9c8b50bac7e0 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -11,13 +11,13 @@
 #include <errno.h>
 #include <assert.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
@@ -25,15 +25,11 @@
 
 int main(int argc, char **argv)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	struct bpf_object *obj;
 	int error = EXIT_FAILURE;
 	int prog_fd, cgroup_fd;
 	__u32 prog_cnt;
 
-	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-		perror("Unable to lift memlock rlimit");
-
 	if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
 			  &obj, &prog_fd)) {
 		printf("Failed to load DEV_CGROUP program\n");
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 2be87e9ee28d..147e34cfceb7 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -22,10 +22,11 @@
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 struct tlpm_node {
 	struct tlpm_node *next;
@@ -736,17 +737,11 @@ static void test_lpm_multi_thread(void)
 
 int main(void)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
-	int i, ret;
+	int i;
 
 	/* we want predictable, pseudo random tests */
 	srand(0xf00ba1);
 
-	/* allow unlimited locked memory */
-	ret = setrlimit(RLIMIT_MEMLOCK, &limit);
-	if (ret < 0)
-		perror("Unable to lift memlock rlimit");
-
 	test_lpm_basic();
 	test_lpm_order();
 
@@ -755,11 +750,8 @@ int main(void)
 		test_lpm_map(i);
 
 	test_lpm_ipaddr();
-
 	test_lpm_delete();
-
 	test_lpm_get_next_key();
-
 	test_lpm_multi_thread();
 
 	printf("test_lpm: OK\n");
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 8c10c9180c1a..781c7de343be 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -16,10 +16,11 @@
 #include <time.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 #define LOCAL_FREE_TARGET	(128)
 #define PERCPU_FREE_TARGET	(4)
@@ -613,7 +614,6 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
 
 int main(int argc, char **argv)
 {
-	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
 			     BPF_MAP_TYPE_LRU_PERCPU_HASH};
 	int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
@@ -621,8 +621,6 @@ int main(int argc, char **argv)
 
 	setbuf(stdout, NULL);
 
-	assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
 	nr_cpus = bpf_num_possible_cpus();
 	assert(nr_cpus != -1);
 	printf("nr_cpus:%d\n\n", nr_cpus);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 436c4c72414f..6c253343a6f9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -17,13 +17,14 @@
 #include <stdlib.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 static int map_flags;
 
@@ -126,6 +127,8 @@ static void test_hashmap_sizes(int task, void *data)
 			fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
 					    2, map_flags);
 			if (fd < 0) {
+				if (errno == ENOMEM)
+					return;
 				printf("Failed to create hashmap key=%d value=%d '%s'\n",
 				       i, j, strerror(errno));
 				exit(1);
@@ -461,15 +464,17 @@ static void test_devmap(int task, void *data)
 #include <linux/err.h>
 #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
 #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
 static void test_sockmap(int tasks, void *data)
 {
-	int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
-	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
+	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
+	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
 	int ports[] = {50200, 50201, 50202, 50204};
 	int err, i, fd, udp, sfd[6] = {0xdeadbeef};
 	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
-	int parse_prog, verdict_prog;
+	int parse_prog, verdict_prog, msg_prog;
 	struct sockaddr_in addr;
+	int one = 1, s, sc, rc;
 	struct bpf_object *obj;
 	struct timeval to;
 	__u32 key, value;
@@ -581,6 +586,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0);
+	if (!err) {
+		printf("Failed invalid msg verdict prog attach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
 	if (!err) {
 		printf("Failed unknown prog attach\n");
@@ -599,6 +610,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
+	if (err) {
+		printf("Failed empty msg verdict prog detach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
 	if (!err) {
 		printf("Detach invalid prog successful\n");
@@ -613,6 +630,13 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+			    BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+	if (err) {
+		printf("Failed to load SK_SKB msg prog\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
 			    BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
 	if (err) {
@@ -628,7 +652,7 @@ static void test_sockmap(int tasks, void *data)
 
 	map_fd_rx = bpf_map__fd(bpf_map_rx);
 	if (map_fd_rx < 0) {
-		printf("Failed to get map fd\n");
+		printf("Failed to get map rx fd\n");
 		goto out_sockmap;
 	}
 
@@ -644,6 +668,18 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+	if (IS_ERR(bpf_map_msg)) {
+		printf("Failed to load map msg from msg_verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_msg = bpf_map__fd(bpf_map_msg);
+	if (map_fd_msg < 0) {
+		printf("Failed to get map msg fd\n");
+		goto out_sockmap;
+	}
+
 	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
 	if (IS_ERR(bpf_map_break)) {
 		printf("Failed to load map tx from verdict prog\n");
@@ -677,6 +713,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);
+	if (err) {
+		printf("Failed msg verdict bpf prog attach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_attach(verdict_prog, map_fd_rx,
 			      __MAX_BPF_ATTACH_TYPE, 0);
 	if (!err) {
@@ -716,6 +758,14 @@ static void test_sockmap(int tasks, void *data)
 		}
 	}
 
+	/* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
+	i = 0;
+	err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);
+	if (err) {
+		printf("Failed map_fd_msg update sockmap %i\n", err);
+		goto out_sockmap;
+	}
+
 	/* Test map send/recv */
 	for (i = 0; i < 2; i++) {
 		buf[0] = i;
@@ -1124,10 +1174,6 @@ static void run_all_tests(void)
 
 int main(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
-
 	map_flags = 0;
 	run_all_tests();
 
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b549308abd19..e9df48b306df 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -26,7 +26,6 @@ typedef __u16 __sum16;
 
 #include <sys/ioctl.h>
 #include <sys/wait.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <fcntl.h>
 
@@ -34,9 +33,11 @@ typedef __u16 __sum16;
 #include <linux/err.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
 #include "bpf_endian.h"
+#include "bpf_rlimit.h"
 
 static int error_cnt, pass_cnt;
 
@@ -840,7 +841,8 @@ static void test_tp_attach_query(void)
 static int compare_map_keys(int map1_fd, int map2_fd)
 {
 	__u32 key, next_key;
-	char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
+	char val_buf[PERF_MAX_STACK_DEPTH *
+		     sizeof(struct bpf_stack_build_id)];
 	int err;
 
 	err = bpf_map_get_next_key(map1_fd, NULL, &key);
@@ -963,12 +965,168 @@ out:
 	return;
 }
 
-int main(void)
+static int extract_build_id(char *build_id, size_t size)
+{
+	FILE *fp;
+	char *line = NULL;
+	size_t len = 0;
+
+	fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+	if (fp == NULL)
+		return -1;
+
+	if (getline(&line, &len, fp) == -1)
+		goto err;
+	fclose(fp);
+
+	if (len > size)
+		len = size;
+	memcpy(build_id, line, len);
+	build_id[len] = '\0';
+	return 0;
+err:
+	fclose(fp);
+	return -1;
+}
+
+static void test_stacktrace_build_id(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int bytes, efd, err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
 
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		goto out;
+
+	/* Get the ID for the sched/sched_switch tracepoint */
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
 
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+		  "read", "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	/* Open the perf event and attach bpf progrram */
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	CHECK(build_id_matches < 1, "build id match",
+	      "Didn't find expected build ID from the map");
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+
+out:
+	return;
+}
+
+int main(void)
+{
 	test_pkt_access();
 	test_xdp();
 	test_l4lb_all();
@@ -979,6 +1137,7 @@ int main(void)
 	test_obj_name();
 	test_tp_attach_query();
 	test_stacktrace_map();
+	test_stacktrace_build_id();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644
index 000000000000..b755bd783ce5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct bpf_stack_build_id)
+		* PERF_MAX_STACK_DEPTH,
+	.max_entries = 128,
+	.map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+	unsigned long long pad;
+	int got_bits;
+	int pool_left;
+	int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+	__u32 key = 0, val = 0, *value_p;
+
+	value_p = bpf_map_lookup_elem(&control_map, &key);
+	if (value_p && *value_p)
+		return 0; /* skip if non-zero *value_p */
+
+	/* The size of stackmap and stackid_hmap should be the same */
+	key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+	if ((int)key >= 0)
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 8b201895c569..6272c784ca2a 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -12,7 +12,6 @@
 #include <assert.h>
 
 #include <sys/socket.h>
-#include <sys/resource.h>
 
 #include <linux/filter.h>
 #include <linux/bpf.h>
@@ -21,6 +20,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 static struct bpf_insn prog[BPF_MAXINSNS];
 
@@ -184,11 +184,9 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
 
 int main(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
 	uint32_t tests = 0;
 	int i, fd_map;
 
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
 	fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
 				sizeof(int), 1, BPF_F_NO_PREALLOC);
 	assert(fd_map > 0);
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
index 57119ad57a3f..3e645ee41ed5 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -5,7 +5,6 @@
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <linux/ip.h>
-#include <linux/in6.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 95a370f3d378..84ab5163c828 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -11,12 +11,14 @@
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 #include <linux/perf_event.h>
 #include "test_tcpbpf.h"
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c0f16e93f9bd..3e7718b1a9ae 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -24,7 +24,6 @@
 #include <limits.h>
 
 #include <sys/capability.h>
-#include <sys/resource.h>
 
 #include <linux/unistd.h>
 #include <linux/filter.h>
@@ -41,7 +40,7 @@
 #  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
 # endif
 #endif
-
+#include "bpf_rlimit.h"
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -57,6 +56,9 @@
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -1595,6 +1597,60 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SK_SKB,
 	},
 	{
+		"direct packet read for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"direct packet write for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"overlapping checks for direct packet access SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
 		"check skb->mark is not writeable by sockets",
 		.insns = {
 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
@@ -2587,6 +2643,90 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
+		"runtime/jit: tail_call within bounds, prog once",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"runtime/jit: tail_call within bounds, prog loop",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 41,
+	},
+	{
+		"runtime/jit: tail_call within bounds, no prog",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"runtime/jit: tail_call out of bounds",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 256),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"runtime/jit: pass negative index to tail_call",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, -1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"runtime/jit: pass > 32bit index to tail_call",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 2 },
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
 		"stack pointer arithmetic",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 4),
@@ -11137,6 +11277,152 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
+	{
+		"jit: lsh, rsh, arsh by 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+			BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: various mul tests",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"xadd/w check unaligned stack",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "misaligned stack access off",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"xadd/w check unaligned map",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = REJECT,
+		.errstr = "misaligned value access off",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"xadd/w check unaligned pkt",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 99),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
+			BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1),
+			BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "BPF_XADD stores into R2 packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -11161,16 +11447,61 @@ static int create_map(uint32_t size_value, uint32_t max_elem)
 	return fd;
 }
 
+static int create_prog_dummy1(void)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 42),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(int mfd, int idx)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_3, idx),
+		BPF_LD_MAP_FD(BPF_REG_2, mfd),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_tail_call),
+		BPF_MOV64_IMM(BPF_REG_0, 41),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
 static int create_prog_array(void)
 {
-	int fd;
+	int p1key = 0, p2key = 1;
+	int mfd, p1fd, p2fd;
 
-	fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
-			    sizeof(int), 4, 0);
-	if (fd < 0)
+	mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+			     sizeof(int), 4, 0);
+	if (mfd < 0) {
 		printf("Failed to create prog array '%s'!\n", strerror(errno));
+		return -1;
+	}
 
-	return fd;
+	p1fd = create_prog_dummy1();
+	p2fd = create_prog_dummy2(mfd, p2key);
+	if (p1fd < 0 || p2fd < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+		goto out;
+	close(p2fd);
+	close(p1fd);
+
+	return mfd;
+out:
+	close(p2fd);
+	close(p1fd);
+	close(mfd);
+	return -1;
 }
 
 static int create_map_in_map(void)
@@ -11291,7 +11622,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 			goto fail_log;
 		}
 		if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
-			printf("FAIL\nUnexpected error message!\n");
+			printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+			      expected_err, bpf_vlog);
 			goto fail_log;
 		}
 	}
@@ -11375,9 +11707,20 @@ out:
 	return ret;
 }
 
+static void get_unpriv_disabled()
+{
+	char buf[2];
+	FILE *fd;
+
+	fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+	if (fgets(buf, 2, fd) == buf && atoi(buf))
+		unpriv_disabled = true;
+	fclose(fd);
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
-	int i, passes = 0, errors = 0;
+	int i, passes = 0, errors = 0, skips = 0;
 
 	for (i = from; i < to; i++) {
 		struct bpf_test *test = &tests[i];
@@ -11385,7 +11728,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 		/* Program types that are not supported by non-root we
 		 * skip right away.
 		 */
-		if (!test->prog_type) {
+		if (!test->prog_type && unpriv_disabled) {
+			printf("#%d/u %s SKIP\n", i, test->descr);
+			skips++;
+		} else if (!test->prog_type) {
 			if (!unpriv)
 				set_admin(false);
 			printf("#%d/u %s ", i, test->descr);
@@ -11394,20 +11740,22 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 				set_admin(true);
 		}
 
-		if (!unpriv) {
+		if (unpriv) {
+			printf("#%d/p %s SKIP\n", i, test->descr);
+			skips++;
+		} else {
 			printf("#%d/p %s ", i, test->descr);
 			do_test_single(test, false, &passes, &errors);
 		}
 	}
 
-	printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+	printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+	       skips, errors);
 	return errors ? EXIT_FAILURE : EXIT_SUCCESS;
 }
 
 int main(int argc, char **argv)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-	struct rlimit rlim = { 1 << 20, 1 << 20 };
 	unsigned int from = 0, to = ARRAY_SIZE(tests);
 	bool unpriv = !is_admin();
 
@@ -11428,6 +11776,12 @@ int main(int argc, char **argv)
 		}
 	}
 
-	setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
+	get_unpriv_disabled();
+	if (unpriv && unpriv_disabled) {
+		printf("Cannot run as unprivileged user with sysctl %s.\n",
+		       UNPRIV_SYSCTL);
+		return EXIT_FAILURE;
+	}
+
 	return do_test(unpriv, from, to);
 }
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index e9626cf5607a..8d6918c3b4a2 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -4,7 +4,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -12,6 +11,8 @@
 
 #include <bpf/bpf.h>
 
+#include "bpf_rlimit.h"
+
 #define LOG_SIZE (1 << 20)
 
 #define err(str...)	printf("ERROR: " str)
@@ -133,16 +134,11 @@ static void test_log_bad(char *log, size_t log_len, int log_level)
 
 int main(int argc, char **argv)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	char full_log[LOG_SIZE];
 	char log[LOG_SIZE];
 	size_t want_len;
 	int i;
 
-	/* allow unlimited locked memory to have more consistent error code */
-	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-		perror("Unable to lift memlock rlimit");
-
 	memset(log, 1, LOG_SIZE);
 
 	/* Test incorrect attr */
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644
index 000000000000..4acfdebf36fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+int main(void)
+{
+	int fd = open("/dev/urandom", O_RDONLY);
+	int i;
+	char buf[BUF_SIZE];
+
+	if (fd < 0)
+		return 1;
+	for (i = 0; i < 4; ++i)
+		read(fd, buf, BUF_SIZE);
+
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index cea4adcd42b8..a63e8453984d 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -12,9 +12,9 @@ all:
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-		if [ -e $$DIR/$(TEST_PROGS) ]; then
-			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
-		fi
+		if [ -e $$DIR/$(TEST_PROGS) ]; then \
+			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+		fi \
 	done
 
 override define RUN_TESTS
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index a5276a91dfbf..0862e6f47a38 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -5,6 +5,7 @@ CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_tests.sh
+TEST_FILES := run_fuse_test.sh
 TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644
index 000000000000..835c7f4dadcd
--- /dev/null
+++ b/tools/testing/selftests/memfd/config
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 86636d207adf..686da510f989 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -4,8 +4,9 @@ all:
 include ../lib.mk
 
 TEST_PROGS := mem-on-off-test.sh
-override RUN_TESTS := ./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
-override EMIT_TESTS := echo "$(RUN_TESTS)"
+override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
+
+override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))"
 
 run_full_test:
 	@/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index d7c30d366935..785fc18a16b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 7177bea1fdfa..6a75a3ea44ad 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -2,3 +2,8 @@ CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
+CONFIG_NET_VRF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755
index 000000000000..3991ad1a368d
--- /dev/null
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -0,0 +1,467 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.3.254
+CONGW[3]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+	else
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "######################################################################"
+	echo "TEST SECTION: $*"
+	echo "######################################################################"
+}
+
+log_subsection()
+{
+	echo
+	echo "#########################################"
+	echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+	echo
+	echo "COMMAND: $*"
+	eval $*
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local pfx
+	local addr
+
+	addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+	echo
+	echo "########################################"
+	echo "Configuring interfaces"
+
+	set -e
+
+	# create namespace
+	ip netns add ${PEER_NS}
+	ip -netns ${PEER_NS} li set lo up
+
+	# add vrf table
+	ip li add ${VRF} type vrf table ${VRF_TABLE}
+	ip li set ${VRF} up
+	ip ro add table ${VRF_TABLE} unreachable default
+	ip -6 ro add table ${VRF_TABLE} unreachable default
+
+	# create test interfaces
+	ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+	ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+	ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+	ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+	# enslave vrf interfaces
+	for n in 5 7; do
+		ip li set ${NETIFS[p${n}]} vrf ${VRF}
+	done
+
+	# add addresses
+	for n in 1 3 5 7; do
+		ip li set ${NETIFS[p${n}]} up
+		ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+	done
+
+	# move peer interfaces to namespace and add addresses
+	for n in 2 4 6 8; do
+		ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+		ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+	done
+
+	set +e
+
+	# let DAD complete - assume default of 1 probe
+	sleep 1
+}
+
+cleanup()
+{
+	# make sure we start from a clean slate
+	ip netns del ${PEER_NS} 2>/dev/null
+	for n in 1 3 5 7; do
+		ip link del ${NETIFS[p${n}]} 2>/dev/null
+	done
+	ip link del ${VRF} 2>/dev/null
+	ip ro flush table ${VRF_TABLE}
+	ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local nh1="$3"
+	local nh2="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+	# - unicast connected, unicast recursive
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+	run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.5 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.6 \
+		"${RECGW4[1]} dev ${NETIFS[p1]} onlink" \
+		"${RECGW4[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast recursive - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.7 \
+		"${CONGW[1]} dev ${NETIFS[p1]}"        \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.8 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]}"        \
+		0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv4()
+{
+	run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+
+	run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+	run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+		"Gateway resolves to wrong nexthop device"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip6_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local opts="$3"
+	local nh1="$4"
+	local nh2="$5"
+	local exp_rc="$6"
+	local desc="$7"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+	# - unicast connected, unicast recursive, v4-mapped
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+	run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+	run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \
+		"${RECGW6[1]} dev ${NETIFS[p1]}" \
+		"${RECGW6[2]} dev ${NETIFS[p3]}" \
+		0 "unicast recursive - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \
+		"::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \
+		"::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \
+		0 "v4-mapped - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink both nexthops"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}"        \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv6()
+{
+	local lladdr
+
+	lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+	run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+		"Invalid gw - local linklocal address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+		"Invalid gw - multicast address"
+
+	lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+		"Invalid gw - local linklocal address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+		"Invalid gw - multicast address, VRF"
+
+	run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+		"No nexthop device given"
+
+	# default VRF validation is done against LOCAL table
+	# run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+	#	"Gateway resolves to wrong nexthop device"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+	log_section "IPv4 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv4
+	log_subsection "Invalid onlink commands"
+	invalid_onlink_ipv4
+
+	log_section "IPv6 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv6
+	log_subsection "Invalid onlink commands"
+	invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index a9154eefb2e2..9164e60d4b66 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,154 +6,179 @@
 
 ret=0
 
-check_err()
-{
-	if [ $ret -eq 0 ]; then
-		ret=$1
-	fi
-}
+VERBOSE=${VERBOSE:=0}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
 
-check_fail()
+log_test()
 {
-	if [ $1 -eq 0 ]; then
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
 		ret=1
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
 	fi
 }
 
-netns_create()
+setup()
 {
-	local testns=$1
+	set -e
+	ip netns add testns
+	$IP link set dev lo up
+
+	$IP link add dummy0 type dummy
+	$IP link set dev dummy0 up
+	$IP address add 198.51.100.1/24 dev dummy0
+	$IP -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
 
-	ip netns add $testns
-	ip netns exec $testns ip link set dev lo up
 }
 
-fib_unreg_unicast_test()
+cleanup()
 {
-	ret=0
+	$IP link del dev dummy0 &> /dev/null
+	ip netns del testns
+}
 
-	netns_create "testns"
+get_linklocal()
+{
+	local dev=$1
+	local addr
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	addr=$($IP -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	[ -z "$addr" ] && return 1
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	echo $addr
 
-	ip netns exec testns ip link del dev dummy0
-	check_err $?
+	return 0
+}
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_fail $?
+fib_unreg_unicast_test()
+{
+	echo
+	echo "Single path route test"
 
-	ip netns del testns
+	setup
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route test"
-		return 1
-	fi
-	echo "PASS: unicast route test"
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	set -e
+	$IP link del dev dummy0
+	set +e
+
+	echo "    Nexthop device deleted"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch - no route"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch - no route"
+
+	cleanup
 }
 
 fib_unreg_multipath_test()
 {
-	ret=0
-
-	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	echo
+	echo "Multipath route test"
 
-	ip netns exec testns ip link add dummy1 type dummy
-	ip netns exec testns ip link set dev dummy1 up
+	setup
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	set -e
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
-	ip netns exec testns ip route add 203.0.113.0/24 \
+	$IP route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+	$IP -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
+
+	echo "    Start point"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_err $?
+	set -e
+	$IP link del dev dummy0
+	set +e
 
-	ip netns exec testns ip link del dev dummy0
-	check_err $?
+	echo "    One nexthop device deleted"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 2 "IPv4 - multipath route removed on delete"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	# In IPv6 we do not flush the entire multipath route.
-	check_err $?
+	log_test $? 0 "IPv6 - multipath down to single path"
 
-	ip netns exec testns ip link del dev dummy1
+	set -e
+	$IP link del dev dummy1
+	set +e
 
-	ip netns del testns
+	echo "    Second nexthop device deleted"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 2 "IPv6 - no route"
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: multipath route test"
-		return 1
-	fi
-	echo "PASS: multipath route test"
+	cleanup
 }
 
 fib_unreg_test()
 {
-	echo "Running netdev unregister tests"
-
 	fib_unreg_unicast_test
 	fib_unreg_multipath_test
 }
 
 fib_down_unicast_test()
 {
-	ret=0
-
-	netns_create "testns"
-
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	echo
+	echo "Single path, admin down"
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	setup
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip link set dev dummy0 down
-	check_err $?
+	set -e
+	$IP link set dev dummy0 down
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_fail $?
+	echo "    Route deleted on down"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch"
 
-	ip netns exec testns ip link del dev dummy0
-
-	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route test"
-		return 1
-	fi
-	echo "PASS: unicast route test"
+	cleanup
 }
 
 fib_down_multipath_test_do()
@@ -161,251 +186,395 @@ fib_down_multipath_test_do()
 	local down_dev=$1
 	local up_dev=$2
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+	$IP route get fibmatch 203.0.113.1 \
 		oif $down_dev &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+	log_test $? 2 "IPv4 fibmatch on down device"
+	$IP -6 route get fibmatch 2001:db8:3::1 \
 		oif $down_dev &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv6 fibmatch on down device"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+	$IP route get fibmatch 203.0.113.1 \
 		oif $up_dev &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+	log_test $? 0 "IPv4 fibmatch on up device"
+	$IP -6 route get fibmatch 2001:db8:3::1 \
 		oif $up_dev &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch on up device"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+	$IP route get fibmatch 203.0.113.1 | \
 		grep $down_dev | grep -q "dead linkdown"
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+	log_test $? 0 "IPv4 flags on down device"
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
 		grep $down_dev | grep -q "dead linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 flags on down device"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+	$IP route get fibmatch 203.0.113.1 | \
 		grep $up_dev | grep -q "dead linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+	log_test $? 1 "IPv4 flags on up device"
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
 		grep $up_dev | grep -q "dead linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 flags on up device"
 }
 
 fib_down_multipath_test()
 {
-	ret=0
-
-	netns_create "testns"
+	echo
+	echo "Admin down multipath"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	setup
 
-	ip netns exec testns ip link add dummy1 type dummy
-	ip netns exec testns ip link set dev dummy1 up
+	set -e
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
-	ip netns exec testns ip route add 203.0.113.0/24 \
+	$IP route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+	$IP -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
+
+	echo "    Verify start point"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_err $?
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip link set dev dummy0 down
-	check_err $?
+	set -e
+	$IP link set dev dummy0 down
+	set +e
 
+	echo "    One device down, one up"
 	fib_down_multipath_test_do "dummy0" "dummy1"
 
-	ip netns exec testns ip link set dev dummy0 up
-	check_err $?
-	ip netns exec testns ip link set dev dummy1 down
-	check_err $?
+	set -e
+	$IP link set dev dummy0 up
+	$IP link set dev dummy1 down
+	set +e
 
+	echo "    Other device down and up"
 	fib_down_multipath_test_do "dummy1" "dummy0"
 
-	ip netns exec testns ip link set dev dummy0 down
-	check_err $?
-
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_fail $?
+	set -e
+	$IP link set dev dummy0 down
+	set +e
 
-	ip netns exec testns ip link del dev dummy1
-	ip netns exec testns ip link del dev dummy0
-
-	ip netns del testns
+	echo "    Both devices down"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch"
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: multipath route test"
-		return 1
-	fi
-	echo "PASS: multipath route test"
+	$IP link del dev dummy1
+	cleanup
 }
 
 fib_down_test()
 {
-	echo "Running netdev down tests"
-
 	fib_down_unicast_test
 	fib_down_multipath_test
 }
 
+# Local routes should not be affected when carrier changes.
 fib_carrier_local_test()
 {
-	ret=0
-
-	# Local routes should not be affected when carrier changes.
-	netns_create "testns"
-
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	echo
+	echo "Local carrier tests - single path"
 
-	ip netns exec testns ip link set dev dummy0 carrier on
+	setup
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	set -e
+	$IP link set dev dummy0 carrier on
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-	check_err $?
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+	$IP route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+	log_test $? 1 "IPv4 - no linkdown flag"
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 - no linkdown flag"
 
-	ip netns exec testns ip link set dev dummy0 carrier off
+	set -e
+	$IP link set dev dummy0 carrier off
+	sleep 1
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-	check_err $?
+	echo "    Carrier off on nexthop"
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+	$IP route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+	log_test $? 1 "IPv4 - linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 - linkdown flag set"
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+	set -e
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
 
-	ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null
-	check_err $?
+	echo "    Route to local address with carrier down"
+	$IP route get fibmatch 192.0.2.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 192.0.2.1 | \
+	$IP route get fibmatch 192.0.2.1 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \
+	log_test $? 1 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:2::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 linkdown flag set"
 
-	ip netns exec testns ip link del dev dummy0
-
-	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: local route carrier test"
-		return 1
-	fi
-	echo "PASS: local route carrier test"
+	cleanup
 }
 
 fib_carrier_unicast_test()
 {
 	ret=0
 
-	netns_create "testns"
+	echo
+	echo "Single path route carrier test"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	setup
 
-	ip netns exec testns ip link set dev dummy0 carrier on
+	set -e
+	$IP link set dev dummy0 carrier on
+	set +e
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
-
-	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+	$IP route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+	log_test $? 1 "IPv4 no linkdown flag"
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 no linkdown flag"
 
-	ip netns exec testns ip link set dev dummy0 carrier off
+	set -e
+	$IP link set dev dummy0 carrier off
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	echo "    Carrier down"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+	$IP route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+	log_test $? 0 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 linkdown flag set"
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+	set -e
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
 
-	ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null
-	check_err $?
+	echo "    Second address added with carrier down"
+	$IP route get fibmatch 192.0.2.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 192.0.2.2 | \
+	$IP route get fibmatch 192.0.2.2 | \
 		grep -q "linkdown"
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \
+	log_test $? 0 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:2::2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 linkdown flag set"
 
-	ip netns exec testns ip link del dev dummy0
+	cleanup
+}
 
-	ip netns del testns
+fib_carrier_test()
+{
+	fib_carrier_local_test
+	fib_carrier_unicast_test
+}
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route carrier test"
-		return 1
+################################################################################
+# Tests on nexthop spec
+
+# run 'ip route add' with given spec
+add_rt()
+{
+	local desc="$1"
+	local erc=$2
+	local vrf=$3
+	local pfx=$4
+	local gw=$5
+	local dev=$6
+	local cmd out rc
+
+	[ "$vrf" = "-" ] && vrf="default"
+	[ -n "$gw" ] && gw="via $gw"
+	[ -n "$dev" ] && dev="dev $dev"
+
+	cmd="$IP route add vrf $vrf $pfx $gw $dev"
+	if [ "$VERBOSE" = "1" ]; then
+		printf "\n    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
 	fi
-	echo "PASS: unicast route carrier test"
+	log_test $rc $erc "$desc"
 }
 
-fib_carrier_test()
+fib4_nexthop()
 {
-	echo "Running netdev carrier change tests"
+	echo
+	echo "IPv4 nexthop tests"
 
-	fib_carrier_local_test
-	fib_carrier_unicast_test
+	echo "<<< write me >>>"
 }
 
+fib6_nexthop()
+{
+	local lldummy=$(get_linklocal dummy0)
+	local llv1=$(get_linklocal dummy0)
+
+	if [ -z "$lldummy" ]; then
+		echo "Failed to get linklocal address for dummy0"
+		return 1
+	fi
+	if [ -z "$llv1" ]; then
+		echo "Failed to get linklocal address for veth1"
+		return 1
+	fi
+
+	echo
+	echo "IPv6 nexthop tests"
+
+	add_rt "Directly connected nexthop, unicast address" 0 \
+		- 2001:db8:101::/64 2001:db8:1::2
+	add_rt "Directly connected nexthop, unicast address with device" 0 \
+		- 2001:db8:102::/64 2001:db8:1::2 "dummy0"
+	add_rt "Gateway is linklocal address" 0 \
+		- 2001:db8:103::1/64 $llv1 "veth0"
+
+	# fails because LL address requires a device
+	add_rt "Gateway is linklocal address, no device" 2 \
+		- 2001:db8:104::1/64 $llv1
+
+	# local address can not be a gateway
+	add_rt "Gateway can not be local unicast address" 2 \
+		- 2001:db8:105::/64 2001:db8:1::1
+	add_rt "Gateway can not be local unicast address, with device" 2 \
+		- 2001:db8:106::/64 2001:db8:1::1 "dummy0"
+	add_rt "Gateway can not be a local linklocal address" 2 \
+		- 2001:db8:107::1/64 $lldummy "dummy0"
+
+	# VRF tests
+	add_rt "Gateway can be local address in a VRF" 0 \
+		- 2001:db8:108::/64 2001:db8:51::2
+	add_rt "Gateway can be local address in a VRF, with device" 0 \
+		- 2001:db8:109::/64 2001:db8:51::2 "veth0"
+	add_rt "Gateway can be local linklocal address in a VRF" 0 \
+		- 2001:db8:110::1/64 $llv1 "veth0"
+
+	add_rt "Redirect to VRF lookup" 0 \
+		- 2001:db8:111::/64 "" "red"
+
+	add_rt "VRF route, gateway can be local address in default VRF" 0 \
+		red 2001:db8:112::/64 2001:db8:51::1
+
+	# local address in same VRF fails
+	add_rt "VRF route, gateway can not be a local address" 2 \
+		red 2001:db8:113::1/64 2001:db8:2::1
+	add_rt "VRF route, gateway can not be a local addr with device" 2 \
+		red 2001:db8:114::1/64 2001:db8:2::1 "dummy1"
+}
+
+# Default VRF:
+#   dummy0 - 198.51.100.1/24 2001:db8:1::1/64
+#   veth0  - 192.0.2.1/24    2001:db8:51::1/64
+#
+# VRF red:
+#   dummy1 - 192.168.2.1/24 2001:db8:2::1/64
+#   veth1  - 192.0.2.2/24   2001:db8:51::2/64
+#
+#  [ dummy0   veth0 ]--[ veth1   dummy1 ]
+
+fib_nexthop_test()
+{
+	setup
+
+	set -e
+
+	$IP -4 rule add pref 32765 table local
+	$IP -4 rule del pref 0
+	$IP -6 rule add pref 32765 table local
+	$IP -6 rule del pref 0
+
+	$IP link add red type vrf table 1
+	$IP link set red up
+	$IP -4 route add vrf red unreachable default metric 4278198272
+	$IP -6 route add vrf red unreachable default metric 4278198272
+
+	$IP link add veth0 type veth peer name veth1
+	$IP link set dev veth0 up
+	$IP address add 192.0.2.1/24 dev veth0
+	$IP -6 address add 2001:db8:51::1/64 dev veth0
+
+	$IP link set dev veth1 vrf red up
+	$IP address add 192.0.2.2/24 dev veth1
+	$IP -6 address add 2001:db8:51::2/64 dev veth1
+
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 vrf red up
+	$IP address add 192.168.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
+	set +e
+
+	sleep 1
+	fib4_nexthop
+	fib6_nexthop
+
+	(
+	$IP link del dev dummy1
+	$IP link del veth0
+	$IP link del red
+	) 2>/dev/null
+	cleanup
+}
+
+################################################################################
+#
+
 fib_test()
 {
-	fib_unreg_test
-	fib_down_test
-	fib_carrier_test
+	if [ -n "$TEST" ]; then
+		eval $TEST
+	else
+		fib_unreg_test
+		fib_down_test
+		fib_carrier_test
+		fib_nexthop_test
+	fi
 }
 
 if [ "$(id -u)" -ne 0 ];then
@@ -424,6 +593,9 @@ if [ $? -ne 0 ]; then
 	exit 0
 fi
 
+# start clean
+cleanup &> /dev/null
+
 fib_test
 
 exit $ret
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000000..a793eef5b876
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000000..4a0964c42860
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+                             br0
+                              +
+               vrf-h1         |           vrf-h2
+                 +        +---+----+        +
+                 |        |        |        |
+    192.0.2.1/24 +        +        +        + 192.0.2.2/24
+               swp1     swp2     swp3     swp4
+                 +        +        +        +
+                 |        |        |        |
+                 +--------+        +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+  of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+  RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+  multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000000..75d922438bc9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+		mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
new file mode 100755
index 000000000000..1cddf06f691d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000000..5cd2aed97958
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000000..e819d049d9ce
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
+# Type of network interface to create
+NETIF_TYPE=veth
+# Whether to create virtual interfaces (veth) or not
+NETIF_CREATE=yes
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000000..1ac6c62271f3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,577 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+NETIF_TYPE=${NETIF_TYPE:=veth}
+NETIF_CREATE=${NETIF_CREATE:=yes}
+
+if [[ -f forwarding.config ]]; then
+	source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+	tc -j &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing JSON support"
+		exit 1
+	fi
+
+	tc filter help 2>&1 | grep block &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing shared block support"
+		exit 1
+	fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+	echo "SKIP: need root privileges"
+	exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+	check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+	echo "SKIP: jq not installed"
+	exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+	echo "SKIP: $MZ not installed"
+	exit 1
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+	echo "SKIP: importer does not define \"NUM_NETIFS\""
+	exit 1
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+	if [[ "$count" -eq "0" ]]; then
+		unset NETIFS
+		declare -A NETIFS
+	fi
+	count=$((count + 1))
+	NETIFS[p$count]="$1"
+	shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+create_netif_veth()
+{
+	local i
+
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		local j=$((i+1))
+
+		ip link show dev ${NETIFS[p$i]} &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			ip link add ${NETIFS[p$i]} type veth \
+				peer name ${NETIFS[p$j]}
+			if [[ $? -ne 0 ]]; then
+				echo "Failed to create netif"
+				exit 1
+			fi
+		fi
+		i=$j
+	done
+}
+
+create_netif()
+{
+	case "$NETIF_TYPE" in
+	veth) create_netif_veth
+	      ;;
+	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
+	   exit 1
+	   ;;
+	esac
+}
+
+if [[ "$NETIF_CREATE" = "yes" ]]; then
+	create_netif
+fi
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+	ip link show dev ${NETIFS[p$i]} &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: could not find all required interfaces"
+		exit 1
+	fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -ne 0 ]]; then
+		RET=$err
+		retmsg=$msg
+	fi
+}
+
+check_fail()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -eq 0 ]]; then
+		RET=1
+		retmsg=$msg
+	fi
+}
+
+log_test()
+{
+	local test_name=$1
+	local opt_str=$2
+
+	if [[ $# -eq 2 ]]; then
+		opt_str="($opt_str)"
+	fi
+
+	if [[ $RET -ne 0 ]]; then
+		EXIT_STATUS=1
+		printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
+		if [[ ! -z "$retmsg" ]]; then
+			printf "\t%s\n" "$retmsg"
+		fi
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo "Hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+		return 1
+	fi
+
+	printf "TEST: %-60s  [PASS]\n" "$test_name $opt_str"
+	return 0
+}
+
+log_info()
+{
+	local msg=$1
+
+	echo "INFO: $msg"
+}
+
+setup_wait()
+{
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		while true; do
+			ip link show dev ${NETIFS[p$i]} up \
+				| grep 'state UP' &> /dev/null
+			if [[ $? -ne 0 ]]; then
+				sleep 1
+			else
+				break
+			fi
+		done
+	done
+
+	# Make sure links are ready.
+	sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+		echo "Pausing before cleanup, hit any key to continue"
+		read
+	fi
+}
+
+vrf_prepare()
+{
+	ip -4 rule add pref 32765 table local
+	ip -4 rule del pref 0
+	ip -6 rule add pref 32765 table local
+	ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+	ip -6 rule add pref 0 table local
+	ip -6 rule del pref 32765
+	ip -4 rule add pref 0 table local
+	ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+	local vrf_name=$1
+
+	__last_tb_id=$((__last_tb_id + 1))
+	__TB_IDS[$vrf_name]=$__last_tb_id
+	return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+	local vrf_name=$1
+
+	return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_assign $vrf_name
+	tb_id=$?
+
+	ip link add dev $vrf_name type vrf table $tb_id
+	ip -4 route add table $tb_id unreachable default metric 4278198272
+	ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_lookup $vrf_name
+	tb_id=$?
+
+	ip -6 route del table $tb_id unreachable default metric 4278198272
+	ip -4 route del table $tb_id unreachable default metric 4278198272
+	ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+	local if_name=$1
+	local add_del=$2
+	local array
+
+	shift
+	shift
+	array=("${@}")
+
+	for addrstr in "${array[@]}"; do
+		ip address $add_del $addrstr dev $if_name
+	done
+}
+
+simple_if_init()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $if_name master $vrf_name
+	ip link set dev $vrf_name up
+	ip link set dev $if_name up
+
+	__addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	__addr_add_del $if_name del "${array[@]}"
+
+	ip link set dev $if_name down
+	vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+       local if_name=$1
+
+       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+	local bridge=$1
+	local ageing_time
+
+	# Need to divide by 100 to convert to seconds.
+	ageing_time=$(ip -j -d link show dev $bridge \
+		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+	echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+       sysctl -q -w net.ipv4.conf.all.forwarding=1
+       sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		ethtool -k ${NETIFS[p$i]} \
+			| grep "hw-tc-offload: on" &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	RET=0
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	check_err $?
+	log_test "ping"
+}
+
+ping6_test()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	RET=0
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	check_err $?
+	log_test "ping6"
+}
+
+learning_test()
+{
+	local bridge=$1
+	local br_port1=$2	# Connected to `host1_if`.
+	local host1_if=$3
+	local host2_if=$4
+	local mac=de:ad:be:ef:13:37
+	local ageing_time
+
+	RET=0
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	# Disable unknown unicast flooding on `br_port1` to make sure
+	# packets are only forwarded through the port after a matching
+	# FDB entry was installed.
+	bridge link set dev $br_port1 flood off
+
+	tc qdisc add dev $host1_if ingress
+	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_fail $? "Packet reached second host when should not"
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_err $? "Did not find FDB record when should"
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_err $? "Packet did not reach second host when should"
+
+	# Wait for 10 seconds after the ageing time to make sure FDB
+	# record was aged-out.
+	ageing_time=$(bridge_ageing_time_get $bridge)
+	sleep $((ageing_time + 10))
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning off
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning on
+
+	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host1_if ingress
+
+	bridge link set dev $br_port1 flood on
+
+	log_test "FDB learning"
+}
+
+flood_test_do()
+{
+	local should_flood=$1
+	local mac=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
+	local err=0
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was flooded to it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -ne 0 && $should_flood == "true" || \
+	      $? -eq 0 && $should_flood == "false" ]]; then
+		err=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $err
+}
+
+flood_unicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=de:ad:be:ef:13:37
+	local ip=192.0.2.100
+
+	RET=0
+
+	bridge link set dev $br_port flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=01:00:5e:00:00:01
+	local ip=239.0.0.1
+
+	RET=0
+
+	bridge link set dev $br_port mcast_flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port mcast_flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+	# `br_port` is connected to `host2_if`
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+
+	flood_unicast_test $br_port $host1_if $host2_if
+	flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000000..cc6a14abfa87
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.2.1/24 dev $rp1
+	ip address add 2001:db8:1::1/64 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 2001:db8:1::1/64 dev $rp1
+	ip address del 192.0.2.1/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000000..3bc351008db6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,376 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+
+	ip route add 198.51.100.0/24 vrf vrf-r1 \
+		nexthop via 169.254.2.22 dev $rp12 \
+		nexthop via 169.254.3.23 dev $rp13
+	ip route add 2001:db8:2::/64 vrf vrf-r1 \
+		nexthop via fe80:2::22 dev $rp12 \
+		nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+
+	ip route add 192.0.2.0/24 vrf vrf-r2 \
+		nexthop via 169.254.2.12 dev $rp22 \
+		nexthop via 169.254.3.13 dev $rp23
+	ip route add 2001:db8:1::/64 vrf vrf-r2 \
+		nexthop via fe80:2::12 dev $rp22 \
+		nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local packets_rp12=$4
+       local packets_rp13=$5
+       local weights_ratio packets_ratio diff
+
+       RET=0
+
+       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+              check_err 1 "Packet difference is 0"
+              log_test "Multipath"
+              log_info "Expected ratio $weights_ratio"
+              return
+       fi
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+		       | bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+		       | bc -l)
+       else
+               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+		       bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+		       bc -l)
+       fi
+
+       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+       diff=${diff#-}
+
+       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       check_err $? "Too large discrepancy between expected and measured ratios"
+       log_test "$desc"
+       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+               nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       # Restore settings.
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_l4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       # Generate 16384 echo requests, each with a random flow label.
+       for _ in $(seq 1 16384); do
+	       ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+       done
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000000..3a6385ebd5d0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+
+	simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.1/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_test()
+{
+	local action=$1
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched without redirect rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress $action \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match incoming $action packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "mirred egress $action ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not dropped"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action ok
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see passed packet"
+
+	tc_check_packets "dev $swp1 ingress" 102 2
+	check_fail $? "Packet was dropped and it should not reach here"
+
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 192.0.2.2 action drop
+	tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_fail $? "Saw packet without trap rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action trap
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not trapped"
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see trapped packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swp1origmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp1 address $h2mac
+	ip link set $swp2 address $h1mac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+	ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_test "redirect"
+mirred_egress_test "mirror"
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	gact_drop_and_ok_test
+	mirred_egress_test "redirect"
+	mirred_egress_test "mirror"
+	gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000000..2fd15226974b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_fail $? "matched on filter in unreachable chain"
+
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $h2mac action goto chain 1
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter with goto chain action"
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_err $? "Did not match on correct filter in chain 1"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	unreachable_chain_test
+	gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000000..9d3b64a2a264
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+	local id=$1
+	local handle=$2
+	local count=$3
+	local ret
+
+	output="$(tc -j -s filter show $id)"
+	# workaround the jq bug which causes jq to return 0 in case input is ""
+	ret=$?
+	if [[ $ret -ne 0 ]]; then
+		return $ret
+	fi
+	echo $output | \
+		jq -e ".[] \
+		| select(.options.handle == $handle) \
+		| select(.options.actions[0].stats.packets == $count)" \
+		&> /dev/null
+	return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000000..032b882adfc0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_mac $h1mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_ip 198.51.100.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_ip 192.0.2.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags src_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	match_dst_mac_test
+	match_src_mac_test
+	match_dst_ip_test
+	match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000000..077b98048ef4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+	simple_if_init $swp2 192.0.2.2/24
+	tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	simple_if_fini $swp2 192.0.2.2/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+	RET=0
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 1
+	check_err $? "Did not match first incoming packet on a block"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 2
+	check_err $? "Did not match second incoming packet on a block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp2 address $swmac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	shared_block_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
new file mode 100755
index 000000000000..88795b510b32
--- /dev/null
+++ b/tools/testing/selftests/net/in_netns.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+ip netns exec "${NETNS}" "$@"
+exit "$?"
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index e11fe84de0fd..406cc70c571d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -14,6 +14,9 @@
  * - SOCK_DGRAM
  * - SOCK_RAW
  *
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
  * Start this program on two connected hosts, one in send mode and
  * the other with option '-r' to put it in receiver mode.
  *
@@ -53,6 +56,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <linux/rds.h>
 
 #ifndef SO_EE_ORIGIN_ZEROCOPY
 #define SO_EE_ORIGIN_ZEROCOPY		5
@@ -164,17 +168,39 @@ static int do_accept(int fd)
 	return fd;
 }
 
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+	struct cmsghdr *cm;
+
+	if (!msg->msg_control)
+		error(1, errno, "NULL cookie");
+	cm = (void *)msg->msg_control;
+	cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+	cm->cmsg_level = SOL_RDS;
+	cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 {
 	int ret, len, i, flags;
+	static uint32_t cookie;
+	char ckbuf[CMSG_SPACE(sizeof(cookie))];
 
 	len = 0;
 	for (i = 0; i < msg->msg_iovlen; i++)
 		len += msg->msg_iov[i].iov_len;
 
 	flags = MSG_DONTWAIT;
-	if (do_zerocopy)
+	if (do_zerocopy) {
 		flags |= MSG_ZEROCOPY;
+		if (domain == PF_RDS) {
+			memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+			msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+			msg->msg_control = (struct cmsghdr *)ckbuf;
+			add_zcopy_cookie(msg, ++cookie);
+		}
+	}
 
 	ret = sendmsg(fd, msg, flags);
 	if (ret == -1 && errno == EAGAIN)
@@ -190,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
 		if (do_zerocopy && ret)
 			expected_completions++;
 	}
+	if (do_zerocopy && domain == PF_RDS) {
+		msg->msg_control = NULL;
+		msg->msg_controllen = 0;
+	}
 
 	return true;
 }
@@ -216,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg)
 		msg->msg_iov[0].iov_len = payload_len + extra_len;
 		extra_len = 0;
 
-		do_sendmsg(fd, msg, do_zerocopy);
+		do_sendmsg(fd, msg, do_zerocopy,
+			   (cfg_dst_addr.ss_family == AF_INET ?
+			    PF_INET : PF_INET6));
 	}
 
 	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
@@ -300,14 +332,65 @@ static int do_setup_tx(int domain, int type, int protocol)
 	if (cfg_zerocopy)
 		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
 
-	if (domain != PF_PACKET)
+	if (domain != PF_PACKET && domain != PF_RDS)
 		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
 			error(1, errno, "connect");
 
+	if (domain == PF_RDS) {
+		if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+			error(1, errno, "bind");
+	}
+
 	return fd;
 }
 
-static bool do_recv_completion(int fd)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+	int i;
+
+	if (ck->num > RDS_MAX_ZCOOKIES)
+		error(1, 0, "Returned %d cookies, max expected %d\n",
+		      ck->num, RDS_MAX_ZCOOKIES);
+	for (i = 0; i < ck->num; i++)
+		if (cfg_verbose >= 2)
+			fprintf(stderr, "%d\n", ck->cookies[i]);
+	return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+	char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+	struct rds_zcopy_cookies *ck;
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	bool ret = false;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+
+	if (recvmsg(fd, &msg, MSG_DONTWAIT))
+		return ret;
+
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_RDS &&
+		    cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+			ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+			completions += do_process_zerocopy_cookies(ck);
+			ret = true;
+			break;
+		}
+		error(0, 0, "ignoring cmsg at level %d type %d\n",
+			    cmsg->cmsg_level, cmsg->cmsg_type);
+	}
+	return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
 {
 	struct sock_extended_err *serr;
 	struct msghdr msg = {};
@@ -316,6 +399,9 @@ static bool do_recv_completion(int fd)
 	int ret, zerocopy;
 	char control[100];
 
+	if (domain == PF_RDS)
+		return do_recvmsg_completion(fd);
+
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
 
@@ -337,6 +423,7 @@ static bool do_recv_completion(int fd)
 		      cm->cmsg_level, cm->cmsg_type);
 
 	serr = (void *) CMSG_DATA(cm);
+
 	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
 		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
 	if (serr->ee_errno != 0)
@@ -371,20 +458,20 @@ static bool do_recv_completion(int fd)
 }
 
 /* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
 {
-	while (do_recv_completion(fd)) {}
+	while (do_recv_completion(fd, domain)) {}
 }
 
 /* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
 {
 	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
 
 	while (completions < expected_completions &&
 	       gettimeofday_ms() < tstop) {
-		if (do_poll(fd, POLLERR))
-			do_recv_completions(fd);
+		if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+			do_recv_completions(fd, domain);
 	}
 
 	if (completions < expected_completions)
@@ -444,6 +531,13 @@ static void do_tx(int domain, int type, int protocol)
 		msg.msg_iovlen++;
 	}
 
+	if (domain == PF_RDS) {
+		msg.msg_name = &cfg_dst_addr;
+		msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
+				    sizeof(struct sockaddr_in) :
+				    sizeof(struct sockaddr_in6));
+	}
+
 	iov[2].iov_base = payload;
 	iov[2].iov_len = cfg_payload_len;
 	msg.msg_iovlen++;
@@ -454,17 +548,17 @@ static void do_tx(int domain, int type, int protocol)
 		if (cfg_cork)
 			do_sendmsg_corked(fd, &msg);
 		else
-			do_sendmsg(fd, &msg, cfg_zerocopy);
+			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 
 		while (!do_poll(fd, POLLOUT)) {
 			if (cfg_zerocopy)
-				do_recv_completions(fd);
+				do_recv_completions(fd, domain);
 		}
 
 	} while (gettimeofday_ms() < tstop);
 
 	if (cfg_zerocopy)
-		do_recv_remaining_completions(fd);
+		do_recv_remaining_completions(fd, domain);
 
 	if (close(fd))
 		error(1, errno, "close");
@@ -610,6 +704,7 @@ static void parse_opts(int argc, char **argv)
 				    40 /* max tcp options */;
 	int c;
 	char *daddr = NULL, *saddr = NULL;
+	char *cfg_test;
 
 	cfg_payload_len = max_payload_len;
 
@@ -667,6 +762,14 @@ static void parse_opts(int argc, char **argv)
 			break;
 		}
 	}
+
+	cfg_test = argv[argc - 1];
+	if (strcmp(cfg_test, "rds") == 0) {
+		if (!daddr)
+			error(1, 0, "-D <server addr> required for PF_RDS\n");
+		if (!cfg_rx && !saddr)
+			error(1, 0, "-S <client addr> required for PF_RDS\n");
+	}
 	setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
 	setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
 
@@ -699,6 +802,8 @@ int main(int argc, char **argv)
 		do_test(cfg_family, SOCK_STREAM, 0);
 	else if (!strcmp(cfg_test, "udp"))
 		do_test(cfg_family, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "rds"))
+		do_test(PF_RDS, SOCK_SEQPACKET, 0);
 	else
 		error(1, 0, "unknown cfg_test %s", cfg_test);
 
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
new file mode 100755
index 000000000000..1e428781a625
--- /dev/null
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -0,0 +1,471 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that route PMTU values match expectations, and that initial device MTU
+# values are assigned correctly
+#
+# Tests currently implemented:
+#
+# - pmtu_vti4_exception
+#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is not
+#	created if link layer MTU is not exceeded, then exceed it and check that
+#	exception is created with the expected PMTU. The approach described
+#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
+#	changes alone won't affect PMTU
+#
+# - pmtu_vti6_exception
+#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is
+#	created by exceeding link layer MTU with ping to other endpoint. Then
+#	decrease and increase MTU of tunnel, checking that route exception PMTU
+#	changes accordingly
+#
+# - pmtu_vti4_default_mtu
+#	Set up vti4 tunnel on top of veth, in two namespaces with matching
+#	endpoints. Check that MTU assigned to vti interface is the MTU of the
+#	lower layer (veth) minus additional lower layer headers (zero, for veth)
+#	minus IPv4 header length
+#
+# - pmtu_vti6_default_mtu
+#	Same as above, for IPv6
+#
+# - pmtu_vti4_link_add_mtu
+#	Set up vti4 interface passing MTU value at link creation, check MTU is
+#	configured, and that link is not created with invalid MTU values
+#
+# - pmtu_vti6_link_add_mtu
+#	Same as above, for IPv6
+#
+# - pmtu_vti6_link_change_mtu
+#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
+#	and check that configured MTU is used on link creation and changes, and
+#	that MTU is properly calculated instead when MTU is not configured from
+#	userspace
+
+tests="
+	pmtu_vti6_exception		vti6: PMTU exceptions
+	pmtu_vti4_exception		vti4: PMTU exceptions
+	pmtu_vti4_default_mtu		vti4: default MTU assignment
+	pmtu_vti6_default_mtu		vti6: default MTU assignment
+	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
+	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
+	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
+
+NS_A="ns-$(mktemp -u XXXXXX)"
+NS_B="ns-$(mktemp -u XXXXXX)"
+ns_a="ip netns exec ${NS_A}"
+ns_b="ip netns exec ${NS_B}"
+
+veth4_a_addr="192.168.1.1"
+veth4_b_addr="192.168.1.2"
+veth4_mask="24"
+veth6_a_addr="fd00:1::a"
+veth6_b_addr="fd00:1::b"
+veth6_mask="64"
+
+vti4_a_addr="192.168.2.1"
+vti4_b_addr="192.168.2.2"
+vti4_mask="24"
+vti6_a_addr="fd00:2::a"
+vti6_b_addr="fd00:2::b"
+vti6_mask="64"
+
+dummy6_0_addr="fc00:1000::0"
+dummy6_1_addr="fc00:1001::0"
+dummy6_mask="64"
+
+cleanup_done=1
+err_buf=
+
+err() {
+	err_buf="${err_buf}${1}
+"
+}
+
+err_flush() {
+	echo -n "${err_buf}"
+	err_buf=
+}
+
+setup_namespaces() {
+	ip netns add ${NS_A} || return 1
+	ip netns add ${NS_B}
+}
+
+setup_veth() {
+	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+	${ns_a} ip link set veth_b netns ${NS_B}
+
+	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+
+	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+
+	${ns_a} ip link set veth_a up
+	${ns_b} ip link set veth_b up
+}
+
+setup_vti() {
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
+	vti_a_addr="${4}"
+	vti_b_addr="${5}"
+	vti_mask=${6}
+
+	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
+
+	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+
+	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+
+	${ns_a} ip link set vti${proto}_a up
+	${ns_b} ip link set vti${proto}_b up
+
+	sleep 1
+}
+
+setup_vti4() {
+	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+}
+
+setup_vti6() {
+	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+}
+
+setup_xfrm() {
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
+
+	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
+	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+
+	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+}
+
+setup_xfrm4() {
+	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
+}
+
+setup_xfrm6() {
+	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
+}
+
+setup() {
+	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return 1
+
+	cleanup_done=0
+	for arg do
+		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
+	done
+}
+
+cleanup() {
+	[ ${cleanup_done} -eq 1 ] && return
+	ip netns del ${NS_A} 2 > /dev/null
+	ip netns del ${NS_B} 2 > /dev/null
+	cleanup_done=1
+}
+
+mtu() {
+	ns_cmd="${1}"
+	dev="${2}"
+	mtu="${3}"
+
+	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
+}
+
+mtu_parse() {
+	input="${1}"
+
+	next=0
+	for i in ${input}; do
+		[ ${next} -eq 1 ] && echo "${i}" && return
+		[ "${i}" = "mtu" ] && next=1
+	done
+}
+
+link_get() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	${ns_cmd} ip link show dev "${name}"
+}
+
+link_get_mtu() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	mtu_parse "$(link_get "${ns_cmd}" ${name})"
+}
+
+route_get_dst_exception() {
+	ns_cmd="${1}"
+	dst="${2}"
+
+	${ns_cmd} ip route get "${dst}"
+}
+
+route_get_dst_pmtu_from_exception() {
+	ns_cmd="${1}"
+	dst="${2}"
+
+	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+}
+
+test_pmtu_vti4_exception() {
+	setup namespaces veth vti4 xfrm4 || return 2
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+	mtu "${ns_a}" veth_a ${veth_mtu}
+	mtu "${ns_b}" veth_b ${veth_mtu}
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" != "" ]; then
+		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
+		return 1
+	fi
+
+	# Now exceed link layer MTU by one byte, check that exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" = "" ]; then
+		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
+		return 1
+	fi
+
+	# ...with the right PMTU value
+	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
+		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
+		return 1
+	fi
+}
+
+test_pmtu_vti6_exception() {
+	setup namespaces veth vti6 xfrm6 || return 2
+	fail=0
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}" veth_a 4000
+	mtu "${ns_b}" veth_b 4000
+	mtu "${ns_a}" vti6_a 5000
+	mtu "${ns_b}" vti6_b 5000
+	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+
+	# Check that exception was created
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
+		err "  tunnel exceeding link layer MTU didn't create route exception"
+		return 1
+	fi
+
+	# Decrease tunnel MTU, check for PMTU decrease in route exception
+	mtu "${ns_a}" vti6_a 3000
+
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
+		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
+		fail=1
+	fi
+
+	# Increase tunnel MTU, check for PMTU increase in route exception
+	mtu "${ns_a}" vti6_a 9000
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
+		err "  increasing tunnel MTU didn't increase route exception PMTU"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
+test_pmtu_vti4_default_mtu() {
+	setup namespaces veth vti4 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv4 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
+		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
+		return 1
+	fi
+}
+
+test_pmtu_vti6_default_mtu() {
+	setup namespaces veth vti6 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv6 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
+		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
+		return 1
+	fi
+}
+
+test_pmtu_vti4_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti not supported" && return 2
+	${ns_a} ip link del vti4_a
+
+	fail=0
+
+	min=68
+	max=$((65528 - 20))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti tunnel created with invalid MTU ${mtu}"
+			fail=1
+		fi
+		${ns_a} ip link del vti4_a
+	done
+
+	# Now check valid values
+	for v in ${min} 1300 ${max}; do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		${ns_a} ip link del vti4_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
+test_pmtu_vti6_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
+	${ns_a} ip link del vti6_a
+
+	fail=0
+
+	min=1280
+	max=$((65535 - 40))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti6 tunnel created with invalid MTU ${v}"
+			fail=1
+		fi
+		${ns_a} ip link del vti6_a
+	done
+
+	# Now check valid values
+	for v in 1280 1300 $((65535 - 40)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		${ns_a} ip link del vti6_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
+test_pmtu_vti6_link_change_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add dummy0 mtu 1500 type dummy
+	[ $? -ne 0 ] && err "  dummy not supported" && return 2
+	${ns_a} ip link add dummy1 mtu 3000 type dummy
+	${ns_a} ip link set dummy0 up
+	${ns_a} ip link set dummy1 up
+
+	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+
+	fail=0
+
+	# Create vti6 interface bound to device, passing MTU, check it
+	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1300 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
+		fail=1
+	fi
+
+	# Move to another device with different MTU, without passing MTU, check
+	# MTU is adjusted
+	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne $((3000 - 40)) ]; then
+		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
+		fail=1
+	fi
+
+	# Move it back, passing MTU, check MTU is not overridden
+	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1280 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
+trap cleanup EXIT
+
+exitcode=0
+desc=0
+IFS="	
+"
+for t in ${tests}; do
+	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+
+	(
+		unset IFS
+		eval test_${name}
+		ret=$?
+		cleanup
+
+		if [ $ret -eq 0 ]; then
+			printf "TEST: %-60s  [ OK ]\n" "${t}"
+		elif [ $ret -eq 1 ]; then
+			printf "TEST: %-60s  [FAIL]\n" "${t}"
+			err_flush
+			exit 1
+		elif [ $ret -eq 2 ]; then
+			printf "TEST: %-60s  [SKIP]\n" "${t}"
+			err_flush
+		fi
+	)
+	[ $? -ne 0 ] && exitcode=1
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 989f917068d1..bd9b9632c72b 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -50,6 +50,7 @@
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/if_packet.h>
+#include <net/if.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
@@ -73,14 +74,29 @@
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
+	struct sockaddr_ll addr = {0};
 	int fd, val;
 
-	fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
 	if (fd < 0) {
 		perror("socket packet");
 		exit(1);
 	}
 
+	pair_udp_setfilter(fd);
+
+	addr.sll_family = AF_PACKET;
+	addr.sll_protocol = htons(ETH_P_IP);
+	addr.sll_ifindex = if_nametoindex("lo");
+	if (addr.sll_ifindex == 0) {
+		perror("if_nametoindex");
+		exit(1);
+	}
+	if (bind(fd, (void *) &addr, sizeof(addr))) {
+		perror("bind packet");
+		exit(1);
+	}
+
 	val = (((int) typeflags) << 16) | group_id;
 	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
 		if (close(fd)) {
@@ -90,7 +106,6 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 		return -1;
 	}
 
-	pair_udp_setfilter(fd);
 	return fd;
 }
 
@@ -128,6 +143,8 @@ static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
 
 static void sock_fanout_set_ebpf(int fd)
 {
+	static char log_buf[65536];
+
 	const int len_off = __builtin_offsetof(struct __sk_buff, len);
 	struct bpf_insn prog[] = {
 		{ BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
@@ -140,7 +157,6 @@ static void sock_fanout_set_ebpf(int fd)
 		{ BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
 		{ BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
 	};
-	char log_buf[512];
 	union bpf_attr attr;
 	int pfd;
 
@@ -228,7 +244,7 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[])
 
 	if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
 	    (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
-		fprintf(stderr, "ERROR: incorrect queue lengths\n");
+		fprintf(stderr, "warning: incorrect queue lengths\n");
 		return 1;
 	}
 
@@ -347,7 +363,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
 	uint8_t type = typeflags & 0xFF;
 	int fds[2], fds_udp[2][2], ret;
 
-	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
+	fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+		typeflags, PORT_BASE, PORT_BASE + port_off);
 
 	fds[0] = sock_fanout_open(typeflags, 0);
 	fds[1] = sock_fanout_open(typeflags, 0);
@@ -418,7 +435,7 @@ int main(int argc, char **argv)
 	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
 	const int expect_bpf[2][2]	= { { 15, 5 },  { 15, 20 } };
 	const int expect_uniqueid[2][2] = { { 20, 20},  { 20, 20 } };
-	int port_off = 2, tries = 5, ret;
+	int port_off = 2, tries = 20, ret;
 
 	test_control_single();
 	test_control_group();
@@ -427,10 +444,14 @@ int main(int argc, char **argv)
 	/* find a set of ports that do not collide onto the same socket */
 	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
 			    expect_hash[0], expect_hash[1]);
-	while (ret && tries--) {
+	while (ret) {
 		fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
 		ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
 				    expect_hash[0], expect_hash[1]);
+		if (!--tries) {
+			fprintf(stderr, "too many collisions\n");
+			return 1;
+		}
 	}
 
 	ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index a622eeecc3a6..e6f485235435 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -517,6 +517,7 @@ kci_test_gretap()
 	ip link help gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: gretap: iproute2 too old"
+		ip netns del "$testns"
 		return 1
 	fi
 
@@ -543,6 +544,7 @@ kci_test_gretap()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: gretap"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: gretap"
@@ -565,6 +567,7 @@ kci_test_ip6gretap()
 	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: ip6gretap: iproute2 too old"
+		ip netns del "$testns"
 		return 1
 	fi
 
@@ -591,6 +594,7 @@ kci_test_ip6gretap()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ip6gretap"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: ip6gretap"
@@ -655,6 +659,7 @@ kci_test_erspan()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: erspan"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: erspan"
@@ -720,6 +725,7 @@ kci_test_ip6erspan()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ip6erspan"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: ip6erspan"
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 21fe149e3de1..bea079edc278 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -9,7 +9,7 @@ fi
 echo "--------------------"
 echo "running psock_fanout test"
 echo "--------------------"
-./psock_fanout
+./in_netns.sh ./psock_fanout
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 else
@@ -19,7 +19,7 @@ fi
 echo "--------------------"
 echo "running psock_tpacket test"
 echo "--------------------"
-./psock_tpacket
+./in_netns.sh ./psock_tpacket
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 else
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
index 5df07047ca86..81a98a240456 100644
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.c
+++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c
@@ -68,9 +68,11 @@ static int cfg_num_pkts = 4;
 static int do_ipv4 = 1;
 static int do_ipv6 = 1;
 static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
 static bool cfg_show_payload;
 static bool cfg_do_pktinfo;
 static bool cfg_loop_nodata;
+static bool cfg_no_delay;
 static uint16_t dest_port = 9000;
 
 static struct sockaddr_in daddr;
@@ -171,7 +173,7 @@ static void __poll(int fd)
 
 	memset(&pollfd, 0, sizeof(pollfd));
 	pollfd.fd = fd;
-	ret = poll(&pollfd, 1, 100);
+	ret = poll(&pollfd, 1, cfg_poll_timeout);
 	if (ret != 1)
 		error(1, errno, "poll");
 }
@@ -371,7 +373,8 @@ static void do_test(int family, unsigned int opt)
 			error(1, errno, "send");
 
 		/* wait for all errors to be queued, else ACKs arrive OOO */
-		usleep(50 * 1000);
+		if (!cfg_no_delay)
+			usleep(50 * 1000);
 
 		__poll(fd);
 
@@ -392,6 +395,9 @@ static void __attribute__((noreturn)) usage(const char *filepath)
 			"  -4:   only IPv4\n"
 			"  -6:   only IPv6\n"
 			"  -h:   show this message\n"
+			"  -c N: number of packets for each test\n"
+			"  -D:   no delay between packets\n"
+			"  -F:   poll() waits forever for an event\n"
 			"  -I:   request PKTINFO\n"
 			"  -l N: send N bytes at a time\n"
 			"  -n:   set no-payload option\n"
@@ -409,7 +415,7 @@ static void parse_opt(int argc, char **argv)
 	int proto_count = 0;
 	char c;
 
-	while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
+	while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) {
 		switch (c) {
 		case '4':
 			do_ipv6 = 0;
@@ -417,6 +423,15 @@ static void parse_opt(int argc, char **argv)
 		case '6':
 			do_ipv4 = 0;
 			break;
+		case 'c':
+			cfg_num_pkts = strtoul(optarg, NULL, 10);
+			break;
+		case 'D':
+			cfg_no_delay = true;
+			break;
+		case 'F':
+			cfg_poll_timeout = -1;
+			break;
 		case 'I':
 			cfg_do_pktinfo = true;
 			break;
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 39fd362415cf..0f2698f9fd6d 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -57,7 +57,7 @@ volatile int gotsig;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
-	struct ucontext *ucp = ctx;
+	ucontext_t *ucp = ctx;
 
 	if (!testing) {
 		signal(sig, SIG_DFL);
diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c
index 35ade7406dcd..3ae77ba93208 100644
--- a/tools/testing/selftests/powerpc/mm/subpage_prot.c
+++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
@@ -135,6 +135,16 @@ static int run_test(void *addr, unsigned long size)
 	return 0;
 }
 
+static int syscall_available(void)
+{
+	int rc;
+
+	errno = 0;
+	rc = syscall(__NR_subpage_prot, 0, 0, 0);
+
+	return rc == 0 || (errno != ENOENT && errno != ENOSYS);
+}
+
 int test_anon(void)
 {
 	unsigned long align;
@@ -145,6 +155,8 @@ int test_anon(void)
 	void *mallocblock;
 	unsigned long mallocsize;
 
+	SKIP_IF(!syscall_available());
+
 	if (getpagesize() != 0x10000) {
 		fprintf(stderr, "Kernel page size must be 64K!\n");
 		return 1;
@@ -180,6 +192,8 @@ int test_file(void)
 	off_t filesize;
 	int fd;
 
+	SKIP_IF(!syscall_available());
+
 	fd = open(file_name, O_RDWR);
 	if (fd == -1) {
 		perror("failed to open file");
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index a23453943ad2..5c72ff978f27 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -16,7 +16,7 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S
 $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
 $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
-$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
+$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
 $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
 $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 5d92c23ee6cb..179d592f0073 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -255,6 +255,8 @@ int tm_trap_test(void)
 
 	struct sigaction trap_sa;
 
+	SKIP_IF(!have_htm());
+
 	trap_sa.sa_flags = SA_SIGINFO;
 	trap_sa.sa_sigaction = trap_signal_handler;
 	sigaction(SIGTRAP, &trap_sa, NULL);
diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config
index 6a8e5a9bfc10..d148f9f89fb6 100644
--- a/tools/testing/selftests/pstore/config
+++ b/tools/testing/selftests/pstore/config
@@ -2,3 +2,4 @@ CONFIG_MISC_FILESYSTEMS=y
 CONFIG_PSTORE=y
 CONFIG_PSTORE_PMSG=y
 CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 0b457e8e0f0c..5df609950a66 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -141,6 +141,15 @@ struct seccomp_data {
 #define SECCOMP_FILTER_FLAG_LOG 2
 #endif
 
+#ifndef PTRACE_SECCOMP_GET_METADATA
+#define PTRACE_SECCOMP_GET_METADATA	0x420d
+
+struct seccomp_metadata {
+	__u64 filter_off;       /* Input: which filter */
+	__u64 flags;             /* Output: filter's flags */
+};
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -2845,6 +2854,58 @@ TEST(get_action_avail)
 	EXPECT_EQ(errno, EOPNOTSUPP);
 }
 
+TEST(get_metadata)
+{
+	pid_t pid;
+	int pipefd[2];
+	char buf;
+	struct seccomp_metadata md;
+
+	ASSERT_EQ(0, pipe(pipefd));
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		struct sock_filter filter[] = {
+			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		};
+		struct sock_fprog prog = {
+			.len = (unsigned short)ARRAY_SIZE(filter),
+			.filter = filter,
+		};
+
+		/* one with log, one without */
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
+				     SECCOMP_FILTER_FLAG_LOG, &prog));
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+
+		ASSERT_EQ(0, close(pipefd[0]));
+		ASSERT_EQ(1, write(pipefd[1], "1", 1));
+		ASSERT_EQ(0, close(pipefd[1]));
+
+		while (1)
+			sleep(100);
+	}
+
+	ASSERT_EQ(0, close(pipefd[1]));
+	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
+
+	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
+	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+
+	md.filter_off = 0;
+	ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
+	EXPECT_EQ(md.filter_off, 0);
+
+	md.filter_off = 1;
+	ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+	EXPECT_EQ(md.flags, 0);
+	EXPECT_EQ(md.filter_off, 1);
+
+	ASSERT_EQ(0, kill(pid, SIGKILL));
+}
+
 /*
  * TODO:
  * - add microbenchmarks
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index b3c8ba3cb668..d0121a8a3523 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -30,7 +30,7 @@ $(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
 	$(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
 
 $(OBJS): $(OUTPUT)/%.o: %.c
-	$(CC) -c $^ -o $@
+	$(CC) -c $^ -o $@ $(CFLAGS)
 
 $(TESTS): $(OUTPUT)/%.o: %.c
 	$(CC) -c $^ -o $@
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 970ff294fec8..3a0336782d2d 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -14,11 +14,11 @@ REQUIREMENTS
 
 *  The kernel must have network namespace support
 
-*   The kernel must have veth support available, as a veth pair is created
+*  The kernel must have veth support available, as a veth pair is created
    prior to running the tests.
 
-*  All tc-related features must be built in or available as modules.
-   To check what is required in current setup run:
+*  All tc-related features being tested must be built in or available as
+   modules.  To check what is required in current setup run:
    ./tdc.py -c
 
    Note:
@@ -44,10 +44,13 @@ using the -p option when running tdc:
 RUNNING TDC
 -----------
 
-To use tdc, root privileges are required. tdc will not run otherwise.
+To use tdc, root privileges are required.  This is because the
+commands being tested must be run as root.  The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
 
-All tests are executed inside a network namespace to prevent conflicts
-within the host.
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
 
 Running tdc without any arguments will run all tests. Refer to the section
 on command line arguments for more information, or run:
@@ -59,6 +62,33 @@ output captured from the failing test will be printed immediately following
 the failed test in the TAP output.
 
 
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future).  A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+  - setup
+  - execute
+  - verify
+  - teardown
+
+The setup and teardown stages can run zero or more commands.  The setup
+stage does some setup if the test needs it.  The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next.  These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command.  The execute stage
+tests the return code against one or more acceptable values.  The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
 USER-DEFINED CONSTANTS
 ----------------------
 
@@ -70,23 +100,132 @@ executed as part of the test. More will be added as test cases require.
 Example:
 	$TC qdisc add dev $DEV1 ingress
 
+The NAMES values are used to substitute into the commands in the test cases.
+
 
 COMMAND LINE ARGUMENTS
 ----------------------
 
 Run tdc.py -h to see the full list of available arguments.
 
--p PATH           Specify the tc executable located at PATH to be used on this
-                  test run
--c                Show the available test case categories in this test file
--c CATEGORY       Run only tests that belong to CATEGORY
--f FILE           Read test cases from the JSON file named FILE
--l [CATEGORY]     List all test cases in the JSON file. If CATEGORY is
-                  specified, list test cases matching that category.
--s ID             Show the test case matching ID
--e ID             Execute the test case identified by ID
--i                Generate unique ID numbers for test cases with no existing
-                  ID number
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+              [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v]
+              [-d DEVICE] [-n NS] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -p PATH, --path PATH  The full path to the tc executable to use
+  -v, --verbose         Show the commands that are being run
+  -d DEVICE, --device DEVICE
+                        Execute the test case in flower category
+
+selection:
+  select which test cases: files plus directories; filtered by categories
+  plus testids
+
+  -D DIR [DIR ...], --directory DIR [DIR ...]
+                        Collect tests from the specified directory(ies)
+                        (default [tc-tests])
+  -f FILE [FILE ...], --file FILE [FILE ...]
+                        Run tests from the specified file(s)
+  -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+                        Run tests only from the specified category/ies, or if
+                        no category/ies is/are specified, list known
+                        categories.
+  -e ID [ID ...], --execute ID [ID ...]
+                        Execute the specified test cases with specified IDs
+
+action:
+  select action to perform on selected test cases
+
+  -l, --list            List all test cases, or those only within the
+                        specified category
+  -s, --show            Display the selected test cases
+  -i, --id              Generate ID numbers for new test cases
+
+netns:
+  options for nsPlugin(run commands in net namespace)
+
+  -n NS, --namespace NS
+                        Run commands in namespace NS
+
+valgrind:
+  options for valgrindPlugin (run command under test under Valgrind)
+
+  -V, --valgrind        Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib.  The are executed from
+directory plugins.  Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx  1 bjb  bjb    27 Oct  4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx  1 bjb  bjb    25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x  1 bjb  bjb     0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them.  They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions.  These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed.  This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+  - pre  (pre-suite)
+  - setup
+  - command
+  - verify
+  - teardown
+  - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py.  To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins.  It will be detected at run time and invoked at the
+appropriate times.  There are a few examples in the plugin-lib
+directory:
+
+  - rootPlugin.py:
+      implements the enforcement of running as root
+  - nsPlugin.py:
+      sets up a network namespace and runs all commands in that namespace
+  - valgrindPlugin.py
+      runs each command in the execute stage under valgrind,
+      and checks for leaks.
+      This plugin will output an extra test for each test in the test file,
+      one is the existing output as to whether the test passed or failed,
+      and the other is a test whether the command leaked memory or not.
+      (This one is a preliminary version, it may not work quite right yet,
+      but the overall template is there and it should only need tweaks.)
 
 
 ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt
index 6a266d811a78..c40698557e2f 100644
--- a/tools/testing/selftests/tc-testing/TODO.txt
+++ b/tools/testing/selftests/tc-testing/TODO.txt
@@ -5,6 +5,27 @@ tc Testing Suite To-Do list:
 
 - Add support for multiple versions of tc to run successively
 
-- Improve error messages when tdc aborts its run
+- Improve error messages when tdc aborts its run.  Partially done - still
+  need to better handle problems in pre- and post-suite.
 
-- Allow tdc to write its results to file
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+  Maybe use python logger module for this too.
+
+- A better implementation of the "hooks".  Currently, every plugin
+  will attempt to run a function at every hook point.  Could be
+  changed so that plugin __init__ methods will register functions to
+  be run in the various predefined times.  Then if a plugin does not
+  require action at a specific point, no penalty will be paid for
+  trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+  and a way to configure a test suite,
+  to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644
index 000000000000..3ee9a6dacb52
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+class TdcPlugin:
+    def __init__(self):
+        super().__init__()
+        print(' -- {}.__init__'.format(self.sub_class))
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        self.testcount = testcount
+        self.testidlist = testidlist
+        if self.args.verbose > 1:
+            print(' -- {}.pre_suite'.format(self.sub_class))
+
+    def post_suite(self, index):
+        '''run commands after test_runner completes the test loop
+        index is the last ordinal number of test that was attempted'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_suite'.format(self.sub_class))
+
+    def pre_case(self, test_ordinal, testid):
+        '''run commands before test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_case'.format(self.sub_class))
+        self.args.testid = testid
+        self.args.test_ordinal = test_ordinal
+
+    def post_case(self):
+        '''run commands after test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_case'.format(self.sub_class))
+
+    def pre_execute(self):
+        '''run command before test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_execute'.format(self.sub_class))
+
+    def post_execute(self):
+        '''run command after test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_execute'.format(self.sub_class))
+
+    def adjust_command(self, stage, command):
+        '''adjust the command'''
+        if self.args.verbose > 1:
+            print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+        # if stage == 'pre':
+        #     pass
+        # elif stage == 'setup':
+        #     pass
+        # elif stage == 'execute':
+        #     pass
+        # elif stage == 'verify':
+        #     pass
+        # elif stage == 'teardown':
+        #     pass
+        # elif stage == 'post':
+        #     pass
+        # else:
+        #     pass
+
+        return command
+
+    def add_args(self, parser):
+        '''Get the plugin args from the command line'''
+        self.argparser = parser
+        return self.argparser
+
+    def check_args(self, args, remaining):
+        '''Check that the args are set correctly'''
+        self.args = args
+        if self.args.verbose > 1:
+            print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644
index 000000000000..c18f88d09360
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
@@ -0,0 +1,104 @@
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - [email protected]
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+    pre        (the pre-suite stage)
+    prepare
+    execute
+    verify
+    teardown
+    post       (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+    def __init__(self)
+    def pre_suite(self, testcount, testidlist)     # see "PRE_SUITE" below
+    def post_suite(self, ordinal)                  # see "SKIPPING" below
+    def pre_case(self, test_ordinal, testid)       # see "PRE_CASE" below
+    def post_case(self)
+    def pre_execute(self)
+    def post_execute(self)
+    def adjust_command(self, stage, command)       # see "ADJUST" below
+    def add_args(self, parser)                     # see "ADD_ARGS" below
+    def check_args(self, args, remaining)          # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run).  This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped.  The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output.  The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted.  It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id.  Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed.  The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+    'pre'
+    'setup'
+    'command'
+    'verify'
+    'teardown'
+    'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it.  Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed.  If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg:  raise Exception('plugin xxx, arg -y is wrong, fix it')
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
index 00438331ba47..17b267dedbd9 100644
--- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -12,14 +12,18 @@ template.json for the required JSON format for test cases.
 Include the 'id' field, but do not assign a value. Running tdc with the -i
 option will generate a unique ID for that test case.
 
-tdc will recursively search the 'tc' subdirectory for .json files.  Any
-test case files you create in these directories will automatically be included.
-If you wish to store your custom test cases elsewhere, be sure to run tdc
-with the -f argument and the path to your file.
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files.  Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
 
-Be aware of required escape characters in the JSON data - particularly when
-defining the match pattern. Refer to the tctests.json file for examples when
-in doubt.
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt.  The match pattern is written in json, and
+will be used by python.  So the match pattern will be a python regular
+expression, but should be written using json syntax.
 
 
 TEST CASE STRUCTURE
@@ -69,7 +73,8 @@ SETUP/TEARDOWN ERRORS
 If an error is detected during the setup/teardown process, execution of the
 tests will immediately stop with an error message and the namespace in which
 the tests are run will be destroyed. This is to prevent inaccurate results
-in the test cases.
+in the test cases.  tdc will output a series of TAP results for the skipped
+tests.
 
 Repeated failures of the setup/teardown may indicate a problem with the test
 case, or possibly even a bug in one of the commands that are not being tested.
@@ -79,3 +84,17 @@ so that it doesn't halt the script for an error that doesn't matter. Turn the
 individual command into a list, with the command being first, followed by all
 acceptable exit codes for the command.
 
+Example:
+
+A pair of setup commands.  The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action reclassify index 65536"
+        ],
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644
index 000000000000..aa8a2669702b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
@@ -0,0 +1,27 @@
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+    __init__.py
+    10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+    20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+    30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+    Check if the uid is root.  If not, bail out.
+
+valgrindPlugin
+    Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+    This plugin will write files to the cwd, called vgnd-xxx.log.  These will contain
+    the valgrind output for test xxx.  Any file matching the glob 'vgnd-*.log' will be
+    deleted at the end of the run.
+
+nsPlugin
+    Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644
index 000000000000..a194b1af2b30
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -0,0 +1,141 @@
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'ns/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+
+        if self.args.namespace:
+            self._ns_create()
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        if self.args.verbose:
+            print('{}.post_suite'.format(self.sub_class))
+
+        if self.args.namespace:
+            self._ns_destroy()
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'netns',
+            'options for nsPlugin(run commands in net namespace)')
+        self.argparser_group.add_argument(
+            '-n', '--namespace', action='store_true',
+            help='Run commands in namespace')
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.namespace:
+            return command
+
+        if self.args.verbose:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+        if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+            if self.args.verbose:
+                print('adjust_command:  stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+            cmdlist.insert(0, self.args.NAMES['NS'])
+            cmdlist.insert(0, 'exec')
+            cmdlist.insert(0, 'netns')
+            cmdlist.insert(0, 'ip')
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def _ns_create(self):
+        '''
+        Create the network namespace in which the tests will be run and set up
+        the required network devices for it.
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV0 up'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            if self.args.device:
+                cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+                cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+
+    def _ns_destroy(self):
+        '''
+        Destroy the network namespace for testing (and any associated network
+        devices as well)
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('post', cmd)
+
+    def _exec_cmd(self, stage, command):
+        '''
+        Perform any required modifications on an executable command, then run
+        it in a subprocess and return the results.
+        '''
+        if '$' in command:
+            command = self._replace_keywords(command)
+
+        self.adjust_command(stage, command)
+        if self.args.verbose:
+            print('_exec_cmd:  command "{}"'.format(command))
+        proc = subprocess.Popen(command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=ENVIR)
+        (rawout, serr) = proc.communicate()
+
+        if proc.returncode != 0 and len(serr) > 0:
+            foutput = serr.decode("utf-8")
+        else:
+            foutput = rawout.decode("utf-8")
+
+        proc.stdout.close()
+        proc.stderr.close()
+        return proc, foutput
+
+    def _replace_keywords(self, cmd):
+        """
+        For a given executable command, substitute any known
+        variables contained within NAMES with the correct values
+        """
+        tcmd = Template(cmd)
+        subcmd = tcmd.safe_substitute(self.args.NAMES)
+        return subcmd
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644
index 000000000000..e36775bd4d12
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -0,0 +1,19 @@
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'root/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        # run commands before test_runner goes into a test loop
+        super().pre_suite(testcount, testidlist)
+
+        if os.geteuid():
+            print('This script must be run with root privileges', file=sys.stderr)
+            exit(1)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644
index 000000000000..477a7bd7d7fb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -0,0 +1,142 @@
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+    return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'valgrind/SubPlugin'
+        self.tap = ''
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+        if self.args.verbose > 1:
+            print('{}.pre_suite'.format(self.sub_class))
+        if self.args.valgrind:
+            self._add_to_tap('1..{}\n'.format(self.testcount))
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        self._add_to_tap('\n|---\n')
+        if self.args.verbose > 1:
+            print('{}.post_suite'.format(self.sub_class))
+        print('{}'.format(self.tap))
+        if self.args.verbose < 4:
+            subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'valgrind',
+            'options for valgrindPlugin (run command under test under Valgrind)')
+
+        self.argparser_group.add_argument(
+            '-V', '--valgrind', action='store_true',
+            help='Run commands under valgrind')
+
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.valgrind:
+            return command
+
+        if self.args.verbose > 1:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+
+        if stage == 'execute':
+            if self.args.verbose > 1:
+                print('adjust_command:  stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+                      format(stage, command, cmdlist))
+            cmdlist.insert(0, '--track-origins=yes')
+            cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+            cmdlist.insert(0, '--leak-check=full')
+            cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+            cmdlist.insert(0, '-v')  # ask for summary of non-leak errors
+            cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose > 1:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def post_execute(self):
+        if not self.args.valgrind:
+            return
+
+        self.definitely_lost_re = re.compile(
+            r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+        self.indirectly_lost_re = re.compile(
+            r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.possibly_lost_re = re.compile(
+            r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.non_leak_error_re = re.compile(
+            r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+        def_num = 0
+        ind_num = 0
+        pos_num = 0
+        nle_num = 0
+
+        # what about concurrent test runs?  Maybe force them to be in different directories?
+        with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+            content = vfd.read()
+            def_mo = self.definitely_lost_re.search(content)
+            ind_mo = self.indirectly_lost_re.search(content)
+            pos_mo = self.possibly_lost_re.search(content)
+            nle_mo = self.non_leak_error_re.search(content)
+
+            if def_mo:
+                def_num = int(def_mo.group(2))
+            if ind_mo:
+                ind_num = int(ind_mo.group(2))
+            if pos_mo:
+                pos_num = int(pos_mo.group(2))
+            if nle_mo:
+                nle_num = int(nle_mo.group(1))
+
+        mem_results = ''
+        if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+            mem_results += 'not '
+
+        mem_results += 'ok {} - {}-mem # {}\n'.format(
+            self.args.test_ordinal, self.args.testid, 'memory leak check')
+        self._add_to_tap(mem_results)
+        if mem_results.startswith('not '):
+            print('{}'.format(content))
+            self._add_to_tap(content)
+
+    def _add_to_tap(self, more_tap_output):
+        self.tap += more_tap_output
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugins/__init__.py
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
new file mode 100644
index 000000000000..5b012f4981d4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -0,0 +1,289 @@
+[
+    {
+        "id": "d959",
+        "name": "Add cBPF action with valid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "f84a",
+        "name": "Add cBPF action with invalid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "e939",
+        "name": "Add eBPF action with valid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf",
+            "rm -f _b.o"
+        ]
+    },
+    {
+        "id": "282d",
+        "name": "Add eBPF action with invalid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf",
+            "rm -f _c.o"
+        ]
+    },
+    {
+        "id": "d819",
+        "name": "Replace cBPF bytecode and action control",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 555",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action replace action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 555",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 555",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' default-action drop.*index 555 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "6ae3",
+        "name": "Delete cBPF action ",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 444",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action bpf index 444",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 444",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 444 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "3e0d",
+        "name": "List cBPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action list action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "55ce",
+        "name": "Flush BPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action flush action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "ccc3",
+        "name": "Add cBPF action with duplicate index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 4294967295"
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967295",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 4294967295",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "89c7",
+        "name": "Add cBPF action with invalid index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action ls action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "7ab9",
+        "name": "Add cBPF action with cookie",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' cookie d0d0d0d0d0d0d0d0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf.*cookie d0d0d0d0d0d0d0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
new file mode 100644
index 000000000000..93cf8fea8ae7
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6d84",
+        "name": "Add csum iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum iph index 800",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 800",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "1862",
+        "name": "Add csum ip4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip4h index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "15c6",
+        "name": "Add csum ipv4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h index 1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1122",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bf47",
+        "name": "Add csum icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "cc1d",
+        "name": "Add csum igmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum igmp index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 999",
+        "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bccc",
+        "name": "Add csum foobar action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum foobar index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "3bb4",
+        "name": "Add csum tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum tcp index 9999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 9999",
+        "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "759c",
+        "name": "Add csum udp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp index 334455",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 334455",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bdb6",
+        "name": "Add csum udp xor iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp xor iph index 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "c220",
+        "name": "Add csum udplite action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udplite continue index 3",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 3",
+        "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "8993",
+        "name": "Add csum sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum sctp index 777",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 777",
+        "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "b138",
+        "name": "Add csum ip & icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 123",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "eeda",
+        "name": "Add csum ip & sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 2",
+        "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "0017",
+        "name": "Add csum udp or tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 27",
+        "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "ce92",
+        "name": "Add csum udp action with cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "912f",
+        "name": "Add csum icmp action with large cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 17",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "879b",
+        "name": "Add batch of 32 csum tcp actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index e2187b6e0b7a..68c91023cdb9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -465,5 +465,76 @@
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "1021",
+        "name": "Add batch of 32 gact pass actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action pass index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "da7a",
+        "name": "Add batch of 32 gact continue actions with cookie",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "8aa3",
+        "name": "Delete batch of 32 gact continue actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action gact index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 0fcccf18399b..443c9b3c8664 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -171,6 +171,198 @@
         ]
     },
     {
+        "id": "8917",
+        "name": "Add mirred mirror action with control pass",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 1",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "1054",
+        "name": "Add mirred mirror action with control pipe",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "9887",
+        "name": "Add mirred mirror action with control continue",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo continue index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) continue.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "e4aa",
+        "name": "Add mirred mirror action with control reclassify",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify index 150",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 150",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*index 150 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "ece9",
+        "name": "Add mirred mirror action with control drop",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo drop index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "0031",
+        "name": "Add mirred mirror action with control jump",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo jump 10 index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) jump 10.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "407c",
+        "name": "Add mirred mirror action with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify cookie aa11bb22cc33dd44ee55",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*cookie aa11bb22cc33dd44ee55",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "8b69",
+        "name": "Add mirred mirror action with maximum index",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
         "id": "a70e",
         "name": "Delete mirred mirror action",
         "category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 0e602a3f9393..38d85a1d7492 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -265,6 +265,150 @@
         ]
     },
     {
+        "id": "ddd6",
+        "name": "Add police action with invalid rate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3tb burst 250k conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Tb burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f61c",
+        "name": "Add police action with invalid burst value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3kbit burst 250P conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Kbit burst 250Pb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "c26f",
+        "name": "Add police action with invalid peakrate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100T index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Tbit",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "db04",
+        "name": "Add police action with invalid mtu value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10kbit burst 10k mtu 2Pbit index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Kbit burst 1Kb mtu 2Pb",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f3c9",
+        "name": "Add police action with cookie",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 1 cookie a1b1c1d1e1f12233bb",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Mbit burst 10Kb mtu 2Kb.*cookie a1b1c1d1e1f12233bb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "d190",
+        "name": "Add police action with maximum index",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "matchPattern": "action order [0-9]*:  police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
         "id": "336e",
         "name": "Delete police action",
         "category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
index 99635ea4722e..37ecc2716fee 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -216,6 +216,174 @@
         ]
     },
     {
+        "id": "464a",
+        "name": "Add skbedit action with control pipe",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 11",
+        "matchPattern": "action order [0-9]*:  skbedit ptype host pipe.*index 11 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "212f",
+        "name": "Add skbedit action with control reclassify",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 90",
+        "matchPattern": "action order [0-9]*:  skbedit mark 56789 reclassify.*index 90 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "0651",
+        "name": "Add skbedit action with control pass",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 pass.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "cc53",
+        "name": "Add skbedit action with control drop",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 drop.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "ec16",
+        "name": "Add skbedit action with control jump",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 2",
+        "matchPattern": "action order [0-9]*:  skbedit priority :8 jump 9.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "db54",
+        "name": "Add skbedit action with control continue",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "1055",
+        "name": "Add skbedit action with cookie",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref.*cookie deadbeef",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
         "id": "5172",
         "name": "List skbedit actions",
         "category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index e34075059c26..fe3326e939c1 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -264,6 +264,30 @@
         ]
     },
     {
+        "id": "6046",
+        "name": "Add skbmod action with control reclassify and cookie",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set smac 00:01:02:03:04:01 reclassify index 1 cookie ddeeffaabb11cc22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 1",
+        "matchPattern": "action order [0-9]*: skbmod reclassify set smac 00:01:02:03:04:01.*index 1 ref.*cookie ddeeffaabb11cc22",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
         "id": "58cb",
         "name": "List skbmod actions",
         "category": [
@@ -315,7 +339,7 @@
         "cmdUnderTest": "$TC actions ls action skbmod",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbmod index 4",
-        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x0031",
+        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbmod"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
new file mode 100644
index 000000000000..4510ddfa6e54
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6f5a",
+        "name": "Add vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "ee6f",
+        "name": "Add vlan pop action with large index",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b6b9",
+        "name": "Add vlan pop action with jump opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "87c3",
+        "name": "Add vlan pop action with trap opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop trap index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "2b91",
+        "name": "Add vlan invalid action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan bad_mode",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*bad_mode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "57fc",
+        "name": "Add vlan action with invalid protocol type",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push protocol ABCD",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "3989",
+        "name": "Add vlan push action with default protocol and priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 123 index 18",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 18",
+        "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "79dc",
+        "name": "Add vlan push action with protocol 802.1Q and priority 3",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 734",
+        "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "4d73",
+        "name": "Add vlan push action with protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 10000",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "1f7b",
+        "name": "Add vlan push action with invalid vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5d02",
+        "name": "Add vlan push action with invalid IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "6812",
+        "name": "Add vlan modify action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 100",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5a31",
+        "name": "Add vlan modify action for protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "83a4",
+        "name": "Delete vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop index 44"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ed1e",
+        "name": "Delete vlan push action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4094 protocol 802.1Q index 999"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a2a3",
+        "name": "Flush vlan actions",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 10",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 11",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 12",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 13"
+        ],
+        "cmdUnderTest": "$TC actions flush action vlan",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "1d78",
+        "name": "Add vlan action with cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index fc373fdf2bdc..44de4a272a11 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -11,16 +11,96 @@ import re
 import os
 import sys
 import argparse
+import importlib
 import json
 import subprocess
+import time
+import traceback
 from collections import OrderedDict
 from string import Template
 
 from tdc_config import *
 from tdc_helper import *
 
-
-USE_NS = True
+import TdcPlugin
+
+
+class PluginMgrTestFail(Exception):
+    def __init__(self, stage, output, message):
+        self.stage = stage
+        self.output = output
+        self.message = message
+
+class PluginMgr:
+    def __init__(self, argparser):
+        super().__init__()
+        self.plugins = {}
+        self.plugin_instances = []
+        self.args = []
+        self.argparser = argparser
+
+        # TODO, put plugins in order
+        plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+        for dirpath, dirnames, filenames in os.walk(plugindir):
+            for fn in filenames:
+                if (fn.endswith('.py') and
+                    not fn == '__init__.py' and
+                    not fn.startswith('#') and
+                    not fn.startswith('.#')):
+                    mn = fn[0:-3]
+                    foo = importlib.import_module('plugins.' + mn)
+                    self.plugins[mn] = foo
+                    self.plugin_instances.append(foo.SubPlugin())
+
+    def call_pre_suite(self, testcount, testidlist):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_suite(testcount, testidlist)
+
+    def call_post_suite(self, index):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_suite(index)
+
+    def call_pre_case(self, test_ordinal, testid):
+        for pgn_inst in self.plugin_instances:
+            try:
+                pgn_inst.pre_case(test_ordinal, testid)
+            except Exception as ee:
+                print('exception {} in call to pre_case for {} plugin'.
+                      format(ee, pgn_inst.__class__))
+                print('test_ordinal is {}'.format(test_ordinal))
+                print('testid is {}'.format(testid))
+                raise
+
+    def call_post_case(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_case()
+
+    def call_pre_execute(self):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_execute()
+
+    def call_post_execute(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_execute()
+
+    def call_add_args(self, parser):
+        for pgn_inst in self.plugin_instances:
+            parser = pgn_inst.add_args(parser)
+        return parser
+
+    def call_check_args(self, args, remaining):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.check_args(args, remaining)
+
+    def call_adjust_command(self, stage, command):
+        for pgn_inst in self.plugin_instances:
+            command = pgn_inst.adjust_command(stage, command)
+        return command
+
+    @staticmethod
+    def _make_argparser(args):
+        self.argparser = argparse.ArgumentParser(
+            description='Linux TC unit tests')
 
 
 def replace_keywords(cmd):
@@ -33,21 +113,24 @@ def replace_keywords(cmd):
     return subcmd
 
 
-def exec_cmd(command, nsonly=True):
+def exec_cmd(args, pm, stage, command):
     """
     Perform any required modifications on an executable command, then run
     it in a subprocess and return the results.
     """
-    if (USE_NS and nsonly):
-        command = 'ip netns exec $NS ' + command
-
+    if len(command.strip()) == 0:
+        return None, None
     if '$' in command:
         command = replace_keywords(command)
 
+    command = pm.call_adjust_command(stage, command)
+    if args.verbose > 0:
+        print('command "{}"'.format(command))
     proc = subprocess.Popen(command,
         shell=True,
         stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
+        stderr=subprocess.PIPE,
+        env=ENVIR)
     (rawout, serr) = proc.communicate()
 
     if proc.returncode != 0 and len(serr) > 0:
@@ -60,36 +143,99 @@ def exec_cmd(command, nsonly=True):
     return proc, foutput
 
 
-def prepare_env(cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
     """
-    Execute the setup/teardown commands for a test case. Optionally
-    terminate test execution if the command fails.
+    Execute the setup/teardown commands for a test case.
+    Optionally terminate test execution if the command fails.
     """
+    if args.verbose > 0:
+        print('{}'.format(prefix))
     for cmdinfo in cmdlist:
-        if (type(cmdinfo) == list):
+        if isinstance(cmdinfo, list):
             exit_codes = cmdinfo[1:]
             cmd = cmdinfo[0]
         else:
             exit_codes = [0]
             cmd = cmdinfo
 
-        if (len(cmd) == 0):
+        if not cmd:
             continue
 
-        (proc, foutput) = exec_cmd(cmd)
+        (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+
+        if proc and (proc.returncode not in exit_codes):
+            print('', file=sys.stderr)
+            print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+                  file=sys.stderr)
+            print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+                  file=sys.stderr)
+            print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+            print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+            print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+            raise PluginMgrTestFail(
+                stage, output,
+                '"{}" did not complete successfully'.format(prefix))
+
+def run_one_test(pm, args, index, tidx):
+    global NAMES
+    result = True
+    tresult = ""
+    tap = ""
+    if args.verbose > 0:
+        print("\t====================\n=====> ", end="")
+    print("Test " + tidx["id"] + ": " + tidx["name"])
+
+    # populate NAMES with TESTID for this test
+    NAMES['TESTID'] = tidx['id']
+
+    pm.call_pre_case(index, tidx['id'])
+    prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+    if (args.verbose > 0):
+        print('-----> execute stage')
+    pm.call_pre_execute()
+    (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+    exit_code = p.returncode
+    pm.call_post_execute()
+
+    if (exit_code != int(tidx["expExitCode"])):
+        result = False
+        print("exit:", exit_code, int(tidx["expExitCode"]))
+        print(procout)
+    else:
+        if args.verbose > 0:
+            print('-----> verify stage')
+        match_pattern = re.compile(
+            str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+        (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+        if procout:
+            match_index = re.findall(match_pattern, procout)
+            if len(match_index) != int(tidx["matchCount"]):
+                result = False
+        elif int(tidx["matchCount"]) != 0:
+            result = False
+
+    if not result:
+        tresult += 'not '
+    tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
+    tap += tresult
 
-        if proc.returncode not in exit_codes:
-            print
-            print("Could not execute:")
-            print(cmd)
-            print("\nError message:")
-            print(foutput)
-            print("\nAborting test run.")
-            ns_destroy()
-            exit(1)
+    if result == False:
+        if procout:
+            tap += procout
+        else:
+            tap += 'No output!\n'
+
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+    pm.call_post_case()
 
+    index += 1
+
+    # remove TESTID from NAMES
+    del(NAMES['TESTID'])
+    return tap
 
-def test_runner(filtered_tests, args):
+def test_runner(pm, args, filtered_tests):
     """
     Driver function for the unit tests.
 
@@ -101,75 +247,92 @@ def test_runner(filtered_tests, args):
     testlist = filtered_tests
     tcount = len(testlist)
     index = 1
-    tap = str(index) + ".." + str(tcount) + "\n"
-
+    tap = ''
+    badtest = None
+    stage = None
+    emergency_exit = False
+    emergency_exit_message = ''
+
+    if args.notap:
+        if args.verbose:
+            tap = 'notap requested:  omitting test plan\n'
+    else:
+        tap = str(index) + ".." + str(tcount) + "\n"
+    try:
+        pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    except Exception as ee:
+        ex_type, ex, ex_tb = sys.exc_info()
+        print('Exception {} {} (caught in pre_suite).'.
+              format(ex_type, ex))
+        # when the extra print statements are uncommented,
+        # the traceback does not appear between them
+        # (it appears way earlier in the tdc.py output)
+        # so don't bother ...
+        # print('--------------------(')
+        # print('traceback')
+        traceback.print_tb(ex_tb)
+        # print('--------------------)')
+        emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+        emergency_exit = True
+        stage = 'pre-SUITE'
+
+    if emergency_exit:
+        pm.call_post_suite(index)
+        return emergency_exit_message
+    if args.verbose > 1:
+        print('give test rig 2 seconds to stabilize')
+    time.sleep(2)
     for tidx in testlist:
-        result = True
-        tresult = ""
         if "flower" in tidx["category"] and args.device == None:
+            if args.verbose > 1:
+                print('Not executing test {} {} because DEV2 not defined'.
+                      format(tidx['id'], tidx['name']))
             continue
-        print("Test " + tidx["id"] + ": " + tidx["name"])
-        prepare_env(tidx["setup"])
-        (p, procout) = exec_cmd(tidx["cmdUnderTest"])
-        exit_code = p.returncode
-
-        if (exit_code != int(tidx["expExitCode"])):
-            result = False
-            print("exit:", exit_code, int(tidx["expExitCode"]))
-            print(procout)
-        else:
-            match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL)
-            (p, procout) = exec_cmd(tidx["verifyCmd"])
-            match_index = re.findall(match_pattern, procout)
-            if len(match_index) != int(tidx["matchCount"]):
-                result = False
-
-        if result == True:
-            tresult += "ok "
-        else:
-            tresult += "not ok "
-        tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n"
-
-        if result == False:
-            tap += procout
-
-        prepare_env(tidx["teardown"])
+        try:
+            badtest = tidx  # in case it goes bad
+            tap += run_one_test(pm, args, index, tidx)
+        except PluginMgrTestFail as pmtf:
+            ex_type, ex, ex_tb = sys.exc_info()
+            stage = pmtf.stage
+            message = pmtf.message
+            output = pmtf.output
+            print(message)
+            print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+                  format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+            print('---------------')
+            print('traceback')
+            traceback.print_tb(ex_tb)
+            print('---------------')
+            if stage == 'teardown':
+                print('accumulated output for this test:')
+                if pmtf.output:
+                    print(pmtf.output)
+            print('---------------')
+            break
         index += 1
 
-    return tap
-
+    # if we failed in setup or teardown,
+    # fill in the remaining tests with ok-skipped
+    count = index
+    if not args.notap:
+        tap += 'about to flush the tap output if tests need to be skipped\n'
+        if tcount + 1 != index:
+            for tidx in testlist[index - 1:]:
+                msg = 'skipped - previous {} failed'.format(stage)
+                tap += 'ok {} - {} # {} {} {}\n'.format(
+                    count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+                count += 1
 
-def ns_create():
-    """
-    Create the network namespace in which the tests will be run and set up
-    the required network devices for it.
-    """
-    if (USE_NS):
-        cmd = 'ip netns add $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip link add $DEV0 type veth peer name $DEV1'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV1 netns $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV0 up'
-        exec_cmd(cmd, False)
-        cmd = 'ip -n $NS link set $DEV1 up'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV2 netns $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip -n $NS link set $DEV2 up'
-        exec_cmd(cmd, False)
+        tap += 'done flushing skipped test tap output\n'
 
+    if args.pause:
+        print('Want to pause\nPress enter to continue ...')
+        if input(sys.stdin):
+            print('got something on stdin')
 
-def ns_destroy():
-    """
-    Destroy the network namespace for testing (and any associated network
-    devices as well)
-    """
-    if (USE_NS):
-        cmd = 'ip netns delete $NS'
-        exec_cmd(cmd, False)
+    pm.call_post_suite(index)
 
+    return tap
 
 def has_blank_ids(idlist):
     """
@@ -209,41 +372,70 @@ def set_args(parser):
     """
     Set the command line arguments for tdc.
     """
-    parser.add_argument('-p', '--path', type=str,
-                        help='The full path to the tc executable to use')
-    parser.add_argument('-c', '--category', type=str, nargs='?', const='+c',
-                        help='Run tests only from the specified category, or if no category is specified, list known categories.')
-    parser.add_argument('-f', '--file', type=str,
-                        help='Run tests from the specified file')
-    parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY',
-                        help='List all test cases, or those only within the specified category')
-    parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID',
-                        help='Display the test case with specified id')
-    parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID',
-                        help='Execute the single test case with specified ID')
-    parser.add_argument('-i', '--id', action='store_true', dest='gen_id',
-                        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-p', '--path', type=str,
+        help='The full path to the tc executable to use')
+    sg = parser.add_argument_group(
+        'selection', 'select which test cases: ' +
+        'files plus directories; filtered by categories plus testids')
+    ag = parser.add_argument_group(
+        'action', 'select action to perform on selected test cases')
+
+    sg.add_argument(
+        '-D', '--directory', nargs='+', metavar='DIR',
+        help='Collect tests from the specified directory(ies) ' +
+        '(default [tc-tests])')
+    sg.add_argument(
+        '-f', '--file', nargs='+', metavar='FILE',
+        help='Run tests from the specified file(s)')
+    sg.add_argument(
+        '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+        help='Run tests only from the specified category/ies, ' +
+        'or if no category/ies is/are specified, list known categories.')
+    sg.add_argument(
+        '-e', '--execute', nargs='+', metavar='ID',
+        help='Execute the specified test cases with specified IDs')
+    ag.add_argument(
+        '-l', '--list', action='store_true',
+        help='List all test cases, or those only within the specified category')
+    ag.add_argument(
+        '-s', '--show', action='store_true', dest='showID',
+        help='Display the selected test cases')
+    ag.add_argument(
+        '-i', '--id', action='store_true', dest='gen_id',
+        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-v', '--verbose', action='count', default=0,
+        help='Show the commands that are being run')
+    parser.add_argument(
+        '-N', '--notap', action='store_true',
+        help='Suppress tap results for command under test')
     parser.add_argument('-d', '--device',
                         help='Execute the test case in flower category')
+    parser.add_argument(
+        '-P', '--pause', action='store_true',
+        help='Pause execution just before post-suite stage')
     return parser
 
 
-def check_default_settings(args):
+def check_default_settings(args, remaining, pm):
     """
-    Process any arguments overriding the default settings, and ensure the
-    settings are correct.
+    Process any arguments overriding the default settings,
+    and ensure the settings are correct.
     """
     # Allow for overriding specific settings
     global NAMES
 
     if args.path != None:
-         NAMES['TC'] = args.path
+        NAMES['TC'] = args.path
     if args.device != None:
-         NAMES['DEV2'] = args.device
+        NAMES['DEV2'] = args.device
     if not os.path.isfile(NAMES['TC']):
         print("The specified tc path " + NAMES['TC'] + " does not exist.")
         exit(1)
 
+    pm.call_check_args(args, remaining)
+
 
 def get_id_list(alltests):
     """
@@ -277,7 +469,7 @@ def generate_case_ids(alltests):
     for c in alltests:
         if (c["id"] == ""):
             while True:
-                newid = str('%04x' % random.randrange(16**4))
+                newid = str('{:04x}'.format(random.randrange(16**4)))
                 if (does_id_exist(alltests, newid)):
                     continue
                 else:
@@ -300,40 +492,107 @@ def generate_case_ids(alltests):
         json.dump(testlist, outfile, indent=4)
         outfile.close()
 
+def filter_tests_by_id(args, testlist):
+    '''
+    Remove tests from testlist that are not in the named id list.
+    If id list is empty, return empty list.
+    '''
+    newlist = list()
+    if testlist and args.execute:
+        target_ids = args.execute
+
+        if isinstance(target_ids, list) and (len(target_ids) > 0):
+            newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+    return newlist
+
+def filter_tests_by_category(args, testlist):
+    '''
+    Remove tests from testlist that are not in a named category.
+    '''
+    answer = list()
+    if args.category and testlist:
+        test_ids = list()
+        for catg in set(args.category):
+            if catg == '+c':
+                continue
+            print('considering category {}'.format(catg))
+            for tc in testlist:
+                if catg in tc['category'] and tc['id'] not in test_ids:
+                    answer.append(tc)
+                    test_ids.append(tc['id'])
+
+    return answer
 
 def get_test_cases(args):
     """
     If a test case file is specified, retrieve tests from that file.
     Otherwise, glob for all json files in subdirectories and load from
     each one.
+    Also, if requested, filter by category, and add tests matching
+    certain ids.
     """
     import fnmatch
-    if args.file != None:
-        if not os.path.isfile(args.file):
-            print("The specified test case file " + args.file + " does not exist.")
-            exit(1)
-        flist = [args.file]
-    else:
-        flist = []
-        for root, dirnames, filenames in os.walk('tc-tests'):
+
+    flist = []
+    testdirs = ['tc-tests']
+
+    if args.file:
+        # at least one file was specified - remove the default directory
+        testdirs = []
+
+        for ff in args.file:
+            if not os.path.isfile(ff):
+                print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
+            else:
+                flist.append(os.path.abspath(ff))
+
+    if args.directory:
+        testdirs = args.directory
+
+    for testdir in testdirs:
+        for root, dirnames, filenames in os.walk(testdir):
             for filename in fnmatch.filter(filenames, '*.json'):
-                flist.append(os.path.join(root, filename))
-    alltests = list()
+                candidate = os.path.abspath(os.path.join(root, filename))
+                if candidate not in testdirs:
+                    flist.append(candidate)
+
+    alltestcases = list()
     for casefile in flist:
-        alltests = alltests + (load_from_file(casefile))
-    return alltests
+        alltestcases = alltestcases + (load_from_file(casefile))
+
+    allcatlist = get_test_categories(alltestcases)
+    allidlist = get_id_list(alltestcases)
 
+    testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+    idtestcases = filter_tests_by_id(args, alltestcases)
+    cattestcases = filter_tests_by_category(args, alltestcases)
 
-def set_operation_mode(args):
+    cat_ids = [x['id'] for x in cattestcases]
+    if args.execute:
+        if args.category:
+            alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+        else:
+            alltestcases = idtestcases
+    else:
+        if cat_ids:
+            alltestcases = cattestcases
+        else:
+            # just accept the existing value of alltestcases,
+            # which has been filtered by file/directory
+            pass
+
+    return allcatlist, allidlist, testcases_by_cats, alltestcases
+
+
+def set_operation_mode(pm, args):
     """
     Load the test case data and process remaining arguments to determine
     what the script should do for this run, and call the appropriate
     function.
     """
-    alltests = get_test_cases(args)
+    ucat, idlist, testcases, alltests = get_test_cases(args)
 
     if args.gen_id:
-        idlist = get_id_list(alltests)
         if (has_blank_ids(idlist)):
             alltests = generate_case_ids(alltests)
         else:
@@ -347,70 +606,29 @@ def set_operation_mode(args):
         print("Please correct them before continuing.")
         exit(1)
 
-    ucat = get_test_categories(alltests)
-
     if args.showID:
-        show_test_case_by_id(alltests, args.showID[0])
+        for atest in alltests:
+            print_test_case(atest)
         exit(0)
 
-    if args.execute:
-        target_id = args.execute[0]
-    else:
-        target_id = ""
-
-    if args.category:
-        if (args.category == '+c'):
-            print("Available categories:")
-            print_sll(ucat)
-            exit(0)
-        else:
-            target_category = args.category
-    else:
-        target_category = ""
-
-
-    testcases = get_categorized_testlist(alltests, ucat)
+    if isinstance(args.category, list) and (len(args.category) == 0):
+        print("Available categories:")
+        print_sll(ucat)
+        exit(0)
 
     if args.list:
-        if (args.list == "++"):
+        if args.list:
             list_test_cases(alltests)
             exit(0)
-        elif(len(args.list) > 0):
-            if (args.list not in ucat):
-                print("Unknown category " + args.list)
-                print("Available categories:")
-                print_sll(ucat)
-                exit(1)
-            list_test_cases(testcases[args.list])
-            exit(0)
-
-    if (os.geteuid() != 0):
-        print("This script must be run with root privileges.\n")
-        exit(1)
-
-    ns_create()
-
-    if (len(target_category) == 0):
-        if (len(target_id) > 0):
-            alltests = list(filter(lambda x: target_id in x['id'], alltests))
-            if (len(alltests) == 0):
-                print("Cannot find a test case with ID matching " + target_id)
-                exit(1)
-        catresults = test_runner(alltests, args)
-        print("All test results: " + "\n\n" + catresults)
-    elif (len(target_category) > 0):
-        if (target_category == "flower") and args.device == None:
-            print("Please specify a NIC device (-d) to run category flower")
-            exit(1)
-        if (target_category not in ucat):
-            print("Specified category is not present in this file.")
-            exit(1)
-        else:
-            catresults = test_runner(testcases[target_category], args)
-            print("Category " + target_category + "\n\n" + catresults)
-
-    ns_destroy()
 
+    if len(alltests):
+        catresults = test_runner(pm, args, alltests)
+    else:
+        catresults = 'No tests found\n'
+    if args.notap:
+        print('Tap output suppression requested\n')
+    else:
+        print('All test results: \n\n{}'.format(catresults))
 
 def main():
     """
@@ -419,10 +637,15 @@ def main():
     """
     parser = args_parse()
     parser = set_args(parser)
+    pm = PluginMgr(parser)
+    parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
-    check_default_settings(args)
+    args.NAMES = NAMES
+    check_default_settings(args, remaining, pm)
+    if args.verbose > 2:
+        print('args is {}'.format(args))
 
-    set_operation_mode(args)
+    set_operation_mode(pm, args)
 
     exit(0)
 
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 707c6bfef689..52fa539dc662 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -49,13 +49,13 @@ index = 0
 for i in range(0x100):
     for j in range(0x100):
         for k in range(0x100):
-            mac = ("%02x:%02x:%02x" % (i, j, k))
+            mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
             src_mac = "e4:11:00:" + mac
             dst_mac = "e4:12:00:" + mac
-            cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
-                   "src_mac %s dst_mac %s action drop %s" %
+            cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+                   "src_mac {} dst_mac {} action drop {}".format
                    (device, prio, skip, src_mac, dst_mac, share_action))
-            file.write("%s\n" % cmd)
+            file.write("{}\n".format(cmd))
             index += 1
             if index >= number:
                 file.close()
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
index db381120a566..9f35c96c88a0 100644
--- a/tools/testing/selftests/tc-testing/tdc_helper.py
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -57,20 +57,11 @@ def print_sll(items):
 
 def print_test_case(tcase):
     """ Pretty-printing of a given test case. """
+    print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
     for k in tcase.keys():
         if (isinstance(tcase[k], list)):
             print(k + ":")
             print_list(tcase[k])
         else:
-            print(k + ": " + tcase[k])
-
-
-def show_test_case_by_id(testlist, caseID):
-    """ Find the specified test case to pretty-print. """
-    if not any(d.get('id', None) == caseID for d in testlist):
-        print("That ID does not exist.")
-        exit(1)
-    else:
-        print_test_case(next((d for d in testlist if d['id'] == caseID)))
-
-
+            if not ((k == 'id') or (k == 'name')):
+                print(k + ": " + str(tcase[k]))
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 3d5a62ff7d31..f5d7a7851e21 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+include ../lib.mk
+
 ifndef CROSS_COMPILE
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
@@ -6,16 +8,14 @@ ifeq ($(CONFIG_X86_32),y)
 LDLIBS += -lgcc_s
 endif
 
-TEST_PROGS := vdso_test vdso_standalone_test_x86
+TEST_PROGS := $(OUTPUT)/vdso_test $(OUTPUT)/vdso_standalone_test_x86
 
 all: $(TEST_PROGS)
-vdso_test: parse_vdso.c vdso_test.c
-vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
-		-o vdso_standalone_test_x86
+		-o $@
 
-include ../lib.mk
-clean:
-	rm -fr $(TEST_PROGS)
+EXTRA_CLEAN := $(TEST_PROGS)
 endif
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 63c94d776e89..342c7bc9dc8c 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -11,3 +11,4 @@ mlock-intersect-test
 mlock-random-test
 virtual_address_range
 gup_benchmark
+va_128TBswitch
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index d2561895a021..22d564673830 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -2,25 +2,33 @@
 # SPDX-License-Identifier: GPL-2.0
 #please run as root
 
-#we need 256M, below is the size in kB
-needmem=262144
 mnt=./huge
 exitcode=0
 
-#get pagesize and freepages from /proc/meminfo
+#get huge pagesize and freepages from /proc/meminfo
 while read name size unit; do
 	if [ "$name" = "HugePages_Free:" ]; then
 		freepgs=$size
 	fi
 	if [ "$name" = "Hugepagesize:" ]; then
-		pgsize=$size
+		hpgsize_KB=$size
 	fi
 done < /proc/meminfo
 
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size.  The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages.  Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+hpgsize_MB=$((hpgsize_KB / 1024))
+half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
+needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+
 #set proper nr_hugepages
-if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
 	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
-	needpgs=`expr $needmem / $pgsize`
+	needpgs=$((needmem_KB / hpgsize_KB))
 	tries=2
 	while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
 		lackpgs=$(( $needpgs - $freepgs ))
@@ -107,8 +115,9 @@ fi
 echo "---------------------------"
 echo "running userfaultfd_hugetlb"
 echo "---------------------------"
-# 256MB total huge pages == 128MB src and 128MB dst
-./userfaultfd hugetlb 128 32 $mnt/ufd_test_file
+# Test requires source and destination huge pages.  Size of source
+# (half_ufd_size_MB) is passed as argument to test.
+./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 10ca46df1449..d744991c0f4f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,16 +5,26 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
 			protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
-TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
@@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
 
-UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-
 define gen-target-rule-32
 $(1) $(1)_32: $(OUTPUT)/$(1)_32
 .PHONY: $(1) $(1)_32
@@ -40,12 +46,14 @@ endef
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
 $(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
 TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
 $(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
 endif
 
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index 361466a2eaef..ade443a88421 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -95,6 +95,10 @@ asm (
 	"int3\n\t"
 	"vmcode_int80:\n\t"
 	"int $0x80\n\t"
+	"vmcode_popf_hlt:\n\t"
+	"push %ax\n\t"
+	"popf\n\t"
+	"hlt\n\t"
 	"vmcode_umip:\n\t"
 	/* addressing via displacements */
 	"smsw (2052)\n\t"
@@ -124,8 +128,8 @@ asm (
 
 extern unsigned char vmcode[], end_vmcode[];
 extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
-	vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[],
-	vmcode_umip_str[], vmcode_umip_sldt[];
+	vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[],
+	vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[];
 
 /* Returns false if the test was skipped. */
 static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
@@ -175,7 +179,7 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
 	    (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
 		printf("[OK]\tReturned correctly\n");
 	} else {
-		printf("[FAIL]\tIncorrect return reason\n");
+		printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip);
 		nerrs++;
 	}
 
@@ -264,6 +268,9 @@ int main(void)
 	v86.regs.ds = load_addr / 16;
 	v86.regs.es = load_addr / 16;
 
+	/* Use the end of the page as our stack. */
+	v86.regs.esp = 4096;
+
 	assert((v86.regs.cs & 3) == 0);	/* Looks like RPL = 0 */
 
 	/* #BR -- should deliver SIG??? */
@@ -295,6 +302,23 @@ int main(void)
 	v86.regs.eflags &= ~X86_EFLAGS_IF;
 	do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
 
+	/* POPF with VIP set but IF clear: should not trap */
+	v86.regs.eflags = X86_EFLAGS_VIP;
+	v86.regs.eax = 0;
+	do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear");
+
+	/* POPF with VIP set and IF set: should trap */
+	v86.regs.eflags = X86_EFLAGS_VIP;
+	v86.regs.eax = X86_EFLAGS_IF;
+	do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set");
+
+	/* POPF with VIP clear and IF set: should not trap */
+	v86.regs.eflags = 0;
+	v86.regs.eax = X86_EFLAGS_IF;
+	do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set");
+
+	v86.regs.eflags = 0;
+
 	/* INT3 -- should cause #BP */
 	do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
 
@@ -318,7 +342,7 @@ int main(void)
 	clearhandler(SIGSEGV);
 
 	/* Make sure nothing explodes if we fork. */
-	if (fork() > 0)
+	if (fork() == 0)
 		return 0;
 
 	return (nerrs == 0 ? 0 : 1);
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index ec0f6b45ce8b..9c0325e1ea68 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si)
 	return si->si_upper;
 }
 #else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+        struct {
+            void *si_addr;
+	} _sigfault;
+
+new _sigfault:
+	struct {
+		void __user *_addr;
+		int _trapno;
+		short _addr_lsb;
+		union {
+			struct {
+				void __user *_lower;
+				void __user *_upper;
+			} _addr_bnd;
+			__u32 _pkey;
+		};
+	} _sigfault;
+ *
+ */
+
 static inline void **__si_bounds_hack(siginfo_t *si)
 {
 	void *sigfault = &si->_sifields._sigfault;
 	void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
-	void **__si_lower = end_sigfault;
+	int *trapno = (int*)end_sigfault;
+	/* skip _trapno and _addr_lsb */
+	void **__si_lower = (void**)(trapno + 2);
 
 	return __si_lower;
 }
@@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si)
 
 static inline void *__si_bounds_upper(siginfo_t *si)
 {
-	return (*__si_bounds_hack(si)) + sizeof(void *);
+	return *(__si_bounds_hack(si) + 1);
 }
 #endif
 
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index bc1b0735bb50..f15aa5a76fe3 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -393,34 +393,6 @@ pid_t fork_lazy_child(void)
 	return forkret;
 }
 
-void davecmp(void *_a, void *_b, int len)
-{
-	int i;
-	unsigned long *a = _a;
-	unsigned long *b = _b;
-
-	for (i = 0; i < len / sizeof(*a); i++) {
-		if (a[i] == b[i])
-			continue;
-
-		dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
-	}
-}
-
-void dumpit(char *f)
-{
-	int fd = open(f, O_RDONLY);
-	char buf[100];
-	int nr_read;
-
-	dprintf2("maps fd: %d\n", fd);
-	do {
-		nr_read = read(fd, &buf[0], sizeof(buf));
-		write(1, buf, nr_read);
-	} while (nr_read > 0);
-	close(fd);
-}
-
 #define PKEY_DISABLE_ACCESS    0x1
 #define PKEY_DISABLE_WRITE     0x2
 
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index a48da95c18fd..ddfdd635de16 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -119,7 +119,9 @@ static void check_result(void)
 
 int main()
 {
+#ifdef CAN_BUILD_32
 	int tmp;
+#endif
 
 	sethandler(SIGTRAP, sigtrap, 0);
 
@@ -139,12 +141,13 @@ int main()
 		      : : "c" (post_nop) : "r11");
 	check_result();
 #endif
-
+#ifdef CAN_BUILD_32
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
 	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
 			: INT80_CLOBBERS);
 	check_result();
+#endif
 
 	/*
 	 * This test is particularly interesting if fast syscalls use
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index bf0d687c7db7..64f11c8d9b76 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp)
 			vdso_size += PAGE_SIZE;
 		}
 
+#ifdef __i386__
 		/* Glibc is likely to explode now - exit with raw syscall */
 		asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+		syscall(SYS_exit, ret);
+#endif
 	} else {
 		int status;
 
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 29973cde06d3..235259011704 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -26,20 +26,59 @@
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 int nerrs = 0;
 
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
 #ifdef __x86_64__
-# define VSYS(x) (x)
+	FILE *maps;
+	char line[MAPS_LINE_LEN];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+		return NULL;
+
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
+		char r, x;
+		void *start, *end;
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		/* assume entries are OK, as we test vDSO here not vsyscall */
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("Warning: failed to find vsyscall getcpu\n");
+		return NULL;
+	}
+	return (void *) (0xffffffffff600800);
 #else
-# define VSYS(x) 0
+	return NULL;
 #endif
+}
 
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
-getcpu_t vdso_getcpu;
 
-void fill_function_pointers()
+static void fill_function_pointers()
 {
 	void *vdso = dlopen("linux-vdso.so.1",
 			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
@@ -54,6 +93,8 @@ void fill_function_pointers()
 	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
 	if (!vdso_getcpu)
 		printf("Warning: failed to find getcpu in vDSO\n");
+
+	vgetcpu = (getcpu_t) vsyscall_getcpu();
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 7a744fa7b786..0b4f1cc2291c 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -33,6 +33,9 @@
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -98,7 +101,7 @@ static int init_vsys(void)
 #ifdef __x86_64__
 	int nerrs = 0;
 	FILE *maps;
-	char line[128];
+	char line[MAPS_LINE_LEN];
 	bool found = false;
 
 	maps = fopen("/proc/self/maps", "r");
@@ -108,10 +111,12 @@ static int init_vsys(void)
 		return 0;
 	}
 
-	while (fgets(line, sizeof(line), maps)) {
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
 		char r, x;
 		void *start, *end;
-		char name[128];
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
 		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
 			   &start, &end, &r, &x, name) != 5)
 			continue;
@@ -445,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
 		num_vsyscall_traps++;
 }
 
-static int test_native_vsyscall(void)
+static int test_emulation(void)
 {
 	time_t tmp;
 	bool is_native;
@@ -453,7 +458,7 @@ static int test_native_vsyscall(void)
 	if (!vtime)
 		return 0;
 
-	printf("[RUN]\tchecking for native vsyscall\n");
+	printf("[RUN]\tchecking that vsyscalls are emulated\n");
 	sethandler(SIGTRAP, sigtrap, 0);
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
 	vtime(&tmp);
@@ -469,11 +474,12 @@ static int test_native_vsyscall(void)
 	 */
 	is_native = (num_vsyscall_traps > 1);
 
-	printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	       (is_native ? "FAIL" : "OK"),
 	       (is_native ? "native" : "emulated"),
 	       (int)num_vsyscall_traps);
 
-	return 0;
+	return is_native;
 }
 #endif
 
@@ -493,7 +499,7 @@ int main(int argc, char **argv)
 	nerrs += test_vsys_r();
 
 #ifdef __x86_64__
-	nerrs += test_native_vsyscall();
+	nerrs += test_emulation();
 #endif
 
 	return nerrs ? 1 : 0;
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index 4e6506078494..01d758d73b6d 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for USB tools
 
-CC = $(CROSS_COMPILE)gcc
 PTHREAD_LIBS = -lpthread
 WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g -I../include
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
index be320b905ea7..20f6cf04377f 100644
--- a/tools/vm/Makefile
+++ b/tools/vm/Makefile
@@ -6,7 +6,6 @@ TARGETS=page-types slabinfo page_owner_sort
 LIB_DIR = ../lib/api
 LIBS = $(LIB_DIR)/libapi.a
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra -I../lib/
 LDFLAGS = $(LIBS)
 
diff --git a/tools/wmi/Makefile b/tools/wmi/Makefile
index e664f1167388..e0e87239126b 100644
--- a/tools/wmi/Makefile
+++ b/tools/wmi/Makefile
@@ -2,7 +2,6 @@ PREFIX ?= /usr
 SBINDIR ?= sbin
 INSTALL ?= install
 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
-CC = $(CROSS_COMPILE)gcc
 
 TARGET = dell-smbios-example
 
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70268c0bec79..282389eb204f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -36,6 +36,8 @@ static struct timecounter *timecounter;
 static unsigned int host_vtimer_irq;
 static u32 host_vtimer_irq_flags;
 
+static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+
 static const struct kvm_irq_level default_ptimer_irq = {
 	.irq	= 30,
 	.level	= 1,
@@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
 	return timecounter->cc->read(timecounter->cc);
 }
 
+static inline bool userspace_irqchip(struct kvm *kvm)
+{
+	return static_branch_unlikely(&userspace_irqchip_in_use) &&
+		unlikely(!irqchip_in_kernel(kvm));
+}
+
 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
 {
 	hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
@@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
 		cancel_work_sync(work);
 }
 
-static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
-	/*
-	 * When using a userspace irqchip with the architected timers, we must
-	 * prevent continuously exiting from the guest, and therefore mask the
-	 * physical interrupt by disabling it on the host interrupt controller
-	 * when the virtual level is high, such that the guest can make
-	 * forward progress.  Once we detect the output level being
-	 * de-asserted, we unmask the interrupt again so that we exit from the
-	 * guest when the timer fires.
-	 */
-	if (vtimer->irq.level)
-		disable_percpu_irq(host_vtimer_irq);
-	else
-		enable_percpu_irq(host_vtimer_irq, 0);
-}
-
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
 	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 	if (kvm_timer_should_fire(vtimer))
 		kvm_timer_update_irq(vcpu, true, vtimer);
 
-	if (static_branch_unlikely(&userspace_irqchip_in_use) &&
-	    unlikely(!irqchip_in_kernel(vcpu->kvm)))
-		kvm_vtimer_update_mask_user(vcpu);
+	if (userspace_irqchip(vcpu->kvm) &&
+	    !static_branch_unlikely(&has_gic_active_state))
+		disable_percpu_irq(host_vtimer_irq);
 
 	return IRQ_HANDLED;
 }
@@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
 				   timer_ctx->irq.level);
 
-	if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
-	    likely(irqchip_in_kernel(vcpu->kvm))) {
+	if (!userspace_irqchip(vcpu->kvm)) {
 		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
 					  timer_ctx->irq.irq,
 					  timer_ctx->irq.level,
@@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 	phys_timer_emulate(vcpu);
 }
 
-static void __timer_snapshot_state(struct arch_timer_context *timer)
-{
-	timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-	timer->cnt_cval = read_sysreg_el0(cntv_cval);
-}
-
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
 	if (!vtimer->loaded)
 		goto out;
 
-	if (timer->enabled)
-		__timer_snapshot_state(vtimer);
+	if (timer->enabled) {
+		vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+		vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+	}
 
 	/* Disable the virtual timer */
 	write_sysreg_el0(0, cntv_ctl);
@@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff)
 	kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
 }
 
-static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
+static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
+{
+	int r;
+	r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
+	WARN_ON(r);
+}
+
+static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	bool phys_active;
-	int ret;
-
-	phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
 
-	ret = irq_set_irqchip_state(host_vtimer_irq,
-				    IRQCHIP_STATE_ACTIVE,
-				    phys_active);
-	WARN_ON(ret);
+	if (irqchip_in_kernel(vcpu->kvm))
+		phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+	else
+		phys_active = vtimer->irq.level;
+	set_vtimer_irq_phys_active(vcpu, phys_active);
 }
 
-static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 {
-	kvm_vtimer_update_mask_user(vcpu);
+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+	/*
+	 * When using a userspace irqchip with the architected timers and a
+	 * host interrupt controller that doesn't support an active state, we
+	 * must still prevent continuously exiting from the guest, and
+	 * therefore mask the physical interrupt by disabling it on the host
+	 * interrupt controller when the virtual level is high, such that the
+	 * guest can make forward progress.  Once we detect the output level
+	 * being de-asserted, we unmask the interrupt again so that we exit
+	 * from the guest when the timer fires.
+	 */
+	if (vtimer->irq.level)
+		disable_percpu_irq(host_vtimer_irq);
+	else
+		enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 }
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 	if (unlikely(!timer->enabled))
 		return;
 
-	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
-		kvm_timer_vcpu_load_user(vcpu);
+	if (static_branch_likely(&has_gic_active_state))
+		kvm_timer_vcpu_load_gic(vcpu);
 	else
-		kvm_timer_vcpu_load_vgic(vcpu);
+		kvm_timer_vcpu_load_nogic(vcpu);
 
 	set_cntvoff(vtimer->cntvoff);
 
@@ -555,22 +559,29 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 
-	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
-		__timer_snapshot_state(vtimer);
-		if (!kvm_timer_should_fire(vtimer)) {
-			kvm_timer_update_irq(vcpu, false, vtimer);
-			kvm_vtimer_update_mask_user(vcpu);
-		}
+	if (!kvm_timer_should_fire(vtimer)) {
+		kvm_timer_update_irq(vcpu, false, vtimer);
+		if (static_branch_likely(&has_gic_active_state))
+			set_vtimer_irq_phys_active(vcpu, false);
+		else
+			enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 	}
 }
 
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-	unmask_vtimer_irq_user(vcpu);
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	if (unlikely(!timer->enabled))
+		return;
+
+	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+		unmask_vtimer_irq_user(vcpu);
 }
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 {
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
@@ -584,6 +595,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 	ptimer->cnt_ctl = 0;
 	kvm_timer_update_state(vcpu);
 
+	if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
+		kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+
 	return 0;
 }
 
@@ -753,9 +767,11 @@ int kvm_timer_hyp_init(bool has_gic)
 			kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
 			goto out_free_irq;
 		}
+
+		static_branch_enable(&has_gic_active_state);
 	}
 
-	kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
+	kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
 
 	cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
 			  "kvm/arm/timer:starting", kvm_timer_starting_cpu,
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86941f6181bb..53572304843b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	vcpu_load(vcpu);
-
 	if (vcpu->arch.power_off)
 		mp_state->mp_state = KVM_MP_STATE_STOPPED;
 	else
 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 
-	vcpu_put(vcpu);
 	return 0;
 }
 
@@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 {
 	int ret = 0;
 
-	vcpu_load(vcpu);
-
 	switch (mp_state->mp_state) {
 	case KVM_MP_STATE_RUNNABLE:
 		vcpu->arch.power_off = false;
@@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 		ret = -EINVAL;
 	}
 
-	vcpu_put(vcpu);
 	return ret;
 }
 
@@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	struct kvm_device_attr attr;
 	long r;
 
-	vcpu_load(vcpu);
-
 	switch (ioctl) {
 	case KVM_ARM_VCPU_INIT: {
 		struct kvm_vcpu_init init;
@@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = -EINVAL;
 	}
 
-	vcpu_put(vcpu);
 	return r;
 }
 
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index f5c3d6d7019e..b89ce5432214 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -215,7 +215,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 	 * are now visible to the system register interface.
 	 */
 	if (!cpu_if->vgic_sre) {
-		dsb(st);
+		dsb(sy);
+		isb();
 		cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
 	}
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ec62d1cccab7..b960acdd0c05 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1810,9 +1810,9 @@ int kvm_mmu_init(void)
 	 */
 	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
 
-	kvm_info("IDMAP page: %lx\n", hyp_idmap_start);
-	kvm_info("HYP VA range: %lx:%lx\n",
-		 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
+	kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
+	kvm_debug("HYP VA range: %lx:%lx\n",
+		  kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
 
 	if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
 	    hyp_idmap_start <  kern_hyp_va(~0UL) &&
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 83d82bd7dc4e..dbe99d635c80 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -113,9 +113,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
 	/* Loop over all IRQs affected by this read */
 	for (i = 0; i < len * 8; i++) {
 		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+		unsigned long flags;
 
+		spin_lock_irqsave(&irq->irq_lock, flags);
 		if (irq_is_pending(irq))
 			value |= (1U << i);
+		spin_unlock_irqrestore(&irq->irq_lock, flags);
 
 		vgic_put_irq(vcpu->kvm, irq);
 	}
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index c32d7b93ffd1..29556f71b691 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void)
 		vgic_v2_write_lr(i, 0);
 }
 
+void vgic_v2_set_npie(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+
+	cpuif->vgic_hcr |= GICH_HCR_NPIE;
+}
+
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 	int lr;
 	unsigned long flags;
 
-	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
+	cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE);
 
 	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
 		u32 val = cpuif->vgic_lr[lr];
@@ -410,7 +417,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
 	kvm_vgic_global_state.type = VGIC_V2;
 	kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
 
-	kvm_info("vgic-v2@%llx\n", info->vctrl.start);
+	kvm_debug("vgic-v2@%llx\n", info->vctrl.start);
 
 	return 0;
 out:
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 6b329414e57a..0ff2006f3781 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -26,6 +26,13 @@ static bool group1_trap;
 static bool common_trap;
 static bool gicv4_enable;
 
+void vgic_v3_set_npie(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+
+	cpuif->vgic_hcr |= ICH_HCR_NPIE;
+}
+
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -47,7 +54,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 	int lr;
 	unsigned long flags;
 
-	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+	cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE);
 
 	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
 		u64 val = cpuif->vgic_lr[lr];
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index c7c5ef190afa..8201899126f6 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -495,6 +495,32 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 	return ret;
 }
 
+/**
+ * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
+ * @vcpu: The VCPU pointer
+ * @vintid: The INTID of the interrupt
+ *
+ * Reset the active and pending states of a mapped interrupt.  Kernel
+ * subsystems injecting mapped interrupts should reset their interrupt lines
+ * when we are doing a reset of the VM.
+ */
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
+{
+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+	unsigned long flags;
+
+	if (!irq->hw)
+		goto out;
+
+	spin_lock_irqsave(&irq->irq_lock, flags);
+	irq->active = false;
+	irq->pending_latch = false;
+	irq->line_level = false;
+	spin_unlock_irqrestore(&irq->irq_lock, flags);
+out:
+	vgic_put_irq(vcpu->kvm, irq);
+}
+
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 {
 	struct vgic_irq *irq;
@@ -684,22 +710,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
 		vgic_v3_set_underflow(vcpu);
 }
 
+static inline void vgic_set_npie(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vgic_global_state.type == VGIC_V2)
+		vgic_v2_set_npie(vcpu);
+	else
+		vgic_v3_set_npie(vcpu);
+}
+
 /* Requires the ap_list_lock to be held. */
-static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
+static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
+				 bool *multi_sgi)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_irq *irq;
 	int count = 0;
 
+	*multi_sgi = false;
+
 	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 
 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 		spin_lock(&irq->irq_lock);
 		/* GICv2 SGIs can count for more than one... */
-		if (vgic_irq_is_sgi(irq->intid) && irq->source)
-			count += hweight8(irq->source);
-		else
+		if (vgic_irq_is_sgi(irq->intid) && irq->source) {
+			int w = hweight8(irq->source);
+
+			count += w;
+			*multi_sgi |= (w > 1);
+		} else {
 			count++;
+		}
 		spin_unlock(&irq->irq_lock);
 	}
 	return count;
@@ -710,28 +751,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_irq *irq;
-	int count = 0;
+	int count;
+	bool npie = false;
+	bool multi_sgi;
+	u8 prio = 0xff;
 
 	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 
-	if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr)
+	count = compute_ap_list_depth(vcpu, &multi_sgi);
+	if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
 		vgic_sort_ap_list(vcpu);
 
+	count = 0;
+
 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 		spin_lock(&irq->irq_lock);
 
-		if (unlikely(vgic_target_oracle(irq) != vcpu))
-			goto next;
-
 		/*
-		 * If we get an SGI with multiple sources, try to get
-		 * them in all at once.
+		 * If we have multi-SGIs in the pipeline, we need to
+		 * guarantee that they are all seen before any IRQ of
+		 * lower priority. In that case, we need to filter out
+		 * these interrupts by exiting early. This is easy as
+		 * the AP list has been sorted already.
 		 */
-		do {
+		if (multi_sgi && irq->priority > prio) {
+			spin_unlock(&irq->irq_lock);
+			break;
+		}
+
+		if (likely(vgic_target_oracle(irq) == vcpu)) {
 			vgic_populate_lr(vcpu, irq, count++);
-		} while (irq->source && count < kvm_vgic_global_state.nr_lr);
 
-next:
+			if (irq->source) {
+				npie = true;
+				prio = irq->priority;
+			}
+		}
+
 		spin_unlock(&irq->irq_lock);
 
 		if (count == kvm_vgic_global_state.nr_lr) {
@@ -742,6 +798,9 @@ next:
 		}
 	}
 
+	if (npie)
+		vgic_set_npie(vcpu);
+
 	vcpu->arch.vgic_cpu.used_lrs = count;
 
 	/* Nuke remaining LRs */
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 12c37b89f7a3..f5b8519e5546 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -96,6 +96,7 @@
 /* we only support 64 kB translation table page size */
 #define KVM_ITS_L1E_ADDR_MASK		GENMASK_ULL(51, 16)
 
+/* Requires the irq_lock to be held by the caller. */
 static inline bool irq_is_pending(struct vgic_irq *irq)
 {
 	if (irq->config == VGIC_CONFIG_EDGE)
@@ -159,6 +160,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
+void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
 			 int offset, u32 *val);
@@ -188,6 +190,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
+void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4501e658e8d6..65dea3ffef68 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		/* Check for overlaps */
 		r = -EEXIST;
 		kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
-			if ((slot->id >= KVM_USER_MEM_SLOTS) ||
-			    (slot->id == id))
+			if (slot->id == id)
 				continue;
 			if (!((base_gfn + npages <= slot->base_gfn) ||
 			      (base_gfn >= slot->base_gfn + slot->npages)))