diff options
581 files changed, 10175 insertions, 4775 deletions
diff --git a/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml b/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml index 3248595dc93c..f04870d84542 100644 --- a/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml +++ b/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml @@ -85,4 +85,5 @@ examples: <&pd IMX_SC_R_DSP_RAM>; mbox-names = "txdb0", "txdb1", "rxdb0", "rxdb1"; mboxes = <&lsio_mu13 2 0>, <&lsio_mu13 2 1>, <&lsio_mu13 3 0>, <&lsio_mu13 3 1>; + memory-region = <&dsp_reserved>; }; diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml index 676ec42e1438..567a33a83dce 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml @@ -43,13 +43,9 @@ properties: dvdd-supply: description: DVdd voltage supply - items: - - const: dvdd avdd-supply: description: AVdd voltage supply - items: - - const: avdd adi,rejection-60-Hz-enable: description: | @@ -99,6 +95,9 @@ required: examples: - | spi0 { + #address-cells = <1>; + #size-cells = <0>; + adc@0 { compatible = "adi,ad7192"; reg = <0>; diff --git a/Documentation/devicetree/bindings/media/rc.yaml b/Documentation/devicetree/bindings/media/rc.yaml index 3d5c154fd230..9054555e6608 100644 --- a/Documentation/devicetree/bindings/media/rc.yaml +++ b/Documentation/devicetree/bindings/media/rc.yaml @@ -73,7 +73,6 @@ properties: - rc-genius-tvgo-a11mce - rc-gotview7135 - rc-hauppauge - - rc-hauppauge - rc-hisi-poplar - rc-hisi-tv-demo - rc-imon-mce diff --git a/Documentation/devicetree/bindings/net/ftgmac100.txt b/Documentation/devicetree/bindings/net/ftgmac100.txt index 72e7aaf7242e..f878c1103463 100644 --- a/Documentation/devicetree/bindings/net/ftgmac100.txt +++ b/Documentation/devicetree/bindings/net/ftgmac100.txt @@ -9,6 +9,7 @@ Required properties: - "aspeed,ast2400-mac" - "aspeed,ast2500-mac" + - "aspeed,ast2600-mac" - reg: Address and length of the register set for the device - interrupts: Should contain ethernet controller interrupt @@ -23,6 +24,13 @@ Optional properties: - no-hw-checksum: Used to disable HW checksum support. Here for backward compatibility as the driver now should have correct defaults based on the SoC. +- clocks: In accordance with the generic clock bindings. Must describe the MAC + IP clock, and optionally an RMII RCLK gate for the AST2500/AST2600. The + required MAC clock must be the first cell. +- clock-names: + + - "MACCLK": The MAC IP clock + - "RCLK": Clock gate for the RMII RCLK Example: diff --git a/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml index 8a56a8526cef..a97482179cf5 100644 --- a/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml @@ -37,7 +37,7 @@ properties: - description: exclusive PHY reset line - description: shared reset line between the PCIe PHY and PCIe controller - resets-names: + reset-names: items: - const: phy - const: pcie diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 6ba9d5365ff3..b89c88168d6a 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -954,11 +954,6 @@ When kbuild executes, the following steps are followed (roughly): From commandline LDFLAGS_MODULE shall be used (see kbuild.txt). - KBUILD_ARFLAGS Options for $(AR) when creating archives - - $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic - mode) if this option is supported by $(AR). - KBUILD_LDS The linker script with full path. Assigned by the top-level Makefile. diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index d2ae799237fd..774a998dcf37 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -498,10 +498,11 @@ build. will be written containing all exported symbols that were not defined in the kernel. ---- 6.3 Symbols From Another External Module +6.3 Symbols From Another External Module +---------------------------------------- Sometimes, an external module uses exported symbols from - another external module. kbuild needs to have full knowledge of + another external module. Kbuild needs to have full knowledge of all symbols to avoid spitting out warnings about undefined symbols. Three solutions exist for this situation. @@ -521,7 +522,7 @@ build. The top-level kbuild file would then look like:: #./Kbuild (or ./Makefile): - obj-y := foo/ bar/ + obj-m := foo/ bar/ And executing:: diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index ab92e98c89c8..503393854e2e 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -16,16 +16,21 @@ the kernel may be unreproducible, and how to avoid them. Timestamps ---------- -The kernel embeds a timestamp in two places: +The kernel embeds timestamps in three places: * The version string exposed by ``uname()`` and included in ``/proc/version`` * File timestamps in the embedded initramfs -By default the timestamp is the current time. This must be overridden -using the `KBUILD_BUILD_TIMESTAMP`_ variable. If you are building -from a git commit, you could use its commit date. +* If enabled via ``CONFIG_IKHEADERS``, file timestamps of kernel + headers embedded in the kernel or respective module, + exposed via ``/sys/kernel/kheaders.tar.xz`` + +By default the timestamp is the current time and in the case of +``kheaders`` the various files' modification times. This must +be overridden using the `KBUILD_BUILD_TIMESTAMP`_ variable. +If you are building from a git commit, you could use its commit date. The kernel does *not* use the ``__DATE__`` and ``__TIME__`` macros, and enables warnings if they are used. If you incorporate external diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst index f51f92571e39..c1f7f75e5fd9 100644 --- a/Documentation/networking/device_drivers/index.rst +++ b/Documentation/networking/device_drivers/index.rst @@ -23,6 +23,7 @@ Contents: intel/ice google/gve mellanox/mlx5 + netronome/nfp pensando/ionic .. only:: subproject and html diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst index ce7e7a044e08..dc60b13fcd09 100644 --- a/Documentation/networking/j1939.rst +++ b/Documentation/networking/j1939.rst @@ -272,7 +272,7 @@ supported flags are: * MSG_DONTWAIT, i.e. non-blocking operation. recvmsg(2) -^^^^^^^^^ +^^^^^^^^^^ In most cases recvmsg(2) is needed if you want to extract more information than recvfrom(2) can provide. For example package priority and timestamp. The diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index 5bcbf75e2025..8cb2cd4e2a80 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -213,3 +213,29 @@ A patchset to OpenSSL to use ktls as the record layer is of calling send directly after a handshake using gnutls. Since it doesn't implement a full record layer, control messages are not supported. + +Statistics +========== + +TLS implementation exposes the following per-namespace statistics +(``/proc/net/tls_stat``): + +- ``TlsCurrTxSw``, ``TlsCurrRxSw`` - + number of TX and RX sessions currently installed where host handles + cryptography + +- ``TlsCurrTxDevice``, ``TlsCurrRxDevice`` - + number of TX and RX sessions currently installed where NIC handles + cryptography + +- ``TlsTxSw``, ``TlsRxSw`` - + number of TX and RX sessions opened with host cryptography + +- ``TlsTxDevice``, ``TlsRxDevice`` - + number of TX and RX sessions opened with NIC cryptography + +- ``TlsDecryptError`` - + record decryption failed (e.g. due to incorrect authentication tag) + +- ``TlsDeviceRxResync`` - + number of RX resyncs sent to NICs handling cryptography diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 402636356fbe..a3c3349046c4 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -143,6 +143,20 @@ via their employer, they cannot enter individual non-disclosure agreements in their role as Linux kernel developers. They will, however, agree to adhere to this documented process and the Memorandum of Understanding. +The disclosing party should provide a list of contacts for all other +entities who have already been, or should be, informed about the issue. +This serves several purposes: + + - The list of disclosed entities allows communication accross the + industry, e.g. other OS vendors, HW vendors, etc. + + - The disclosed entities can be contacted to name experts who should + participate in the mitigation development. + + - If an expert which is required to handle an issue is employed by an + listed entity or member of an listed entity, then the response teams can + request the disclosure of that expert from that entity. This ensures + that the expert is also part of the entity's response team. Disclosure """""""""" @@ -158,10 +172,7 @@ Mitigation development """""""""""""""""""""" The initial response team sets up an encrypted mailing-list or repurposes -an existing one if appropriate. The disclosing party should provide a list -of contacts for all other parties who have already been, or should be, -informed about the issue. The response team contacts these parties so they -can name experts who should be subscribed to the mailing-list. +an existing one if appropriate. Using a mailing-list is close to the normal Linux development process and has been successfully used in developing mitigations for various hardware @@ -175,9 +186,24 @@ development branch against the mainline kernel and backport branches for stable kernel versions as necessary. The initial response team will identify further experts from the Linux -kernel developer community as needed and inform the disclosing party about -their participation. Bringing in experts can happen at any time of the -development process and often needs to be handled in a timely manner. +kernel developer community as needed. Bringing in experts can happen at any +time of the development process and needs to be handled in a timely manner. + +If an expert is employed by or member of an entity on the disclosure list +provided by the disclosing party, then participation will be requested from +the relevant entity. + +If not, then the disclosing party will be informed about the experts +participation. The experts are covered by the Memorandum of Understanding +and the disclosing party is requested to acknowledge the participation. In +case that the disclosing party has a compelling reason to object, then this +objection has to be raised within five work days and resolved with the +incident team immediately. If the disclosing party does not react within +five work days this is taken as silent acknowledgement. + +After acknowledgement or resolution of an objection the expert is disclosed +by the incident team and brought into the development process. + Coordinated release """"""""""""""""""" @@ -216,7 +242,7 @@ an involved disclosed party. The current ambassadors list: ARM AMD IBM - Intel + Intel Tony Luck <[email protected]> Qualcomm Trilok Soni <[email protected]> Microsoft Sasha Levin <[email protected]> diff --git a/MAINTAINERS b/MAINTAINERS index 496e8f156925..8824f61cd2c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6112,7 +6112,10 @@ M: Gao Xiang <[email protected]> M: Chao Yu <[email protected]> S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git +F: Documentation/filesystems/erofs.txt F: fs/erofs/ +F: include/trace/events/erofs.h ERRSEQ ERROR TRACKING INFRASTRUCTURE M: Jeff Layton <[email protected]> @@ -9075,6 +9078,7 @@ F: security/keys/ KGDB / KDB /debug_core M: Jason Wessel <[email protected]> M: Daniel Thompson <[email protected]> +R: Douglas Anderson <[email protected]> W: http://kgdb.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 3 +PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Bobtail Squid # *DOCUMENTATION* @@ -206,24 +206,8 @@ ifndef KBUILD_CHECKSRC KBUILD_CHECKSRC = 0 endif -# Use make M=dir to specify directory of external module to build -# Old syntax make ... SUBDIRS=$PWD is still supported -# Setting the environment variable KBUILD_EXTMOD take precedence -ifdef SUBDIRS - $(warning ================= WARNING ================) - $(warning 'SUBDIRS' will be removed after Linux 5.3) - $(warning ) - $(warning If you are building an individual subdirectory) - $(warning in the kernel tree, you can do like this:) - $(warning $$ make path/to/dir/you/want/to/build/) - $(warning (Do not forget the trailing slash)) - $(warning ) - $(warning If you are building an external module,) - $(warning Please use 'M=' or 'KBUILD_EXTMOD' instead) - $(warning ==========================================) - KBUILD_EXTMOD ?= $(SUBDIRS) -endif - +# Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the +# directory of external module to build. Setting M= takes precedence. ifeq ("$(origin M)", "command line") KBUILD_EXTMOD := $(M) endif @@ -498,7 +482,6 @@ export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL -export KBUILD_ARFLAGS # Files to ignore in find ... statements @@ -914,9 +897,6 @@ ifdef CONFIG_RETPOLINE KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif -# use the deterministic mode of AR if available -KBUILD_ARFLAGS := $(call ar-option,D) - include scripts/Makefile.kasan include scripts/Makefile.extrawarn include scripts/Makefile.ubsan diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index ebfe28c2f544..a1fd3e63e86e 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -124,10 +124,11 @@ }; lcd0: display@0 { - compatible = "panel-dpi"; + /* This isn't the exact LCD, but the timings meet spec */ + /* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */ + compatible = "newhaven,nhd-4.3-480272ef-atxl"; label = "15"; - status = "okay"; - pinctrl-names = "default"; + backlight = <&bl>; enable-gpios = <&gpio6 16 GPIO_ACTIVE_HIGH>; /* gpio176, lcd INI */ vcc-supply = <&vdd_io_reg>; @@ -136,22 +137,6 @@ remote-endpoint = <&dpi_out>; }; }; - - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <3>; - hback-porch = <2>; - hsync-len = <42>; - vback-porch = <3>; - vfront-porch = <4>; - vsync-len = <11>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; }; bl: backlight { diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 5563ee54c960..b56524cc7fe2 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -228,6 +228,20 @@ >; }; + i2c2_pins: pinmux_i2c2_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT | MUX_MODE0) /* i2c2_scl */ + OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT | MUX_MODE0) /* i2c2_sda */ + >; + }; + + i2c3_pins: pinmux_i2c3_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT | MUX_MODE0) /* i2c3_scl */ + OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT | MUX_MODE0) /* i2c3_sda */ + >; + }; + tsc2004_pins: pinmux_tsc2004_pins { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE4) /* mcbsp4_dr.gpio_153 */ @@ -249,18 +263,6 @@ OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */ >; }; - i2c2_pins: pinmux_i2c2_pins { - pinctrl-single,pins = < - OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT | MUX_MODE0) /* i2c2_scl */ - OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT | MUX_MODE0) /* i2c2_sda */ - >; - }; - i2c3_pins: pinmux_i2c3_pins { - pinctrl-single,pins = < - OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT | MUX_MODE0) /* i2c3_scl */ - OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT | MUX_MODE0) /* i2c3_sda */ - >; - }; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi index 642e809e757a..449cc7616da6 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi @@ -108,7 +108,6 @@ &dss { status = "ok"; vdds_dsi-supply = <&vpll2>; - vdda_video-supply = <&video_reg>; pinctrl-names = "default"; pinctrl-0 = <&dss_dpi_pins1>; port { @@ -124,44 +123,20 @@ display0 = &lcd0; }; - video_reg: video_reg { - pinctrl-names = "default"; - pinctrl-0 = <&panel_pwr_pins>; - compatible = "regulator-fixed"; - regulator-name = "fixed-supply"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio5 27 GPIO_ACTIVE_HIGH>; /* gpio155, lcd INI */ - }; - lcd0: display { - compatible = "panel-dpi"; + /* This isn't the exact LCD, but the timings meet spec */ + /* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */ + compatible = "newhaven,nhd-4.3-480272ef-atxl"; label = "15"; - status = "okay"; - /* default-on; */ pinctrl-names = "default"; - + pinctrl-0 = <&panel_pwr_pins>; + backlight = <&bl>; + enable-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>; port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; }; }; - - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <3>; - hback-porch = <2>; - hsync-len = <42>; - vback-porch = <3>; - vfront-porch = <4>; - vsync-len = <11>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; }; bl: backlight { diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index c7bf9c493646..64eb896907bf 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -363,6 +363,7 @@ CONFIG_DRM_OMAP_PANEL_TPO_TD028TTEC1=m CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m CONFIG_DRM_TILCDC=m +CONFIG_DRM_PANEL_SIMPLE=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h deleted file mode 100644 index ec154e719b11..000000000000 --- a/arch/arm/include/asm/xen/xen-ops.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_XEN_OPS_H -#define _ASM_XEN_OPS_H - -void xen_efi_runtime_setup(void); - -#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm/mach-aspeed/Kconfig b/arch/arm/mach-aspeed/Kconfig index 56007b0b6120..e8d6e9957d65 100644 --- a/arch/arm/mach-aspeed/Kconfig +++ b/arch/arm/mach-aspeed/Kconfig @@ -26,7 +26,6 @@ config MACH_ASPEED_G4 config MACH_ASPEED_G5 bool "Aspeed SoC 5th Generation" depends on ARCH_MULTI_V6 - select CPU_V6 select PINCTRL_ASPEED_G5 select FTTMR010_TIMER help diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 6c6f8fce854e..d942a3357090 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -491,11 +491,11 @@ static int ti_sysc_clkdm_init(struct device *dev, struct clk *fck, struct clk *ick, struct ti_sysc_cookie *cookie) { - if (fck) + if (!IS_ERR(fck)) cookie->clkdm = ti_sysc_find_one_clockdomain(fck); if (cookie->clkdm) return 0; - if (ick) + if (!IS_ERR(ick)) cookie->clkdm = ti_sysc_find_one_clockdomain(ick); if (cookie->clkdm) return 0; diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 7ed28982c4c3..c32d04713ba0 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o -obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c deleted file mode 100644 index d687a73044bf..000000000000 --- a/arch/arm/xen/efi.c +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2015, Linaro Limited, Shannon Zhao - */ - -#include <linux/efi.h> -#include <xen/xen-ops.h> -#include <asm/xen/xen-ops.h> - -/* Set XEN EFI runtime services function pointers. Other fields of struct efi, - * e.g. efi.systab, will be set like normal EFI. - */ -void __init xen_efi_runtime_setup(void) -{ - efi.get_time = xen_efi_get_time; - efi.set_time = xen_efi_set_time; - efi.get_wakeup_time = xen_efi_get_wakeup_time; - efi.set_wakeup_time = xen_efi_set_wakeup_time; - efi.get_variable = xen_efi_get_variable; - efi.get_next_variable = xen_efi_get_next_variable; - efi.set_variable = xen_efi_set_variable; - efi.query_variable_info = xen_efi_query_variable_info; - efi.update_capsule = xen_efi_update_capsule; - efi.query_capsule_caps = xen_efi_query_capsule_caps; - efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; - efi.reset_system = xen_efi_reset_system; -} -EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 1e57692552d9..dd6804a64f1a 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -15,7 +15,6 @@ #include <xen/xen-ops.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> -#include <asm/xen/xen-ops.h> #include <asm/system_misc.h> #include <asm/efi.h> #include <linux/interrupt.h> @@ -437,7 +436,7 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); -EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 2b2c208408bb..38fa917c8585 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -28,7 +28,10 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order) for_each_memblock(memory, reg) { if (reg->base < (phys_addr_t)0xffffffff) { - flags |= __GFP_DMA; + if (IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= __GFP_DMA32; + else + flags |= __GFP_DMA; break; } } diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 86825aa20852..97f21cc66657 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -47,30 +47,6 @@ #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) -/** - * hyp_alternate_select - Generates patchable code sequences that are - * used to switch between two implementations of a function, depending - * on the availability of a feature. - * - * @fname: a symbol name that will be defined as a function returning a - * function pointer whose type will match @orig and @alt - * @orig: A pointer to the default function, as returned by @fname when - * @cond doesn't hold - * @alt: A pointer to the alternate function, as returned by @fname - * when @cond holds - * @cond: a CPU feature (as described in asm/cpufeature.h) - */ -#define hyp_alternate_select(fname, orig, alt, cond) \ -typeof(orig) * __hyp_text fname(void) \ -{ \ - typeof(alt) *val = orig; \ - asm volatile(ALTERNATIVE("nop \n", \ - "mov %0, %1 \n", \ - cond) \ - : "+r" (val) : "r" (alt)); \ - return val; \ -} - int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h deleted file mode 100644 index e6e784051932..000000000000 --- a/arch/arm64/include/asm/xen/xen-ops.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_XEN_OPS_H -#define _ASM_XEN_OPS_H - -void xen_efi_runtime_setup(void); - -#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index bd978ad71936..3d3815020e36 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -229,20 +229,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) } } -static bool __hyp_text __true_value(void) -{ - return true; -} - -static bool __hyp_text __false_value(void) -{ - return false; -} - -static hyp_alternate_select(__check_arm_834220, - __false_value, __true_value, - ARM64_WORKAROUND_834220); - static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) { u64 par, tmp; @@ -298,7 +284,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) * resolve the IPA using the AT instruction. */ if (!(esr & ESR_ELx_S1PTW) && - (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + (cpus_have_const_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index c466060b76d6..eb0efc5557f3 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -67,10 +67,14 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, isb(); } -static hyp_alternate_select(__tlb_switch_to_guest, - __tlb_switch_to_guest_nvhe, - __tlb_switch_to_guest_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_guest_vhe(kvm, cxt); + else + __tlb_switch_to_guest_nvhe(kvm, cxt); +} static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, struct tlb_inv_context *cxt) @@ -98,10 +102,14 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, write_sysreg(0, vttbr_el2); } -static hyp_alternate_select(__tlb_switch_to_host, - __tlb_switch_to_host_nvhe, - __tlb_switch_to_host_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_host_vhe(kvm, cxt); + else + __tlb_switch_to_host_nvhe(kvm, cxt); +} void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -111,7 +119,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); /* * We could do so much better if we had the VA as well. @@ -154,7 +162,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) if (!has_vhe() && icache_is_vpipt()) __flush_icache_all(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -165,13 +173,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -180,13 +188,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalle1); dsb(nsh); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_flush_vm_context(void) diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index a4fc65f3928d..b66215e8658e 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o -obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o) diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index 27ef5b2c43ab..cb2a0d94a144 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -5,8 +5,10 @@ #include <linux/uaccess.h> #include <linux/ptrace.h> -static int align_enable = 1; -static int align_count; +static int align_kern_enable = 1; +static int align_usr_enable = 1; +static int align_kern_count = 0; +static int align_usr_count = 0; static inline uint32_t get_ptreg(struct pt_regs *regs, uint32_t rx) { @@ -32,9 +34,6 @@ static int ldb_asm(uint32_t addr, uint32_t *valp) uint32_t val; int err; - if (!access_ok((void *)addr, 1)) - return 1; - asm volatile ( "movi %0, 0\n" "1:\n" @@ -67,9 +66,6 @@ static int stb_asm(uint32_t addr, uint32_t val) { int err; - if (!access_ok((void *)addr, 1)) - return 1; - asm volatile ( "movi %0, 0\n" "1:\n" @@ -203,8 +199,6 @@ static int stw_c(struct pt_regs *regs, uint32_t rz, uint32_t addr) if (stb_asm(addr, byte3)) return 1; - align_count++; - return 0; } @@ -226,7 +220,14 @@ void csky_alignment(struct pt_regs *regs) uint32_t addr = 0; if (!user_mode(regs)) + goto kernel_area; + + if (!align_usr_enable) { + pr_err("%s user disabled.\n", __func__); goto bad_area; + } + + align_usr_count++; ret = get_user(tmp, (uint16_t *)instruction_pointer(regs)); if (ret) { @@ -234,6 +235,19 @@ void csky_alignment(struct pt_regs *regs) goto bad_area; } + goto good_area; + +kernel_area: + if (!align_kern_enable) { + pr_err("%s kernel disabled.\n", __func__); + goto bad_area; + } + + align_kern_count++; + + tmp = *(uint16_t *)instruction_pointer(regs); + +good_area: opcode = (uint32_t)tmp; rx = opcode & 0xf; @@ -286,18 +300,32 @@ bad_area: force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); } -static struct ctl_table alignment_tbl[4] = { +static struct ctl_table alignment_tbl[5] = { + { + .procname = "kernel_enable", + .data = &align_kern_enable, + .maxlen = sizeof(align_kern_enable), + .mode = 0666, + .proc_handler = &proc_dointvec + }, + { + .procname = "user_enable", + .data = &align_usr_enable, + .maxlen = sizeof(align_usr_enable), + .mode = 0666, + .proc_handler = &proc_dointvec + }, { - .procname = "enable", - .data = &align_enable, - .maxlen = sizeof(align_enable), + .procname = "kernel_count", + .data = &align_kern_count, + .maxlen = sizeof(align_kern_count), .mode = 0666, .proc_handler = &proc_dointvec }, { - .procname = "count", - .data = &align_count, - .maxlen = sizeof(align_count), + .procname = "user_count", + .data = &align_usr_count, + .maxlen = sizeof(align_usr_count), .mode = 0666, .proc_handler = &proc_dointvec }, diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c index 10af8b6fe322..9f1fe80cc847 100644 --- a/arch/csky/abiv1/cacheflush.c +++ b/arch/csky/abiv1/cacheflush.c @@ -11,42 +11,66 @@ #include <asm/cacheflush.h> #include <asm/cachectl.h> +#define PG_dcache_clean PG_arch_1 + void flush_dcache_page(struct page *page) { - struct address_space *mapping = page_mapping(page); - unsigned long addr; + struct address_space *mapping; - if (mapping && !mapping_mapped(mapping)) { - set_bit(PG_arch_1, &(page)->flags); + if (page == ZERO_PAGE(0)) return; - } - /* - * We could delay the flush for the !page_mapping case too. But that - * case is for exec env/arg pages and those are %99 certainly going to - * get faulted into the tlb (and thus flushed) anyways. - */ - addr = (unsigned long) page_address(page); - dcache_wb_range(addr, addr + PAGE_SIZE); + mapping = page_mapping_file(page); + + if (mapping && !page_mapcount(page)) + clear_bit(PG_dcache_clean, &page->flags); + else { + dcache_wbinv_all(); + if (mapping) + icache_inv_all(); + set_bit(PG_dcache_clean, &page->flags); + } } +EXPORT_SYMBOL(flush_dcache_page); -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, - pte_t *pte) +void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) { - unsigned long addr; + unsigned long pfn = pte_pfn(*ptep); struct page *page; - unsigned long pfn; - pfn = pte_pfn(*pte); - if (unlikely(!pfn_valid(pfn))) + if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); - addr = (unsigned long) page_address(page); + if (page == ZERO_PAGE(0)) + return; + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + dcache_wbinv_all(); - if (vma->vm_flags & VM_EXEC || - pages_do_alias(addr, address & PAGE_MASK)) - cache_wbinv_all(); + if (page_mapping_file(page)) { + if (vma->vm_flags & VM_EXEC) + icache_inv_all(); + } +} + +void flush_kernel_dcache_page(struct page *page) +{ + struct address_space *mapping; + + mapping = page_mapping_file(page); + + if (!mapping || mapping_mapped(mapping)) + dcache_wbinv_all(); +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + dcache_wbinv_all(); - clear_bit(PG_arch_1, &(page)->flags); + if (vma->vm_flags & VM_EXEC) + icache_inv_all(); } diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 5f663aef9b1b..79ef9e8c1afd 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -4,46 +4,63 @@ #ifndef __ABI_CSKY_CACHEFLUSH_H #define __ABI_CSKY_CACHEFLUSH_H -#include <linux/compiler.h> +#include <linux/mm.h> #include <asm/string.h> #include <asm/cache.h> #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -#define flush_cache_mm(mm) cache_wbinv_all() +#define flush_cache_mm(mm) dcache_wbinv_all() #define flush_cache_page(vma, page, pfn) cache_wbinv_all() #define flush_cache_dup_mm(mm) cache_wbinv_all() +#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +extern void flush_kernel_dcache_page(struct page *); + +#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) +#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) + +static inline void flush_kernel_vmap_range(void *addr, int size) +{ + dcache_wbinv_all(); +} +static inline void invalidate_kernel_vmap_range(void *addr, int size) +{ + dcache_wbinv_all(); +} + +#define ARCH_HAS_FLUSH_ANON_PAGE +static inline void flush_anon_page(struct vm_area_struct *vma, + struct page *page, unsigned long vmaddr) +{ + if (PageAnon(page)) + cache_wbinv_all(); +} + /* * if (current_mm != vma->mm) cache_wbinv_range(start, end) will be broken. * Use cache_wbinv_all() here and need to be improved in future. */ -#define flush_cache_range(vma, start, end) cache_wbinv_all() -#define flush_cache_vmap(start, end) cache_wbinv_range(start, end) -#define flush_cache_vunmap(start, end) cache_wbinv_range(start, end) +extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +#define flush_cache_vmap(start, end) cache_wbinv_all() +#define flush_cache_vunmap(start, end) cache_wbinv_all() -#define flush_icache_page(vma, page) cache_wbinv_all() +#define flush_icache_page(vma, page) do {} while (0); #define flush_icache_range(start, end) cache_wbinv_range(start, end) -#define flush_icache_user_range(vma, pg, adr, len) \ - cache_wbinv_range(adr, adr + len) +#define flush_icache_user_range(vma,page,addr,len) \ + flush_dcache_page(page) #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ do { \ - cache_wbinv_all(); \ memcpy(dst, src, len); \ - cache_wbinv_all(); \ } while (0) #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ - cache_wbinv_all(); \ memcpy(dst, src, len); \ cache_wbinv_all(); \ } while (0) -#define flush_dcache_mmap_lock(mapping) do {} while (0) -#define flush_dcache_mmap_unlock(mapping) do {} while (0) - #endif /* __ABI_CSKY_CACHEFLUSH_H */ diff --git a/arch/csky/abiv1/inc/abi/page.h b/arch/csky/abiv1/inc/abi/page.h index 6336e92a103a..c864519117c7 100644 --- a/arch/csky/abiv1/inc/abi/page.h +++ b/arch/csky/abiv1/inc/abi/page.h @@ -1,13 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. -extern unsigned long shm_align_mask; +#include <asm/shmparam.h> + extern void flush_dcache_page(struct page *page); static inline unsigned long pages_do_alias(unsigned long addr1, unsigned long addr2) { - return (addr1 ^ addr2) & shm_align_mask; + return (addr1 ^ addr2) & (SHMLBA-1); } static inline void clear_user_page(void *addr, unsigned long vaddr, diff --git a/arch/csky/abiv1/mmap.c b/arch/csky/abiv1/mmap.c index b462fd50b23a..6792aca49999 100644 --- a/arch/csky/abiv1/mmap.c +++ b/arch/csky/abiv1/mmap.c @@ -9,58 +9,63 @@ #include <linux/random.h> #include <linux/io.h> -unsigned long shm_align_mask = (0x4000 >> 1) - 1; /* Sane caches */ +#define COLOUR_ALIGN(addr,pgoff) \ + ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ + (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + shm_align_mask) & ~shm_align_mask) + \ - (((pgoff) << PAGE_SHIFT) & shm_align_mask)) - -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, +/* + * We need to ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + * + * We unconditionally provide this function for all cases. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - struct vm_area_struct *vmm; - int do_color_align; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int do_align = 0; + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + do_align = filp || (flags & MAP_SHARED); + /* + * We enforce the MAP_FIXED case. + */ if (flags & MAP_FIXED) { - /* - * We do not accept a shared mapping if it would violate - * cache aliasing constraints. - */ - if ((flags & MAP_SHARED) && - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + if (flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; } if (len > TASK_SIZE) return -ENOMEM; - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = 1; + if (addr) { - if (do_color_align) + if (do_align) addr = COLOUR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); - vmm = find_vma(current->mm, addr); + + vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vmm || addr + len <= vmm->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } - addr = TASK_UNMAPPED_BASE; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { - /* At this point: (!vmm || addr < vmm->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) - return addr; - addr = vmm->vm_end; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - } + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); } diff --git a/arch/csky/include/asm/barrier.h b/arch/csky/include/asm/barrier.h index 476eb786f22d..a430e7fddf35 100644 --- a/arch/csky/include/asm/barrier.h +++ b/arch/csky/include/asm/barrier.h @@ -9,11 +9,12 @@ #define nop() asm volatile ("nop\n":::"memory") /* - * sync: completion barrier - * sync.s: completion barrier and shareable to other cores - * sync.i: completion barrier with flush cpu pipeline - * sync.is: completion barrier with flush cpu pipeline and shareable to - * other cores + * sync: completion barrier, all sync.xx instructions + * guarantee the last response recieved by bus transaction + * made by ld/st instructions before sync.s + * sync.s: inherit from sync, but also shareable to other cores + * sync.i: inherit from sync, but also flush cpu pipeline + * sync.is: the same with sync.i + sync.s * * bar.brwarw: ordering barrier for all load/store instructions before it * bar.brwarws: ordering barrier for all load/store instructions before it @@ -27,9 +28,7 @@ */ #ifdef CONFIG_CPU_HAS_CACHEV2 -#define mb() asm volatile ("bar.brwarw\n":::"memory") -#define rmb() asm volatile ("bar.brar\n":::"memory") -#define wmb() asm volatile ("bar.bwaw\n":::"memory") +#define mb() asm volatile ("sync.s\n":::"memory") #ifdef CONFIG_SMP #define __smp_mb() asm volatile ("bar.brwarws\n":::"memory") diff --git a/arch/csky/include/asm/cache.h b/arch/csky/include/asm/cache.h index d68373463676..1d5fc2f78fd7 100644 --- a/arch/csky/include/asm/cache.h +++ b/arch/csky/include/asm/cache.h @@ -24,6 +24,7 @@ void cache_wbinv_range(unsigned long start, unsigned long end); void cache_wbinv_all(void); void dma_wbinv_range(unsigned long start, unsigned long end); +void dma_inv_range(unsigned long start, unsigned long end); void dma_wb_range(unsigned long start, unsigned long end); #endif diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h index c1dfa9c10e36..80d071e2567f 100644 --- a/arch/csky/include/asm/io.h +++ b/arch/csky/include/asm/io.h @@ -4,17 +4,10 @@ #ifndef __ASM_CSKY_IO_H #define __ASM_CSKY_IO_H -#include <abi/pgtable-bits.h> +#include <asm/pgtable.h> #include <linux/types.h> #include <linux/version.h> -extern void __iomem *ioremap(phys_addr_t offset, size_t size); - -extern void iounmap(void *addr); - -extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr, - size_t size, unsigned long flags); - /* * I/O memory access primitives. Reads are ordered relative to any * following Normal memory access. Writes are ordered relative to any prior @@ -40,9 +33,17 @@ extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr, #define writel(v,c) ({ wmb(); writel_relaxed((v),(c)); mb(); }) #endif -#define ioremap_nocache(phy, sz) ioremap(phy, sz) -#define ioremap_wc ioremap_nocache -#define ioremap_wt ioremap_nocache +/* + * I/O memory mapping functions. + */ +extern void __iomem *ioremap_cache(phys_addr_t addr, size_t size); +extern void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot); +extern void iounmap(void *addr); + +#define ioremap(addr, size) __ioremap((addr), (size), pgprot_noncached(PAGE_KERNEL)) +#define ioremap_wc(addr, size) __ioremap((addr), (size), pgprot_writecombine(PAGE_KERNEL)) +#define ioremap_nocache(addr, size) ioremap((addr), (size)) +#define ioremap_cache ioremap_cache #include <asm-generic/io.h> diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 0040b3a05b61..7c21985c60dc 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -258,6 +258,16 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot) { unsigned long prot = pgprot_val(_prot); + prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED | _PAGE_SO; + + return __pgprot(prot); +} + +#define pgprot_writecombine pgprot_writecombine +static inline pgprot_t pgprot_writecombine(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED; return __pgprot(prot); diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a7e84ccccbd8..a7a5b67df898 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -17,6 +17,12 @@ #define PTE_INDX_SHIFT 10 #define _PGDIR_SHIFT 22 +.macro zero_fp +#ifdef CONFIG_STACKTRACE + movi r8, 0 +#endif +.endm + .macro tlbop_begin name, val0, val1, val2 ENTRY(csky_\name) mtcr a3, ss2 @@ -96,6 +102,7 @@ ENTRY(csky_\name) SAVE_ALL 0 .endm .macro tlbop_end is_write + zero_fp RD_MEH a2 psrset ee, ie mov a0, sp @@ -120,6 +127,7 @@ tlbop_end 1 ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE + zero_fp psrset ee, ie @@ -136,9 +144,9 @@ ENTRY(csky_systemcall) mov r9, sp bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r12, 0 bt csky_syscall_trace #if defined(__CSKYABIV2__) subi sp, 8 @@ -180,7 +188,7 @@ csky_syscall_trace: ENTRY(ret_from_kernel_thread) jbsr schedule_tail - mov a0, r8 + mov a0, r10 jsr r9 jbsr ret_from_exception @@ -189,9 +197,9 @@ ENTRY(ret_from_fork) mov r9, sp bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r12, 0 bf ret_from_exception mov a0, sp /* sp = pt_regs pointer */ jbsr syscall_trace_exit @@ -209,9 +217,9 @@ ret_from_exception: bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - andi r8, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + andi r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) + cmpnei r12, 0 bt exit_work 1: RESTORE_ALL @@ -220,11 +228,11 @@ exit_work: lrw syscallid, ret_from_exception mov lr, syscallid - btsti r8, TIF_NEED_RESCHED + btsti r12, TIF_NEED_RESCHED bt work_resched mov a0, sp - mov a1, r8 + mov a1, r12 jmpi do_notify_resume work_resched: @@ -232,6 +240,7 @@ work_resched: ENTRY(csky_trap) SAVE_ALL 0 + zero_fp psrset ee mov a0, sp /* Push Stack pointer arg */ jbsr trap_c /* Call C-level trap handler */ @@ -265,6 +274,7 @@ ENTRY(csky_get_tls) ENTRY(csky_irq) SAVE_ALL 0 + zero_fp psrset ee #ifdef CONFIG_PREEMPT @@ -276,27 +286,23 @@ ENTRY(csky_irq) * Get task_struct->stack.preempt_count for current, * and increase 1. */ - ldw r8, (r9, TINFO_PREEMPT) - addi r8, 1 - stw r8, (r9, TINFO_PREEMPT) + ldw r12, (r9, TINFO_PREEMPT) + addi r12, 1 + stw r12, (r9, TINFO_PREEMPT) #endif mov a0, sp jbsr csky_do_IRQ #ifdef CONFIG_PREEMPT - subi r8, 1 - stw r8, (r9, TINFO_PREEMPT) - cmpnei r8, 0 + subi r12, 1 + stw r12, (r9, TINFO_PREEMPT) + cmpnei r12, 0 bt 2f - ldw r8, (r9, TINFO_FLAGS) - btsti r8, TIF_NEED_RESCHED + ldw r12, (r9, TINFO_FLAGS) + btsti r12, TIF_NEED_RESCHED bf 2f -1: jbsr preempt_schedule_irq /* irq en/disable is done inside */ - ldw r7, (r9, TINFO_FLAGS) /* get new tasks TI_FLAGS */ - btsti r7, TIF_NEED_RESCHED - bt 1b /* go again */ #endif 2: jmpi ret_from_exception diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c index 4c1a1934d76a..1a29f1157449 100644 --- a/arch/csky/kernel/perf_event.c +++ b/arch/csky/kernel/perf_event.c @@ -1306,7 +1306,7 @@ int csky_pmu_device_probe(struct platform_device *pdev, &csky_pmu.count_width)) { csky_pmu.count_width = DEFAULT_COUNT_WIDTH; } - csky_pmu.max_period = BIT(csky_pmu.count_width) - 1; + csky_pmu.max_period = BIT_ULL(csky_pmu.count_width) - 1; csky_pmu.plat_device = pdev; @@ -1337,7 +1337,7 @@ int csky_pmu_device_probe(struct platform_device *pdev, return ret; } -const static struct of_device_id csky_pmu_of_device_ids[] = { +static const struct of_device_id csky_pmu_of_device_ids[] = { {.compatible = "csky,csky-pmu"}, {}, }; diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index e555740c0be5..f320d9248a22 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -55,7 +55,7 @@ int copy_thread(unsigned long clone_flags, if (unlikely(p->flags & PF_KTHREAD)) { memset(childregs, 0, sizeof(struct pt_regs)); childstack->r15 = (unsigned long) ret_from_kernel_thread; - childstack->r8 = kthread_arg; + childstack->r10 = kthread_arg; childstack->r9 = usp; childregs->sr = mfcr("psr"); } else { diff --git a/arch/csky/mm/cachev1.c b/arch/csky/mm/cachev1.c index b8a75cce0b8c..494ec912abff 100644 --- a/arch/csky/mm/cachev1.c +++ b/arch/csky/mm/cachev1.c @@ -120,7 +120,12 @@ void dma_wbinv_range(unsigned long start, unsigned long end) cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); } +void dma_inv_range(unsigned long start, unsigned long end) +{ + cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); +} + void dma_wb_range(unsigned long start, unsigned long end) { - cache_op_range(start, end, DATA_CACHE|CACHE_INV, 1); + cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); } diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c index baaf05d69f44..b61be6518e21 100644 --- a/arch/csky/mm/cachev2.c +++ b/arch/csky/mm/cachev2.c @@ -69,11 +69,20 @@ void dma_wbinv_range(unsigned long start, unsigned long end) sync_is(); } +void dma_inv_range(unsigned long start, unsigned long end) +{ + unsigned long i = start & ~(L1_CACHE_BYTES - 1); + + for (; i < end; i += L1_CACHE_BYTES) + asm volatile("dcache.iva %0\n"::"r"(i):"memory"); + sync_is(); +} + void dma_wb_range(unsigned long start, unsigned long end) { unsigned long i = start & ~(L1_CACHE_BYTES - 1); for (; i < end; i += L1_CACHE_BYTES) - asm volatile("dcache.civa %0\n"::"r"(i):"memory"); + asm volatile("dcache.cva %0\n"::"r"(i):"memory"); sync_is(); } diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 602a60d47a94..06e85b565454 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -14,69 +14,50 @@ #include <linux/version.h> #include <asm/cache.h> -void arch_dma_prep_coherent(struct page *page, size_t size) -{ - if (PageHighMem(page)) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - do { - void *ptr = kmap_atomic(page); - size_t _size = (size < PAGE_SIZE) ? size : PAGE_SIZE; - - memset(ptr, 0, _size); - dma_wbinv_range((unsigned long)ptr, - (unsigned long)ptr + _size); - - kunmap_atomic(ptr); - - page++; - size -= PAGE_SIZE; - count--; - } while (count); - } else { - void *ptr = page_address(page); - - memset(ptr, 0, size); - dma_wbinv_range((unsigned long)ptr, (unsigned long)ptr + size); - } -} - static inline void cache_op(phys_addr_t paddr, size_t size, void (*fn)(unsigned long start, unsigned long end)) { - struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); - unsigned int offset = paddr & ~PAGE_MASK; - size_t left = size; - unsigned long start; + struct page *page = phys_to_page(paddr); + void *start = __va(page_to_phys(page)); + unsigned long offset = offset_in_page(paddr); + size_t left = size; do { size_t len = left; + if (offset + len > PAGE_SIZE) + len = PAGE_SIZE - offset; + if (PageHighMem(page)) { - void *addr; + start = kmap_atomic(page); - if (offset + len > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - } - len = PAGE_SIZE - offset; - } + fn((unsigned long)start + offset, + (unsigned long)start + offset + len); - addr = kmap_atomic(page); - start = (unsigned long)(addr + offset); - fn(start, start + len); - kunmap_atomic(addr); + kunmap_atomic(start); } else { - start = (unsigned long)phys_to_virt(paddr); - fn(start, start + size); + fn((unsigned long)start + offset, + (unsigned long)start + offset + len); } offset = 0; + page++; + start += PAGE_SIZE; left -= len; } while (left); } +static void dma_wbinv_set_zero_range(unsigned long start, unsigned long end) +{ + memset((void *)start, 0, end - start); + dma_wbinv_range(start, end); +} + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range); +} + void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { @@ -98,11 +79,10 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, { switch (dir) { case DMA_TO_DEVICE: - cache_op(paddr, size, dma_wb_range); - break; + return; case DMA_FROM_DEVICE: case DMA_BIDIRECTIONAL: - cache_op(paddr, size, dma_wbinv_range); + cache_op(paddr, size, dma_inv_range); break; default: BUG(); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index eb0dc9e5065f..d4c2292ea46b 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -60,22 +60,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - pr_info("Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages_inc(); - } -} -#endif - extern char __init_begin[], __init_end[]; void free_initmem(void) diff --git a/arch/csky/mm/ioremap.c b/arch/csky/mm/ioremap.c index 8473b6bdf512..e13cd3497628 100644 --- a/arch/csky/mm/ioremap.c +++ b/arch/csky/mm/ioremap.c @@ -8,12 +8,12 @@ #include <asm/pgtable.h> -void __iomem *ioremap(phys_addr_t addr, size_t size) +static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size, + pgprot_t prot, void *caller) { phys_addr_t last_addr; unsigned long offset, vaddr; struct vm_struct *area; - pgprot_t prot; last_addr = addr + size - 1; if (!size || last_addr < addr) @@ -23,15 +23,12 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) addr &= PAGE_MASK; size = PAGE_ALIGN(size + offset); - area = get_vm_area_caller(size, VM_ALLOC, __builtin_return_address(0)); + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; vaddr = (unsigned long)area->addr; - prot = __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | - _PAGE_GLOBAL | _CACHE_UNCACHED | _PAGE_SO); - if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) { free_vm_area(area); return NULL; @@ -39,7 +36,20 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) return (void __iomem *)(vaddr + offset); } -EXPORT_SYMBOL(ioremap); + +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) +{ + return __ioremap_caller(phys_addr, size, prot, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__ioremap); + +void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) +{ + return __ioremap_caller(phys_addr, size, PAGE_KERNEL, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); void iounmap(void __iomem *addr) { @@ -51,10 +61,9 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { if (!pfn_valid(pfn)) { - vma_prot.pgprot |= _PAGE_SO; return pgprot_noncached(vma_prot); } else if (file->f_flags & O_SYNC) { - return pgprot_noncached(vma_prot); + return pgprot_writecombine(vma_prot); } return vma_prot; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 63a9f33aa43e..5cfc9d347826 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -99,7 +99,7 @@ miscintc: interrupt-controller@18060010 { compatible = "qca,ar7240-misc-intc"; - reg = <0x18060010 0x4>; + reg = <0x18060010 0x8>; interrupt-parent = <&cpuintc>; interrupts = <6>; diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c index af44b35d79a1..b4328b3b5288 100644 --- a/arch/mips/fw/arc/memory.c +++ b/arch/mips/fw/arc/memory.c @@ -160,7 +160,6 @@ void __init prom_meminit(void) void __init prom_free_prom_memory(void) { - unsigned long addr; int i; if (prom_flags & PROM_FLAG_DONT_FREE_TEMP) diff --git a/arch/mips/include/asm/octeon/cvmx-ipd.h b/arch/mips/include/asm/octeon/cvmx-ipd.h index cbdc14b77435..adab7b54c3b4 100644 --- a/arch/mips/include/asm/octeon/cvmx-ipd.h +++ b/arch/mips/include/asm/octeon/cvmx-ipd.h @@ -36,6 +36,7 @@ #include <asm/octeon/octeon-feature.h> #include <asm/octeon/cvmx-ipd-defs.h> +#include <asm/octeon/cvmx-pip-defs.h> enum cvmx_ipd_mode { CVMX_IPD_OPC_MODE_STT = 0LL, /* All blocks DRAM, not cached in L2 */ diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index 071053ece677..5d70babfc9ee 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -52,6 +52,7 @@ # endif #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 /* whitelists for checksyscalls */ #define __IGNORE_fadvise64_64 diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c index fa62cd1dff93..6a7afe7ef4d3 100644 --- a/arch/mips/kernel/cpu-bugs64.c +++ b/arch/mips/kernel/cpu-bugs64.c @@ -24,7 +24,8 @@ static char r4kwar[] __initdata = static char daddiwar[] __initdata = "Enable CPU_DADDI_WORKAROUNDS to rectify."; -static inline void align_mod(const int align, const int mod) +static __always_inline __init +void align_mod(const int align, const int mod) { asm volatile( ".set push\n\t" @@ -38,8 +39,9 @@ static inline void align_mod(const int align, const int mod) : "n"(align), "n"(mod)); } -static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, - const int align, const int mod) +static __always_inline __init +void mult_sh_align_mod(long *v1, long *v2, long *w, + const int align, const int mod) { unsigned long flags; int m1, m2; @@ -113,7 +115,7 @@ static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, *w = lw; } -static inline void check_mult_sh(void) +static __always_inline __init void check_mult_sh(void) { long v1[8], v2[8], w[8]; int bug, fix, i; @@ -176,7 +178,7 @@ asmlinkage void __init do_daddi_ov(struct pt_regs *regs) exception_exit(prev_state); } -static inline void check_daddi(void) +static __init void check_daddi(void) { extern asmlinkage void handle_daddi_ov(void); unsigned long flags; @@ -242,7 +244,7 @@ static inline void check_daddi(void) int daddiu_bug = IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1; -static inline void check_daddiu(void) +static __init void check_daddiu(void) { long v, w, tmp; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index b8249c233754..5eec13b8d222 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -108,6 +108,9 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type) return; } + if (start < PHYS_OFFSET) + return; + memblock_add(start, size); /* Reserve any memory except the ordinary RAM ranges. */ switch (type) { @@ -321,7 +324,7 @@ static void __init bootmem_init(void) * Reserve any memory between the start of RAM and PHYS_OFFSET */ if (ramstart > PHYS_OFFSET) - memblock_reserve(PHYS_OFFSET, PFN_UP(ramstart) - PHYS_OFFSET); + memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET); if (PFN_UP(ramstart) > ARCH_PFN_OFFSET) { pr_info("Wasting %lu bytes for tracking %lu unused pages\n", diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index b0e25e913bdb..3f16f3823031 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -80,6 +80,7 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, save_static_function(sys_fork); save_static_function(sys_clone); +save_static_function(sys_clone3); SYSCALL_DEFINE1(set_thread_area, unsigned long, addr) { diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index c9c879ec9b6d..e7c5ab38e403 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -373,4 +373,4 @@ 432 n32 fsmount sys_fsmount 433 n32 fspick sys_fspick 434 n32 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 n32 clone3 __sys_clone3 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index bbce9159caa1..13cd66581f3b 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -349,4 +349,4 @@ 432 n64 fsmount sys_fsmount 433 n64 fspick sys_fspick 434 n64 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 n64 clone3 __sys_clone3 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 9653591428ec..353539ea4140 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -422,4 +422,4 @@ 432 o32 fsmount sys_fsmount 433 o32 fspick sys_fspick 434 o32 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 o32 clone3 __sys_clone3 diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c index 4abb92e0fc39..4254ac4ec616 100644 --- a/arch/mips/loongson64/common/mem.c +++ b/arch/mips/loongson64/common/mem.c @@ -3,6 +3,7 @@ */ #include <linux/fs.h> #include <linux/fcntl.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <asm/bootinfo.h> @@ -64,24 +65,22 @@ void __init prom_init_memory(void) node_id = loongson_memmap->map[i].node_id; mem_type = loongson_memmap->map[i].mem_type; - if (node_id == 0) { - switch (mem_type) { - case SYSTEM_RAM_LOW: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_HIGH: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_RESERVED: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RESERVED); - break; - } + if (node_id != 0) + continue; + + switch (mem_type) { + case SYSTEM_RAM_LOW: + memblock_add(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; + case SYSTEM_RAM_HIGH: + memblock_add(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; + case SYSTEM_RAM_RESERVED: + memblock_reserve(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; } } } diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c index ffefc1cb2612..98c3a7feb10f 100644 --- a/arch/mips/loongson64/common/serial.c +++ b/arch/mips/loongson64/common/serial.c @@ -110,7 +110,7 @@ static int __init serial_init(void) } module_init(serial_init); -static void __init serial_exit(void) +static void __exit serial_exit(void) { platform_device_unregister(&uart8250_device); } diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 414e97de5dc0..8f20d2cb3767 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -142,8 +142,6 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; @@ -156,16 +154,12 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; case SYSTEM_RAM_RESERVED: pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", (u32)node_id, mem_type, mem_start, mem_size); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RESERVED); memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); break; @@ -191,8 +185,6 @@ static void __init node_mem_init(unsigned int node) NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; - free_bootmem_with_active_regions(node, end_pfn); - if (node == 0) { /* kernel end address */ unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end)); @@ -209,8 +201,6 @@ static void __init node_mem_init(unsigned int node) memblock_reserve((node_addrspace_offset | 0xfe000000), 32 << 20); } - - sparse_memory_present_with_active_regions(node); } static __init void prom_meminit(void) @@ -227,6 +217,7 @@ static __init void prom_meminit(void) cpumask_clear(&__node_data[(node)]->cpumask); } } + memblocks_present(); max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) { diff --git a/arch/mips/pmcs-msp71xx/msp_prom.c b/arch/mips/pmcs-msp71xx/msp_prom.c index dfb527961a27..800a21b8b8b0 100644 --- a/arch/mips/pmcs-msp71xx/msp_prom.c +++ b/arch/mips/pmcs-msp71xx/msp_prom.c @@ -61,6 +61,7 @@ int init_debug = 1; /* memory blocks */ struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS]; +#define MAX_PROM_MEM 5 static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata; static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata; static unsigned int nr_prom_mem __initdata; @@ -358,7 +359,7 @@ void __init prom_meminit(void) p++; if (type == BOOT_MEM_ROM_DATA) { - if (nr_prom_mem >= 5) { + if (nr_prom_mem >= MAX_PROM_MEM) { pr_err("Too many ROM DATA regions"); continue; } @@ -377,7 +378,6 @@ void __init prom_free_prom_memory(void) char *ptr; int len = 0; int i; - unsigned long addr; /* * preserve environment variables and command line from pmon/bbload diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 69cfa0a5339e..807f0f782f75 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -59,7 +59,7 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg ifndef CONFIG_CPU_MIPSR6 ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) - obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y)) + obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) ccflags-vdso += -DDISABLE_MIPS_VDSO endif endif diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c deleted file mode 100644 index e8243c7fd5b5..000000000000 --- a/arch/mips/vdso/gettimeofday.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 Imagination Technologies - * Author: Alex Smith <[email protected]> - */ - -#include "vdso.h" - -#include <linux/compiler.h> -#include <linux/time.h> - -#include <asm/clocksource.h> -#include <asm/io.h> -#include <asm/unistd.h> -#include <asm/vdso.h> - -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - -static __always_inline long gettimeofday_fallback(struct timeval *_tv, - struct timezone *_tz) -{ - register struct timezone *tz asm("a1") = _tz; - register struct timeval *tv asm("a0") = _tv; - register long ret asm("v0"); - register long nr asm("v0") = __NR_gettimeofday; - register long error asm("a3"); - - asm volatile( - " syscall\n" - : "=r" (ret), "=r" (error) - : "r" (tv), "r" (tz), "r" (nr) - : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); - - return error ? -ret : ret; -} - -#endif - -static __always_inline long clock_gettime_fallback(clockid_t _clkid, - struct timespec *_ts) -{ - register struct timespec *ts asm("a1") = _ts; - register clockid_t clkid asm("a0") = _clkid; - register long ret asm("v0"); - register long nr asm("v0") = __NR_clock_gettime; - register long error asm("a3"); - - asm volatile( - " syscall\n" - : "=r" (ret), "=r" (error) - : "r" (clkid), "r" (ts), "r" (nr) - : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); - - return error ? -ret : ret; -} - -static __always_inline int do_realtime_coarse(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - - do { - start_seq = vdso_data_read_begin(data); - - ts->tv_sec = data->xtime_sec; - ts->tv_nsec = data->xtime_nsec >> data->cs_shift; - } while (vdso_data_read_retry(data, start_seq)); - - return 0; -} - -static __always_inline int do_monotonic_coarse(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 to_mono_sec; - u64 to_mono_nsec; - - do { - start_seq = vdso_data_read_begin(data); - - ts->tv_sec = data->xtime_sec; - ts->tv_nsec = data->xtime_nsec >> data->cs_shift; - - to_mono_sec = data->wall_to_mono_sec; - to_mono_nsec = data->wall_to_mono_nsec; - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_sec += to_mono_sec; - timespec_add_ns(ts, to_mono_nsec); - - return 0; -} - -#ifdef CONFIG_CSRC_R4K - -static __always_inline u64 read_r4k_count(void) -{ - unsigned int count; - - __asm__ __volatile__( - " .set push\n" - " .set mips32r2\n" - " rdhwr %0, $2\n" - " .set pop\n" - : "=r" (count)); - - return count; -} - -#endif - -#ifdef CONFIG_CLKSRC_MIPS_GIC - -static __always_inline u64 read_gic_count(const union mips_vdso_data *data) -{ - void __iomem *gic = get_gic(data); - u32 hi, hi2, lo; - - do { - hi = __raw_readl(gic + sizeof(lo)); - lo = __raw_readl(gic); - hi2 = __raw_readl(gic + sizeof(lo)); - } while (hi2 != hi); - - return (((u64)hi) << 32) + lo; -} - -#endif - -static __always_inline u64 get_ns(const union mips_vdso_data *data) -{ - u64 cycle_now, delta, nsec; - - switch (data->clock_mode) { -#ifdef CONFIG_CSRC_R4K - case VDSO_CLOCK_R4K: - cycle_now = read_r4k_count(); - break; -#endif -#ifdef CONFIG_CLKSRC_MIPS_GIC - case VDSO_CLOCK_GIC: - cycle_now = read_gic_count(data); - break; -#endif - default: - return 0; - } - - delta = (cycle_now - data->cs_cycle_last) & data->cs_mask; - - nsec = (delta * data->cs_mult) + data->xtime_nsec; - nsec >>= data->cs_shift; - - return nsec; -} - -static __always_inline int do_realtime(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 ns; - - do { - start_seq = vdso_data_read_begin(data); - - if (data->clock_mode == VDSO_CLOCK_NONE) - return -ENOSYS; - - ts->tv_sec = data->xtime_sec; - ns = get_ns(data); - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_nsec = 0; - timespec_add_ns(ts, ns); - - return 0; -} - -static __always_inline int do_monotonic(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 ns; - u64 to_mono_sec; - u64 to_mono_nsec; - - do { - start_seq = vdso_data_read_begin(data); - - if (data->clock_mode == VDSO_CLOCK_NONE) - return -ENOSYS; - - ts->tv_sec = data->xtime_sec; - ns = get_ns(data); - - to_mono_sec = data->wall_to_mono_sec; - to_mono_nsec = data->wall_to_mono_nsec; - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_sec += to_mono_sec; - ts->tv_nsec = 0; - timespec_add_ns(ts, ns + to_mono_nsec); - - return 0; -} - -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - -/* - * This is behind the ifdef so that we don't provide the symbol when there's no - * possibility of there being a usable clocksource, because there's nothing we - * can do without it. When libc fails the symbol lookup it should fall back on - * the standard syscall path. - */ -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - const union mips_vdso_data *data = get_vdso_data(); - struct timespec ts; - int ret; - - ret = do_realtime(&ts, data); - if (ret) - return gettimeofday_fallback(tv, tz); - - if (tv) { - tv->tv_sec = ts.tv_sec; - tv->tv_usec = ts.tv_nsec / 1000; - } - - if (tz) { - tz->tz_minuteswest = data->tz_minuteswest; - tz->tz_dsttime = data->tz_dsttime; - } - - return 0; -} - -#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ - -int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts) -{ - const union mips_vdso_data *data = get_vdso_data(); - int ret = -1; - - switch (clkid) { - case CLOCK_REALTIME_COARSE: - ret = do_realtime_coarse(ts, data); - break; - case CLOCK_MONOTONIC_COARSE: - ret = do_monotonic_coarse(ts, data); - break; - case CLOCK_REALTIME: - ret = do_realtime(ts, data); - break; - case CLOCK_MONOTONIC: - ret = do_monotonic(ts, data); - break; - default: - break; - } - - if (ret) - ret = clock_gettime_fallback(clkid, ts); - - return ret; -} diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6841bd52738b..dfbd7f22eef5 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -50,7 +50,7 @@ endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc -BOOTARFLAGS := -cr$(KBUILD_ARFLAGS) +BOOTARFLAGS := -crD ifdef CONFIG_CC_IS_CLANG BOOTCFLAGS += $(CLANG_FLAGS) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 574eca33f893..d97db3ad9aae 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -254,7 +254,13 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); -extern int radix__has_transparent_hugepage(void); +static inline int radix__has_transparent_hugepage(void) +{ + /* For radix 2M at PMD level means thp */ + if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT) + return 1; + return 0; +} #endif extern int __meminit radix__vmemmap_create_mapping(unsigned long start, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d7fcdfa7fee4..ec2547cc5ecb 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -36,8 +36,8 @@ #include "book3s.h" #include "trace.h" -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ /* #define EXIT_DEBUG */ @@ -69,8 +69,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "pthru_all", VCPU_STAT(pthru_all) }, { "pthru_host", VCPU_STAT(pthru_host) }, { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, - { "largepages_2M", VM_STAT(num_2M_pages) }, - { "largepages_1G", VM_STAT(num_1G_pages) }, + { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) }, + { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) }, { NULL } }; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index d1f390ac9cdb..64733b9cb20a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -406,6 +406,8 @@ int hash__has_transparent_hugepage(void) return 1; } +EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3a1fbf9cb8f8..6ee17d09649c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1027,13 +1027,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, return old_pmd; } -int radix__has_transparent_hugepage(void) -{ - /* For radix 2M at PMD level means thp */ - if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT) - return 1; - return 0; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a44f6281ca3a..4e08246acd79 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -172,6 +172,21 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } +static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, + unsigned long page_size) +{ + unsigned long nr_pfn = page_size / sizeof(struct page); + unsigned long start_pfn = page_to_pfn((struct page *)start); + + if ((start_pfn + nr_pfn) > altmap->end_pfn) + return true; + + if (start_pfn < altmap->base_pfn) + return true; + + return false; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { @@ -194,7 +209,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, * fail due to alignment issues when using 16MB hugepages, so * fall back to system memory if the altmap allocation fail. */ - if (altmap) { + if (altmap && !altmap_cross_boundary(altmap, start, page_size)) { p = altmap_alloc_block_buf(page_size, altmap); if (!p) pr_debug("altmap block allocation failed, falling back to system memory"); diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 5a02b7d50940..9c992a88d858 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -22,6 +22,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) +#define REG_SC __REG_SEL(sc.d, sc.w) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index da7aa88113c2..2d592da1e776 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -98,7 +98,26 @@ _save_context: */ .macro RESTORE_ALL REG_L a0, PT_SSTATUS(sp) - REG_L a2, PT_SEPC(sp) + /* + * The current load reservation is effectively part of the processor's + * state, in the sense that load reservations cannot be shared between + * different hart contexts. We can't actually save and restore a load + * reservation, so instead here we clear any existing reservation -- + * it's always legal for implementations to clear load reservations at + * any point (as long as the forward progress guarantee is kept, but + * we'll ignore that here). + * + * Dangling load reservations can be the result of taking a trap in the + * middle of an LR/SC sequence, but can also be the result of a taken + * forward branch around an SC -- which is how we implement CAS. As a + * result we need to clear reservations between the last CAS and the + * jump back to the new context. While it is unlikely the store + * completes, implementations are allowed to expand reservations to be + * arbitrarily large. + */ + REG_L a2, PT_SEPC(sp) + REG_SC x0, a2, PT_SEPC(sp) + csrw CSR_SSTATUS, a0 csrw CSR_SEPC, a2 diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f0ba71304b6e..83f7d12042fb 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -11,6 +11,7 @@ #include <linux/swap.h> #include <linux/sizes.h> #include <linux/of_fdt.h> +#include <linux/libfdt.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -82,6 +83,8 @@ disable: } #endif /* CONFIG_BLK_DEV_INITRD */ +static phys_addr_t dtb_early_pa __initdata; + void __init setup_bootmem(void) { struct memblock_region *reg; @@ -117,7 +120,12 @@ void __init setup_bootmem(void) setup_initrd(); #endif /* CONFIG_BLK_DEV_INITRD */ - early_init_fdt_reserve_self(); + /* + * Avoid using early_init_fdt_reserve_self() since __pa() does + * not work for DTB pointers that are fixmap addresses + */ + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + early_init_fdt_scan_reserved_mem(); memblock_allow_resize(); memblock_dump_all(); @@ -393,6 +401,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) /* Save pointer to DTB for early FDT parsing */ dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); + /* Save physical address for memblock reservation */ + dtb_early_pa = dtb_pa; } static void __init setup_vm_final(void) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 347f48702edb..38d64030aacf 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -44,6 +44,7 @@ CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y @@ -69,12 +70,13 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y CONFIG_MODULE_SIG_SHA256=y +CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -370,6 +372,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y @@ -424,6 +427,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -435,6 +439,7 @@ CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_NETDEVICES=y CONFIG_BONDING=m @@ -489,6 +494,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set @@ -538,15 +544,16 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m @@ -580,6 +587,8 @@ CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y @@ -589,6 +598,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -648,12 +658,15 @@ CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m @@ -664,10 +677,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -739,7 +748,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8514b8b9500f..25f799849582 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -44,6 +44,7 @@ CONFIG_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y @@ -66,11 +67,12 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y CONFIG_MODULE_SIG_SHA256=y +CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -363,6 +365,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -418,6 +421,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -429,6 +433,7 @@ CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_DM_INTEGRITY=m CONFIG_NETDEVICES=y @@ -484,6 +489,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set @@ -533,16 +539,16 @@ CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m @@ -573,6 +579,8 @@ CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y @@ -581,6 +589,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -639,12 +648,15 @@ CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -656,10 +668,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_OFB=m @@ -727,7 +735,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index be09a208b608..20c51e5d9353 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -61,7 +61,7 @@ CONFIG_RAW_DRIVER=y CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_DIMLIB is not set +CONFIG_LSM="yama,loadpin,safesetid,integrity" CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index d3f09526ee19..61467b9eecc7 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -41,7 +41,7 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg") #undef __ATOMIC_OP #define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier) \ -static inline void op_name(op_type val, op_type *ptr) \ +static __always_inline void op_name(op_type val, op_type *ptr) \ { \ asm volatile( \ op_string " %[ptr],%[val]\n" \ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index b8833ac983fa..eb7eed43e780 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -56,7 +56,7 @@ __bitops_byte(unsigned long nr, volatile unsigned long *ptr) return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); } -static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) +static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -77,7 +77,7 @@ static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_or(mask, (long *)addr); } -static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) +static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -98,8 +98,8 @@ static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_and(mask, (long *)addr); } -static inline void arch_change_bit(unsigned long nr, - volatile unsigned long *ptr) +static __always_inline void arch_change_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index a092f63aac6a..c0f3bfeddcbe 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -171,7 +171,7 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t; * * Returns 1 if @func is available for @opcode, 0 otherwise */ -static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) +static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) { register unsigned long r0 asm("0") = 0; /* query function */ register unsigned long r1 asm("1") = (unsigned long) mask; diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index ceeb552d3472..819803a97c2b 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -28,6 +28,8 @@ asm(".include \"asm/cpu_mf-insn.h\"\n"); CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) +#define CPU_MF_SF_RIBM_NOTAV 0x1 /* Sampling unavailable */ + /* CPU measurement facility support */ static inline int cpum_cf_avail(void) { @@ -69,7 +71,8 @@ struct hws_qsi_info_block { /* Bit(s) */ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ unsigned long tear; /* 24-31: TEAR contents */ unsigned long dear; /* 32-39: DEAR contents */ - unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int rsvrd0:24; /* 40-42: reserved */ + unsigned int ribm:8; /* 43: Reserved by IBM */ unsigned int cpu_speed; /* 44-47: CPU speed */ unsigned long long rsvrd1; /* 48-55: reserved */ unsigned long long rsvrd2; /* 56-63: reserved */ @@ -220,7 +223,8 @@ enum stcctm_ctr_set { MT_DIAG = 5, MT_DIAG_CLEARING = 9, /* clears loss-of-MT-ctr-data alert */ }; -static inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest) + +static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest) { int cc; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bb59dd964590..de8f0bf5f238 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -12,8 +12,6 @@ #include <asm/page.h> #include <asm/pgtable.h> - -#define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range free_pgd_range #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -23,6 +21,13 @@ pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +static inline bool is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return false; +} + /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index e548ec1ec12c..39f747d63758 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -20,7 +20,7 @@ * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. */ -static inline bool arch_static_branch(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table,\"aw\"\n" @@ -34,7 +34,7 @@ label: return true; } -static inline bool arch_static_branch_jump(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 36c578c0ff96..5ff98d76a66c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -997,9 +997,9 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_NODAT 0x400 #define IPTE_GUEST_ASCE 0x800 -static inline void __ptep_ipte(unsigned long address, pte_t *ptep, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, + unsigned long opt, unsigned long asce, + int local) { unsigned long pto = (unsigned long) ptep; @@ -1020,8 +1020,8 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep, : [r1] "a" (pto), [m4] "i" (local) : "memory"); } -static inline void __ptep_ipte_range(unsigned long address, int nr, - pte_t *ptep, int local) +static __always_inline void __ptep_ipte_range(unsigned long address, int nr, + pte_t *ptep, int local) { unsigned long pto = (unsigned long) ptep; @@ -1269,7 +1269,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) #define pte_offset_kernel(pmd, address) pte_offset(pmd, address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) -#define pte_unmap(pte) do { } while (0) + +static inline void pte_unmap(pte_t *pte) { } static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { @@ -1435,9 +1436,9 @@ static inline void __pmdp_csp(pmd_t *pmdp) #define IDTE_NODAT 0x1000 #define IDTE_GUEST_ASCE 0x2000 -static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, + unsigned long opt, unsigned long asce, + int local) { unsigned long sto; @@ -1461,9 +1462,9 @@ static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, } } -static inline void __pudp_idte(unsigned long addr, pud_t *pudp, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, + unsigned long opt, unsigned long asce, + int local) { unsigned long r3o; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 78e8a888306d..e3f238e8c611 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -111,7 +111,7 @@ struct qib { /* private: */ u8 res[88]; /* public: */ - u8 parm[QDIO_MAX_BUFFERS_PER_Q]; + u8 parm[128]; } __attribute__ ((packed, aligned(256))); /** diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 5f1fd1581330..2654e348801a 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -390,7 +390,7 @@ static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, debug_sprintf_event(cf_diag_dbg, 6, "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" - " need %zd rc:%d\n", + " need %zd rc %d\n", __func__, ctrset, ctrset_size, cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); return need; @@ -567,7 +567,7 @@ static int cf_diag_add(struct perf_event *event, int flags) int err = 0; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x cpuhw:%p\n", + "%s event %p cpu %d flags %#x cpuhw %p\n", __func__, event, event->cpu, flags, cpuhw); if (cpuhw->flags & PMU_F_IN_USE) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 544a02e944c6..3d8b12a9a6ff 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -803,6 +803,12 @@ static int __hw_perf_event_init(struct perf_event *event) goto out; } + if (si.ribm & CPU_MF_SF_RIBM_NOTAV) { + pr_warn("CPU Measurement Facility sampling is temporarily not available\n"); + err = -EBUSY; + goto out; + } + /* Always enable basic sampling */ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; @@ -895,7 +901,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) /* Check online status of the CPU to which the event is pinned */ if (event->cpu >= 0 && !cpu_online(event->cpu)) - return -ENODEV; + return -ENODEV; /* Force reset of idle/hv excludes regardless of what the * user requested. diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f6db0f1bc867..d047e846e1b9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -332,7 +332,7 @@ static inline int plo_test_bit(unsigned char nr) return cc == 0; } -static inline void __insn32_query(unsigned int opcode, u8 query[32]) +static __always_inline void __insn32_query(unsigned int opcode, u8 *query) { register unsigned long r0 asm("0") = 0; /* query function */ register unsigned long r1 asm("1") = (unsigned long) query; @@ -340,9 +340,9 @@ static inline void __insn32_query(unsigned int opcode, u8 query[32]) asm volatile( /* Parameter regs are ignored */ " .insn rrf,%[opc] << 16,2,4,6,0\n" - : "=m" (*query) + : : "d" (r0), "a" (r1), [opc] "i" (opcode) - : "cc"); + : "cc", "memory"); } #define INSN_SORTL 0xb938 diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 9bdff4defef1..e585a62d6530 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -66,7 +66,7 @@ static inline int clp_get_ilp(unsigned long *ilp) /* * Call Logical Processor with c=0, the give constant lps and an lpcb request. */ -static inline int clp_req(void *data, unsigned int lps) +static __always_inline int clp_req(void *data, unsigned int lps) { struct { u8 _[CLP_BLK_SIZE]; } *req = data; u64 ignored; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 23edf56cf577..50eb430b0ad8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -219,13 +219,6 @@ enum { PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) -/* - * The mask used to denote special SPTEs, which can be either MMIO SPTEs or - * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting - * with the SVE bit in EPT PTEs. - */ -#define SPTE_SPECIAL_MASK (1ULL << 62) - /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 63316036f85a..9c5029cf6f3f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -485,6 +485,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); @@ -618,16 +619,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, */ case 0x1f: case 0xb: { - int i, level_type; + int i; - /* read more entries until level_type is zero */ - for (i = 1; ; ++i) { + /* + * We filled in entry[0] for CPUID(EAX=<function>, + * ECX=00H) above. If its level type (ECX[15:8]) is + * zero, then the leaf is unimplemented, and we're + * done. Otherwise, continue to populate entries + * until the level type (ECX[15:8]) of the previously + * added entry is zero. + */ + for (i = 1; entry[i - 1].ecx & 0xff00; ++i) { if (*nent >= maxnent) goto out; - level_type = entry[i - 1].ecx & 0xff00; - if (!level_type) - break; do_host_cpuid(&entry[i], function, i); ++*nent; } @@ -969,53 +974,66 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); /* - * If no match is found, check whether we exceed the vCPU's limit - * and return the content of the highest valid _standard_ leaf instead. - * This is to satisfy the CPUID specification. + * If the basic or extended CPUID leaf requested is higher than the + * maximum supported basic or extended leaf, respectively, then it is + * out of range. */ -static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, - u32 function, u32 index) +static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function) { - struct kvm_cpuid_entry2 *maxlevel; - - maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); - if (!maxlevel || maxlevel->eax >= function) - return NULL; - if (function & 0x80000000) { - maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); - if (!maxlevel) - return NULL; - } - return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); + struct kvm_cpuid_entry2 *max; + + max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); + return max && function <= max->eax; } bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) { u32 function = *eax, index = *ecx; - struct kvm_cpuid_entry2 *best; - bool entry_found = true; - - best = kvm_find_cpuid_entry(vcpu, function, index); - - if (!best) { - entry_found = false; - if (!check_limit) - goto out; + struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid_entry2 *max; + bool found; - best = check_cpuid_limit(vcpu, function, index); + entry = kvm_find_cpuid_entry(vcpu, function, index); + found = entry; + /* + * Intel CPUID semantics treats any query for an out-of-range + * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were + * requested. AMD CPUID semantics returns all zeroes for any + * undefined leaf, whether or not the leaf is in range. + */ + if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) && + !cpuid_function_in_range(vcpu, function)) { + max = kvm_find_cpuid_entry(vcpu, 0, 0); + if (max) { + function = max->eax; + entry = kvm_find_cpuid_entry(vcpu, function, index); + } } - -out: - if (best) { - *eax = best->eax; - *ebx = best->ebx; - *ecx = best->ecx; - *edx = best->edx; - } else + if (entry) { + *eax = entry->eax; + *ebx = entry->ebx; + *ecx = entry->ecx; + *edx = entry->edx; + } else { *eax = *ebx = *ecx = *edx = 0; - trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); - return entry_found; + /* + * When leaf 0BH or 1FH is defined, CL is pass-through + * and EDX is always the x2APIC ID, even for undefined + * subleaves. Index 1 will exist iff the leaf is + * implemented, so we pass through CL iff leaf 1 + * exists. EDX can be copied from any existing index. + */ + if (function == 0xb || function == 0x1f) { + entry = kvm_find_cpuid_entry(vcpu, function, 1); + if (entry) { + *ecx = index & 0xff; + *edx = entry->edx; + } + } + } + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found); + return found; } EXPORT_SYMBOL_GPL(kvm_cpuid); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3a3a6854dcca..87b0fcc23ef8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -66,9 +66,10 @@ #define X2APIC_BROADCAST 0xFFFFFFFFul static bool lapic_timer_advance_dynamic __read_mostly; -#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 -#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000 -#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 +#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_NS_INIT 1000 +#define LAPIC_TIMER_ADVANCE_NS_MAX 5000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -1504,8 +1505,8 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } - if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX)) - timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) + timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; apic->lapic_timer.timer_advance_ns = timer_advance_ns; } @@ -2302,7 +2303,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) HRTIMER_MODE_ABS_HARD); apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { - apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; lapic_timer_advance_dynamic = true; } else { apic->lapic_timer.timer_advance_ns = timer_advance_ns; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5269aa057dfa..24c23c66b226 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -83,7 +83,17 @@ module_param(dbg, bool, 0644); #define PTE_PREFETCH_NUM 8 #define PT_FIRST_AVAIL_BITS_SHIFT 10 -#define PT64_SECOND_AVAIL_BITS_SHIFT 52 +#define PT64_SECOND_AVAIL_BITS_SHIFT 54 + +/* + * The mask used to denote special SPTEs, which can be either MMIO SPTEs or + * Access Tracking SPTEs. + */ +#define SPTE_SPECIAL_MASK (3ULL << 52) +#define SPTE_AD_ENABLED_MASK (0ULL << 52) +#define SPTE_AD_DISABLED_MASK (1ULL << 52) +#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52) +#define SPTE_MMIO_MASK (3ULL << 52) #define PT64_LEVEL_BITS 9 @@ -219,12 +229,11 @@ static u64 __read_mostly shadow_present_mask; static u64 __read_mostly shadow_me_mask; /* - * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. - * Non-present SPTEs with shadow_acc_track_value set are in place for access - * tracking. + * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK; + * shadow_acc_track_mask is the set of bits to be cleared in non-accessed + * pages. */ static u64 __read_mostly shadow_acc_track_mask; -static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; /* * The mask/shift to use for saving the original R/X bits when marking the PTE @@ -304,7 +313,7 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); - shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; + shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; } @@ -320,10 +329,27 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) return sp->role.ad_disabled; } +static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) +{ + /* + * When using the EPT page-modification log, the GPAs in the log + * would come from L2 rather than L1. Therefore, we need to rely + * on write protection to record dirty pages. This also bypasses + * PML, since writes now result in a vmexit. + */ + return vcpu->arch.mmu == &vcpu->arch.guest_mmu; +} + static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(is_mmio_spte(spte)); - return !(spte & shadow_acc_track_value); + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK; +} + +static inline bool spte_ad_need_write_protect(u64 spte) +{ + MMU_WARN_ON(is_mmio_spte(spte)); + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; } static inline u64 spte_shadow_accessed_mask(u64 spte) @@ -461,7 +487,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, { BUG_ON(!dirty_mask != !accessed_mask); BUG_ON(!accessed_mask && !acc_track_mask); - BUG_ON(acc_track_mask & shadow_acc_track_value); + BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK); shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; @@ -1589,16 +1615,16 @@ static bool spte_clear_dirty(u64 *sptep) rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep); + MMU_WARN_ON(!spte_ad_enabled(spte)); spte &= ~shadow_dirty_mask; - return mmu_spte_update(sptep, spte); } -static bool wrprot_ad_disabled_spte(u64 *sptep) +static bool spte_wrprot_for_clear_dirty(u64 *sptep) { bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT, (unsigned long *)sptep); - if (was_writable) + if (was_writable && !spte_ad_enabled(*sptep)) kvm_set_pfn_dirty(spte_to_pfn(*sptep)); return was_writable; @@ -1617,10 +1643,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) - if (spte_ad_enabled(*sptep)) - flush |= spte_clear_dirty(sptep); + if (spte_ad_need_write_protect(*sptep)) + flush |= spte_wrprot_for_clear_dirty(sptep); else - flush |= wrprot_ad_disabled_spte(sptep); + flush |= spte_clear_dirty(sptep); return flush; } @@ -1631,6 +1657,11 @@ static bool spte_set_dirty(u64 *sptep) rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep); + /* + * Similar to the !kvm_x86_ops->slot_disable_log_dirty case, + * do not bother adding back write access to pages marked + * SPTE_AD_WRPROT_ONLY_MASK. + */ spte |= shadow_dirty_mask; return mmu_spte_update(sptep, spte); @@ -2622,7 +2653,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, shadow_user_mask | shadow_x_mask | shadow_me_mask; if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; else spte |= shadow_accessed_mask; @@ -2968,7 +2999,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, sp = page_header(__pa(sptep)); if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; + else if (kvm_vcpu_ad_need_write_protect(vcpu)) + spte |= SPTE_AD_WRPROT_ONLY_MASK; /* * For the EPT case, shadow_present_mask is 0 if hardware diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 41abc62c9a8a..e76eb4f07f6c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2610,7 +2610,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, /* VM-entry exception error code */ if (CC(has_error_code && - vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))) + vmcs12->vm_entry_exception_error_code & GENMASK(31, 16))) return -EINVAL; /* VM-entry interruption-info field: reserved bits */ diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 4dea0e0e7e39..3e9c059099e9 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -262,6 +262,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void intel_pmu_refresh(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct x86_pmu_capability x86_pmu; struct kvm_cpuid_entry2 *entry; union cpuid10_eax eax; union cpuid10_edx edx; @@ -283,8 +284,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (!pmu->version) return; + perf_get_x86_pmu_capability(&x86_pmu); + pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, - INTEL_PMC_MAX_GENERIC); + x86_pmu.num_counters_gp); pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; pmu->available_event_types = ~entry->ebx & ((1ull << eax.split.mask_length) - 1); @@ -294,7 +297,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) } else { pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, - INTEL_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d4575ffb3cec..e7970a2e8eae 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -209,6 +209,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) struct page *page; unsigned int i; + if (!boot_cpu_has_bug(X86_BUG_L1TF)) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + if (!enable_ept) { l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; return 0; @@ -7995,12 +8000,10 @@ static int __init vmx_init(void) * contain 'auto' which will be turned into the default 'cond' * mitigation mode. */ - if (boot_cpu_has(X86_BUG_L1TF)) { - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } + r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); + if (r) { + vmx_exit(); + return r; } #ifdef CONFIG_KEXEC_CORE diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ed07d8d2caa..661e2bf38526 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -212,7 +212,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } @@ -885,34 +885,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; - if (cr4 & CR4_RESERVED_BITS) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + return -EINVAL; + + return 0; +} + +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + unsigned long old_cr4 = kvm_read_cr4(vcpu); + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + + if (kvm_valid_cr4(vcpu, cr4)) return 1; if (is_long_mode(vcpu)) { @@ -1161,13 +1169,6 @@ static u32 msrs_to_save[] = { MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, - MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19, - MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21, - MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23, - MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25, - MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27, - MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29, - MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, @@ -1177,13 +1178,6 @@ static u32 msrs_to_save[] = { MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, - MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19, - MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21, - MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23, - MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25, - MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27, - MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29, - MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31, }; static unsigned num_msrs_to_save; @@ -5097,13 +5091,14 @@ out: static void kvm_init_msr_list(void) { + struct x86_pmu_capability x86_pmu; u32 dummy[2]; unsigned i, j; BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, "Please update the fixed PMCs in msrs_to_save[]"); - BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32, - "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]"); + + perf_get_x86_pmu_capability(&x86_pmu); for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) @@ -5145,6 +5140,15 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; + break; + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; } default: break; @@ -8714,10 +8718,6 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - (sregs->cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; - if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in @@ -8736,7 +8736,7 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return -EINVAL; } - return 0; + return kvm_valid_cr4(vcpu, sregs->cr4); } static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 0d3365cb64de..a04551ee5568 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -57,19 +57,7 @@ static efi_system_table_t __init *xen_efi_probe(void) return NULL; /* Here we know that Xen runs on EFI platform. */ - - efi.get_time = xen_efi_get_time; - efi.set_time = xen_efi_set_time; - efi.get_wakeup_time = xen_efi_get_wakeup_time; - efi.set_wakeup_time = xen_efi_set_wakeup_time; - efi.get_variable = xen_efi_get_variable; - efi.get_next_variable = xen_efi_get_next_variable; - efi.set_variable = xen_efi_set_variable; - efi.query_variable_info = xen_efi_query_variable_info; - efi.update_capsule = xen_efi_update_capsule; - efi.query_capsule_caps = xen_efi_query_capsule_caps; - efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; - efi.reset_system = xen_efi_reset_system; + xen_efi_runtime_setup(); efi_systab_xen.tables = info->cfg.addr; efi_systab_xen.nr_tables = info->cfg.nent; diff --git a/block/blk-mq.c b/block/blk-mq.c index 6e3b15f70cd7..ec791156e9cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1992,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) /* bypass scheduler for flush rq */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { + } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs || + !blk_queue_nonrot(q))) { /* * Use plugging if we have a ->commit_rqs() hook as well, as * we know the driver uses bd->last in a smart fashion. + * + * Use normal plugging if this disk is slow HDD, as sequential + * IO may benefit a lot from plug merging. */ unsigned int request_count = plug->rq_count; struct request *last = NULL; @@ -2012,6 +2016,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } blk_add_rq_to_plug(plug, rq); + } else if (q->elevator) { + blk_mq_sched_insert_request(rq, false, true, true); } else if (plug && !blk_queue_nomerges(q)) { /* * We do limited plugging. If the bio can be merged, do that. @@ -2035,8 +2041,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); } - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && - !data.hctx->dispatch_busy)) { + } else if ((q->nr_hw_queues > 1 && is_sync) || + !data.hctx->dispatch_busy) { blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_sched_insert_request(rq, false, true, true); diff --git a/block/sed-opal.c b/block/sed-opal.c index 4e95a9792162..b4c761973ac1 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -129,7 +129,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = { { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 }, /* tables */ - [OPAL_TABLE_TABLE] + [OPAL_TABLE_TABLE] = { 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 }, [OPAL_LOCKINGRANGE_GLOBAL] = { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 }, @@ -372,8 +372,8 @@ static void check_geometry(struct opal_dev *dev, const void *data) { const struct d0_geometry_features *geo = data; - dev->align = geo->alignment_granularity; - dev->lowest_lba = geo->lowest_aligned_lba; + dev->align = be64_to_cpu(geo->alignment_granularity); + dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba); } static int execute_step(struct opal_dev *dev, diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1410fa893653..f6f77eaa7217 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -994,6 +994,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_write_cache(lo->lo_queue, true, false); + if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) { + /* In case of direct I/O, match underlying block size */ + unsigned short bsize = bdev_logical_block_size( + inode->i_sb->s_bdev); + + blk_queue_logical_block_size(lo->lo_queue, bsize); + blk_queue_physical_block_size(lo->lo_queue, bsize); + blk_queue_io_min(lo->lo_queue, bsize); + } + loop_update_rotational(lo); loop_update_dio(lo); set_capacity(lo->lo_disk, size); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 9207ac291341..ad50efb470aa 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -280,9 +280,6 @@ static int sysc_get_one_clock(struct sysc *ddata, const char *name) ddata->clocks[index] = devm_clk_get(ddata->dev, name); if (IS_ERR(ddata->clocks[index])) { - if (PTR_ERR(ddata->clocks[index]) == -ENOENT) - return 0; - dev_err(ddata->dev, "clock get error for %s: %li\n", name, PTR_ERR(ddata->clocks[index])); @@ -357,7 +354,7 @@ static int sysc_get_clocks(struct sysc *ddata) continue; error = sysc_get_one_clock(ddata, name); - if (error && error != -ENOENT) + if (error) return error; } @@ -1632,17 +1629,19 @@ static int sysc_init_module(struct sysc *ddata) if (error) return error; - if (manage_clocks) { - sysc_clkdm_deny_idle(ddata); + sysc_clkdm_deny_idle(ddata); - error = sysc_enable_opt_clocks(ddata); - if (error) - return error; + /* + * Always enable clocks. The bootloader may or may not have enabled + * the related clocks. + */ + error = sysc_enable_opt_clocks(ddata); + if (error) + return error; - error = sysc_enable_main_clocks(ddata); - if (error) - goto err_opt_clocks; - } + error = sysc_enable_main_clocks(ddata); + if (error) + goto err_opt_clocks; if (!(ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT)) { error = sysc_rstctrl_reset_deassert(ddata, true); @@ -1660,7 +1659,7 @@ static int sysc_init_module(struct sysc *ddata) goto err_main_clocks; } - if (!ddata->legacy_mode && manage_clocks) { + if (!ddata->legacy_mode) { error = sysc_enable_module(ddata->dev); if (error) goto err_main_clocks; @@ -1677,6 +1676,7 @@ err_main_clocks: if (manage_clocks) sysc_disable_main_clocks(ddata); err_opt_clocks: + /* No re-enable of clockdomain autoidle to prevent module autoidle */ if (manage_clocks) { sysc_disable_opt_clocks(ddata); sysc_clkdm_allow_idle(ddata); @@ -2357,6 +2357,27 @@ static void ti_sysc_idle(struct work_struct *work) ddata = container_of(work, struct sysc, idle_work.work); + /* + * One time decrement of clock usage counts if left on from init. + * Note that we disable opt clocks unconditionally in this case + * as they are enabled unconditionally during init without + * considering sysc_opt_clks_needed() at that point. + */ + if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE | + SYSC_QUIRK_NO_IDLE_ON_INIT)) { + sysc_disable_main_clocks(ddata); + sysc_disable_opt_clocks(ddata); + sysc_clkdm_allow_idle(ddata); + } + + /* Keep permanent PM runtime usage count for SYSC_QUIRK_NO_IDLE */ + if (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE) + return; + + /* + * Decrement PM runtime usage count for SYSC_QUIRK_NO_IDLE_ON_INIT + * and SYSC_QUIRK_NO_RESET_ON_INIT + */ if (pm_runtime_active(ddata->dev)) pm_runtime_put_sync(ddata->dev); } @@ -2445,7 +2466,8 @@ static int sysc_probe(struct platform_device *pdev) INIT_DELAYED_WORK(&ddata->idle_work, ti_sysc_idle); /* At least earlycon won't survive without deferred idle */ - if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE_ON_INIT | + if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE | + SYSC_QUIRK_NO_IDLE_ON_INIT | SYSC_QUIRK_NO_RESET_ON_INIT)) { schedule_delayed_work(&ddata->idle_work, 3000); } else { diff --git a/drivers/char/random.c b/drivers/char/random.c index d3beed084c0a..de434feb873a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1732,6 +1732,56 @@ void get_random_bytes(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes); + +/* + * Each time the timer fires, we expect that we got an unpredictable + * jump in the cycle counter. Even if the timer is running on another + * CPU, the timer activity will be touching the stack of the CPU that is + * generating entropy.. + * + * Note that we don't re-arm the timer in the timer itself - we are + * happy to be scheduled away, since that just makes the load more + * complex, but we do not want the timer to keep ticking unless the + * entropy loop is running. + * + * So the re-arming always happens in the entropy loop itself. + */ +static void entropy_timer(struct timer_list *t) +{ + credit_entropy_bits(&input_pool, 1); +} + +/* + * If we have an actual cycle counter, see if we can + * generate enough entropy with timing noise + */ +static void try_to_generate_entropy(void) +{ + struct { + unsigned long now; + struct timer_list timer; + } stack; + + stack.now = random_get_entropy(); + + /* Slow counter - or none. Don't even bother */ + if (stack.now == random_get_entropy()) + return; + + timer_setup_on_stack(&stack.timer, entropy_timer, 0); + while (!crng_ready()) { + if (!timer_pending(&stack.timer)) + mod_timer(&stack.timer, jiffies+1); + mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + schedule(); + stack.now = random_get_entropy(); + } + + del_timer_sync(&stack.timer); + destroy_timer_on_stack(&stack.timer); + mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); +} + /* * Wait for the urandom pool to be seeded and thus guaranteed to supply * cryptographically secure random numbers. This applies to: the /dev/urandom @@ -1746,7 +1796,17 @@ int wait_for_random_bytes(void) { if (likely(crng_ready())) return 0; - return wait_event_interruptible(crng_init_wait, crng_ready()); + + do { + int ret; + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; + + try_to_generate_entropy(); + } while (!crng_ready()); + + return 0; } EXPORT_SYMBOL(wait_for_random_bytes); @@ -2460,4 +2520,4 @@ void add_bootloader_randomness(const void *buf, unsigned int size) else add_device_randomness(buf, size); } -EXPORT_SYMBOL_GPL(add_bootloader_randomness);
\ No newline at end of file +EXPORT_SYMBOL_GPL(add_bootloader_randomness); diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index d8c2bd4391d0..11ff701ff4bb 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -25,7 +25,9 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) struct clock_event_device *clkevt = &to->clkevt; - of_irq->percpu ? free_percpu_irq(of_irq->irq, clkevt) : + if (of_irq->percpu) + free_percpu_irq(of_irq->irq, clkevt); + else free_irq(of_irq->irq, clkevt); } diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index 250150560e68..91e424378217 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -35,7 +35,7 @@ config CHELSIO_IPSEC_INLINE config CRYPTO_DEV_CHELSIO_TLS tristate "Chelsio Crypto Inline TLS Driver" depends on CHELSIO_T4 - depends on TLS + depends on TLS_TOE select CRYPTO_DEV_CHELSIO ---help--- Support Chelsio Inline TLS with Chelsio crypto accelerator. diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h index 025c831d0899..d2bc655ab931 100644 --- a/drivers/crypto/chelsio/chtls/chtls.h +++ b/drivers/crypto/chelsio/chtls/chtls.h @@ -21,6 +21,7 @@ #include <crypto/internal/hash.h> #include <linux/tls.h> #include <net/tls.h> +#include <net/tls_toe.h> #include "t4fw_api.h" #include "t4_msg.h" @@ -118,7 +119,7 @@ struct tls_scmd { }; struct chtls_dev { - struct tls_device tlsdev; + struct tls_toe_device tlsdev; struct list_head list; struct cxgb4_lld_info *lldi; struct pci_dev *pdev; @@ -362,7 +363,7 @@ enum { #define TCP_PAGE(sk) (sk->sk_frag.page) #define TCP_OFF(sk) (sk->sk_frag.offset) -static inline struct chtls_dev *to_chtls_dev(struct tls_device *tlsdev) +static inline struct chtls_dev *to_chtls_dev(struct tls_toe_device *tlsdev) { return container_of(tlsdev, struct chtls_dev, tlsdev); } diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index e6df5b95ed47..18996935d8ba 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -124,7 +124,7 @@ static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk) mutex_unlock(¬ify_mutex); } -static int chtls_inline_feature(struct tls_device *dev) +static int chtls_inline_feature(struct tls_toe_device *dev) { struct net_device *netdev; struct chtls_dev *cdev; @@ -140,7 +140,7 @@ static int chtls_inline_feature(struct tls_device *dev) return 0; } -static int chtls_create_hash(struct tls_device *dev, struct sock *sk) +static int chtls_create_hash(struct tls_toe_device *dev, struct sock *sk) { struct chtls_dev *cdev = to_chtls_dev(dev); @@ -149,7 +149,7 @@ static int chtls_create_hash(struct tls_device *dev, struct sock *sk) return 0; } -static void chtls_destroy_hash(struct tls_device *dev, struct sock *sk) +static void chtls_destroy_hash(struct tls_toe_device *dev, struct sock *sk) { struct chtls_dev *cdev = to_chtls_dev(dev); @@ -161,7 +161,7 @@ static void chtls_free_uld(struct chtls_dev *cdev) { int i; - tls_unregister_device(&cdev->tlsdev); + tls_toe_unregister_device(&cdev->tlsdev); kvfree(cdev->kmap.addr); idr_destroy(&cdev->hwtid_idr); for (i = 0; i < (1 << RSPQ_HASH_BITS); i++) @@ -173,27 +173,27 @@ static void chtls_free_uld(struct chtls_dev *cdev) static inline void chtls_dev_release(struct kref *kref) { + struct tls_toe_device *dev; struct chtls_dev *cdev; - struct tls_device *dev; - dev = container_of(kref, struct tls_device, kref); + dev = container_of(kref, struct tls_toe_device, kref); cdev = to_chtls_dev(dev); chtls_free_uld(cdev); } static void chtls_register_dev(struct chtls_dev *cdev) { - struct tls_device *tlsdev = &cdev->tlsdev; + struct tls_toe_device *tlsdev = &cdev->tlsdev; - strlcpy(tlsdev->name, "chtls", TLS_DEVICE_NAME_MAX); + strlcpy(tlsdev->name, "chtls", TLS_TOE_DEVICE_NAME_MAX); strlcat(tlsdev->name, cdev->lldi->ports[0]->name, - TLS_DEVICE_NAME_MAX); + TLS_TOE_DEVICE_NAME_MAX); tlsdev->feature = chtls_inline_feature; tlsdev->hash = chtls_create_hash; tlsdev->unhash = chtls_destroy_hash; tlsdev->release = chtls_dev_release; kref_init(&tlsdev->kref); - tls_register_device(tlsdev); + tls_toe_register_device(tlsdev); cdev->cdev_state = CHTLS_CDEV_STATE_UP; } diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c index 64cc81915581..ab42c21c5517 100644 --- a/drivers/firmware/arm_scmi/reset.c +++ b/drivers/firmware/arm_scmi/reset.c @@ -150,7 +150,7 @@ static int scmi_domain_reset(const struct scmi_handle *handle, u32 domain, dom = t->tx.buf; dom->domain_id = cpu_to_le32(domain); dom->flags = cpu_to_le32(flags); - dom->domain_id = cpu_to_le32(state); + dom->reset_state = cpu_to_le32(state); if (rdom->async_reset) ret = scmi_do_xfer_with_response(handle, t); diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 42e2c1f57152..00962a659009 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -54,7 +54,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o + amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index eba42c752bca..82155ac3288a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -189,7 +189,7 @@ static int acp_hw_init(void *handle) u32 val = 0; u32 count = 0; struct device *dev; - struct i2s_platform_data *i2s_pdata; + struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -231,20 +231,21 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - if (adev->acp.acp_cell == NULL) - return -ENOMEM; + if (adev->acp.acp_cell == NULL) { + r = -ENOMEM; + goto failure; + } adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); if (adev->acp.acp_res == NULL) { - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); if (i2s_pdata == NULL) { - kfree(adev->acp.acp_res); - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } switch (adev->asic_type) { @@ -341,14 +342,14 @@ static int acp_hw_init(void *handle) r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); if (r) - return r; + goto failure; for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); if (r) { dev_err(dev, "Failed to add dev to genpd\n"); - return r; + goto failure; } } @@ -367,7 +368,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -384,7 +386,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -393,6 +396,13 @@ static int acp_hw_init(void *handle) val &= ~ACP_SOFT_RESET__SoftResetAud_MASK; cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); return 0; + +failure: + kfree(i2s_pdata); + kfree(adev->acp.acp_res); + kfree(adev->acp.acp_cell); + kfree(adev->acp.acp_genpd); + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 264677ab248a..6f8aaf655a9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -81,9 +81,10 @@ * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 34 +#define KMS_DRIVER_MINOR 35 #define KMS_DRIVER_PATCHLEVEL 0 #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 554a59b3c4a6..6ee4021910e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -165,6 +165,7 @@ struct amdgpu_gfx_config { uint32_t num_sc_per_sh; uint32_t num_packer_per_sc; uint32_t pa_sc_tile_steering_override; + uint64_t tcc_disabled_mask; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f6147528be64..f2c097983f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -787,6 +787,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.pa_sc_tile_steering_override = adev->gfx.config.pa_sc_tile_steering_override; + dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e2fb141ff2e5..5251352f5922 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -603,14 +603,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base; -#if 0 if (vm->bulk_moveable) { spin_lock(&glob->lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); spin_unlock(&glob->lru_lock); return; } -#endif memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 638c821611ab..957811b73672 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1691,6 +1691,17 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) +{ + /* TCCs are global (not instanced). */ + uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + + adev->gfx.config.tcc_disabled_mask = + REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | + (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); +} + static void gfx_v10_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1702,6 +1713,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v10_0_get_tcc_info(adev); adev->gfx.config.pa_sc_tile_steering_override = gfx_v10_0_init_pa_sc_tile_steering_override(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 85393a99a848..de9b995b65b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -317,10 +317,12 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); ret = smu_baco_reset(smu); } else { - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); ret = nv_asic_mode1_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f70658a536a9..f8ab80c8801b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -558,12 +558,14 @@ static int soc15_asic_reset(struct amdgpu_device *adev) { switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return soc15_mode2_reset(adev); default: - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } @@ -771,8 +773,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); -#else -# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); break; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8cab6da512a0..a52f0b13a2c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2385,8 +2385,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) dm->dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; - if (adev->asic_type == CHIP_RENOIR) - dm->dc->debug.disable_stutter = true; return 0; fail: @@ -6019,7 +6017,9 @@ static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev, struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; +#ifdef CONFIG_DEBUG_FS enum amdgpu_dm_pipe_crc_source source; +#endif for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 1787b9bf800a..76d54885374a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -668,6 +668,7 @@ struct clock_source *dce100_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 318e9c2e2ca8..89620adc81d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -714,6 +714,7 @@ struct clock_source *dce110_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 83e1878161c9..21a657e79306 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -687,6 +687,7 @@ struct clock_source *dce112_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 8b85e5274bba..7c52f7f9196c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -500,6 +500,7 @@ static struct clock_source *dce120_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 4625df9f9fd2..643ccb0ade00 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -701,6 +701,7 @@ struct clock_source *dce80_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 59305e411a66..1599bb971111 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -786,6 +786,7 @@ struct clock_source *dcn10_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index b4e3ce22ed52..5a2763daff4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1077,6 +1077,7 @@ struct clock_source *dcn20_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 8cd9de8b1a7a..ef673bffc241 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,7 +3,17 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -mpreferred-stack-boundary=4 +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align := -mpreferred-stack-boundary=4 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align := -mstack-alignment=16 +endif + +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) + +ifdef CONFIG_CC_IS_CLANG +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 +endif AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 456cd0e3289c..3b6ed60dcd35 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -39,9 +39,6 @@ * ways. Unless there is something clearly wrong with it the code should * remain as-is as it provides us with a guarantee from HW that it is correct. */ - -typedef unsigned int uint; - typedef struct { double DPPCLK; double DISPCLK; @@ -4774,7 +4771,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - uint m; + unsigned int m; locals->cursor_bw[k] = 0; locals->cursor_bw_pre[k] = 0; @@ -5285,7 +5282,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; double FullDETBufferingTimeYStutterCriticalPlane = 0; double TimeToFinishSwathTransferStutterCriticalPlane = 0; - uint k, j; + unsigned int k, j; mode_lib->vba.TotalActiveDPP = 0; mode_lib->vba.TotalDCCActiveDPP = 0; @@ -5507,7 +5504,7 @@ static void CalculateDCFCLKDeepSleep( double DPPCLK[], double *DCFCLKDeepSleep) { - uint k; + unsigned int k; double DisplayPipeLineDeliveryTimeLuma; double DisplayPipeLineDeliveryTimeChroma; //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX]; @@ -5727,7 +5724,7 @@ static void CalculatePixelDeliveryTimes( double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) { double req_per_swath_ub; - uint k; + unsigned int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (VRatio[k] <= 1) { @@ -5869,7 +5866,7 @@ static void CalculateMetaAndPTETimes( unsigned int dpte_groups_per_row_chroma_ub; unsigned int num_group_per_lower_vm_stage; unsigned int num_req_per_lower_vm_stage; - uint k; + unsigned int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (GPUVMEnable == true) { diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 33960fb38a5d..4acf139ea014 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -843,6 +843,8 @@ static int smu_sw_init(void *handle) smu->smu_baco.state = SMU_BACO_STATE_EXIT; smu->smu_baco.platform_support = false; + mutex_init(&smu->sensor_lock); + smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index f1f072012fac..d493a3f8c07a 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1018,6 +1018,7 @@ static int arcturus_read_sensor(struct smu_context *smu, if (!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -1044,6 +1045,7 @@ static int arcturus_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 6109815a0401..23171a4d9a31 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -344,6 +344,7 @@ struct smu_context const struct smu_funcs *funcs; const struct pptable_funcs *ppt_funcs; struct mutex mutex; + struct mutex sensor_lock; uint64_t pool_size; struct smu_table_context smu_table; diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 12c0e469bf35..0b461404af6b 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -547,7 +547,7 @@ static int navi10_get_metrics_table(struct smu_context *smu, struct smu_table_context *smu_table= &smu->smu_table; int ret = 0; - if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) { + if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) { ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)smu_table->metrics_table, false); if (ret) { @@ -1386,6 +1386,7 @@ static int navi10_read_sensor(struct smu_context *smu, if(!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -1409,6 +1410,7 @@ static int navi10_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 64386ee3f878..bbd8ebd58434 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -3023,6 +3023,7 @@ static int vega20_read_sensor(struct smu_context *smu, if(!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -3048,6 +3049,7 @@ static int vega20_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index 2851cac94d86..b72840c06ab7 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -43,9 +43,8 @@ komeda_wb_encoder_atomic_check(struct drm_encoder *encoder, struct komeda_data_flow_cfg dflow; int err; - if (!writeback_job || !writeback_job->fb) { + if (!writeback_job) return 0; - } if (!crtc_st->active) { DRM_DEBUG_ATOMIC("Cannot write the composition result out on a inactive CRTC.\n"); @@ -166,8 +165,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, &komeda_wb_encoder_helper_funcs, formats, n_formats); komeda_put_fourcc_list(formats); - if (err) + if (err) { + kfree(kwb_conn); return err; + } drm_connector_helper_add(&wb_conn->base, &komeda_wb_conn_helper_funcs); diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c index 22c0847986df..875a3a9eabfa 100644 --- a/drivers/gpu/drm/arm/malidp_mw.c +++ b/drivers/gpu/drm/arm/malidp_mw.c @@ -131,7 +131,7 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder, struct drm_framebuffer *fb; int i, n_planes; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -248,7 +248,7 @@ void malidp_mw_atomic_commit(struct drm_device *drm, mw_state = to_mw_state(conn_state); - if (conn_state->writeback_job && conn_state->writeback_job->fb) { + if (conn_state->writeback_job) { struct drm_framebuffer *fb = conn_state->writeback_job->fb; DRM_DEV_DEBUG_DRIVER(drm->dev, diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 419381abbdd1..14aeaf736321 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -430,10 +430,15 @@ static int drm_atomic_connector_check(struct drm_connector *connector, return -EINVAL; } - if (writeback_job->out_fence && !writeback_job->fb) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", - connector->base.id, connector->name); - return -EINVAL; + if (!writeback_job->fb) { + if (writeback_job->out_fence) { + DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", + connector->base.id, connector->name); + return -EINVAL; + } + + drm_writeback_cleanup_job(writeback_job); + state->writeback_job = NULL; } return 0; diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index ff138b6ec48b..43d9e3bb3a94 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job) if (job->fb) drm_framebuffer_put(job->fb); + if (job->out_fence) + dma_fence_put(job->out_fence); + kfree(job); } EXPORT_SYMBOL(drm_writeback_cleanup_job); @@ -366,25 +369,29 @@ drm_writeback_signal_completion(struct drm_writeback_connector *wb_connector, { unsigned long flags; struct drm_writeback_job *job; + struct dma_fence *out_fence; spin_lock_irqsave(&wb_connector->job_lock, flags); job = list_first_entry_or_null(&wb_connector->job_queue, struct drm_writeback_job, list_entry); - if (job) { + if (job) list_del(&job->list_entry); - if (job->out_fence) { - if (status) - dma_fence_set_error(job->out_fence, status); - dma_fence_signal(job->out_fence); - dma_fence_put(job->out_fence); - } - } + spin_unlock_irqrestore(&wb_connector->job_lock, flags); if (WARN_ON(!job)) return; + out_fence = job->out_fence; + if (out_fence) { + if (status) + dma_fence_set_error(out_fence, status); + dma_fence_signal(out_fence); + dma_fence_put(out_fence); + job->out_fence = NULL; + } + INIT_WORK(&job->cleanup_work, cleanup_work); queue_work(system_long_wq, &job->cleanup_work); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b51d1ceb8739..ce05e805b08f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7261,7 +7261,7 @@ retry: pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n, false); + link_bw, &pipe_config->fdi_m_n, false, false); ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EDEADLK) @@ -7508,11 +7508,15 @@ void intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n) + bool constant_n, bool fec_enable) { - m_n->tu = 64; + u32 data_clock = bits_per_pixel * pixel_clock; + + if (fec_enable) + data_clock = intel_dp_mode_to_fec_clock(data_clock); - compute_m_n(bits_per_pixel * pixel_clock, + m_n->tu = 64; + compute_m_n(data_clock, link_clock * nlanes * 8, &m_n->gmch_m, &m_n->gmch_n, constant_n); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index e57e6969051d..01fa87ad3270 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -414,7 +414,7 @@ enum phy_fia { void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n); + bool constant_n, bool fec_enable); bool is_ccs_modifier(u64 modifier); void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 921ad0a2f7ba..57e9f0ba331b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -78,8 +78,8 @@ #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 #define DP_DSC_MAX_ENC_THROUGHPUT_1 400000 -/* DP DSC FEC Overhead factor = (100 - 2.4)/100 */ -#define DP_DSC_FEC_OVERHEAD_FACTOR 976 +/* DP DSC FEC Overhead factor = 1/(0.972261) */ +#define DP_DSC_FEC_OVERHEAD_FACTOR 972261 /* Compliance test status bits */ #define INTEL_DP_RESOLUTION_SHIFT_MASK 0 @@ -494,6 +494,97 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock) +{ + return div_u64(mul_u32_u32(mode_clock, 1000000U), + DP_DSC_FEC_OVERHEAD_FACTOR); +} + +static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count, + u32 mode_clock, u32 mode_hdisplay) +{ + u32 bits_per_pixel, max_bpp_small_joiner_ram; + int i; + + /* + * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* + * (LinkSymbolClock)* 8 * (TimeSlotsPerMTP) + * for SST -> TimeSlotsPerMTP is 1, + * for MST -> TimeSlotsPerMTP has to be calculated + */ + bits_per_pixel = (link_clock * lane_count * 8) / + intel_dp_mode_to_fec_clock(mode_clock); + DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel); + + /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ + max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / mode_hdisplay; + DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram); + + /* + * Greatest allowed DSC BPP = MIN (output BPP from available Link BW + * check, output bpp from small joiner RAM check) + */ + bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); + + /* Error out if the max bpp is less than smallest allowed valid bpp */ + if (bits_per_pixel < valid_dsc_bpp[0]) { + DRM_DEBUG_KMS("Unsupported BPP %u, min %u\n", + bits_per_pixel, valid_dsc_bpp[0]); + return 0; + } + + /* Find the nearest match in the array of known BPPs from VESA */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { + if (bits_per_pixel < valid_dsc_bpp[i + 1]) + break; + } + bits_per_pixel = valid_dsc_bpp[i]; + + /* + * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, + * fractional part is 0 + */ + return bits_per_pixel << 4; +} + +static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, + int mode_clock, int mode_hdisplay) +{ + u8 min_slice_count, i; + int max_slice_width; + + if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_0); + else + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_1); + + max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); + if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { + DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", + max_slice_width); + return 0; + } + /* Also take into account max slice width */ + min_slice_count = min_t(u8, min_slice_count, + DIV_ROUND_UP(mode_hdisplay, + max_slice_width)); + + /* Find the closest match to the valid slice count values */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { + if (valid_dsc_slicecount[i] > + drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, + false)) + break; + if (min_slice_count <= valid_dsc_slicecount[i]) + return valid_dsc_slicecount[i]; + } + + DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); + return 0; +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) @@ -2226,7 +2317,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, - constant_n); + constant_n, pipe_config->fec_enable); if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { @@ -2236,7 +2327,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, &pipe_config->dp_m2_n2, - constant_n); + constant_n, pipe_config->fec_enable); } if (!HAS_DDI(dev_priv)) @@ -4323,91 +4414,6 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay) -{ - u16 bits_per_pixel, max_bpp_small_joiner_ram; - int i; - - /* - * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* - * (LinkSymbolClock)* 8 * ((100-FECOverhead)/100)*(TimeSlotsPerMTP) - * FECOverhead = 2.4%, for SST -> TimeSlotsPerMTP is 1, - * for MST -> TimeSlotsPerMTP has to be calculated - */ - bits_per_pixel = (link_clock * lane_count * 8 * - DP_DSC_FEC_OVERHEAD_FACTOR) / - mode_clock; - - /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ - max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / - mode_hdisplay; - - /* - * Greatest allowed DSC BPP = MIN (output BPP from avaialble Link BW - * check, output bpp from small joiner RAM check) - */ - bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); - - /* Error out if the max bpp is less than smallest allowed valid bpp */ - if (bits_per_pixel < valid_dsc_bpp[0]) { - DRM_DEBUG_KMS("Unsupported BPP %d\n", bits_per_pixel); - return 0; - } - - /* Find the nearest match in the array of known BPPs from VESA */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { - if (bits_per_pixel < valid_dsc_bpp[i + 1]) - break; - } - bits_per_pixel = valid_dsc_bpp[i]; - - /* - * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, - * fractional part is 0 - */ - return bits_per_pixel << 4; -} - -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, - int mode_clock, - int mode_hdisplay) -{ - u8 min_slice_count, i; - int max_slice_width; - - if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_0); - else - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_1); - - max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); - if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { - DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", - max_slice_width); - return 0; - } - /* Also take into account max slice width */ - min_slice_count = min_t(u8, min_slice_count, - DIV_ROUND_UP(mode_hdisplay, - max_slice_width)); - - /* Find the closest match to the valid slice count values */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { - if (valid_dsc_slicecount[i] > - drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, - false)) - break; - if (min_slice_count <= valid_dsc_slicecount[i]) - return valid_dsc_slicecount[i]; - } - - DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); - return 0; -} - static void intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 657bbb1f5ed0..00981fb9414b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -102,10 +102,6 @@ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status); -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay); -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, - int mode_hdisplay); bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); @@ -118,4 +114,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) return ~((1 << lane_count) - 1) & 0xf; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 6df240a01b8c..600873c796d0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -81,7 +81,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, crtc_state->port_clock, &crtc_state->dp_m_n, - constant_n); + constant_n, crtc_state->fec_enable); crtc_state->dp_m_n.tu = slots; return 0; @@ -615,7 +615,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; - intel_encoder->crtc_mask = BIT(pipe); + intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; intel_encoder->compute_config = intel_dp_mst_compute_config; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index dea63be1964f..cae25e493128 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1528,6 +1528,7 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, int src_x, src_w, src_h, crtc_w, crtc_h; const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + unsigned int stride = plane_state->color_plane[0].stride; unsigned int cpp = fb->format->cpp[0]; unsigned int width_bytes; int min_width, min_height; @@ -1569,9 +1570,9 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, return -EINVAL; } - if (width_bytes > 4096 || fb->pitches[0] > 4096) { + if (stride > 4096) { DRM_DEBUG_KMS("Stride (%u) exceeds hardware max with scaling (%u)\n", - fb->pitches[0], 4096); + stride, 4096); return -EINVAL; } diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index e226324adb69..4bdd63b57100 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1083,7 +1083,7 @@ static const struct dss_features omap34xx_dss_feats = { static const struct dss_features omap3630_dss_feats = { .model = DSS_MODEL_OMAP3, - .fck_div_max = 32, + .fck_div_max = 31, .fck_freq_max = 173000000, .dss_fck_multiplier = 1, .parent_clk_name = "dpll4_ck", diff --git a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c index ae07290bba6a..04efa78d70b6 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c @@ -147,7 +147,7 @@ static int rcar_du_wb_enc_atomic_check(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct drm_framebuffer *fb; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -221,7 +221,7 @@ void rcar_du_writeback_setup(struct rcar_du_crtc *rcrtc, unsigned int i; state = rcrtc->writeback.base.state; - if (!state || !state->writeback_job || !state->writeback_job->fb) + if (!state || !state->writeback_job) return; fb = state->writeback_job->fb; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c index 525dc1c0f1c1..530edb3b51cc 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c @@ -7,6 +7,7 @@ #include <linux/gpio.h> #include <linux/mod_devicetable.h> #include <linux/of_gpio.h> +#include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> #include <drm/drm_atomic_helper.h> diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index 1ce4d7142b6e..bf720206727f 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -231,7 +231,7 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn, int i; conn_state = drm_atomic_get_new_connector_state(state, conn); - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); @@ -271,8 +271,7 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, u32 ctrl; int i; - if (WARN_ON(!conn_state->writeback_job || - !conn_state->writeback_job->fb)) + if (WARN_ON(!conn_state->writeback_job)) return; mode = &conn_state->crtc->state->adjusted_mode; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index c09791fb4929..f1c714acc280 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1736,6 +1736,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS: case PCI_DEVICE_ID_INTEL_DNV_SMBUS: case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS: + priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; priv->features |= FEATURE_SMBUS_PEC; diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index a89bfce5388e..17abf60c94ae 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -355,11 +355,13 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, { dma_addr_t rx_dma; unsigned long time_left; - void *dma_buf; + void *dma_buf = NULL; struct geni_se *se = &gi2c->se; size_t len = msg->len; - dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (!of_machine_is_compatible("lenovo,yoga-c630")) + dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (dma_buf) geni_se_select_mode(se, GENI_SE_DMA); else @@ -394,11 +396,13 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, { dma_addr_t tx_dma; unsigned long time_left; - void *dma_buf; + void *dma_buf = NULL; struct geni_se *se = &gi2c->se; size_t len = msg->len; - dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (!of_machine_is_compatible("lenovo,yoga-c630")) + dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (dma_buf) geni_se_select_mode(se, GENI_SE_DMA); else diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index f31413fd9521..800414886f6b 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -202,6 +202,7 @@ static irqreturn_t riic_tend_isr(int irq, void *data) if (readb(riic->base + RIIC_ICSR2) & ICSR2_NACKF) { /* We got a NACKIE */ readb(riic->base + RIIC_ICDRR); /* dummy read */ + riic_clear_set_bit(riic, ICSR2_NACKF, 0, RIIC_ICSR2); riic->err = -ENXIO; } else if (riic->bytes_left) { return IRQ_NONE; diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c index 92ff9991bae8..db9763cb4dae 100644 --- a/drivers/i2c/i2c-slave-eeprom.c +++ b/drivers/i2c/i2c-slave-eeprom.c @@ -33,11 +33,13 @@ struct eeprom_data { u16 address_mask; u8 num_address_bytes; u8 idx_write_cnt; + bool read_only; u8 buffer[]; }; #define I2C_SLAVE_BYTELEN GENMASK(15, 0) #define I2C_SLAVE_FLAG_ADDR16 BIT(16) +#define I2C_SLAVE_FLAG_RO BIT(17) #define I2C_SLAVE_DEVICE_MAGIC(_len, _flags) ((_flags) | (_len)) static int i2c_slave_eeprom_slave_cb(struct i2c_client *client, @@ -53,9 +55,11 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client, eeprom->buffer_idx = *val | (eeprom->buffer_idx << 8); eeprom->idx_write_cnt++; } else { - spin_lock(&eeprom->buffer_lock); - eeprom->buffer[eeprom->buffer_idx++ & eeprom->address_mask] = *val; - spin_unlock(&eeprom->buffer_lock); + if (!eeprom->read_only) { + spin_lock(&eeprom->buffer_lock); + eeprom->buffer[eeprom->buffer_idx++ & eeprom->address_mask] = *val; + spin_unlock(&eeprom->buffer_lock); + } } break; @@ -130,6 +134,7 @@ static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_de eeprom->idx_write_cnt = 0; eeprom->num_address_bytes = flag_addr16 ? 2 : 1; eeprom->address_mask = size - 1; + eeprom->read_only = FIELD_GET(I2C_SLAVE_FLAG_RO, id->driver_data); spin_lock_init(&eeprom->buffer_lock); i2c_set_clientdata(client, eeprom); @@ -165,8 +170,11 @@ static int i2c_slave_eeprom_remove(struct i2c_client *client) static const struct i2c_device_id i2c_slave_eeprom_id[] = { { "slave-24c02", I2C_SLAVE_DEVICE_MAGIC(2048 / 8, 0) }, + { "slave-24c02ro", I2C_SLAVE_DEVICE_MAGIC(2048 / 8, I2C_SLAVE_FLAG_RO) }, { "slave-24c32", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16) }, + { "slave-24c32ro", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16 | I2C_SLAVE_FLAG_RO) }, { "slave-24c64", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16) }, + { "slave-24c64ro", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16 | I2C_SLAVE_FLAG_RO) }, { } }; MODULE_DEVICE_TABLE(i2c, i2c_slave_eeprom_id); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 97975bb7f347..2369b8af81f3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -70,7 +70,6 @@ */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) -static DEFINE_SPINLOCK(amd_iommu_devtable_lock); static DEFINE_SPINLOCK(pd_bitmap_lock); /* List of all available dev_data structures */ @@ -202,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; + spin_lock_init(&dev_data->lock); dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); @@ -501,6 +501,29 @@ static void iommu_uninit_device(struct device *dev) */ } +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, + unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + /**************************************************************************** * * Interrupt handling functions @@ -1311,8 +1334,12 @@ static void domain_flush_np_cache(struct protection_domain *domain, dma_addr_t iova, size_t size) { if (unlikely(amd_iommu_np_cache)) { + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); domain_flush_complete(domain); + spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1425,7 +1452,7 @@ static void free_pagetable(struct protection_domain *domain) BUG_ON(domain->mode < PAGE_MODE_NONE || domain->mode > PAGE_MODE_6_LEVEL); - free_sub_pt(root, domain->mode, freelist); + freelist = free_sub_pt(root, domain->mode, freelist); free_page_list(freelist); } @@ -1435,10 +1462,11 @@ static void free_pagetable(struct protection_domain *domain) * another level increases the size of the address space by 9 bits to a size up * to 64 bits. */ -static void increase_address_space(struct protection_domain *domain, +static bool increase_address_space(struct protection_domain *domain, gfp_t gfp) { unsigned long flags; + bool ret = false; u64 *pte; spin_lock_irqsave(&domain->lock, flags); @@ -1455,19 +1483,21 @@ static void increase_address_space(struct protection_domain *domain, iommu_virt_to_phys(domain->pt_root)); domain->pt_root = pte; domain->mode += 1; - domain->updated = true; + + ret = true; out: spin_unlock_irqrestore(&domain->lock, flags); - return; + return ret; } static u64 *alloc_pte(struct protection_domain *domain, unsigned long address, unsigned long page_size, u64 **pte_page, - gfp_t gfp) + gfp_t gfp, + bool *updated) { int level, end_lvl; u64 *pte, *page; @@ -1475,7 +1505,7 @@ static u64 *alloc_pte(struct protection_domain *domain, BUG_ON(!is_power_of_2(page_size)); while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); + *updated = increase_address_space(domain, gfp) || *updated; level = domain->mode - 1; pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; @@ -1489,9 +1519,32 @@ static u64 *alloc_pte(struct protection_domain *domain, __pte = *pte; pte_level = PM_PTE_LEVEL(__pte); - if (!IOMMU_PTE_PRESENT(__pte) || + /* + * If we replace a series of large PTEs, we need + * to tear down all of them. + */ + if (IOMMU_PTE_PRESENT(__pte) && pte_level == PAGE_MODE_7_LEVEL) { + unsigned long count, i; + u64 *lpte; + + lpte = first_pte_l7(pte, NULL, &count); + + /* + * Unmap the replicated PTEs that still match the + * original large mapping + */ + for (i = 0; i < count; ++i) + cmpxchg64(&lpte[i], __pte, 0ULL); + + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte) || + pte_level == PAGE_MODE_NONE) { page = (u64 *)get_zeroed_page(gfp); + if (!page) return NULL; @@ -1500,8 +1553,8 @@ static u64 *alloc_pte(struct protection_domain *domain, /* pte could have been changed somewhere. */ if (cmpxchg64(pte, __pte, __npte) != __pte) free_page((unsigned long)page); - else if (pte_level == PAGE_MODE_7_LEVEL) - domain->updated = true; + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; continue; } @@ -1566,17 +1619,12 @@ static u64 *fetch_pte(struct protection_domain *domain, *page_size = PTE_LEVEL_PAGE_SIZE(level); } - if (PM_PTE_LEVEL(*pte) == 0x07) { - unsigned long pte_mask; - - /* - * If we have a series of large PTEs, make - * sure to return a pointer to the first one. - */ - *page_size = pte_mask = PTE_PAGE_SIZE(*pte); - pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); - pte = (u64 *)(((unsigned long)pte) & pte_mask); - } + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) + pte = first_pte_l7(pte, page_size, NULL); return pte; } @@ -1615,26 +1663,29 @@ static int iommu_map_page(struct protection_domain *dom, gfp_t gfp) { struct page *freelist = NULL; + bool updated = false; u64 __pte, *pte; - int i, count; + int ret, i, count; BUG_ON(!IS_ALIGNED(bus_addr, page_size)); BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) - return -EINVAL; + goto out; count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp); + pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + ret = -ENOMEM; if (!pte) - return -ENOMEM; + goto out; for (i = 0; i < count; ++i) freelist = free_clear_pte(&pte[i], pte[i], freelist); if (freelist != NULL) - dom->updated = true; + updated = true; if (count > 1) { __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); @@ -1650,12 +1701,21 @@ static int iommu_map_page(struct protection_domain *dom, for (i = 0; i < count; ++i) pte[i] = __pte; - update_domain(dom); + ret = 0; + +out: + if (updated) { + unsigned long flags; + + spin_lock_irqsave(&dom->lock, flags); + update_domain(dom); + spin_unlock_irqrestore(&dom->lock, flags); + } /* Everything flushed out, free pages now */ free_page_list(freelist); - return 0; + return ret; } static unsigned long iommu_unmap_page(struct protection_domain *dom, @@ -1806,8 +1866,12 @@ static void free_gcr3_table(struct protection_domain *domain) static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) { + unsigned long flags; + + spin_lock_irqsave(&dom->domain.lock, flags); domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); + spin_unlock_irqrestore(&dom->domain.lock, flags); } static void iova_domain_flush_tlb(struct iova_domain *iovad) @@ -2022,36 +2086,6 @@ static void do_detach(struct iommu_dev_data *dev_data) domain->dev_cnt -= 1; } -/* - * If a device is not yet associated with a domain, this function makes the - * device visible in the domain - */ -static int __attach_device(struct iommu_dev_data *dev_data, - struct protection_domain *domain) -{ - int ret; - - /* lock domain */ - spin_lock(&domain->lock); - - ret = -EBUSY; - if (dev_data->domain != NULL) - goto out_unlock; - - /* Attach alias group root */ - do_attach(dev_data, domain); - - ret = 0; - -out_unlock: - - /* ready */ - spin_unlock(&domain->lock); - - return ret; -} - - static void pdev_iommuv2_disable(struct pci_dev *pdev) { pci_disable_ats(pdev); @@ -2133,19 +2167,28 @@ static int attach_device(struct device *dev, unsigned long flags; int ret; + spin_lock_irqsave(&domain->lock, flags); + dev_data = get_dev_data(dev); + spin_lock(&dev_data->lock); + + ret = -EBUSY; + if (dev_data->domain != NULL) + goto out; + if (!dev_is_pci(dev)) goto skip_ats_check; pdev = to_pci_dev(dev); if (domain->flags & PD_IOMMUV2_MASK) { + ret = -EINVAL; if (!dev_data->passthrough) - return -EINVAL; + goto out; if (dev_data->iommu_v2) { if (pdev_iommuv2_enable(pdev) != 0) - return -EINVAL; + goto out; dev_data->ats.enabled = true; dev_data->ats.qdep = pci_ats_queue_depth(pdev); @@ -2158,9 +2201,9 @@ static int attach_device(struct device *dev, } skip_ats_check: - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); - ret = __attach_device(dev_data, domain); - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + ret = 0; + + do_attach(dev_data, domain); /* * We might boot into a crash-kernel here. The crashed kernel @@ -2169,23 +2212,14 @@ skip_ats_check: */ domain_flush_tlb_pde(domain); - return ret; -} - -/* - * Removes a device from a protection domain (unlocked) - */ -static void __detach_device(struct iommu_dev_data *dev_data) -{ - struct protection_domain *domain; - - domain = dev_data->domain; + domain_flush_complete(domain); - spin_lock(&domain->lock); +out: + spin_unlock(&dev_data->lock); - do_detach(dev_data); + spin_unlock_irqrestore(&domain->lock, flags); - spin_unlock(&domain->lock); + return ret; } /* @@ -2200,6 +2234,10 @@ static void detach_device(struct device *dev) dev_data = get_dev_data(dev); domain = dev_data->domain; + spin_lock_irqsave(&domain->lock, flags); + + spin_lock(&dev_data->lock); + /* * First check if the device is still attached. It might already * be detached from its domain because the generic @@ -2207,15 +2245,12 @@ static void detach_device(struct device *dev) * our alias handling. */ if (WARN_ON(!dev_data->domain)) - return; + goto out; - /* lock device table */ - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(dev_data); - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + do_detach(dev_data); if (!dev_is_pci(dev)) - return; + goto out; if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) pdev_iommuv2_disable(to_pci_dev(dev)); @@ -2223,6 +2258,11 @@ static void detach_device(struct device *dev) pci_disable_ats(to_pci_dev(dev)); dev_data->ats.enabled = false; + +out: + spin_unlock(&dev_data->lock); + + spin_unlock_irqrestore(&domain->lock, flags); } static int amd_iommu_add_device(struct device *dev) @@ -2354,15 +2394,10 @@ static void update_device_table(struct protection_domain *domain) static void update_domain(struct protection_domain *domain) { - if (!domain->updated) - return; - update_device_table(domain); domain_flush_devices(domain); domain_flush_tlb_pde(domain); - - domain->updated = false; } static int dir2prot(enum dma_data_direction direction) @@ -2392,6 +2427,7 @@ static dma_addr_t __map_single(struct device *dev, { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start, ret; + unsigned long flags; unsigned int pages; int prot = 0; int i; @@ -2429,8 +2465,10 @@ out_unmap: iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); } + spin_lock_irqsave(&dma_dom->domain.lock, flags); domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); + spin_unlock_irqrestore(&dma_dom->domain.lock, flags); dma_ops_free_iova(dma_dom, address, pages); @@ -2459,8 +2497,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, } if (amd_iommu_unmap_flush) { + unsigned long flags; + + spin_lock_irqsave(&dma_dom->domain.lock, flags); domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); + spin_unlock_irqrestore(&dma_dom->domain.lock, flags); dma_ops_free_iova(dma_dom, dma_addr, pages); } else { pages = __roundup_pow_of_two(pages); @@ -2866,16 +2908,16 @@ static void cleanup_domain(struct protection_domain *domain) struct iommu_dev_data *entry; unsigned long flags; - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); + spin_lock_irqsave(&domain->lock, flags); while (!list_empty(&domain->dev_list)) { entry = list_first_entry(&domain->dev_list, struct iommu_dev_data, list); BUG_ON(!entry->domain); - __detach_device(entry); + do_detach(entry); } - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); } static void protection_domain_free(struct protection_domain *domain) @@ -3226,9 +3268,12 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct protection_domain *dom = to_pdomain(domain); + unsigned long flags; + spin_lock_irqsave(&dom->lock, flags); domain_flush_tlb_pde(dom); domain_flush_complete(dom); + spin_unlock_irqrestore(&dom->lock, flags); } static void amd_iommu_iotlb_sync(struct iommu_domain *domain, @@ -3290,7 +3335,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) /* Update data structure */ domain->mode = PAGE_MODE_NONE; - domain->updated = true; /* Make changes visible to IOMMUs */ update_domain(domain); @@ -3336,7 +3380,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) domain->glx = levels; domain->flags |= PD_IOMMUV2_MASK; - domain->updated = true; update_domain(domain); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 9ac229e92b07..c9c1612d52e0 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -475,7 +475,6 @@ struct protection_domain { int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ - bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; @@ -634,6 +633,9 @@ struct devid_map { * This struct contains device specific data for the IOMMU */ struct iommu_dev_data { + /*Protect against attach/detach races */ + spinlock_t lock; + struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 0e019cc5da42..dfac6afa82ca 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -36,7 +36,6 @@ #include <linux/regulator/db8500-prcmu.h> #include <linux/regulator/machine.h> #include <linux/platform_data/ux500_wdt.h> -#include <linux/platform_data/db8500_thermal.h> #include "dbx500-prcmu-regs.h" /* Index of different voltages to be used when accessing AVSData */ @@ -3014,53 +3013,6 @@ static struct ux500_wdt_data db8500_wdt_pdata = { .timeout = 600, /* 10 minutes */ .has_28_bits_resolution = true, }; -/* - * Thermal Sensor - */ - -static struct resource db8500_thsens_resources[] = { - { - .name = "IRQ_HOTMON_LOW", - .start = IRQ_PRCMU_HOTMON_LOW, - .end = IRQ_PRCMU_HOTMON_LOW, - .flags = IORESOURCE_IRQ, - }, - { - .name = "IRQ_HOTMON_HIGH", - .start = IRQ_PRCMU_HOTMON_HIGH, - .end = IRQ_PRCMU_HOTMON_HIGH, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct db8500_thsens_platform_data db8500_thsens_data = { - .trip_points[0] = { - .temp = 70000, - .type = THERMAL_TRIP_ACTIVE, - .cdev_name = { - [0] = "thermal-cpufreq-0", - }, - }, - .trip_points[1] = { - .temp = 75000, - .type = THERMAL_TRIP_ACTIVE, - .cdev_name = { - [0] = "thermal-cpufreq-0", - }, - }, - .trip_points[2] = { - .temp = 80000, - .type = THERMAL_TRIP_ACTIVE, - .cdev_name = { - [0] = "thermal-cpufreq-0", - }, - }, - .trip_points[3] = { - .temp = 85000, - .type = THERMAL_TRIP_CRITICAL, - }, - .num_trips = 4, -}; static const struct mfd_cell common_prcmu_devs[] = { { @@ -3084,10 +3036,7 @@ static const struct mfd_cell db8500_prcmu_devs[] = { }, { .name = "db8500-thermal", - .num_resources = ARRAY_SIZE(db8500_thsens_resources), - .resources = db8500_thsens_resources, - .platform_data = &db8500_thsens_data, - .pdata_size = sizeof(db8500_thsens_data), + .of_compatible = "stericsson,db8500-thermal", }, }; diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 3a52f5703286..49ea02c467bf 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -94,6 +94,7 @@ config MMC_SDHCI_PCI depends on MMC_SDHCI && PCI select MMC_CQHCI select IOSF_MBI if X86 + select MMC_SDHCI_IO_ACCESSORS help This selects the PCI Secure Digital Host Controller Interface. Most controllers found today are PCI devices. diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 390ee162fe71..11c4598e91d9 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MMC_MXS) += mxs-mmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o sdhci-pci-y += sdhci-pci-core.o sdhci-pci-o2micro.o sdhci-pci-arasan.o \ - sdhci-pci-dwc-mshc.o + sdhci-pci-dwc-mshc.o sdhci-pci-gli.o obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += sdhci-pci-data.o obj-$(CONFIG_MMC_SDHCI_ACPI) += sdhci-acpi.o obj-$(CONFIG_MMC_SDHCI_PXAV3) += sdhci-pxav3.o diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 3271c2d76629..1d1953dfc54b 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -495,7 +495,12 @@ static int esdhc_of_enable_dma(struct sdhci_host *host) dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); value = sdhci_readl(host, ESDHC_DMA_SYSCTL); - value |= ESDHC_DMA_SNOOP; + + if (of_dma_is_coherent(dev->of_node)) + value |= ESDHC_DMA_SNOOP; + else + value &= ~ESDHC_DMA_SNOOP; + sdhci_writel(host, value, ESDHC_DMA_SYSCTL); return 0; } diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index e1ca185d7328..eaffa85bc728 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1685,6 +1685,8 @@ static const struct pci_device_id pci_ids[] = { SDHCI_PCI_DEVICE(O2, SEABIRD1, o2), SDHCI_PCI_DEVICE(ARASAN, PHY_EMMC, arasan), SDHCI_PCI_DEVICE(SYNOPSYS, DWC_MSHC, snps), + SDHCI_PCI_DEVICE(GLI, 9750, gl9750), + SDHCI_PCI_DEVICE(GLI, 9755, gl9755), SDHCI_PCI_DEVICE_CLASS(AMD, SYSTEM_SDHCI, PCI_CLASS_MASK, amd), /* Generic SD host controller */ {PCI_DEVICE_CLASS(SYSTEM_SDHCI, PCI_CLASS_MASK)}, diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c new file mode 100644 index 000000000000..5eea8d70a85d --- /dev/null +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 Genesys Logic, Inc. + * + * Authors: Ben Chuang <[email protected]> + * + * Version: v0.9.0 (2019-08-08) + */ + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/pci.h> +#include <linux/mmc/mmc.h> +#include <linux/delay.h> +#include "sdhci.h" +#include "sdhci-pci.h" + +/* Genesys Logic extra registers */ +#define SDHCI_GLI_9750_WT 0x800 +#define SDHCI_GLI_9750_WT_EN BIT(0) +#define GLI_9750_WT_EN_ON 0x1 +#define GLI_9750_WT_EN_OFF 0x0 + +#define SDHCI_GLI_9750_DRIVING 0x860 +#define SDHCI_GLI_9750_DRIVING_1 GENMASK(11, 0) +#define SDHCI_GLI_9750_DRIVING_2 GENMASK(27, 26) +#define GLI_9750_DRIVING_1_VALUE 0xFFF +#define GLI_9750_DRIVING_2_VALUE 0x3 + +#define SDHCI_GLI_9750_PLL 0x864 +#define SDHCI_GLI_9750_PLL_TX2_INV BIT(23) +#define SDHCI_GLI_9750_PLL_TX2_DLY GENMASK(22, 20) +#define GLI_9750_PLL_TX2_INV_VALUE 0x1 +#define GLI_9750_PLL_TX2_DLY_VALUE 0x0 + +#define SDHCI_GLI_9750_SW_CTRL 0x874 +#define SDHCI_GLI_9750_SW_CTRL_4 GENMASK(7, 6) +#define GLI_9750_SW_CTRL_4_VALUE 0x3 + +#define SDHCI_GLI_9750_MISC 0x878 +#define SDHCI_GLI_9750_MISC_TX1_INV BIT(2) +#define SDHCI_GLI_9750_MISC_RX_INV BIT(3) +#define SDHCI_GLI_9750_MISC_TX1_DLY GENMASK(6, 4) +#define GLI_9750_MISC_TX1_INV_VALUE 0x0 +#define GLI_9750_MISC_RX_INV_ON 0x1 +#define GLI_9750_MISC_RX_INV_OFF 0x0 +#define GLI_9750_MISC_RX_INV_VALUE GLI_9750_MISC_RX_INV_OFF +#define GLI_9750_MISC_TX1_DLY_VALUE 0x5 + +#define SDHCI_GLI_9750_TUNING_CONTROL 0x540 +#define SDHCI_GLI_9750_TUNING_CONTROL_EN BIT(4) +#define GLI_9750_TUNING_CONTROL_EN_ON 0x1 +#define GLI_9750_TUNING_CONTROL_EN_OFF 0x0 +#define SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1 BIT(16) +#define SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2 GENMASK(20, 19) +#define GLI_9750_TUNING_CONTROL_GLITCH_1_VALUE 0x1 +#define GLI_9750_TUNING_CONTROL_GLITCH_2_VALUE 0x2 + +#define SDHCI_GLI_9750_TUNING_PARAMETERS 0x544 +#define SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY GENMASK(2, 0) +#define GLI_9750_TUNING_PARAMETERS_RX_DLY_VALUE 0x1 + +#define GLI_MAX_TUNING_LOOP 40 + +/* Genesys Logic chipset */ +static inline void gl9750_wt_on(struct sdhci_host *host) +{ + u32 wt_value; + u32 wt_enable; + + wt_value = sdhci_readl(host, SDHCI_GLI_9750_WT); + wt_enable = FIELD_GET(SDHCI_GLI_9750_WT_EN, wt_value); + + if (wt_enable == GLI_9750_WT_EN_ON) + return; + + wt_value &= ~SDHCI_GLI_9750_WT_EN; + wt_value |= FIELD_PREP(SDHCI_GLI_9750_WT_EN, GLI_9750_WT_EN_ON); + + sdhci_writel(host, wt_value, SDHCI_GLI_9750_WT); +} + +static inline void gl9750_wt_off(struct sdhci_host *host) +{ + u32 wt_value; + u32 wt_enable; + + wt_value = sdhci_readl(host, SDHCI_GLI_9750_WT); + wt_enable = FIELD_GET(SDHCI_GLI_9750_WT_EN, wt_value); + + if (wt_enable == GLI_9750_WT_EN_OFF) + return; + + wt_value &= ~SDHCI_GLI_9750_WT_EN; + wt_value |= FIELD_PREP(SDHCI_GLI_9750_WT_EN, GLI_9750_WT_EN_OFF); + + sdhci_writel(host, wt_value, SDHCI_GLI_9750_WT); +} + +static void gli_set_9750(struct sdhci_host *host) +{ + u32 driving_value; + u32 pll_value; + u32 sw_ctrl_value; + u32 misc_value; + u32 parameter_value; + u32 control_value; + u16 ctrl2; + + gl9750_wt_on(host); + + driving_value = sdhci_readl(host, SDHCI_GLI_9750_DRIVING); + pll_value = sdhci_readl(host, SDHCI_GLI_9750_PLL); + sw_ctrl_value = sdhci_readl(host, SDHCI_GLI_9750_SW_CTRL); + misc_value = sdhci_readl(host, SDHCI_GLI_9750_MISC); + parameter_value = sdhci_readl(host, SDHCI_GLI_9750_TUNING_PARAMETERS); + control_value = sdhci_readl(host, SDHCI_GLI_9750_TUNING_CONTROL); + + driving_value &= ~(SDHCI_GLI_9750_DRIVING_1); + driving_value &= ~(SDHCI_GLI_9750_DRIVING_2); + driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_1, + GLI_9750_DRIVING_1_VALUE); + driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_2, + GLI_9750_DRIVING_2_VALUE); + sdhci_writel(host, driving_value, SDHCI_GLI_9750_DRIVING); + + sw_ctrl_value &= ~SDHCI_GLI_9750_SW_CTRL_4; + sw_ctrl_value |= FIELD_PREP(SDHCI_GLI_9750_SW_CTRL_4, + GLI_9750_SW_CTRL_4_VALUE); + sdhci_writel(host, sw_ctrl_value, SDHCI_GLI_9750_SW_CTRL); + + /* reset the tuning flow after reinit and before starting tuning */ + pll_value &= ~SDHCI_GLI_9750_PLL_TX2_INV; + pll_value &= ~SDHCI_GLI_9750_PLL_TX2_DLY; + pll_value |= FIELD_PREP(SDHCI_GLI_9750_PLL_TX2_INV, + GLI_9750_PLL_TX2_INV_VALUE); + pll_value |= FIELD_PREP(SDHCI_GLI_9750_PLL_TX2_DLY, + GLI_9750_PLL_TX2_DLY_VALUE); + + misc_value &= ~SDHCI_GLI_9750_MISC_TX1_INV; + misc_value &= ~SDHCI_GLI_9750_MISC_RX_INV; + misc_value &= ~SDHCI_GLI_9750_MISC_TX1_DLY; + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_TX1_INV, + GLI_9750_MISC_TX1_INV_VALUE); + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV, + GLI_9750_MISC_RX_INV_VALUE); + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_TX1_DLY, + GLI_9750_MISC_TX1_DLY_VALUE); + + parameter_value &= ~SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY; + parameter_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY, + GLI_9750_TUNING_PARAMETERS_RX_DLY_VALUE); + + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1; + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2; + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1, + GLI_9750_TUNING_CONTROL_GLITCH_1_VALUE); + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2, + GLI_9750_TUNING_CONTROL_GLITCH_2_VALUE); + + sdhci_writel(host, pll_value, SDHCI_GLI_9750_PLL); + sdhci_writel(host, misc_value, SDHCI_GLI_9750_MISC); + + /* disable tuned clk */ + ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + ctrl2 &= ~SDHCI_CTRL_TUNED_CLK; + sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2); + + /* enable tuning parameters control */ + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_EN; + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_EN, + GLI_9750_TUNING_CONTROL_EN_ON); + sdhci_writel(host, control_value, SDHCI_GLI_9750_TUNING_CONTROL); + + /* write tuning parameters */ + sdhci_writel(host, parameter_value, SDHCI_GLI_9750_TUNING_PARAMETERS); + + /* disable tuning parameters control */ + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_EN; + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_EN, + GLI_9750_TUNING_CONTROL_EN_OFF); + sdhci_writel(host, control_value, SDHCI_GLI_9750_TUNING_CONTROL); + + /* clear tuned clk */ + ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + ctrl2 &= ~SDHCI_CTRL_TUNED_CLK; + sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2); + + gl9750_wt_off(host); +} + +static void gli_set_9750_rx_inv(struct sdhci_host *host, bool b) +{ + u32 misc_value; + + gl9750_wt_on(host); + + misc_value = sdhci_readl(host, SDHCI_GLI_9750_MISC); + misc_value &= ~SDHCI_GLI_9750_MISC_RX_INV; + if (b) { + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV, + GLI_9750_MISC_RX_INV_ON); + } else { + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV, + GLI_9750_MISC_RX_INV_OFF); + } + sdhci_writel(host, misc_value, SDHCI_GLI_9750_MISC); + + gl9750_wt_off(host); +} + +static int __sdhci_execute_tuning_9750(struct sdhci_host *host, u32 opcode) +{ + int i; + int rx_inv; + + for (rx_inv = 0; rx_inv < 2; rx_inv++) { + gli_set_9750_rx_inv(host, !!rx_inv); + sdhci_start_tuning(host); + + for (i = 0; i < GLI_MAX_TUNING_LOOP; i++) { + u16 ctrl; + + sdhci_send_tuning(host, opcode); + + if (!host->tuning_done) { + sdhci_abort_tuning(host, opcode); + break; + } + + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + if (!(ctrl & SDHCI_CTRL_EXEC_TUNING)) { + if (ctrl & SDHCI_CTRL_TUNED_CLK) + return 0; /* Success! */ + break; + } + } + } + if (!host->tuning_done) { + pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n", + mmc_hostname(host->mmc)); + return -ETIMEDOUT; + } + + pr_info("%s: Tuning failed, falling back to fixed sampling clock\n", + mmc_hostname(host->mmc)); + sdhci_reset_tuning(host); + + return -EAGAIN; +} + +static int gl9750_execute_tuning(struct sdhci_host *host, u32 opcode) +{ + host->mmc->retune_period = 0; + if (host->tuning_mode == SDHCI_TUNING_MODE_1) + host->mmc->retune_period = host->tuning_count; + + gli_set_9750(host); + host->tuning_err = __sdhci_execute_tuning_9750(host, opcode); + sdhci_end_tuning(host); + + return 0; +} + +static int gli_probe_slot_gl9750(struct sdhci_pci_slot *slot) +{ + struct sdhci_host *host = slot->host; + + slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; + sdhci_enable_v4_mode(host); + + return 0; +} + +static int gli_probe_slot_gl9755(struct sdhci_pci_slot *slot) +{ + struct sdhci_host *host = slot->host; + + slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; + sdhci_enable_v4_mode(host); + + return 0; +} + +static void sdhci_gli_voltage_switch(struct sdhci_host *host) +{ + /* + * According to Section 3.6.1 signal voltage switch procedure in + * SD Host Controller Simplified Spec. 4.20, steps 6~8 are as + * follows: + * (6) Set 1.8V Signal Enable in the Host Control 2 register. + * (7) Wait 5ms. 1.8V voltage regulator shall be stable within this + * period. + * (8) If 1.8V Signal Enable is cleared by Host Controller, go to + * step (12). + * + * Wait 5ms after set 1.8V signal enable in Host Control 2 register + * to ensure 1.8V signal enable bit is set by GL9750/GL9755. + */ + usleep_range(5000, 5500); +} + +static void sdhci_gl9750_reset(struct sdhci_host *host, u8 mask) +{ + sdhci_reset(host, mask); + gli_set_9750(host); +} + +static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg) +{ + u32 value; + + value = readl(host->ioaddr + reg); + if (unlikely(reg == SDHCI_MAX_CURRENT && !(value & 0xff))) + value |= 0xc8; + + return value; +} + +static const struct sdhci_ops sdhci_gl9755_ops = { + .set_clock = sdhci_set_clock, + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .voltage_switch = sdhci_gli_voltage_switch, +}; + +const struct sdhci_pci_fixes sdhci_gl9755 = { + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks2 = SDHCI_QUIRK2_BROKEN_DDR50, + .probe_slot = gli_probe_slot_gl9755, + .ops = &sdhci_gl9755_ops, +}; + +static const struct sdhci_ops sdhci_gl9750_ops = { + .read_l = sdhci_gl9750_readl, + .set_clock = sdhci_set_clock, + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_gl9750_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .voltage_switch = sdhci_gli_voltage_switch, + .platform_execute_tuning = gl9750_execute_tuning, +}; + +const struct sdhci_pci_fixes sdhci_gl9750 = { + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks2 = SDHCI_QUIRK2_BROKEN_DDR50, + .probe_slot = gli_probe_slot_gl9750, + .ops = &sdhci_gl9750_ops, +}; diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 1abc9d47a4c0..558202fe64c6 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -68,6 +68,9 @@ #define PCI_DEVICE_ID_SYNOPSYS_DWC_MSHC 0xc202 +#define PCI_DEVICE_ID_GLI_9755 0x9755 +#define PCI_DEVICE_ID_GLI_9750 0x9750 + /* * PCI device class and mask */ @@ -188,5 +191,7 @@ int sdhci_pci_enable_dma(struct sdhci_host *host); extern const struct sdhci_pci_fixes sdhci_arasan; extern const struct sdhci_pci_fixes sdhci_snps; extern const struct sdhci_pci_fixes sdhci_o2; +extern const struct sdhci_pci_fixes sdhci_gl9750; +extern const struct sdhci_pci_fixes sdhci_gl9755; #endif /* __SDHCI_PCI_H */ diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 02d8f524bb9e..7bc950520fd9 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -4,6 +4,7 @@ */ #include <linux/delay.h> +#include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/module.h> #include <linux/init.h> @@ -104,6 +105,7 @@ struct sdhci_tegra_soc_data { const struct sdhci_pltfm_data *pdata; + u64 dma_mask; u32 nvquirks; u8 min_tap_delay; u8 max_tap_delay; @@ -1233,11 +1235,25 @@ static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = { .update_dcmd_desc = sdhci_tegra_update_dcmd_desc, }; +static int tegra_sdhci_set_dma_mask(struct sdhci_host *host) +{ + struct sdhci_pltfm_host *platform = sdhci_priv(host); + struct sdhci_tegra *tegra = sdhci_pltfm_priv(platform); + const struct sdhci_tegra_soc_data *soc = tegra->soc_data; + struct device *dev = mmc_dev(host->mmc); + + if (soc->dma_mask) + return dma_set_mask_and_coherent(dev, soc->dma_mask); + + return 0; +} + static const struct sdhci_ops tegra_sdhci_ops = { .get_ro = tegra_sdhci_get_ro, .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .platform_execute_tuning = tegra_sdhci_execute_tuning, @@ -1257,6 +1273,7 @@ static const struct sdhci_pltfm_data sdhci_tegra20_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra20 = { .pdata = &sdhci_tegra20_pdata, + .dma_mask = DMA_BIT_MASK(32), .nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 | NVQUIRK_ENABLE_BLOCK_GAP_DET, }; @@ -1283,6 +1300,7 @@ static const struct sdhci_pltfm_data sdhci_tegra30_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra30 = { .pdata = &sdhci_tegra30_pdata, + .dma_mask = DMA_BIT_MASK(32), .nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 | NVQUIRK_ENABLE_SDR50 | NVQUIRK_ENABLE_SDR104 | @@ -1295,6 +1313,7 @@ static const struct sdhci_ops tegra114_sdhci_ops = { .write_w = tegra_sdhci_writew, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .platform_execute_tuning = tegra_sdhci_execute_tuning, @@ -1316,6 +1335,7 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra114 = { .pdata = &sdhci_tegra114_pdata, + .dma_mask = DMA_BIT_MASK(32), }; static const struct sdhci_pltfm_data sdhci_tegra124_pdata = { @@ -1325,22 +1345,13 @@ static const struct sdhci_pltfm_data sdhci_tegra124_pdata = { SDHCI_QUIRK_NO_HISPD_BIT | SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | - /* - * The TRM states that the SD/MMC controller found on - * Tegra124 can address 34 bits (the maximum supported by - * the Tegra memory controller), but tests show that DMA - * to or from above 4 GiB doesn't work. This is possibly - * caused by missing programming, though it's not obvious - * what sequence is required. Mark 64-bit DMA broken for - * now to fix this for existing users (e.g. Nyan boards). - */ - SDHCI_QUIRK2_BROKEN_64_BIT_DMA, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .ops = &tegra114_sdhci_ops, }; static const struct sdhci_tegra_soc_data soc_data_tegra124 = { .pdata = &sdhci_tegra124_pdata, + .dma_mask = DMA_BIT_MASK(34), }; static const struct sdhci_ops tegra210_sdhci_ops = { @@ -1349,6 +1360,7 @@ static const struct sdhci_ops tegra210_sdhci_ops = { .write_w = tegra210_sdhci_writew, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .set_uhs_signaling = tegra_sdhci_set_uhs_signaling, @@ -1369,6 +1381,7 @@ static const struct sdhci_pltfm_data sdhci_tegra210_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra210 = { .pdata = &sdhci_tegra210_pdata, + .dma_mask = DMA_BIT_MASK(34), .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL | NVQUIRK_HAS_PADCALIB | NVQUIRK_DIS_CARD_CLK_CONFIG_TAP | @@ -1383,6 +1396,7 @@ static const struct sdhci_ops tegra186_sdhci_ops = { .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .set_uhs_signaling = tegra_sdhci_set_uhs_signaling, @@ -1398,20 +1412,13 @@ static const struct sdhci_pltfm_data sdhci_tegra186_pdata = { SDHCI_QUIRK_NO_HISPD_BIT | SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | - /* SDHCI controllers on Tegra186 support 40-bit addressing. - * IOVA addresses are 48-bit wide on Tegra186. - * With 64-bit dma mask used for SDHCI, accesses can - * be broken. Disable 64-bit dma, which would fall back - * to 32-bit dma mask. Ideally 40-bit dma mask would work, - * But it is not supported as of now. - */ - SDHCI_QUIRK2_BROKEN_64_BIT_DMA, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .ops = &tegra186_sdhci_ops, }; static const struct sdhci_tegra_soc_data soc_data_tegra186 = { .pdata = &sdhci_tegra186_pdata, + .dma_mask = DMA_BIT_MASK(40), .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL | NVQUIRK_HAS_PADCALIB | NVQUIRK_DIS_CARD_CLK_CONFIG_TAP | @@ -1424,6 +1431,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra186 = { static const struct sdhci_tegra_soc_data soc_data_tegra194 = { .pdata = &sdhci_tegra186_pdata, + .dma_mask = DMA_BIT_MASK(39), .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL | NVQUIRK_HAS_PADCALIB | NVQUIRK_DIS_CARD_CLK_CONFIG_TAP | diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 4b297f397326..b056400e34b1 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2874,6 +2874,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p) static void sdhci_adma_show_error(struct sdhci_host *host) { void *desc = host->adma_table; + dma_addr_t dma = host->adma_addr; sdhci_dumpregs(host); @@ -2881,18 +2882,21 @@ static void sdhci_adma_show_error(struct sdhci_host *host) struct sdhci_adma2_64_desc *dma_desc = desc; if (host->flags & SDHCI_USE_64_BIT_DMA) - DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n", - desc, le32_to_cpu(dma_desc->addr_hi), + SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n", + (unsigned long long)dma, + le32_to_cpu(dma_desc->addr_hi), le32_to_cpu(dma_desc->addr_lo), le16_to_cpu(dma_desc->len), le16_to_cpu(dma_desc->cmd)); else - DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n", - desc, le32_to_cpu(dma_desc->addr_lo), + SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n", + (unsigned long long)dma, + le32_to_cpu(dma_desc->addr_lo), le16_to_cpu(dma_desc->len), le16_to_cpu(dma_desc->cmd)); desc += host->desc_sz; + dma += host->desc_sz; if (dma_desc->cmd & cpu_to_le16(ADMA2_END)) break; @@ -2968,7 +2972,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) != MMC_BUS_TEST_R) host->data->error = -EILSEQ; else if (intmask & SDHCI_INT_ADMA_ERROR) { - pr_err("%s: ADMA error\n", mmc_hostname(host->mmc)); + pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc), + intmask); sdhci_adma_show_error(host); host->data->error = -EIO; if (host->ops->adma_workaround) @@ -3776,18 +3781,14 @@ int sdhci_setup_host(struct sdhci_host *host) host->flags &= ~SDHCI_USE_ADMA; } - /* - * It is assumed that a 64-bit capable device has set a 64-bit DMA mask - * and *must* do 64-bit DMA. A driver has the opportunity to change - * that during the first call to ->enable_dma(). Similarly - * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to - * implement. - */ if (sdhci_can_64bit_dma(host)) host->flags |= SDHCI_USE_64_BIT_DMA; if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { - ret = sdhci_set_dma_mask(host); + if (host->ops->set_dma_mask) + ret = host->ops->set_dma_mask(host); + else + ret = sdhci_set_dma_mask(host); if (!ret && host->ops->enable_dma) ret = host->ops->enable_dma(host); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index a29c4cd2d92e..0ed3e0eaef5f 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -622,6 +622,7 @@ struct sdhci_ops { u32 (*irq)(struct sdhci_host *host, u32 intmask); + int (*set_dma_mask)(struct sdhci_host *host); int (*enable_dma)(struct sdhci_host *host); unsigned int (*get_max_clock)(struct sdhci_host *host); unsigned int (*get_min_clock)(struct sdhci_host *host); diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 2b9a2f117113..96d7cef3289f 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -3,7 +3,13 @@ # CAIF physical drivers # -comment "CAIF transport drivers" +menuconfig CAIF_DRIVERS + bool "CAIF transport drivers" + depends on CAIF + help + Enable this to see CAIF physical drivers. + +if CAIF_DRIVERS config CAIF_TTY tristate "CAIF TTY transport driver" @@ -22,7 +28,7 @@ config CAIF_SPI_SLAVE The CAIF Link layer SPI Protocol driver for Slave SPI interface. This driver implements a platform driver to accommodate for a platform specific SPI device. A sample CAIF SPI Platform device is - provided in Documentation/networking/caif/spi_porting.txt + provided in <file:Documentation/networking/caif/spi_porting.txt>. config CAIF_SPI_SYNC bool "Next command and length in start of frame" @@ -38,7 +44,7 @@ config CAIF_HSI depends on CAIF default n ---help--- - The caif low level driver for CAIF over HSI. + The CAIF low level driver for CAIF over HSI. Be aware that if you enable this then you also need to enable a low-level HSI driver. @@ -50,8 +56,10 @@ config CAIF_VIRTIO select GENERIC_ALLOCATOR default n ---help--- - The caif driver for CAIF over Virtio. + The CAIF driver for CAIF over Virtio. if CAIF_VIRTIO source "drivers/vhost/Kconfig.vringh" endif + +endif # CAIF_DRIVERS diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index 0b1e01f0873d..b0a1595d780d 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -85,7 +85,6 @@ MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); static struct i2c_driver ksz9477_i2c_driver = { .driver = { .name = "ksz9477-switch", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz9477_dt_ids), }, .probe = ksz9477_i2c_probe, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 684aa51684db..b00274caae4f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -705,7 +705,7 @@ qca8k_setup(struct dsa_switch *ds) BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); /* Setup connection between CPU port & user ports */ - for (i = 0; i < DSA_MAX_PORTS; i++) { + for (i = 0; i < QCA8K_NUM_PORTS; i++) { /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), @@ -1077,7 +1077,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (id != QCA8K_ID_QCA8337) return -ENODEV; - priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS); + priv->ds = dsa_switch_alloc(&mdiodev->dev, QCA8K_NUM_PORTS); if (!priv->ds) return -ENOMEM; diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index ca3d17e43ed8..ac88caca5ad4 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -339,10 +339,12 @@ int rtl8366_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct realtek_smi *smi = ds->priv; + u16 vid; int ret; - if (!smi->ops->is_vlan_valid(smi, port)) - return -EINVAL; + for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++) + if (!smi->ops->is_vlan_valid(smi, vid)) + return -EINVAL; dev_info(smi->dev, "prepare VLANs %04x..%04x\n", vlan->vid_begin, vlan->vid_end); @@ -370,8 +372,9 @@ void rtl8366_vlan_add(struct dsa_switch *ds, int port, u16 vid; int ret; - if (!smi->ops->is_vlan_valid(smi, port)) - return; + for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++) + if (!smi->ops->is_vlan_valid(smi, vid)) + return; dev_info(smi->dev, "add VLAN on port %d, %s, %s\n", port, diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index a268085ffad2..f5cc8b0a7c74 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -507,7 +507,8 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) irq = of_irq_get(intc, 0); if (irq <= 0) { dev_err(smi->dev, "failed to get parent IRQ\n"); - return irq ? irq : -EINVAL; + ret = irq ? irq : -EINVAL; + goto out_put_node; } /* This clears the IRQ status register */ @@ -515,7 +516,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) &val); if (ret) { dev_err(smi->dev, "can't read interrupt status\n"); - return ret; + goto out_put_node; } /* Fetch IRQ edge information from the descriptor */ @@ -537,7 +538,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) val); if (ret) { dev_err(smi->dev, "could not configure IRQ polarity\n"); - return ret; + goto out_put_node; } ret = devm_request_threaded_irq(smi->dev, irq, NULL, @@ -545,7 +546,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) "RTL8366RB", smi); if (ret) { dev_err(smi->dev, "unable to request irq: %d\n", ret); - return ret; + goto out_put_node; } smi->irqdomain = irq_domain_add_linear(intc, RTL8366RB_NUM_INTERRUPT, @@ -553,12 +554,15 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) smi); if (!smi->irqdomain) { dev_err(smi->dev, "failed to create IRQ domain\n"); - return -EINVAL; + ret = -EINVAL; + goto out_put_node; } for (i = 0; i < smi->num_ports; i++) irq_set_parent(irq_create_mapping(smi->irqdomain, i), irq); - return 0; +out_put_node: + of_node_put(intc); + return ret; } static int rtl8366rb_set_addr(struct realtek_smi *smi) diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index e53e494c22e0..8681ff9d1a76 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -127,15 +127,13 @@ typedef enum { int sja1105_static_config_reload(struct sja1105_private *priv); /* From sja1105_spi.c */ -int sja1105_spi_send_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - void *packed_buf, size_t size_bytes); -int sja1105_spi_send_int(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - u64 *value, u64 size_bytes); -int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 base_addr, - void *packed_buf, u64 buf_len); +int sja1105_xfer_buf(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, + void *packed_buf, size_t size_bytes); +int sja1105_xfer_u32(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u32 *value); +int sja1105_xfer_u64(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u64 *value); int sja1105_static_config_upload(struct sja1105_private *priv); int sja1105_inhibit_tx(const struct sja1105_private *priv, unsigned long port_bitmap, bool tx_inhibited); diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 608126a15d72..9082e52b55e9 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -118,9 +118,8 @@ static int sja1105_cgu_idiv_config(struct sja1105_private *priv, int port, idiv.pd = enabled ? 0 : 1; /* Power down? */ sja1105_cgu_idiv_packing(packed_buf, &idiv, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->cgu_idiv[port], packed_buf, - SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->cgu_idiv[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static void @@ -167,9 +166,8 @@ static int sja1105_cgu_mii_tx_clk_config(struct sja1105_private *priv, mii_tx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_tx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_tx_clk[port], packed_buf, - SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -192,9 +190,8 @@ sja1105_cgu_mii_rx_clk_config(struct sja1105_private *priv, int port) mii_rx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_rx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_rx_clk[port], packed_buf, - SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_rx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -217,9 +214,8 @@ sja1105_cgu_mii_ext_tx_clk_config(struct sja1105_private *priv, int port) mii_ext_tx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_ext_tx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_ext_tx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_ext_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -242,9 +238,8 @@ sja1105_cgu_mii_ext_rx_clk_config(struct sja1105_private *priv, int port) mii_ext_rx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_ext_rx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_ext_rx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_ext_rx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int sja1105_mii_clocking_setup(struct sja1105_private *priv, int port, @@ -337,9 +332,8 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv, txc.pd = 0; sja1105_cgu_mii_control_packing(packed_buf, &txc, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->rgmii_tx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgmii_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } /* AGU */ @@ -383,9 +377,8 @@ static int sja1105_rgmii_cfg_pad_tx_config(struct sja1105_private *priv, pad_mii_tx.clk_ipud = 2; /* TX_CLK input stage (default) */ sja1105_cfg_pad_mii_tx_packing(packed_buf, &pad_mii_tx, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->pad_mii_tx[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_tx[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static void @@ -405,7 +398,7 @@ sja1105_cfg_pad_mii_id_packing(void *buf, struct sja1105_cfg_pad_mii_id *cmd, } /* Valid range in degrees is an integer between 73.8 and 101.7 */ -static inline u64 sja1105_rgmii_delay(u64 phase) +static u64 sja1105_rgmii_delay(u64 phase) { /* UM11040.pdf: The delay in degree phase is 73.8 + delay_tune * 0.9. * To avoid floating point operations we'll multiply by 10 @@ -442,9 +435,8 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port) pad_mii_id.txc_pd = 1; sja1105_cfg_pad_mii_id_packing(packed_buf, &pad_mii_id, PACK); - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->pad_mii_id[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_id[port], + packed_buf, SJA1105_SIZE_CGU_CMD); if (rc < 0) return rc; @@ -459,9 +451,8 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port) } sja1105_cfg_pad_mii_id_packing(packed_buf, &pad_mii_id, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->pad_mii_id[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_id[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int sja1105_rgmii_clocking_setup(struct sja1105_private *priv, int port, @@ -547,9 +538,8 @@ static int sja1105_cgu_rmii_ref_clk_config(struct sja1105_private *priv, ref_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &ref_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->rmii_ref_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_ref_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -565,9 +555,8 @@ sja1105_cgu_rmii_ext_tx_clk_config(struct sja1105_private *priv, int port) ext_tx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &ext_tx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->rmii_ext_tx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_ext_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv) @@ -595,8 +584,8 @@ static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv) pll.pd = 0x1; sja1105_cgu_pll_control_packing(packed_buf, &pll, PACK); - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rmii_pll1, - packed_buf, SJA1105_SIZE_CGU_CMD); + rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_pll1, packed_buf, + SJA1105_SIZE_CGU_CMD); if (rc < 0) { dev_err(dev, "failed to configure PLL1 for 50MHz\n"); return rc; @@ -606,8 +595,8 @@ static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv) pll.pd = 0x0; sja1105_cgu_pll_control_packing(packed_buf, &pll, PACK); - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rmii_pll1, - packed_buf, SJA1105_SIZE_CGU_CMD); + rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_pll1, packed_buf, + SJA1105_SIZE_CGU_CMD); if (rc < 0) { dev_err(dev, "failed to enable PLL1\n"); return rc; diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index 91da430045ff..25381bd65ed7 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -686,8 +686,8 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv, ops->entry_packing(packed_buf, entry, PACK); /* Send SPI write operation: read config table entry */ - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, ops->addr, - packed_buf, ops->packed_size); + rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf, + ops->packed_size); if (rc < 0) return rc; @@ -698,8 +698,8 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv, memset(packed_buf, 0, ops->packed_size); /* Retrieve the read operation's result */ - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, ops->addr, - packed_buf, ops->packed_size); + rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf, + ops->packed_size); if (rc < 0) return rc; @@ -771,8 +771,8 @@ int sja1105_dynamic_config_write(struct sja1105_private *priv, ops->entry_packing(packed_buf, entry, PACK); /* Send SPI write operation: read config table entry */ - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, ops->addr, - packed_buf, ops->packed_size); + rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf, + ops->packed_size); if (rc < 0) return rc; diff --git a/drivers/net/dsa/sja1105/sja1105_ethtool.c b/drivers/net/dsa/sja1105/sja1105_ethtool.c index ab581a28cd41..064301cc7d5b 100644 --- a/drivers/net/dsa/sja1105/sja1105_ethtool.c +++ b/drivers/net/dsa/sja1105/sja1105_ethtool.c @@ -167,8 +167,8 @@ static int sja1105_port_status_get_mac(struct sja1105_private *priv, int rc; /* MAC area */ - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac[port], - packed_buf, SJA1105_SIZE_MAC_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac[port], packed_buf, + SJA1105_SIZE_MAC_AREA); if (rc < 0) return rc; @@ -185,8 +185,8 @@ static int sja1105_port_status_get_hl1(struct sja1105_private *priv, u8 packed_buf[SJA1105_SIZE_HL1_AREA] = {0}; int rc; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac_hl1[port], - packed_buf, SJA1105_SIZE_HL1_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl1[port], packed_buf, + SJA1105_SIZE_HL1_AREA); if (rc < 0) return rc; @@ -203,8 +203,8 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv, u8 packed_buf[SJA1105_SIZE_QLEVEL_AREA] = {0}; int rc; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac_hl2[port], - packed_buf, SJA1105_SIZE_HL2_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl2[port], packed_buf, + SJA1105_SIZE_HL2_AREA); if (rc < 0) return rc; @@ -215,8 +215,8 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv, priv->info->device_id == SJA1105T_DEVICE_ID) return 0; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->qlevel[port], - packed_buf, SJA1105_SIZE_QLEVEL_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->qlevel[port], packed_buf, + SJA1105_SIZE_QLEVEL_AREA); if (rc < 0) return rc; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index b9def744bcb3..6ce46d7e971a 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -382,8 +382,8 @@ static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv) static int sja1105_init_general_params(struct sja1105_private *priv) { struct sja1105_general_params_entry default_general_params = { - /* Disallow dynamic changing of the mirror port */ - .mirr_ptacu = 0, + /* Allow dynamic changing of the mirror port */ + .mirr_ptacu = true, .switchid = priv->ds->index, /* Priority queue for link-local management frames * (both ingress to and egress from CPU - PTP, STP etc) @@ -403,8 +403,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv) * by installing a temporary 'management route' */ .host_port = dsa_upstream_port(priv->ds, 0), - /* Same as host port */ - .mirr_port = dsa_upstream_port(priv->ds, 0), + /* Default to an invalid value */ + .mirr_port = SJA1105_NUM_PORTS, /* Link-local traffic received on casc_port will be forwarded * to host_port without embedding the source port and device ID * info in the destination MAC address (presumably because it @@ -458,9 +458,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv) #define SJA1105_RATE_MBPS(speed) (((speed) * 64000) / 1000) -static inline void -sja1105_setup_policer(struct sja1105_l2_policing_entry *policing, - int index) +static void sja1105_setup_policer(struct sja1105_l2_policing_entry *policing, + int index) { policing[index].sharindx = index; policing[index].smax = 65535; /* Burst size in bytes */ @@ -951,7 +950,7 @@ sja1105_static_fdb_change(struct sja1105_private *priv, int port, * For the placement of a newly learnt FDB entry, the switch selects the bin * based on a hash function, and the way within that bin incrementally. */ -static inline int sja1105et_fdb_index(int bin, int way) +static int sja1105et_fdb_index(int bin, int way) { return bin * SJA1105ET_FDB_BIN_SIZE + way; } @@ -1897,7 +1896,9 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds, return sja1105_static_config_reload(priv); } -/* Caller must hold priv->tagger_data.meta_lock */ +/* Must be called only with priv->tagger_data.state bit + * SJA1105_HWTS_RX_EN cleared + */ static int sja1105_change_rxtstamping(struct sja1105_private *priv, bool on) { @@ -1954,16 +1955,17 @@ static int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, break; } - if (rx_on != priv->tagger_data.hwts_rx_en) { - spin_lock(&priv->tagger_data.meta_lock); + if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) { + clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); + rc = sja1105_change_rxtstamping(priv, rx_on); - spin_unlock(&priv->tagger_data.meta_lock); if (rc < 0) { dev_err(ds->dev, "Failed to change RX timestamping: %d\n", rc); - return -EFAULT; + return rc; } - priv->tagger_data.hwts_rx_en = rx_on; + if (rx_on) + set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); } if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) @@ -1982,7 +1984,7 @@ static int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, config.tx_type = HWTSTAMP_TX_ON; else config.tx_type = HWTSTAMP_TX_OFF; - if (priv->tagger_data.hwts_rx_en) + if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; else config.rx_filter = HWTSTAMP_FILTER_NONE; @@ -2005,12 +2007,12 @@ static void sja1105_rxtstamp_work(struct work_struct *work) mutex_lock(&priv->ptp_lock); - now = priv->tstamp_cc.read(&priv->tstamp_cc); - while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) { struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb); u64 ts; + now = priv->tstamp_cc.read(&priv->tstamp_cc); + *shwt = (struct skb_shared_hwtstamps) {0}; ts = SJA1105_SKB_CB(skb)->meta_tstamp; @@ -2031,7 +2033,7 @@ static bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, struct sja1105_private *priv = ds->priv; struct sja1105_tagger_data *data = &priv->tagger_data; - if (!data->hwts_rx_en) + if (!test_bit(SJA1105_HWTS_RX_EN, &data->state)) return false; /* We need to read the full PTP clock to reconstruct the Rx @@ -2070,6 +2072,84 @@ static int sja1105_port_setup_tc(struct dsa_switch *ds, int port, } } +/* We have a single mirror (@to) port, but can configure ingress and egress + * mirroring on all other (@from) ports. + * We need to allow mirroring rules only as long as the @to port is always the + * same, and we need to unset the @to port from mirr_port only when there is no + * mirroring rule that references it. + */ +static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to, + bool ingress, bool enabled) +{ + struct sja1105_general_params_entry *general_params; + struct sja1105_mac_config_entry *mac; + struct sja1105_table *table; + bool already_enabled; + u64 new_mirr_port; + int rc; + + table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; + general_params = table->entries; + + mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; + + already_enabled = (general_params->mirr_port != SJA1105_NUM_PORTS); + if (already_enabled && enabled && general_params->mirr_port != to) { + dev_err(priv->ds->dev, + "Delete mirroring rules towards port %llu first\n", + general_params->mirr_port); + return -EBUSY; + } + + new_mirr_port = to; + if (!enabled) { + bool keep = false; + int port; + + /* Anybody still referencing mirr_port? */ + for (port = 0; port < SJA1105_NUM_PORTS; port++) { + if (mac[port].ing_mirr || mac[port].egr_mirr) { + keep = true; + break; + } + } + /* Unset already_enabled for next time */ + if (!keep) + new_mirr_port = SJA1105_NUM_PORTS; + } + if (new_mirr_port != general_params->mirr_port) { + general_params->mirr_port = new_mirr_port; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_GENERAL_PARAMS, + 0, general_params, true); + if (rc < 0) + return rc; + } + + if (ingress) + mac[from].ing_mirr = enabled; + else + mac[from].egr_mirr = enabled; + + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, from, + &mac[from], true); +} + +static int sja1105_mirror_add(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress) +{ + return sja1105_mirror_apply(ds->priv, port, mirror->to_local_port, + ingress, true); +} + +static void sja1105_mirror_del(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror) +{ + sja1105_mirror_apply(ds->priv, port, mirror->to_local_port, + mirror->ingress, false); +} + static const struct dsa_switch_ops sja1105_switch_ops = { .get_tag_protocol = sja1105_get_tag_protocol, .setup = sja1105_setup, @@ -2103,6 +2183,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_rxtstamp = sja1105_port_rxtstamp, .port_txtstamp = sja1105_port_txtstamp, .port_setup_tc = sja1105_port_setup_tc, + .port_mirror_add = sja1105_mirror_add, + .port_mirror_del = sja1105_mirror_del, }; static int sja1105_check_device_id(struct sja1105_private *priv) @@ -2110,23 +2192,22 @@ static int sja1105_check_device_id(struct sja1105_private *priv) const struct sja1105_regs *regs = priv->info->regs; u8 prod_id[SJA1105_SIZE_DEVICE_ID] = {0}; struct device *dev = &priv->spidev->dev; - u64 device_id; + u32 device_id; u64 part_no; int rc; - rc = sja1105_spi_send_int(priv, SPI_READ, regs->device_id, - &device_id, SJA1105_SIZE_DEVICE_ID); + rc = sja1105_xfer_u32(priv, SPI_READ, regs->device_id, &device_id); if (rc < 0) return rc; if (device_id != priv->info->device_id) { - dev_err(dev, "Expected device ID 0x%llx but read 0x%llx\n", + dev_err(dev, "Expected device ID 0x%llx but read 0x%x\n", priv->info->device_id, device_id); return -ENODEV; } - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->prod_id, - prod_id, SJA1105_SIZE_DEVICE_ID); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->prod_id, prod_id, + SJA1105_SIZE_DEVICE_ID); if (rc < 0) return rc; @@ -2201,6 +2282,7 @@ static int sja1105_probe(struct spi_device *spi) tagger_data = &priv->tagger_data; skb_queue_head_init(&tagger_data->skb_rxtstamp_queue); INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work); + spin_lock_init(&tagger_data->meta_lock); /* Connections between dsa_port and sja1105_port */ for (i = 0; i < SJA1105_NUM_PORTS; i++) { diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index d8e8dd59f3d1..0df1bbec475a 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -91,8 +91,8 @@ int sja1105et_ptp_cmd(const void *ctx, const void *data) sja1105_pack(buf, &valid, 31, 31, size); sja1105_pack(buf, &cmd->resptp, 2, 2, size); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control, - buf, SJA1105_SIZE_PTP_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf, + SJA1105_SIZE_PTP_CMD); } int sja1105pqrs_ptp_cmd(const void *ctx, const void *data) @@ -108,8 +108,8 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data) sja1105_pack(buf, &valid, 31, 31, size); sja1105_pack(buf, &cmd->resptp, 3, 3, size); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control, - buf, SJA1105_SIZE_PTP_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf, + SJA1105_SIZE_PTP_CMD); } /* The switch returns partial timestamps (24 bits for SJA1105 E/T, which wrap @@ -180,10 +180,8 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts) int rc; do { - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, - regs->ptpegr_ts[port], - packed_buf, - priv->info->ptpegr_ts_bytes); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->ptpegr_ts[port], + packed_buf, priv->info->ptpegr_ts_bytes); if (rc < 0) return rc; @@ -327,8 +325,7 @@ static u64 sja1105_ptptsclk_read(const struct cyclecounter *cc) u64 ptptsclk = 0; int rc; - rc = sja1105_spi_send_int(priv, SPI_READ, regs->ptptsclk, - &ptptsclk, 8); + rc = sja1105_xfer_u64(priv, SPI_READ, regs->ptptsclk, &ptptsclk); if (rc < 0) dev_err_ratelimited(priv->ds->dev, "failed to read ptp cycle counter: %d\n", diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 84dc603138cf..b224b1a55695 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -7,7 +7,6 @@ #include <linux/packing.h> #include "sja1105.h" -#define SJA1105_SIZE_PORT_CTRL 4 #define SJA1105_SIZE_RESET_CMD 4 #define SJA1105_SIZE_SPI_MSG_HEADER 4 #define SJA1105_SIZE_SPI_MSG_MAXLEN (64 * 4) @@ -64,11 +63,11 @@ sja1105_spi_message_pack(void *buf, const struct sja1105_spi_message *msg) * * This function should only be called if it is priorly known that * @size_bytes is smaller than SIZE_SPI_MSG_MAXLEN. Larger packed buffers - * are chunked in smaller pieces by sja1105_spi_send_long_packed_buf below. + * are chunked in smaller pieces by sja1105_xfer_long_buf below. */ -int sja1105_spi_send_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - void *packed_buf, size_t size_bytes) +int sja1105_xfer_buf(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, + void *packed_buf, size_t size_bytes) { u8 tx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0}; u8 rx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0}; @@ -103,35 +102,52 @@ int sja1105_spi_send_packed_buf(const struct sja1105_private *priv, /* If @rw is: * - SPI_WRITE: creates and sends an SPI write message at absolute - * address reg_addr, taking size_bytes from *packed_buf + * address reg_addr * - SPI_READ: creates and sends an SPI read message from absolute - * address reg_addr, writing size_bytes into *packed_buf + * address reg_addr * * The u64 *value is unpacked, meaning that it's stored in the native * CPU endianness and directly usable by software running on the core. - * - * This is a wrapper around sja1105_spi_send_packed_buf(). */ -int sja1105_spi_send_int(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - u64 *value, u64 size_bytes) +int sja1105_xfer_u64(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u64 *value) { - u8 packed_buf[SJA1105_SIZE_SPI_MSG_MAXLEN]; + u8 packed_buf[8]; int rc; - if (size_bytes > SJA1105_SIZE_SPI_MSG_MAXLEN) - return -ERANGE; - if (rw == SPI_WRITE) - sja1105_pack(packed_buf, value, 8 * size_bytes - 1, 0, - size_bytes); + sja1105_pack(packed_buf, value, 63, 0, 8); - rc = sja1105_spi_send_packed_buf(priv, rw, reg_addr, packed_buf, - size_bytes); + rc = sja1105_xfer_buf(priv, rw, reg_addr, packed_buf, 8); if (rw == SPI_READ) - sja1105_unpack(packed_buf, value, 8 * size_bytes - 1, 0, - size_bytes); + sja1105_unpack(packed_buf, value, 63, 0, 8); + + return rc; +} + +/* Same as above, but transfers only a 4 byte word */ +int sja1105_xfer_u32(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u32 *value) +{ + u8 packed_buf[4]; + u64 tmp; + int rc; + + if (rw == SPI_WRITE) { + /* The packing API only supports u64 as CPU word size, + * so we need to convert. + */ + tmp = *value; + sja1105_pack(packed_buf, &tmp, 31, 0, 4); + } + + rc = sja1105_xfer_buf(priv, rw, reg_addr, packed_buf, 4); + + if (rw == SPI_READ) { + sja1105_unpack(packed_buf, &tmp, 31, 0, 4); + *value = tmp; + } return rc; } @@ -140,9 +156,9 @@ int sja1105_spi_send_int(const struct sja1105_private *priv, * must be sent/received. Splitting the buffer into chunks and assembling * those into SPI messages is done automatically by this function. */ -int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 base_addr, - void *packed_buf, u64 buf_len) +static int sja1105_xfer_long_buf(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 base_addr, + void *packed_buf, u64 buf_len) { struct chunk { void *buf_ptr; @@ -158,8 +174,8 @@ int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv, chunk.len = min_t(int, buf_len, SJA1105_SIZE_SPI_MSG_MAXLEN); while (chunk.len) { - rc = sja1105_spi_send_packed_buf(priv, rw, chunk.spi_address, - chunk.buf_ptr, chunk.len); + rc = sja1105_xfer_buf(priv, rw, chunk.spi_address, + chunk.buf_ptr, chunk.len); if (rc < 0) return rc; @@ -241,8 +257,8 @@ static int sja1105et_reset_cmd(const void *ctx, const void *data) sja1105et_reset_cmd_pack(packed_buf, reset); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rgu, - packed_buf, SJA1105_SIZE_RESET_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf, + SJA1105_SIZE_RESET_CMD); } static int sja1105pqrs_reset_cmd(const void *ctx, const void *data) @@ -271,8 +287,8 @@ static int sja1105pqrs_reset_cmd(const void *ctx, const void *data) sja1105pqrs_reset_cmd_pack(packed_buf, reset); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rgu, - packed_buf, SJA1105_SIZE_RESET_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf, + SJA1105_SIZE_RESET_CMD); } static int sja1105_cold_reset(const struct sja1105_private *priv) @@ -287,11 +303,11 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv, unsigned long port_bitmap, bool tx_inhibited) { const struct sja1105_regs *regs = priv->info->regs; - u64 inhibit_cmd; + u32 inhibit_cmd; int rc; - rc = sja1105_spi_send_int(priv, SPI_READ, regs->port_control, - &inhibit_cmd, SJA1105_SIZE_PORT_CTRL); + rc = sja1105_xfer_u32(priv, SPI_READ, regs->port_control, + &inhibit_cmd); if (rc < 0) return rc; @@ -300,8 +316,8 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv, else inhibit_cmd &= ~port_bitmap; - return sja1105_spi_send_int(priv, SPI_WRITE, regs->port_control, - &inhibit_cmd, SJA1105_SIZE_PORT_CTRL); + return sja1105_xfer_u32(priv, SPI_WRITE, regs->port_control, + &inhibit_cmd); } struct sja1105_status { @@ -339,9 +355,7 @@ static int sja1105_status_get(struct sja1105_private *priv, u8 packed_buf[4]; int rc; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, - regs->status, - packed_buf, 4); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->status, packed_buf, 4); if (rc < 0) return rc; @@ -409,7 +423,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) rc = static_config_buf_prepare_for_upload(priv, config_buf, buf_len); if (rc < 0) { dev_err(dev, "Invalid config, cannot upload\n"); - return -EINVAL; + rc = -EINVAL; + goto out; } /* Prevent PHY jabbering during switch reset by inhibiting * Tx on all ports and waiting for current packet to drain. @@ -418,7 +433,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) rc = sja1105_inhibit_tx(priv, port_bitmap, true); if (rc < 0) { dev_err(dev, "Failed to inhibit Tx on ports\n"); - return -ENXIO; + rc = -ENXIO; + goto out; } /* Wait for an eventual egress packet to finish transmission * (reach IFG). It is guaranteed that a second one will not @@ -435,9 +451,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) /* Wait for the switch to come out of reset */ usleep_range(1000, 5000); /* Upload the static config to the device */ - rc = sja1105_spi_send_long_packed_buf(priv, SPI_WRITE, - regs->config, - config_buf, buf_len); + rc = sja1105_xfer_long_buf(priv, SPI_WRITE, regs->config, + config_buf, buf_len); if (rc < 0) { dev_err(dev, "Failed to upload config, retrying...\n"); continue; diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 16553d92fad2..a3250dcf7d53 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -133,7 +133,7 @@ static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) u64 *ptr; int i, j; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { /* Tx stats */ ring = &adapter->tx_ring[i]; @@ -205,7 +205,7 @@ int ena_get_sset_count(struct net_device *netdev, int sset) if (sset != ETH_SS_STATS) return -EOPNOTSUPP; - return adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM; } @@ -214,7 +214,7 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) const struct ena_stats *ena_stats; int i, j; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { /* Tx stats */ for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { ena_stats = &ena_stats_tx_strings[j]; @@ -333,7 +333,7 @@ static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev); - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) adapter->tx_ring[i].smoothed_interval = val; } @@ -344,7 +344,7 @@ static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter) val = ena_com_get_nonadaptive_moderation_interval_rx(adapter->ena_dev); - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) adapter->rx_ring[i].smoothed_interval = val; } @@ -612,7 +612,7 @@ static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info, switch (info->cmd) { case ETHTOOL_GRXRINGS: - info->data = adapter->num_queues; + info->data = adapter->num_io_queues; rc = 0; break; case ETHTOOL_GRXFH: @@ -734,14 +734,20 @@ static void ena_get_channels(struct net_device *netdev, { struct ena_adapter *adapter = netdev_priv(netdev); - channels->max_rx = adapter->num_queues; - channels->max_tx = adapter->num_queues; - channels->max_other = 0; - channels->max_combined = 0; - channels->rx_count = adapter->num_queues; - channels->tx_count = adapter->num_queues; - channels->other_count = 0; - channels->combined_count = 0; + channels->max_combined = adapter->max_num_io_queues; + channels->combined_count = adapter->num_io_queues; +} + +static int ena_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + u32 count = channels->combined_count; + /* The check for max value is already done in ethtool */ + if (count < ENA_MIN_NUM_IO_QUEUES) + return -EINVAL; + + return ena_update_queue_count(adapter, count); } static int ena_get_tunable(struct net_device *netdev, @@ -807,6 +813,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .get_rxfh = ena_get_rxfh, .set_rxfh = ena_set_rxfh, .get_channels = ena_get_channels, + .set_channels = ena_set_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, }; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c487d2a7d6dd..d46a912002ff 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -101,7 +101,7 @@ static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) adapter->rx_ring[i].mtu = mtu; } @@ -129,10 +129,10 @@ static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) u32 i; int rc; - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues); + adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues); if (!adapter->netdev->rx_cpu_rmap) return -ENOMEM; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { int irq_idx = ENA_IO_IRQ_IDX(i); rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, @@ -172,7 +172,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) ena_dev = adapter->ena_dev; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { txr = &adapter->tx_ring[i]; rxr = &adapter->rx_ring[i]; @@ -294,7 +294,7 @@ static int ena_setup_all_tx_resources(struct ena_adapter *adapter) { int i, rc = 0; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_setup_tx_resources(adapter, i); if (rc) goto err_setup_tx; @@ -322,7 +322,7 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_free_tx_resources(adapter, i); } @@ -428,7 +428,7 @@ static int ena_setup_all_rx_resources(struct ena_adapter *adapter) { int i, rc = 0; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_setup_rx_resources(adapter, i); if (rc) goto err_setup_rx; @@ -456,7 +456,7 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_free_rx_resources(adapter, i); } @@ -600,7 +600,7 @@ static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) struct ena_ring *rx_ring; int i, rc, bufs_num; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rx_ring = &adapter->rx_ring[i]; bufs_num = rx_ring->ring_size - 1; rc = ena_refill_rx_bufs(rx_ring, bufs_num); @@ -616,7 +616,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_free_rx_bufs(adapter, i); } @@ -688,7 +688,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter) struct ena_ring *tx_ring; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { tx_ring = &adapter->tx_ring[i]; ena_free_tx_bufs(tx_ring); } @@ -699,7 +699,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) u16 ena_qid; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { ena_qid = ENA_IO_TXQ_IDX(i); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } @@ -710,7 +710,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) u16 ena_qid; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { ena_qid = ENA_IO_RXQ_IDX(i); cancel_work_sync(&adapter->ena_napi[i].dim.work); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); @@ -1331,7 +1331,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) * the number of potential io queues is the minimum of what the device * supports and the number of vCPUs. */ -static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) +static int ena_enable_msix(struct ena_adapter *adapter) { int msix_vecs, irq_cnt; @@ -1342,7 +1342,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) } /* Reserved the max msix vectors we might need */ - msix_vecs = ENA_MAX_MSIX_VEC(num_queues); + msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues); netif_dbg(adapter, probe, adapter->netdev, "trying to enable MSI-X, vectors %d\n", msix_vecs); @@ -1359,7 +1359,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) netif_notice(adapter, probe, adapter->netdev, "enable only %d MSI-X (out of %d), reduce the number of queues\n", irq_cnt, msix_vecs); - adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; + adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; } if (ena_init_rx_cpu_rmap(adapter)) @@ -1397,7 +1397,7 @@ static void ena_setup_io_intr(struct ena_adapter *adapter) netdev = adapter->netdev; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { irq_idx = ENA_IO_IRQ_IDX(i); cpu = i % num_online_cpus(); @@ -1529,7 +1529,7 @@ static void ena_del_napi(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) netif_napi_del(&adapter->ena_napi[i].napi); } @@ -1538,7 +1538,7 @@ static void ena_init_napi(struct ena_adapter *adapter) struct ena_napi *napi; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { napi = &adapter->ena_napi[i]; netif_napi_add(adapter->netdev, @@ -1555,7 +1555,7 @@ static void ena_napi_disable_all(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) napi_disable(&adapter->ena_napi[i].napi); } @@ -1563,7 +1563,7 @@ static void ena_napi_enable_all(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) napi_enable(&adapter->ena_napi[i].napi); } @@ -1673,7 +1673,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_create_io_tx_queue(adapter, i); if (rc) goto create_err; @@ -1741,7 +1741,7 @@ static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_create_io_rx_queue(adapter, i); if (rc) goto create_err; @@ -1764,7 +1764,7 @@ static void set_io_rings_size(struct ena_adapter *adapter, { int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { adapter->tx_ring[i].ring_size = new_tx_size; adapter->rx_ring[i].ring_size = new_rx_size; } @@ -1902,14 +1902,14 @@ static int ena_up(struct ena_adapter *adapter) set_bit(ENA_FLAG_DEV_UP, &adapter->flags); /* Enable completion queues interrupt */ - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_unmask_interrupt(&adapter->tx_ring[i], &adapter->rx_ring[i]); /* schedule napi in case we had pending packets * from the last time we disable napi */ - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) napi_schedule(&adapter->ena_napi[i].napi); return rc; @@ -1984,13 +1984,13 @@ static int ena_open(struct net_device *netdev) int rc; /* Notify the stack of the actual queue counts. */ - rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues); + rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues); if (rc) { netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); return rc; } - rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues); + rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues); if (rc) { netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); return rc; @@ -2043,14 +2043,30 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, u32 new_tx_size, u32 new_rx_size) { - bool dev_up; + bool dev_was_up; - dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); ena_close(adapter->netdev); adapter->requested_tx_ring_size = new_tx_size; adapter->requested_rx_ring_size = new_rx_size; ena_init_io_rings(adapter); - return dev_up ? ena_up(adapter) : 0; + return dev_was_up ? ena_up(adapter) : 0; +} + +int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + bool dev_was_up; + + dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + ena_close(adapter->netdev); + adapter->num_io_queues = new_channel_count; + /* We need to destroy the rss table so that the indirection + * table will be reinitialized by ena_up() + */ + ena_com_rss_destroy(ena_dev); + ena_init_io_rings(adapter); + return dev_was_up ? ena_open(adapter->netdev) : 0; } static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) @@ -2495,7 +2511,7 @@ static void ena_get_stats64(struct net_device *netdev, if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) return; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { u64 bytes, packets; tx_ring = &adapter->tx_ring[i]; @@ -2682,14 +2698,13 @@ err_mmio_read_less: return rc; } -static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, - int io_vectors) +static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) { struct ena_com_dev *ena_dev = adapter->ena_dev; struct device *dev = &adapter->pdev->dev; int rc; - rc = ena_enable_msix(adapter, io_vectors); + rc = ena_enable_msix(adapter); if (rc) { dev_err(dev, "Can not reserve msix vectors\n"); return rc; @@ -2782,8 +2797,7 @@ static int ena_restore_device(struct ena_adapter *adapter) goto err_device_destroy; } - rc = ena_enable_msix_and_set_admin_interrupts(adapter, - adapter->num_queues); + rc = ena_enable_msix_and_set_admin_interrupts(adapter); if (rc) { dev_err(&pdev->dev, "Enable MSI-X failed\n"); goto err_device_destroy; @@ -2948,7 +2962,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) budget = ENA_MONITORED_TX_QUEUES; - for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { + for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) { tx_ring = &adapter->tx_ring[i]; rx_ring = &adapter->rx_ring[i]; @@ -2965,7 +2979,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) break; } - adapter->last_monitored_tx_qid = i % adapter->num_queues; + adapter->last_monitored_tx_qid = i % adapter->num_io_queues; } /* trigger napi schedule after 2 consecutive detections */ @@ -2995,7 +3009,7 @@ static void check_for_empty_rx_ring(struct ena_adapter *adapter) if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) return; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rx_ring = &adapter->rx_ring[i]; refill_required = @@ -3137,16 +3151,16 @@ static void ena_timer_service(struct timer_list *t) mod_timer(&adapter->timer_service, jiffies + HZ); } -static int ena_calc_io_queue_num(struct pci_dev *pdev, - struct ena_com_dev *ena_dev, - struct ena_com_dev_get_features_ctx *get_feat_ctx) +static int ena_calc_max_io_queue_num(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) { - int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num; + int io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { struct ena_admin_queue_ext_feature_fields *max_queue_ext = &get_feat_ctx->max_queue_ext.max_queue_ext; - io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num, + io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num, max_queue_ext->max_rx_cq_num); io_tx_sq_num = max_queue_ext->max_tx_sq_num; @@ -3156,25 +3170,25 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev, &get_feat_ctx->max_queues; io_tx_sq_num = max_queues->max_sq_num; io_tx_cq_num = max_queues->max_cq_num; - io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num); + io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num); } /* In case of LLQ use the llq fields for the tx SQ/CQ */ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) io_tx_sq_num = get_feat_ctx->llq.max_llq_num; - io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); - io_queue_num = min_t(int, io_queue_num, io_rx_num); - io_queue_num = min_t(int, io_queue_num, io_tx_sq_num); - io_queue_num = min_t(int, io_queue_num, io_tx_cq_num); + max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); + max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num); + max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num); + max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ - io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); - if (unlikely(!io_queue_num)) { + max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); + if (unlikely(!max_num_io_queues)) { dev_err(&pdev->dev, "The device doesn't have io queues\n"); return -EFAULT; } - return io_queue_num; + return max_num_io_queues; } static int ena_set_queues_placement_policy(struct pci_dev *pdev, @@ -3302,7 +3316,7 @@ static int ena_rss_init_default(struct ena_adapter *adapter) } for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { - val = ethtool_rxfh_indir_default(i, adapter->num_queues); + val = ethtool_rxfh_indir_default(i, adapter->num_io_queues); rc = ena_com_indirect_table_fill_entry(ena_dev, i, ENA_IO_RXQ_IDX(val)); if (unlikely(rc && (rc != -EOPNOTSUPP))) { @@ -3349,7 +3363,7 @@ static void set_default_llq_configurations(struct ena_llq_configurations *llq_co llq_config->llq_ring_entry_size_value = 128; } -static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx) +static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) { struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; struct ena_com_dev *ena_dev = ctx->ena_dev; @@ -3358,7 +3372,7 @@ static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx) u32 max_tx_queue_size; u32 max_rx_queue_size; - if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { + if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { struct ena_admin_queue_ext_feature_fields *max_queue_ext = &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, @@ -3432,11 +3446,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ena_llq_configurations llq_config; struct ena_com_dev *ena_dev = NULL; struct ena_adapter *adapter; - int io_queue_num, bars, rc; struct net_device *netdev; static int adapters_found; + u32 max_num_io_queues; char *queue_type_str; bool wd_state; + int bars, rc; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -3497,27 +3512,20 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) calc_queue_ctx.pdev = pdev; /* Initial Tx and RX interrupt delay. Assumes 1 usec granularity. - * Updated during device initialization with the real granularity - */ + * Updated during device initialization with the real granularity + */ ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS; ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; - io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); - rc = ena_calc_queue_size(&calc_queue_ctx); - if (rc || io_queue_num <= 0) { + max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx); + rc = ena_calc_io_queue_size(&calc_queue_ctx); + if (rc || !max_num_io_queues) { rc = -EFAULT; goto err_device_destroy; } - dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n", - io_queue_num, - calc_queue_ctx.rx_queue_size, - calc_queue_ctx.tx_queue_size, - (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ? - "ENABLED" : "DISABLED"); - /* dev zeroed in init_etherdev */ - netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); + netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues); if (!netdev) { dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); rc = -ENOMEM; @@ -3545,7 +3553,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; - adapter->num_queues = io_queue_num; + adapter->num_io_queues = max_num_io_queues; + adapter->max_num_io_queues = max_num_io_queues; + adapter->last_monitored_tx_qid = 0; adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; @@ -3569,7 +3579,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u64_stats_init(&adapter->syncp); - rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num); + rc = ena_enable_msix_and_set_admin_interrupts(adapter); if (rc) { dev_err(&pdev->dev, "Failed to enable and set the admin interrupts\n"); @@ -3611,9 +3621,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) queue_type_str = "Low Latency"; dev_info(&pdev->dev, - "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n", + "%s found at mem %lx, mac addr %pM, Placement policy: %s\n", DEVICE_NAME, (long)pci_resource_start(pdev, 0), - netdev->dev_addr, io_queue_num, queue_type_str); + netdev->dev_addr, queue_type_str); set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 72ee51a82ec7..bffd778f2ce3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -82,6 +82,8 @@ #define ENA_DEFAULT_RING_SIZE (1024) #define ENA_MIN_RING_SIZE (256) +#define ENA_MIN_NUM_IO_QUEUES (1) + #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) #define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN) @@ -161,10 +163,10 @@ struct ena_calc_queue_size_ctx { struct ena_com_dev_get_features_ctx *get_feat_ctx; struct ena_com_dev *ena_dev; struct pci_dev *pdev; - u16 tx_queue_size; - u16 rx_queue_size; - u16 max_tx_queue_size; - u16 max_rx_queue_size; + u32 tx_queue_size; + u32 rx_queue_size; + u32 max_tx_queue_size; + u32 max_rx_queue_size; u16 max_tx_sgl_size; u16 max_rx_sgl_size; }; @@ -324,7 +326,8 @@ struct ena_adapter { u32 rx_copybreak; u32 max_mtu; - int num_queues; + u32 num_io_queues; + u32 max_num_io_queues; int msix_vecs; @@ -387,6 +390,7 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_update_queue_sizes(struct ena_adapter *adapter, u32 new_tx_size, u32 new_rx_size); +int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); int ena_get_sset_count(struct net_device *netdev, int sset); diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 7548247455d7..1b1a09095c0d 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -526,7 +526,7 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) struct device *dev = &ag->pdev->dev; struct net_device *ndev = ag->ndev; static struct mii_bus *mii_bus; - struct device_node *np; + struct device_node *np, *mnp; int err; np = dev->of_node; @@ -571,7 +571,9 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) msleep(200); } - err = of_mdiobus_register(mii_bus, np); + mnp = of_get_child_by_name(np, "mdio"); + err = of_mdiobus_register(mii_bus, mnp); + of_node_put(mnp); if (err) goto mdio_err_put_clk; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index e664392dccc0..ff1bc0ec2e7c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -16,7 +16,8 @@ #include "bnxt_devlink.h" static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); struct bnxt_fw_health *health = bp->fw_health; @@ -66,7 +67,8 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = { }; static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); @@ -84,7 +86,8 @@ struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = { }; static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index f96a42af1014..af04a2c81adb 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1914,10 +1914,10 @@ static struct platform_driver xgmac_driver = { .driver = { .name = "calxedaxgmac", .of_match_table = xgmac_of_match, + .pm = &xgmac_pm_ops, }, .probe = xgmac_probe, .remove = xgmac_remove, - .driver.pm = &xgmac_pm_ops, }; module_platform_driver(xgmac_driver); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 5b602243d573..a4dead4ab0ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -137,13 +137,12 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, static int alloc_uld_rxqs(struct adapter *adap, struct sge_uld_rxq_info *rxq_info, bool lro) { - struct sge *s = &adap->sge; unsigned int nq = rxq_info->nrxq + rxq_info->nciq; + int i, err, msi_idx, que_idx = 0, bmap_idx = 0; struct sge_ofld_rxq *q = rxq_info->uldrxq; unsigned short *ids = rxq_info->rspq_id; - unsigned int bmap_idx = 0; + struct sge *s = &adap->sge; unsigned int per_chan; - int i, err, msi_idx, que_idx = 0; per_chan = rxq_info->nrxq / adap->params.nports; @@ -161,6 +160,10 @@ static int alloc_uld_rxqs(struct adapter *adap, if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); + if (bmap_idx < 0) { + err = -ENOSPC; + goto freeout; + } msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9b7af94a40bb..824310253099 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -90,6 +90,9 @@ struct ftgmac100 { struct mii_bus *mii_bus; struct clk *clk; + /* AST2500/AST2600 RMII ref clock gate */ + struct clk *rclk; + /* Link management */ int cur_speed; int cur_duplex; @@ -1718,20 +1721,41 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd) nd->link_up ? "up" : "down"); } -static void ftgmac100_setup_clk(struct ftgmac100 *priv) +static int ftgmac100_setup_clk(struct ftgmac100 *priv) { - priv->clk = devm_clk_get(priv->dev, NULL); - if (IS_ERR(priv->clk)) - return; + struct clk *clk; + int rc; - clk_prepare_enable(priv->clk); + clk = devm_clk_get(priv->dev, NULL /* MACCLK */); + if (IS_ERR(clk)) + return PTR_ERR(clk); + priv->clk = clk; + rc = clk_prepare_enable(priv->clk); + if (rc) + return rc; /* Aspeed specifies a 100MHz clock is required for up to * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz * is sufficient */ - clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ : - FTGMAC_100MHZ); + rc = clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ : + FTGMAC_100MHZ); + if (rc) + goto cleanup_clk; + + /* RCLK is for RMII, typically used for NCSI. Optional because its not + * necessary if it's the AST2400 MAC, or the MAC is configured for + * RGMII, or the controller is not an ASPEED-based controller. + */ + priv->rclk = devm_clk_get_optional(priv->dev, "RCLK"); + rc = clk_prepare_enable(priv->rclk); + if (!rc) + return 0; + +cleanup_clk: + clk_disable_unprepare(priv->clk); + + return rc; } static int ftgmac100_probe(struct platform_device *pdev) @@ -1853,8 +1877,11 @@ static int ftgmac100_probe(struct platform_device *pdev) goto err_setup_mdio; } - if (priv->is_aspeed) - ftgmac100_setup_clk(priv); + if (priv->is_aspeed) { + err = ftgmac100_setup_clk(priv); + if (err) + goto err_ncsi_dev; + } /* Default ring sizes */ priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES; @@ -1886,8 +1913,10 @@ static int ftgmac100_probe(struct platform_device *pdev) return 0; -err_ncsi_dev: err_register_netdev: + clk_disable_unprepare(priv->rclk); + clk_disable_unprepare(priv->clk); +err_ncsi_dev: ftgmac100_destroy_mdio(netdev); err_setup_mdio: iounmap(priv->base); @@ -1909,6 +1938,7 @@ static int ftgmac100_remove(struct platform_device *pdev) unregister_netdev(netdev); + clk_disable_unprepare(priv->rclk); clk_disable_unprepare(priv->clk); /* There's a small chance the reset task will have been re-queued, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 162d7d8fb295..1b959d7ec923 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -221,6 +221,7 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv, struct dpaa2_eth_channel *ch, dma_addr_t addr) { + int retries = 0; int err; ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr; @@ -229,8 +230,11 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv, while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid, ch->xdp.drop_bufs, - ch->xdp.drop_cnt)) == -EBUSY) + ch->xdp.drop_cnt)) == -EBUSY) { + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) + break; cpu_relax(); + } if (err) { free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt); @@ -458,7 +462,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch, struct dpaa2_eth_fq *fq = NULL; struct dpaa2_dq *dq; const struct dpaa2_fd *fd; - int cleaned = 0; + int cleaned = 0, retries = 0; int is_last; do { @@ -469,6 +473,11 @@ static int consume_frames(struct dpaa2_eth_channel *ch, * the store until we get some sort of valid response * token (either a valid frame or an "empty dequeue") */ + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) { + netdev_err_once(priv->net_dev, + "Unable to read a valid dequeue response\n"); + return -ETIMEDOUT; + } continue; } @@ -477,6 +486,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch, fq->consume(priv, ch, fd, fq); cleaned++; + retries = 0; } while (!is_last); if (!cleaned) @@ -949,6 +959,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv, u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; struct page *page; dma_addr_t addr; + int retries = 0; int i, err; for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) { @@ -980,8 +991,11 @@ static int add_bufs(struct dpaa2_eth_priv *priv, release_bufs: /* In case the portal is busy, retry until successful */ while ((err = dpaa2_io_service_release(ch->dpio, bpid, - buf_array, i)) == -EBUSY) + buf_array, i)) == -EBUSY) { + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) + break; cpu_relax(); + } /* If release command failed, clean up and bail out; * not much else we can do about it @@ -1032,16 +1046,21 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid) static void drain_bufs(struct dpaa2_eth_priv *priv, int count) { u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; + int retries = 0; int ret; do { ret = dpaa2_io_service_acquire(NULL, priv->bpid, buf_array, count); if (ret < 0) { + if (ret == -EBUSY && + retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) + continue; netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n"); return; } free_bufs(priv, buf_array, ret); + retries = 0; } while (ret); } @@ -1094,7 +1113,7 @@ static int pull_channel(struct dpaa2_eth_channel *ch) ch->store); dequeues++; cpu_relax(); - } while (err == -EBUSY); + } while (err == -EBUSY && dequeues < DPAA2_ETH_SWP_BUSY_RETRIES); ch->stats.dequeue_portal_busy += dequeues; if (unlikely(err)) @@ -1118,6 +1137,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) struct netdev_queue *nq; int store_cleaned, work_done; struct list_head rx_list; + int retries = 0; int err; ch = container_of(napi, struct dpaa2_eth_channel, napi); @@ -1136,7 +1156,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) refill_pool(priv, ch, priv->bpid); store_cleaned = consume_frames(ch, &fq); - if (!store_cleaned) + if (store_cleaned <= 0) break; if (fq->type == DPAA2_RX_FQ) { rx_cleaned += store_cleaned; @@ -1163,7 +1183,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) do { err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx); cpu_relax(); - } while (err == -EBUSY); + } while (err == -EBUSY && retries++ < DPAA2_ETH_SWP_BUSY_RETRIES); WARN_ONCE(err, "CDAN notifications rearm failed on core %d", ch->nctx.desired_cpu); @@ -2043,7 +2063,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv) { struct fsl_mc_device *dpcon; struct device *dev = priv->net_dev->dev.parent; - struct dpcon_attr attrs; int err; err = fsl_mc_object_allocate(to_fsl_mc_device(dev), @@ -2068,12 +2087,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv) goto close; } - err = dpcon_get_attributes(priv->mc_io, 0, dpcon->mc_handle, &attrs); - if (err) { - dev_err(dev, "dpcon_get_attributes() failed\n"); - goto close; - } - err = dpcon_enable(priv->mc_io, 0, dpcon->mc_handle); if (err) { dev_err(dev, "dpcon_enable() failed\n"); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 8a0e65b3267f..686b651edcb2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -245,6 +245,14 @@ static inline struct dpaa2_faead *dpaa2_get_faead(void *buf_addr, bool swa) */ #define DPAA2_ETH_ENQUEUE_RETRIES 10 +/* Number of times to retry DPIO portal operations while waiting + * for portal to finish executing current command and become + * available. We want to avoid being stuck in a while loop in case + * hardware becomes unresponsive, but not give up too easily if + * the portal really is busy for valid reasons + */ +#define DPAA2_ETH_SWP_BUSY_RETRIES 1000 + /* Driver statistics, other than those in struct rtnl_link_stats64. * These are usually collected per-CPU and aggregated by ethtool. */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 0aa1c34019bb..dc9a6c36cac0 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -216,7 +216,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev, if (err == -EINVAL) /* Older firmware versions don't support all pages */ memset(&dpni_stats, 0, sizeof(dpni_stats)); - else + else if (err) netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j); num_cnt = dpni_stats_page_size[j] / sizeof(u64); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 3a14bbc26ea2..1c5243cc1dc6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3049,7 +3049,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) u32 sl; u32 credit; int i; - const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + static const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { {DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, {DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, {DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, @@ -3059,7 +3059,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, }; - const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + static const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index f8a87f8ca983..0059d440e1f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -45,6 +45,7 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_GET_LINK_MODE, /* (VF -> PF) get the link mode of pf */ HCLGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */ HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */ + HCLGE_MBX_PUSH_PROMISC_INFO, /* (PF -> VF) push vf promisc info */ HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */ HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index c4b7bf851a28..c15d7fc31bb8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -364,6 +364,19 @@ struct hnae3_ae_dev { * Enable/disable HW GRO * add_arfs_entry * Check the 5-tuples of flow, and create flow director rule + * get_vf_config + * Get the VF configuration setting by the host + * set_vf_link_state + * Set VF link status + * set_vf_spoofchk + * Enable/disable spoof check for specified vf + * set_vf_trust + * Enable/disable trust for specified vf, if the vf being trusted, then + * it can enable promisc mode + * set_vf_rate + * Set the max tx rate of specified vf. + * set_vf_mac + * Configure the default MAC for specified VF */ struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); @@ -529,6 +542,16 @@ struct hnae3_ae_ops { int (*mac_connect_phy)(struct hnae3_handle *handle); void (*mac_disconnect_phy)(struct hnae3_handle *handle); void (*restore_vlan_table)(struct hnae3_handle *handle); + int (*get_vf_config)(struct hnae3_handle *handle, int vf, + struct ifla_vf_info *ivf); + int (*set_vf_link_state)(struct hnae3_handle *handle, int vf, + int link_state); + int (*set_vf_spoofchk)(struct hnae3_handle *handle, int vf, + bool enable); + int (*set_vf_trust)(struct hnae3_handle *handle, int vf, bool enable); + int (*set_vf_rate)(struct hnae3_handle *handle, int vf, + int min_tx_rate, int max_tx_rate, bool force); + int (*set_vf_mac)(struct hnae3_handle *handle, int vf, u8 *p); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 616cad0faa21..6e0b2612d92d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -681,7 +681,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen, return 0; ret = skb_cow_head(skb, 0); - if (unlikely(ret)) + if (unlikely(ret < 0)) return ret; l3.hdr = skb_network_header(skb); @@ -962,14 +962,6 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, return 0; } -static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end) -{ - /* Config bd buffer end */ - if (!!frag_end) - hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U); - hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U); -} - static int hns3_handle_vtags(struct hns3_enet_ring *tx_ring, struct sk_buff *skb) { @@ -1062,7 +1054,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, skb_reset_mac_len(skb); ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_l4_proto_err++; u64_stats_update_end(&ring->syncp); @@ -1072,7 +1064,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, ret = hns3_set_l2l3l4(skb, ol4_proto, il4_proto, &type_cs_vlan_tso, &ol_type_vlan_len_msec); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_l2l3l4_err++; u64_stats_update_end(&ring->syncp); @@ -1081,7 +1073,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, ret = hns3_set_tso(skb, &paylen, &mss, &type_cs_vlan_tso); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_tso_err++; u64_stats_update_end(&ring->syncp); @@ -1102,9 +1094,10 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, } static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, - unsigned int size, int frag_end, - enum hns_desc_type type) + unsigned int size, enum hns_desc_type type) { +#define HNS3_LIKELY_BD_NUM 1 + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; struct hns3_desc *desc = &ring->desc[ring->next_to_use]; struct device *dev = ring_to_dev(ring); @@ -1118,7 +1111,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, int ret; ret = hns3_fill_skb_desc(ring, skb, desc); - if (unlikely(ret)) + if (unlikely(ret < 0)) return ret; dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); @@ -1137,19 +1130,16 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc_cb->length = size; if (likely(size <= HNS3_MAX_BD_SIZE)) { - u16 bdtp_fe_sc_vld_ra_ri = 0; - desc_cb->priv = priv; desc_cb->dma = dma; desc_cb->type = type; desc->addr = cpu_to_le64(dma); desc->tx.send_size = cpu_to_le16(size); - hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end); desc->tx.bdtp_fe_sc_vld_ra_ri = - cpu_to_le16(bdtp_fe_sc_vld_ra_ri); + cpu_to_le16(BIT(HNS3_TXD_VLD_B)); ring_ptr_move_fw(ring, next_to_use); - return 0; + return HNS3_LIKELY_BD_NUM; } frag_buf_num = hns3_tx_bd_count(size); @@ -1158,8 +1148,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, /* When frag size is bigger than hardware limit, split this frag */ for (k = 0; k < frag_buf_num; k++) { - u16 bdtp_fe_sc_vld_ra_ri = 0; - /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */ desc_cb->priv = priv; desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k; @@ -1170,11 +1158,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k); desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ? (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE); - hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, - frag_end && (k == frag_buf_num - 1) ? - 1 : 0); desc->tx.bdtp_fe_sc_vld_ra_ri = - cpu_to_le16(bdtp_fe_sc_vld_ra_ri); + cpu_to_le16(BIT(HNS3_TXD_VLD_B)); /* move ring pointer to next */ ring_ptr_move_fw(ring, next_to_use); @@ -1183,23 +1168,78 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc = &ring->desc[ring->next_to_use]; } - return 0; + return frag_buf_num; } -static unsigned int hns3_nic_bd_num(struct sk_buff *skb) +static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size, + unsigned int bd_num) { - unsigned int bd_num; + unsigned int size; int i; - /* if the total len is within the max bd limit */ - if (likely(skb->len <= HNS3_MAX_BD_SIZE)) - return skb_shinfo(skb)->nr_frags + 1; + size = skb_headlen(skb); + while (size > HNS3_MAX_BD_SIZE) { + bd_size[bd_num++] = HNS3_MAX_BD_SIZE; + size -= HNS3_MAX_BD_SIZE; - bd_num = hns3_tx_bd_count(skb_headlen(skb)); + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } + + if (size) { + bd_size[bd_num++] = size; + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - bd_num += hns3_tx_bd_count(skb_frag_size(frag)); + size = skb_frag_size(frag); + if (!size) + continue; + + while (size > HNS3_MAX_BD_SIZE) { + bd_size[bd_num++] = HNS3_MAX_BD_SIZE; + size -= HNS3_MAX_BD_SIZE; + + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } + + bd_size[bd_num++] = size; + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } + + return bd_num; +} + +static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size) +{ + struct sk_buff *frag_skb; + unsigned int bd_num = 0; + + /* If the total len is within the max bd limit */ + if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) && + skb_shinfo(skb)->nr_frags < HNS3_MAX_NON_TSO_BD_NUM)) + return skb_shinfo(skb)->nr_frags + 1U; + + /* The below case will always be linearized, return + * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized. + */ + if (unlikely(skb->len > HNS3_MAX_TSO_SIZE || + (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE))) + return HNS3_MAX_TSO_BD_NUM + 1U; + + bd_num = hns3_skb_bd_num(skb, bd_size, bd_num); + + if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + + skb_walk_frags(skb, frag_skb) { + bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num); + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; } return bd_num; @@ -1218,26 +1258,26 @@ static unsigned int hns3_gso_hdr_len(struct sk_buff *skb) * 7 frags to to be larger than gso header len + mss, and the remaining * continuous 7 frags to be larger than MSS except the last 7 frags. */ -static bool hns3_skb_need_linearized(struct sk_buff *skb) +static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size, + unsigned int bd_num) { - int bd_limit = HNS3_MAX_BD_NUM_NORMAL - 1; unsigned int tot_len = 0; int i; - for (i = 0; i < bd_limit; i++) - tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); + for (i = 0; i < HNS3_MAX_NON_TSO_BD_NUM - 1U; i++) + tot_len += bd_size[i]; - /* ensure headlen + the first 7 frags is greater than mss + header - * and the first 7 frags is greater than mss. - */ - if (((tot_len + skb_headlen(skb)) < (skb_shinfo(skb)->gso_size + - hns3_gso_hdr_len(skb))) || (tot_len < skb_shinfo(skb)->gso_size)) + /* ensure the first 8 frags is greater than mss + header */ + if (tot_len + bd_size[HNS3_MAX_NON_TSO_BD_NUM - 1U] < + skb_shinfo(skb)->gso_size + hns3_gso_hdr_len(skb)) return true; - /* ensure the remaining continuous 7 buffer is greater than mss */ - for (i = 0; i < (skb_shinfo(skb)->nr_frags - bd_limit - 1); i++) { - tot_len -= skb_frag_size(&skb_shinfo(skb)->frags[i]); - tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i + bd_limit]); + /* ensure every continuous 7 buffer is greater than mss + * except the last one. + */ + for (i = 0; i < bd_num - HNS3_MAX_NON_TSO_BD_NUM; i++) { + tot_len -= bd_size[i]; + tot_len += bd_size[i + HNS3_MAX_NON_TSO_BD_NUM - 1U]; if (tot_len < skb_shinfo(skb)->gso_size) return true; @@ -1249,15 +1289,16 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb) static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, struct sk_buff **out_skb) { + unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U]; struct sk_buff *skb = *out_skb; unsigned int bd_num; - bd_num = hns3_nic_bd_num(skb); - if (unlikely(bd_num > HNS3_MAX_BD_NUM_NORMAL)) { + bd_num = hns3_tx_bd_num(skb, bd_size); + if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) { struct sk_buff *new_skb; - if (skb_is_gso(skb) && bd_num <= HNS3_MAX_BD_NUM_TSO && - !hns3_skb_need_linearized(skb)) + if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) && + !hns3_skb_need_linearized(skb, bd_size, bd_num)) goto out; /* manual split the send packet */ @@ -1267,9 +1308,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, dev_kfree_skb_any(skb); *out_skb = new_skb; - bd_num = hns3_nic_bd_num(new_skb); - if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_TSO) || - (!skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_NORMAL)) + bd_num = hns3_tx_bd_count(new_skb->len); + if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_TSO_BD_NUM) || + (!skb_is_gso(new_skb) && + bd_num > HNS3_MAX_NON_TSO_BD_NUM)) return -ENOMEM; u64_stats_update_begin(&ring->syncp); @@ -1314,6 +1356,37 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) } } +static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, + struct sk_buff *skb, enum hns_desc_type type) +{ + unsigned int size = skb_headlen(skb); + int i, ret, bd_num = 0; + + if (size) { + ret = hns3_fill_desc(ring, skb, size, type); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + size = skb_frag_size(frag); + if (!size) + continue; + + ret = hns3_fill_desc(ring, frag, size, DESC_TYPE_PAGE); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + + return bd_num; +} + netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -1321,58 +1394,54 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) &tx_ring_data(priv, skb->queue_mapping); struct hns3_enet_ring *ring = ring_data->ring; struct netdev_queue *dev_queue; - skb_frag_t *frag; - int next_to_use_head; - int buf_num; - int seg_num; - int size; + int pre_ntu, next_to_use_head; + struct sk_buff *frag_skb; + int bd_num = 0; int ret; - int i; /* Prefetch the data used later */ prefetch(skb->data); - buf_num = hns3_nic_maybe_stop_tx(ring, &skb); - if (unlikely(buf_num <= 0)) { - if (buf_num == -EBUSY) { + ret = hns3_nic_maybe_stop_tx(ring, &skb); + if (unlikely(ret <= 0)) { + if (ret == -EBUSY) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_busy++; u64_stats_update_end(&ring->syncp); goto out_net_tx_busy; - } else if (buf_num == -ENOMEM) { + } else if (ret == -ENOMEM) { u64_stats_update_begin(&ring->syncp); ring->stats.sw_err_cnt++; u64_stats_update_end(&ring->syncp); } - hns3_rl_err(netdev, "xmit error: %d!\n", buf_num); + hns3_rl_err(netdev, "xmit error: %d!\n", ret); goto out_err_tx_ok; } - /* No. of segments (plus a header) */ - seg_num = skb_shinfo(skb)->nr_frags + 1; - /* Fill the first part */ - size = skb_headlen(skb); - next_to_use_head = ring->next_to_use; - ret = hns3_fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0, - DESC_TYPE_SKB); - if (unlikely(ret)) + ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); + if (unlikely(ret < 0)) goto fill_err; - /* Fill the fragments */ - for (i = 1; i < seg_num; i++) { - frag = &skb_shinfo(skb)->frags[i - 1]; - size = skb_frag_size(frag); + bd_num += ret; - ret = hns3_fill_desc(ring, frag, size, - seg_num - 1 == i ? 1 : 0, - DESC_TYPE_PAGE); + if (!skb_has_frag_list(skb)) + goto out; - if (unlikely(ret)) + skb_walk_frags(skb, frag_skb) { + ret = hns3_fill_skb_to_desc(ring, frag_skb, DESC_TYPE_PAGE); + if (unlikely(ret < 0)) goto fill_err; + + bd_num += ret; } +out: + pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) : + (ring->desc_num - 1); + ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |= + cpu_to_le16(BIT(HNS3_TXD_FE_B)); /* Complete translate all packets */ dev_queue = netdev_get_tx_queue(netdev, ring_data->queue_index); @@ -1380,7 +1449,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) wmb(); /* Commit all data before submit */ - hnae3_queue_xmit(ring->tqp, buf_num); + hnae3_queue_xmit(ring->tqp, bd_num); return NETDEV_TX_OK; @@ -1413,6 +1482,16 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) return 0; } + /* For VF device, if there is a perm_addr, then the user will not + * be allowed to change the address. + */ + if (!hns3_is_phys_func(h->pdev) && + !is_zero_ether_addr(netdev->perm_addr)) { + netdev_err(netdev, "has permanent MAC %pM, user MAC %pM not allow\n", + netdev->perm_addr, mac_addr->sa_data); + return -EPERM; + } + ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false); if (ret) { netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret); @@ -1643,6 +1722,29 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, return ret; } +static int hns3_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + + if (hns3_nic_resetting(netdev)) + return -EBUSY; + + if (!handle->ae_algo->ops->set_vf_spoofchk) + return -EOPNOTSUPP; + + return handle->ae_algo->ops->set_vf_spoofchk(handle, vf, enable); +} + +static int hns3_set_vf_trust(struct net_device *netdev, int vf, bool enable) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + + if (!handle->ae_algo->ops->set_vf_trust) + return -EOPNOTSUPP; + + return handle->ae_algo->ops->set_vf_trust(handle, vf, enable); +} + static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -1805,6 +1907,57 @@ static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, } #endif +static int hns3_nic_get_vf_config(struct net_device *ndev, int vf, + struct ifla_vf_info *ivf) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (!h->ae_algo->ops->get_vf_config) + return -EOPNOTSUPP; + + return h->ae_algo->ops->get_vf_config(h, vf, ivf); +} + +static int hns3_nic_set_vf_link_state(struct net_device *ndev, int vf, + int link_state) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (!h->ae_algo->ops->set_vf_link_state) + return -EOPNOTSUPP; + + return h->ae_algo->ops->set_vf_link_state(h, vf, link_state); +} + +static int hns3_nic_set_vf_rate(struct net_device *ndev, int vf, + int min_tx_rate, int max_tx_rate) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (!h->ae_algo->ops->set_vf_rate) + return -EOPNOTSUPP; + + return h->ae_algo->ops->set_vf_rate(h, vf, min_tx_rate, max_tx_rate, + false); +} + +static int hns3_nic_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!h->ae_algo->ops->set_vf_mac) + return -EOPNOTSUPP; + + if (is_multicast_ether_addr(mac)) { + netdev_err(netdev, + "Invalid MAC:%pM specified. Could not set MAC\n", + mac); + return -EINVAL; + } + + return h->ae_algo->ops->set_vf_mac(h, vf_id, mac); +} + static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_open = hns3_nic_net_open, .ndo_stop = hns3_nic_net_stop, @@ -1820,10 +1973,15 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_vlan_rx_add_vid = hns3_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = hns3_vlan_rx_kill_vid, .ndo_set_vf_vlan = hns3_ndo_set_vf_vlan, + .ndo_set_vf_spoofchk = hns3_set_vf_spoofchk, + .ndo_set_vf_trust = hns3_set_vf_trust, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = hns3_rx_flow_steer, #endif - + .ndo_get_vf_config = hns3_nic_get_vf_config, + .ndo_set_vf_link_state = hns3_nic_set_vf_link_state, + .ndo_set_vf_rate = hns3_nic_set_vf_rate, + .ndo_set_vf_mac = hns3_nic_set_vf_mac, }; bool hns3_is_phys_func(struct pci_dev *pdev) @@ -2069,9 +2227,8 @@ static void hns3_set_default_feature(struct net_device *netdev) NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; - - netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_TSO_MANGLEID | NETIF_F_FRAGLIST; netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; @@ -2081,21 +2238,24 @@ static void hns3_set_default_feature(struct net_device *netdev) NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_FRAGLIST; netdev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_FRAGLIST; netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_FRAGLIST; if (pdev->revision >= 0x21) { netdev->hw_features |= NETIF_F_GRO_HW; @@ -2358,7 +2518,7 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring) netdev_tx_completed_queue(dev_queue, pkts, bytes); if (unlikely(pkts && netif_carrier_ok(netdev) && - (ring_space(ring) > HNS3_MAX_BD_PER_PKT))) { + ring_space(ring) > HNS3_MAX_TSO_BD_NUM)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -3743,23 +3903,24 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv) } /* Set mac addr if it is configured. or leave it to the AE driver */ -static int hns3_init_mac_addr(struct net_device *netdev, bool init) +static int hns3_init_mac_addr(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; u8 mac_addr_temp[ETH_ALEN]; int ret = 0; - if (h->ae_algo->ops->get_mac_addr && init) { + if (h->ae_algo->ops->get_mac_addr) h->ae_algo->ops->get_mac_addr(h, mac_addr_temp); - ether_addr_copy(netdev->dev_addr, mac_addr_temp); - } /* Check if the MAC address is valid, if not get a random one */ - if (!is_valid_ether_addr(netdev->dev_addr)) { + if (!is_valid_ether_addr(mac_addr_temp)) { eth_hw_addr_random(netdev); dev_warn(priv->dev, "using random MAC address %pM\n", netdev->dev_addr); + } else { + ether_addr_copy(netdev->dev_addr, mac_addr_temp); + ether_addr_copy(netdev->perm_addr, mac_addr_temp); } if (h->ae_algo->ops->set_mac_addr) @@ -3863,7 +4024,7 @@ static int hns3_client_init(struct hnae3_handle *handle) handle->kinfo.netdev = netdev; handle->priv = (void *)priv; - hns3_init_mac_addr(netdev, true); + hns3_init_mac_addr(netdev); hns3_set_default_feature(netdev); @@ -4331,7 +4492,7 @@ static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle) bool vlan_filter_enable; int ret; - ret = hns3_init_mac_addr(netdev, false); + ret = hns3_init_mac_addr(netdev); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 2110fa3b4479..c5b7c22263b1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -76,7 +76,7 @@ enum hns3_nic_state { #define HNS3_RING_NAME_LEN 16 #define HNS3_BUFFER_SIZE_2048 2048 #define HNS3_RING_MAX_PENDING 32760 -#define HNS3_RING_MIN_PENDING 24 +#define HNS3_RING_MIN_PENDING 72 #define HNS3_RING_BD_MULTIPLE 8 /* max frame size of mac */ #define HNS3_MAC_MAX_FRAME 9728 @@ -195,9 +195,13 @@ enum hns3_nic_state { #define HNS3_VECTOR_INITED 1 #define HNS3_MAX_BD_SIZE 65535 -#define HNS3_MAX_BD_NUM_NORMAL 8 -#define HNS3_MAX_BD_NUM_TSO 63 -#define HNS3_MAX_BD_PER_PKT MAX_SKB_FRAGS +#define HNS3_MAX_NON_TSO_BD_NUM 8U +#define HNS3_MAX_TSO_BD_NUM 63U +#define HNS3_MAX_TSO_SIZE \ + (HNS3_MAX_BD_SIZE * HNS3_MAX_TSO_BD_NUM) + +#define HNS3_MAX_NON_TSO_SIZE \ + (HNS3_MAX_BD_SIZE * HNS3_MAX_NON_TSO_BD_NUM) #define HNS3_VECTOR_GL0_OFFSET 0x100 #define HNS3_VECTOR_GL1_OFFSET 0x200 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 4821fe08b5e4..3578832067ff 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -244,7 +244,7 @@ enum hclge_opcode_type { /* QCN commands */ HCLGE_OPC_QCN_MOD_CFG = 0x1A01, HCLGE_OPC_QCN_GRP_TMPLT_CFG = 0x1A02, - HCLGE_OPC_QCN_SHAPPING_IR_CFG = 0x1A03, + HCLGE_OPC_QCN_SHAPPING_CFG = 0x1A03, HCLGE_OPC_QCN_SHAPPING_BS_CFG = 0x1A04, HCLGE_OPC_QCN_QSET_LINK_CFG = 0x1A05, HCLGE_OPC_QCN_RP_STATUS_GET = 0x1A06, @@ -1090,9 +1090,6 @@ void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, enum hclge_opcode_type opcode, bool is_read); void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read); -int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, - struct hclge_promisc_param *param); - enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw, struct hclge_desc *desc); enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index d0128d792717..0ccc8e7b19d0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1110,6 +1110,82 @@ static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev) } } +static void hclge_dbg_dump_qs_shaper_single(struct hclge_dev *hdev, u16 qsid) +{ + struct hclge_qs_shapping_cmd *shap_cfg_cmd; + u8 ir_u, ir_b, ir_s, bs_b, bs_s; + struct hclge_desc desc; + u32 shapping_para; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, true); + + shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data; + shap_cfg_cmd->qs_id = cpu_to_le16(qsid); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "qs%u failed to get tx_rate, ret=%d\n", + qsid, ret); + return; + } + + shapping_para = le32_to_cpu(shap_cfg_cmd->qs_shapping_para); + ir_b = hclge_tm_get_field(shapping_para, IR_B); + ir_u = hclge_tm_get_field(shapping_para, IR_U); + ir_s = hclge_tm_get_field(shapping_para, IR_S); + bs_b = hclge_tm_get_field(shapping_para, BS_B); + bs_s = hclge_tm_get_field(shapping_para, BS_S); + + dev_info(&hdev->pdev->dev, + "qs%u ir_b:%u, ir_u:%u, ir_s:%u, bs_b:%u, bs_s:%u\n", + qsid, ir_b, ir_u, ir_s, bs_b, bs_s); +} + +static void hclge_dbg_dump_qs_shaper_all(struct hclge_dev *hdev) +{ + struct hnae3_knic_private_info *kinfo; + struct hclge_vport *vport; + int vport_id, i; + + for (vport_id = 0; vport_id <= pci_num_vf(hdev->pdev); vport_id++) { + vport = &hdev->vport[vport_id]; + kinfo = &vport->nic.kinfo; + + dev_info(&hdev->pdev->dev, "qs cfg of vport%d:\n", vport_id); + + for (i = 0; i < kinfo->num_tc; i++) { + u16 qsid = vport->qs_offset + i; + + hclge_dbg_dump_qs_shaper_single(hdev, qsid); + } + } +} + +static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev, + const char *cmd_buf) +{ +#define HCLGE_MAX_QSET_NUM 1024 + + u16 qsid; + int ret; + + ret = kstrtou16(cmd_buf, 0, &qsid); + if (ret) { + hclge_dbg_dump_qs_shaper_all(hdev); + return; + } + + if (qsid >= HCLGE_MAX_QSET_NUM) { + dev_err(&hdev->pdev->dev, "qsid(%u) out of range[0-1023]\n", + qsid); + return; + } + + hclge_dbg_dump_qs_shaper_single(hdev, qsid); +} + int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) { #define DUMP_REG "dump reg" @@ -1145,6 +1221,9 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) &cmd_buf[sizeof("dump ncl_config")]); } else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) { hclge_dbg_dump_mac_tnl_status(hdev); + } else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) { + hclge_dbg_dump_qs_shaper(hdev, + &cmd_buf[sizeof("dump qs shaper")]); } else { dev_info(&hdev->pdev->dev, "unknown command\n"); return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index fd7f94372ff0..8a3a4fdc12ed 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -55,6 +55,8 @@ #define HCLGE_LINK_STATUS_MS 10 +#define HCLGE_VF_VPORT_START_NUM 1 + static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps); static int hclge_init_vlan_config(struct hclge_dev *hdev); static void hclge_sync_vlan_filter(struct hclge_dev *hdev); @@ -1182,6 +1184,35 @@ static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability) hclge_parse_backplane_link_mode(hdev, speed_ability); } +static u32 hclge_get_max_speed(u8 speed_ability) +{ + if (speed_ability & HCLGE_SUPPORT_100G_BIT) + return HCLGE_MAC_SPEED_100G; + + if (speed_ability & HCLGE_SUPPORT_50G_BIT) + return HCLGE_MAC_SPEED_50G; + + if (speed_ability & HCLGE_SUPPORT_40G_BIT) + return HCLGE_MAC_SPEED_40G; + + if (speed_ability & HCLGE_SUPPORT_25G_BIT) + return HCLGE_MAC_SPEED_25G; + + if (speed_ability & HCLGE_SUPPORT_10G_BIT) + return HCLGE_MAC_SPEED_10G; + + if (speed_ability & HCLGE_SUPPORT_1G_BIT) + return HCLGE_MAC_SPEED_1G; + + if (speed_ability & HCLGE_SUPPORT_100M_BIT) + return HCLGE_MAC_SPEED_100M; + + if (speed_ability & HCLGE_SUPPORT_10M_BIT) + return HCLGE_MAC_SPEED_10M; + + return HCLGE_MAC_SPEED_1G; +} + static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) { struct hclge_cfg_param_cmd *req; @@ -1352,6 +1383,8 @@ static int hclge_configure(struct hclge_dev *hdev) hclge_parse_link_mode(hdev, cfg.speed_ability); + hdev->hw.mac.max_speed = hclge_get_max_speed(cfg.speed_ability); + if ((hdev->tc_max > HNAE3_MAX_TC) || (hdev->tc_max < 1)) { dev_warn(&hdev->pdev->dev, "TC num = %d.\n", @@ -1633,6 +1666,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) for (i = 0; i < num_vport; i++) { vport->back = hdev; vport->vport_id = i; + vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO; vport->mps = HCLGE_MAC_DEFAULT_FRAME; vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE; vport->rxvlan_cfg.rx_vlan_offload_en = true; @@ -2853,6 +2887,62 @@ static int hclge_get_status(struct hnae3_handle *handle) return hdev->hw.mac.link; } +static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf) +{ + if (pci_num_vf(hdev->pdev) == 0) { + dev_err(&hdev->pdev->dev, + "SRIOV is disabled, can not get vport(%d) info.\n", vf); + return NULL; + } + + if (vf < 0 || vf >= pci_num_vf(hdev->pdev)) { + dev_err(&hdev->pdev->dev, + "vf id(%d) is out of range(0 <= vfid < %d)\n", + vf, pci_num_vf(hdev->pdev)); + return NULL; + } + + /* VF start from 1 in vport */ + vf += HCLGE_VF_VPORT_START_NUM; + return &hdev->vport[vf]; +} + +static int hclge_get_vf_config(struct hnae3_handle *handle, int vf, + struct ifla_vf_info *ivf) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + ivf->vf = vf; + ivf->linkstate = vport->vf_info.link_state; + ivf->spoofchk = vport->vf_info.spoofchk; + ivf->trusted = vport->vf_info.trusted; + ivf->min_tx_rate = 0; + ivf->max_tx_rate = vport->vf_info.max_tx_rate; + ether_addr_copy(ivf->mac, vport->vf_info.mac); + + return 0; +} + +static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, + int link_state) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + vport->vf_info.link_state = link_state; + + return 0; +} + static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) { u32 rst_src_reg, cmdq_src_reg, msix_src_reg; @@ -4558,8 +4648,8 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, int vector, return ret; } -int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, - struct hclge_promisc_param *param) +static int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, + struct hclge_promisc_param *param) { struct hclge_promisc_cfg_cmd *req; struct hclge_desc desc; @@ -4586,8 +4676,9 @@ int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, return ret; } -void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, - bool en_mc, bool en_bc, int vport_id) +static void hclge_promisc_param_init(struct hclge_promisc_param *param, + bool en_uc, bool en_mc, bool en_bc, + int vport_id) { if (!param) return; @@ -4602,12 +4693,21 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, param->vf_id = vport_id; } +int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc, + bool en_mc_pmc, bool en_bc_pmc) +{ + struct hclge_dev *hdev = vport->back; + struct hclge_promisc_param param; + + hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc, + vport->vport_id); + return hclge_cmd_set_promisc_mode(hdev, ¶m); +} + static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, bool en_mc_pmc) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - struct hclge_promisc_param param; bool en_bc_pmc = true; /* For revision 0x20, if broadcast promisc enabled, vlan filter is @@ -4617,9 +4717,8 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, if (handle->pdev->revision == 0x20) en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false; - hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc, - vport->vport_id); - return hclge_cmd_set_promisc_mode(hdev, ¶m); + return hclge_set_vport_promisc_mode(vport, en_uc_pmc, en_mc_pmc, + en_bc_pmc); } static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) @@ -7391,6 +7490,67 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, return return_status; } +static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx, + u8 *mac_addr) +{ + struct hclge_mac_vlan_tbl_entry_cmd req; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u16 egress_port = 0; + int i; + + if (is_zero_ether_addr(mac_addr)) + return false; + + memset(&req, 0, sizeof(req)); + hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M, + HCLGE_MAC_EPORT_VFID_S, vport->vport_id); + req.egress_port = cpu_to_le16(egress_port); + hclge_prepare_mac_addr(&req, mac_addr, false); + + if (hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false) != -ENOENT) + return true; + + vf_idx += HCLGE_VF_VPORT_START_NUM; + for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) + if (i != vf_idx && + ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) + return true; + + return false; +} + +static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, + u8 *mac_addr) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (ether_addr_equal(mac_addr, vport->vf_info.mac)) { + dev_info(&hdev->pdev->dev, + "Specified MAC(=%pM) is same as before, no change committed!\n", + mac_addr); + return 0; + } + + if (hclge_check_vf_mac_exist(vport, vf, mac_addr)) { + dev_err(&hdev->pdev->dev, "Specified MAC(=%pM) exists!\n", + mac_addr); + return -EEXIST; + } + + ether_addr_copy(vport->vf_info.mac, mac_addr); + dev_info(&hdev->pdev->dev, + "MAC of VF %d has been set to %pM, and it will be reinitialized!\n", + vf, mac_addr); + + return hclge_inform_reset_assert_to_vf(vport); +} + static int hclge_add_mgr_tbl(struct hclge_dev *hdev, const struct hclge_mac_mgr_tbl_entry_cmd *req) { @@ -7564,6 +7724,8 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, __be16 proto) { #define HCLGE_MAX_VF_BYTES 16 + + struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_vlan_filter_vf_cfg_cmd *req0; struct hclge_vlan_filter_vf_cfg_cmd *req1; struct hclge_desc desc[2]; @@ -7572,10 +7734,18 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, int ret; /* if vf vlan table is full, firmware will close vf vlan filter, it - * is unable and unnecessary to add new vlan id to vf vlan filter + * is unable and unnecessary to add new vlan id to vf vlan filter. + * If spoof check is enable, and vf vlan is full, it shouldn't add + * new vlan, because tx packets with these vlan id will be dropped. */ - if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) + if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) { + if (vport->vf_info.spoofchk && vlan) { + dev_err(&hdev->pdev->dev, + "Can't add vlan due to spoof check is on and vf vlan table is full\n"); + return -EPERM; + } return 0; + } hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_VLAN_FILTER_VF_CFG, false); @@ -8072,12 +8242,15 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) } list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { - if (vlan->hd_tbl_status) - hclge_set_vlan_filter_hw(hdev, - htons(ETH_P_8021Q), - vport->vport_id, - vlan->vlan_id, - false); + int ret; + + if (!vlan->hd_tbl_status) + continue; + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, + vlan->vlan_id, false); + if (ret) + break; } } @@ -9319,6 +9492,219 @@ static void hclge_stats_clear(struct hclge_dev *hdev) memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); } +static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable) +{ + return hclge_config_switch_param(hdev, vf, enable, + HCLGE_SWITCH_ANTI_SPOOF_MASK); +} + +static int hclge_set_vlan_spoofchk(struct hclge_dev *hdev, int vf, bool enable) +{ + return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, + HCLGE_FILTER_FE_NIC_INGRESS_B, + enable, vf); +} + +static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable) +{ + int ret; + + ret = hclge_set_mac_spoofchk(hdev, vf, enable); + if (ret) { + dev_err(&hdev->pdev->dev, + "Set vf %d mac spoof check %s failed, ret=%d\n", + vf, enable ? "on" : "off", ret); + return ret; + } + + ret = hclge_set_vlan_spoofchk(hdev, vf, enable); + if (ret) + dev_err(&hdev->pdev->dev, + "Set vf %d vlan spoof check %s failed, ret=%d\n", + vf, enable ? "on" : "off", ret); + + return ret; +} + +static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf, + bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 new_spoofchk = enable ? 1 : 0; + int ret; + + if (hdev->pdev->revision == 0x20) + return -EOPNOTSUPP; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (vport->vf_info.spoofchk == new_spoofchk) + return 0; + + if (enable && test_bit(vport->vport_id, hdev->vf_vlan_full)) + dev_warn(&hdev->pdev->dev, + "vf %d vlan table is full, enable spoof check may cause its packet send fail\n", + vf); + else if (enable && hclge_is_umv_space_full(vport)) + dev_warn(&hdev->pdev->dev, + "vf %d mac table is full, enable spoof check may cause its packet send fail\n", + vf); + + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, enable); + if (ret) + return ret; + + vport->vf_info.spoofchk = new_spoofchk; + return 0; +} + +static int hclge_reset_vport_spoofchk(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + int ret; + int i; + + if (hdev->pdev->revision == 0x20) + return 0; + + /* resume the vf spoof check state after reset */ + for (i = 0; i < hdev->num_alloc_vport; i++) { + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, + vport->vf_info.spoofchk); + if (ret) + return ret; + + vport++; + } + + return 0; +} + +static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 new_trusted = enable ? 1 : 0; + bool en_bc_pmc; + int ret; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (vport->vf_info.trusted == new_trusted) + return 0; + + /* Disable promisc mode for VF if it is not trusted any more. */ + if (!enable && vport->vf_info.promisc_enable) { + en_bc_pmc = hdev->pdev->revision != 0x20; + ret = hclge_set_vport_promisc_mode(vport, false, false, + en_bc_pmc); + if (ret) + return ret; + vport->vf_info.promisc_enable = 0; + hclge_inform_vf_promisc_info(vport); + } + + vport->vf_info.trusted = new_trusted; + + return 0; +} + +static void hclge_reset_vf_rate(struct hclge_dev *hdev) +{ + int ret; + int vf; + + /* reset vf rate to default value */ + for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) { + struct hclge_vport *vport = &hdev->vport[vf]; + + vport->vf_info.max_tx_rate = 0; + ret = hclge_tm_qs_shaper_cfg(vport, vport->vf_info.max_tx_rate); + if (ret) + dev_err(&hdev->pdev->dev, + "vf%d failed to reset to default, ret=%d\n", + vf - HCLGE_VF_VPORT_START_NUM, ret); + } +} + +static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf, + int min_tx_rate, int max_tx_rate) +{ + if (min_tx_rate != 0 || + max_tx_rate < 0 || max_tx_rate > hdev->hw.mac.max_speed) { + dev_err(&hdev->pdev->dev, + "min_tx_rate:%d [0], max_tx_rate:%d [0, %u]\n", + min_tx_rate, max_tx_rate, hdev->hw.mac.max_speed); + return -EINVAL; + } + + return 0; +} + +static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf, + int min_tx_rate, int max_tx_rate, bool force) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate); + if (ret) + return ret; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (!force && max_tx_rate == vport->vf_info.max_tx_rate) + return 0; + + ret = hclge_tm_qs_shaper_cfg(vport, max_tx_rate); + if (ret) + return ret; + + vport->vf_info.max_tx_rate = max_tx_rate; + + return 0; +} + +static int hclge_resume_vf_rate(struct hclge_dev *hdev) +{ + struct hnae3_handle *handle = &hdev->vport->nic; + struct hclge_vport *vport; + int ret; + int vf; + + /* resume the vf max_tx_rate after reset */ + for (vf = 0; vf < pci_num_vf(hdev->pdev); vf++) { + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + /* zero means max rate, after reset, firmware already set it to + * max rate, so just continue. + */ + if (!vport->vf_info.max_tx_rate) + continue; + + ret = hclge_set_vf_rate(handle, vf, 0, + vport->vf_info.max_tx_rate, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "vf%d failed to resume tx_rate:%u, ret=%d\n", + vf, vport->vf_info.max_tx_rate, ret); + return ret; + } + } + + return 0; +} + static void hclge_reset_vport_state(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; @@ -9418,6 +9804,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) } hclge_reset_vport_state(hdev); + ret = hclge_reset_vport_spoofchk(hdev); + if (ret) + return ret; + + ret = hclge_resume_vf_rate(hdev); + if (ret) + return ret; dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n", HCLGE_DRIVER_NAME); @@ -9430,6 +9823,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) struct hclge_dev *hdev = ae_dev->priv; struct hclge_mac *mac = &hdev->hw.mac; + hclge_reset_vf_rate(hdev); hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); @@ -10152,6 +10546,12 @@ static const struct hnae3_ae_ops hclge_ops = { .mac_connect_phy = hclge_mac_connect_phy, .mac_disconnect_phy = hclge_mac_disconnect_phy, .restore_vlan_table = hclge_restore_vlan_table, + .get_vf_config = hclge_get_vf_config, + .set_vf_link_state = hclge_set_vf_link_state, + .set_vf_spoofchk = hclge_set_vf_spoofchk, + .set_vf_trust = hclge_set_vf_trust, + .set_vf_rate = hclge_set_vf_rate, + .set_vf_mac = hclge_set_vf_mac, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 3e9574a9e22d..3153a96729d3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -258,6 +258,7 @@ struct hclge_mac { u8 support_autoneg; u8 speed_type; /* 0: sfp speed, 1: active speed */ u32 speed; + u32 max_speed; u32 speed_ability; /* speed ability supported by current media */ u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */ u32 fec_mode; /* active fec mode */ @@ -885,6 +886,15 @@ struct hclge_port_base_vlan_config { struct hclge_vlan_info vlan_info; }; +struct hclge_vf_info { + int link_state; + u8 mac[ETH_ALEN]; + u32 spoofchk; + u32 max_tx_rate; + u32 trusted; + u16 promisc_enable; +}; + struct hclge_vport { u16 alloc_tqps; /* Allocated Tx/Rx queues */ @@ -916,15 +926,15 @@ struct hclge_vport { unsigned long state; unsigned long last_active_jiffies; u32 mps; /* Max packet size */ + struct hclge_vf_info vf_info; struct list_head uc_mac_list; /* Store VF unicast table */ struct list_head mc_mac_list; /* Store VF multicast table */ struct list_head vlan_list; /* Store VF vlan table */ }; -void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, - bool en_mc, bool en_bc, int vport_id); - +int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc, + bool en_mc_pmc, bool en_bc_pmc); int hclge_add_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr); int hclge_rm_uc_addr_common(struct hclge_vport *vport, @@ -993,4 +1003,5 @@ int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc); void hclge_report_hw_error(struct hclge_dev *hdev, enum hnae3_hw_error_type type); +void hclge_inform_vf_promisc_info(struct hclge_vport *vport); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index f5da28a60d00..97463e11aca7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -205,12 +205,38 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en, static int hclge_set_vf_promisc_mode(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *req) { - bool en_bc = req->msg[1] ? true : false; - struct hclge_promisc_param param; +#define HCLGE_MBX_BC_INDEX 1 +#define HCLGE_MBX_UC_INDEX 2 +#define HCLGE_MBX_MC_INDEX 3 - /* vf is not allowed to enable unicast/multicast broadcast */ - hclge_promisc_param_init(¶m, false, false, en_bc, vport->vport_id); - return hclge_cmd_set_promisc_mode(vport->back, ¶m); + bool en_bc = req->msg[HCLGE_MBX_BC_INDEX] ? true : false; + bool en_uc = req->msg[HCLGE_MBX_UC_INDEX] ? true : false; + bool en_mc = req->msg[HCLGE_MBX_MC_INDEX] ? true : false; + int ret; + + if (!vport->vf_info.trusted) { + en_uc = false; + en_mc = false; + } + + ret = hclge_set_vport_promisc_mode(vport, en_uc, en_mc, en_bc); + if (req->mbx_need_resp) + hclge_gen_resp_to_vf(vport, req, ret, NULL, 0); + + vport->vf_info.promisc_enable = (en_uc || en_mc) ? 1 : 0; + + return ret; +} + +void hclge_inform_vf_promisc_info(struct hclge_vport *vport) +{ + u8 dest_vfid = (u8)vport->vport_id; + u8 msg_data[2]; + + memcpy(&msg_data[0], &vport->vf_info.promisc_enable, sizeof(u16)); + + hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data), + HCLGE_MBX_PUSH_PROMISC_INFO, dest_vfid); } static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, @@ -223,6 +249,20 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_MODIFY) { const u8 *old_addr = (const u8 *)(&mbx_req->msg[8]); + /* If VF MAC has been configured by the host then it + * cannot be overridden by the MAC specified by the VM. + */ + if (!is_zero_ether_addr(vport->vf_info.mac) && + !ether_addr_equal(mac_addr, vport->vf_info.mac)) { + status = -EPERM; + goto out; + } + + if (!is_valid_ether_addr(mac_addr)) { + status = -EINVAL; + goto out; + } + hclge_rm_uc_addr_common(vport, old_addr); status = hclge_add_uc_addr_common(vport, mac_addr); if (status) { @@ -250,6 +290,7 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, return -EIO; } +out: if (mbx_req->mbx_need_resp & HCLGE_MBX_NEED_RESP_BIT) hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0); @@ -324,6 +365,9 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, proto = msg_cmd->proto; status = hclge_set_vlan_filter(handle, cpu_to_be16(proto), vlan, is_kill); + if (mbx_req->mbx_need_resp) + return hclge_gen_resp_to_vf(vport, mbx_req, status, + NULL, 0); } else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) { struct hnae3_handle *handle = &vport->nic; bool en = msg_cmd->is_kill ? true : false; @@ -398,6 +442,13 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport, HCLGE_TQPS_RSS_INFO_LEN); } +static int hclge_get_vf_mac_addr(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req) +{ + return hclge_gen_resp_to_vf(vport, mbx_req, 0, vport->vf_info.mac, + ETH_ALEN); +} + static int hclge_get_vf_queue_depth(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req, bool gen_resp) @@ -428,6 +479,9 @@ static int hclge_get_vf_media_type(struct hclge_vport *vport, static int hclge_get_link_info(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req) { +#define HCLGE_VF_LINK_STATE_UP 1U +#define HCLGE_VF_LINK_STATE_DOWN 0U + struct hclge_dev *hdev = vport->back; u16 link_status; u8 msg_data[8]; @@ -435,7 +489,19 @@ static int hclge_get_link_info(struct hclge_vport *vport, u16 duplex; /* mac.link can only be 0 or 1 */ - link_status = (u16)hdev->hw.mac.link; + switch (vport->vf_info.link_state) { + case IFLA_VF_LINK_STATE_ENABLE: + link_status = HCLGE_VF_LINK_STATE_UP; + break; + case IFLA_VF_LINK_STATE_DISABLE: + link_status = HCLGE_VF_LINK_STATE_DOWN; + break; + case IFLA_VF_LINK_STATE_AUTO: + default: + link_status = (u16)hdev->hw.mac.link; + break; + } + duplex = hdev->hw.mac.duplex; memcpy(&msg_data[0], &link_status, sizeof(u16)); memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32)); @@ -749,6 +815,13 @@ void hclge_mbx_handler(struct hclge_dev *hdev) case HCLGE_MBX_PUSH_LINK_STATUS: hclge_handle_link_change_event(hdev, req); break; + case HCLGE_MBX_GET_MAC_ADDR: + ret = hclge_get_vf_mac_addr(vport, req); + if (ret) + dev_err(&hdev->pdev->dev, + "PF failed(%d) to get MAC for VF\n", + ret); + break; case HCLGE_MBX_NCSI_ERROR: hclge_handle_ncsi_error(hdev); break; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 9f0e35f27789..09349545c473 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -46,7 +46,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, #define DIVISOR_CLK (1000 * 8) #define DIVISOR_IR_B_126 (126 * DIVISOR_CLK) - const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { + static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { 6 * 256, /* Prioriy level */ 6 * 32, /* Prioriy group level */ 6 * 8, /* Port level */ @@ -511,6 +511,49 @@ static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc, u8 grp_id, return hclge_cmd_send(&hdev->hw, &desc, 1); } +int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate) +{ + struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; + struct hclge_qs_shapping_cmd *shap_cfg_cmd; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u8 ir_b, ir_u, ir_s; + u32 shaper_para; + int ret, i; + + if (!max_tx_rate) + max_tx_rate = HCLGE_ETHER_MAX_RATE; + + ret = hclge_shaper_para_calc(max_tx_rate, HCLGE_SHAPER_LVL_QSET, + &ir_b, &ir_u, &ir_s); + if (ret) + return ret; + + shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); + + for (i = 0; i < kinfo->num_tc; i++) { + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, + false); + + shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data; + shap_cfg_cmd->qs_id = cpu_to_le16(vport->qs_offset + i); + shap_cfg_cmd->qs_shapping_para = cpu_to_le32(shaper_para); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "vf%d, qs%u failed to set tx_rate:%d, ret=%d\n", + vport->vport_id, shap_cfg_cmd->qs_id, + max_tx_rate, ret); + return ret; + } + } + + return 0; +} + static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 818610988d34..95ef6e1204cf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -96,6 +96,12 @@ struct hclge_pg_shapping_cmd { __le32 pg_shapping_para; }; +struct hclge_qs_shapping_cmd { + __le16 qs_id; + u8 rsvd[2]; + __le32 qs_shapping_para; +}; + #define HCLGE_BP_GRP_NUM 32 #define HCLGE_BP_SUB_GRP_ID_S 0 #define HCLGE_BP_SUB_GRP_ID_M GENMASK(4, 0) @@ -154,4 +160,6 @@ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx); int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr); int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats); +int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate); + #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index e3090b3dab1d..9c8fd971f9fd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1105,6 +1105,7 @@ static int hclgevf_put_vector(struct hnae3_handle *handle, int vector) } static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, + bool en_uc_pmc, bool en_mc_pmc, bool en_bc_pmc) { struct hclge_mbx_vf_to_pf_cmd *req; @@ -1112,10 +1113,11 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, int ret; req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data; - hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false); req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE; req->msg[1] = en_bc_pmc ? 1 : 0; + req->msg[2] = en_uc_pmc ? 1 : 0; + req->msg[3] = en_mc_pmc ? 1 : 0; ret = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (ret) @@ -1125,9 +1127,17 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, return ret; } -static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc) +static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, + bool en_mc_pmc) { - return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc); + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct pci_dev *pdev = hdev->pdev; + bool en_bc_pmc; + + en_bc_pmc = pdev->revision != 0x20; + + return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc, + en_bc_pmc); } static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, @@ -1166,11 +1176,37 @@ static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle) } } +static int hclgevf_get_host_mac_addr(struct hclgevf_dev *hdev, u8 *p) +{ + u8 host_mac[ETH_ALEN]; + int status; + + status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_MAC_ADDR, 0, NULL, 0, + true, host_mac, ETH_ALEN); + if (status) { + dev_err(&hdev->pdev->dev, + "fail to get VF MAC from host %d", status); + return status; + } + + ether_addr_copy(p, host_mac); + + return 0; +} + static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + u8 host_mac_addr[ETH_ALEN]; - ether_addr_copy(p, hdev->hw.mac.mac_addr); + if (hclgevf_get_host_mac_addr(hdev, host_mac_addr)) + return; + + hdev->has_pf_mac = !is_zero_ether_addr(host_mac_addr); + if (hdev->has_pf_mac) + ether_addr_copy(p, host_mac_addr); + else + ether_addr_copy(p, hdev->hw.mac.mac_addr); } static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p, @@ -1267,7 +1303,7 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, memcpy(&msg_data[3], &proto, sizeof(proto)); ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, HCLGE_MBX_VLAN_FILTER, msg_data, - HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0); + HCLGEVF_VLAN_MBX_MSG_LEN, true, NULL, 0); /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence @@ -2626,12 +2662,6 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) return ret; } - if (pdev->revision >= 0x21) { - ret = hclgevf_set_promisc_mode(hdev, true); - if (ret) - return ret; - } - dev_info(&hdev->pdev->dev, "Reset done\n"); return 0; @@ -2706,17 +2736,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) if (ret) goto err_config; - /* vf is not allowed to enable unicast/multicast promisc mode. - * For revision 0x20, default to disable broadcast promisc mode, - * firmware makes sure broadcast packets can be accepted. - * For revision 0x21, default to enable broadcast promisc mode. - */ - if (pdev->revision >= 0x21) { - ret = hclgevf_set_promisc_mode(hdev, true); - if (ret) - goto err_config; - } - /* Initialize RSS for this VF */ ret = hclgevf_rss_init_hw(hdev); if (ret) { @@ -3130,6 +3149,7 @@ static const struct hnae3_ae_ops hclgevf_ops = { .get_global_queue_id = hclgevf_get_qid_global, .set_timer_task = hclgevf_set_timer_task, .get_link_mode = hclgevf_get_link_mode, + .set_promisc_mode = hclgevf_set_promisc_mode, }; static struct hnae3_ae_algo ae_algovf = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index bdde3afc286b..ed839408850e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -266,6 +266,7 @@ struct hclgevf_dev { u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ u8 hw_tc_map; + u8 has_pf_mac; u16 num_msi; u16 num_msi_left; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index a108191c9e50..72bacf89f09c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -205,6 +205,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) case HCLGE_MBX_ASSERTING_RESET: case HCLGE_MBX_LINK_STAT_MODE: case HCLGE_MBX_PUSH_VLAN_INFO: + case HCLGE_MBX_PUSH_PROMISC_INFO: /* set this mbx event as pending. This is required as we * might loose interrupt event when mbx task is busy * handling. This shall be cleared when mbx task just @@ -248,6 +249,14 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) crq->next_to_use); } +static void hclgevf_parse_promisc_info(struct hclgevf_dev *hdev, + u16 promisc_info) +{ + if (!promisc_info) + dev_info(&hdev->pdev->dev, + "Promisc mode is closed by host for being untrusted.\n"); +} + void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) { enum hnae3_reset_type reset_type; @@ -313,6 +322,9 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) hclgevf_update_port_base_vlan_info(hdev, state, (u8 *)vlan_info, 8); break; + case HCLGE_MBX_PUSH_PROMISC_INFO: + hclgevf_parse_promisc_info(hdev, msg_q[1]); + break; default: dev_err(&hdev->pdev->dev, "fetched unsupported(%d) message from arq\n", diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 3e863a71c513..7df5d7d211d4 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -148,11 +148,15 @@ static int mdio_sc_cfg_reg_write(struct hns_mdio_device *mdio_dev, { u32 time_cnt; u32 reg_value; + int ret; regmap_write(mdio_dev->subctrl_vbase, cfg_reg, set_val); for (time_cnt = MDIO_TIMEOUT; time_cnt; time_cnt--) { - regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + ret = regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + if (ret) + return ret; + reg_value &= st_msk; if ((!!check_st) == (!!reg_value)) break; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 111b3b8239e1..5fea65256b9d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2863,7 +2863,7 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status, skb->ip_summed = CHECKSUM_NONE; } -/* Reuse skb if possible, or allocate a new skb and add it to BM pool */ +/* Allocate a new skb and add it to BM pool */ static int mvpp2_rx_refill(struct mvpp2_port *port, struct mvpp2_bm_pool *bm_pool, int pool) { @@ -2871,7 +2871,6 @@ static int mvpp2_rx_refill(struct mvpp2_port *port, phys_addr_t phys_addr; void *buf; - /* No recycle or too many buffers are in use, so allocate a new skb */ buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr, &phys_addr, GFP_ATOMIC); if (!buf) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index fce9b3a24347..22c72fb7206a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3935,13 +3935,17 @@ static void mlx4_restart_one_down(struct pci_dev *pdev); static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, struct devlink *devlink); -static int mlx4_devlink_reload_down(struct devlink *devlink, +static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); struct mlx4_dev *dev = &priv->dev; struct mlx4_dev_persistent *persist = dev->persist; + if (netns_change) { + NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported"); + return -EOPNOTSUPP; + } if (persist->num_vfs) mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); mlx4_restart_one_down(persist->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index b860569d4247..6c72b592315b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -222,7 +222,8 @@ static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) } static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, - void *context) + void *context, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_err_ctx *err_ctx = context; @@ -301,7 +302,8 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, } static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_params *params = &priv->channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index bfed558637c2..b468549e96ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -135,7 +135,8 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) } static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, - void *context) + void *context, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_err_ctx *err_ctx = context; @@ -205,7 +206,8 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, } static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d685122d9ff7..be3c3c704bfc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -390,7 +390,8 @@ static void print_health_info(struct mlx5_core_dev *dev) static int mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); struct mlx5_core_health *health = &dev->priv.health; @@ -491,7 +492,8 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, static int mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx) + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); int err; @@ -545,7 +547,8 @@ static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { static int mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); @@ -555,7 +558,8 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, #define MLX5_CR_DUMP_CHUNK_SIZE 256 static int mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx) + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); u32 crdump_size = dev->priv.health.crdump_size; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 5d20d615663e..13e2944b1274 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -248,9 +248,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct net_device *fib_dev; struct fib_info *fi; - if (!net_eq(info->net, &init_net)) - return NOTIFY_DONE; - if (info->family != AF_INET) return NOTIFY_DONE; @@ -311,8 +308,8 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) return 0; mp->fib_nb.notifier_call = mlx5_lag_fib_event; - err = register_fib_notifier(&mp->fib_nb, - mlx5_lag_fib_event_flush); + err = register_fib_notifier(&init_net, &mp->fib_nb, + mlx5_lag_fib_event_flush, NULL); if (err) mp->fib_nb.notifier_call = NULL; @@ -326,6 +323,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) if (!mp->fib_nb.notifier_call) return; - unregister_fib_notifier(&mp->fib_nb); + unregister_fib_notifier(&init_net, &mp->fib_nb); mp->fib_nb.notifier_call = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 913f1e5aaaf2..d7c7467e2d53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -137,7 +137,8 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool, icm_mr->icm_start_addr = icm_mr->dm.addr; - align_diff = icm_mr->icm_start_addr % align_base; + /* align_base is always a power of 2 */ + align_diff = icm_mr->icm_start_addr & (align_base - 1); if (align_diff) icm_mr->used_length = align_base - align_diff; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 14dcc786926d..2b59f84b14f9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -127,6 +127,16 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_res_query_enabled); +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev) +{ + return rev->minor > req_rev->minor || + (rev->minor == req_rev->minor && + rev->subminor >= req_rev->subminor); +} +EXPORT_SYMBOL(mlxsw_core_fw_rev_minor_subminor_validate); + struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; @@ -985,6 +995,7 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, static int mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, + bool netns_change, struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); @@ -1005,7 +1016,7 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, return mlxsw_core_bus_device_register(mlxsw_core->bus_info, mlxsw_core->bus, mlxsw_core->bus_priv, true, - devlink); + devlink, extack); } static int mlxsw_devlink_flash_update(struct devlink *devlink, @@ -1098,7 +1109,8 @@ static int __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink) + struct devlink *devlink, + struct netlink_ext_ack *extack) { const char *device_kind = mlxsw_bus_info->device_kind; struct mlxsw_core *mlxsw_core; @@ -1172,7 +1184,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } if (mlxsw_driver->init) { - err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack); if (err) goto err_driver_init; } @@ -1223,14 +1235,16 @@ err_devlink_alloc: int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink) + struct devlink *devlink, + struct netlink_ext_ack *extack) { bool called_again = false; int err; again: err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus, - bus_priv, reload, devlink); + bus_priv, reload, + devlink, extack); /* -EAGAIN is returned in case the FW was updated. FW needs * a reset, so lets try to call __mlxsw_core_bus_device_register() * again. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 5d7d2ab6d155..f25037074e2d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/workqueue.h> +#include <linux/net_namespace.h> #include <net/devlink.h> #include "trap.h" @@ -23,6 +24,7 @@ struct mlxsw_core_port; struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; +struct mlxsw_fw_rev; unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); @@ -30,13 +32,18 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev); + int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink); + struct devlink *devlink, + struct netlink_ext_ack *extack); void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, bool reload); struct mlxsw_tx_info { @@ -252,7 +259,8 @@ struct mlxsw_driver { const char *kind; size_t priv_size; int (*init)(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info); + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack); void (*fini)(struct mlxsw_core *mlxsw_core); int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core); int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port, @@ -350,6 +358,11 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, #define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id) \ mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id) +static inline struct net *mlxsw_core_net(struct mlxsw_core *mlxsw_core) +{ + return devlink_net(priv_to_devlink(mlxsw_core)); +} + #define MLXSW_BUS_F_TXRX BIT(0) #define MLXSW_BUS_F_RESET BIT(1) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 5b00726c4346..9bf8da5f6daf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -41,7 +41,7 @@ struct mlxsw_hwmon { struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; u8 sensor_count; - u8 module_sensor_count; + u8 module_sensor_max; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -56,7 +56,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -79,7 +79,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -109,7 +109,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, return -EINVAL; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -336,7 +336,7 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; int index = mlwsw_hwmon_attr->type_index - - mlxsw_hwmon->module_sensor_count + 1; + mlxsw_hwmon->module_sensor_max + 1; return sprintf(buf, "gearbox %03u\n", index); } @@ -528,51 +528,45 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) { - unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core); - char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0}; - int i, index; - u8 width; - int err; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 module_sensor_max; + int i, err; if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core)) return 0; + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &module_sensor_max); + /* Add extra attributes for module temperature. Sensor index is * assigned to sensor_count value, while all indexed before * sensor_count are already utilized by the sensors connected through * mtmp register by mlxsw_hwmon_temp_init(). */ - index = mlxsw_hwmon->sensor_count; - for (i = 1; i < module_count; i++) { - mlxsw_reg_pmlp_pack(pmlp_pl, i); - err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp), - pmlp_pl); - if (err) { - dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n", - i); - return err; - } - width = mlxsw_reg_pmlp_width_get(pmlp_pl); - if (!width) - continue; + mlxsw_hwmon->module_sensor_max = mlxsw_hwmon->sensor_count + + module_sensor_max; + for (i = mlxsw_hwmon->sensor_count; + i < mlxsw_hwmon->module_sensor_max; i++) { mlxsw_hwmon_attr_add(mlxsw_hwmon, - MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index, - index); + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, - index, index); + i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, - MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, - index, index); + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, i, + i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, - index, index); + i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, - index, index); - index++; + i, i); } - mlxsw_hwmon->module_sensor_count = index; return 0; } @@ -590,14 +584,14 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL); + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL, NULL); if (!gbox_num) return 0; - index = mlxsw_hwmon->module_sensor_count; - max_index = mlxsw_hwmon->module_sensor_count + gbox_num; + index = mlxsw_hwmon->module_sensor_max; + max_index = mlxsw_hwmon->module_sensor_max + gbox_num; while (index < max_index) { - sensor_index = index % mlxsw_hwmon->module_sensor_count + + sensor_index = index % mlxsw_hwmon->module_sensor_max + MLXSW_REG_MTMP_GBOX_INDEX_MIN; mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 35a1dc89c28a..c721b171bd8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -112,6 +112,7 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; + u8 tz_module_num; struct mlxsw_thermal_module *tz_gearbox_arr; u8 tz_gearbox_num; unsigned int tz_highest_score; @@ -775,23 +776,10 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) static int mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, - struct mlxsw_thermal *thermal, u8 local_port) + struct mlxsw_thermal *thermal, u8 module) { struct mlxsw_thermal_module *module_tz; - char pmlp_pl[MLXSW_REG_PMLP_LEN]; - u8 width, module; - int err; - - mlxsw_reg_pmlp_pack(pmlp_pl, local_port); - err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl); - if (err) - return err; - width = mlxsw_reg_pmlp_width_get(pmlp_pl); - if (!width) - return 0; - - module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); module_tz = &thermal->tz_module_arr[module]; /* Skip if parent is already set (case of port split). */ if (module_tz->parent) @@ -819,26 +807,34 @@ static int mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal) { - unsigned int module_count = mlxsw_core_max_ports(core); struct mlxsw_thermal_module *module_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; int i, err; if (!mlxsw_core_res_query_enabled(core)) return 0; - thermal->tz_module_arr = kcalloc(module_count, + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &thermal->tz_module_num); + + thermal->tz_module_arr = kcalloc(thermal->tz_module_num, sizeof(*thermal->tz_module_arr), GFP_KERNEL); if (!thermal->tz_module_arr) return -ENOMEM; - for (i = 1; i < module_count; i++) { + for (i = 0; i < thermal->tz_module_num; i++) { err = mlxsw_thermal_module_init(dev, core, thermal, i); if (err) goto err_unreg_tz_module_arr; } - for (i = 0; i < module_count - 1; i++) { + for (i = 0; i < thermal->tz_module_num; i++) { module_tz = &thermal->tz_module_arr[i]; if (!module_tz->parent) continue; @@ -850,7 +846,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, return 0; err_unreg_tz_module_arr: - for (i = module_count - 1; i >= 0; i--) + for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); return err; @@ -859,13 +855,12 @@ err_unreg_tz_module_arr: static void mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) { - unsigned int module_count = mlxsw_core_max_ports(thermal->core); int i; if (!mlxsw_core_res_query_enabled(thermal->core)) return; - for (i = module_count - 1; i >= 0; i--) + for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); } @@ -913,7 +908,8 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL); + mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL, + NULL); if (!thermal->tz_gearbox_num) return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 95f408d0e103..34566eb62c47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -640,7 +640,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info, &mlxsw_i2c_bus, mlxsw_i2c, false, - NULL); + NULL, NULL); if (err) { dev_err(&client->dev, "Fail to register core bus\n"); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 471b0ca6d69a..2b543911ae00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -16,6 +16,14 @@ static const char mlxsw_m_driver_name[] = "mlxsw_minimal"; +#define MLXSW_M_FWREV_MINOR 2000 +#define MLXSW_M_FWREV_SUBMINOR 1886 + +static const struct mlxsw_fw_rev mlxsw_m_fw_rev = { + .minor = MLXSW_M_FWREV_MINOR, + .subminor = MLXSW_M_FWREV_SUBMINOR, +}; + struct mlxsw_m_port; struct mlxsw_m { @@ -172,6 +180,7 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) } SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev); + dev_net_set(dev, mlxsw_core_net(mlxsw_m->core)); mlxsw_m_port = netdev_priv(dev); mlxsw_m_port->dev = dev; mlxsw_m_port->mlxsw_m = mlxsw_m; @@ -325,8 +334,27 @@ static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m) kfree(mlxsw_m->ports); } +static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m) +{ + const struct mlxsw_fw_rev *rev = &mlxsw_m->bus_info->fw_rev; + + /* Validate driver and FW are compatible. + * Do not check major version, since it defines chip type, while + * driver is supposed to support any type. + */ + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, &mlxsw_m_fw_rev)) + return 0; + + dev_err(mlxsw_m->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, rev->major, + mlxsw_m_fw_rev.minor, mlxsw_m_fw_rev.subminor); + + return -EINVAL; +} + static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); int err; @@ -334,6 +362,10 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, mlxsw_m->core = mlxsw_core; mlxsw_m->bus_info = mlxsw_bus_info; + err = mlxsw_m_fw_rev_validate(mlxsw_m); + if (err) + return err; + err = mlxsw_m_base_mac_get(mlxsw_m); if (err) { dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 615455a21567..914c33e46fb4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -284,15 +284,18 @@ static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q, static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { + int tclass; int i; int err; q->producer_counter = 0; q->consumer_counter = 0; + tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC : + MLXSW_PCI_SDQ_CTL_TC; /* Set CQ of same number of this SDQ. */ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); - mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 3); + mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); @@ -963,6 +966,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox); if (num_sdqs + num_rdqs > num_cqs || + num_sdqs < MLXSW_PCI_SDQS_MIN || num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) { dev_err(&pdev->dev, "Unsupported number of queues\n"); return -EINVAL; @@ -1520,7 +1524,15 @@ static struct mlxsw_pci_queue * mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, const struct mlxsw_tx_info *tx_info) { - u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci); + u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1; + u8 sdqn; + + if (tx_info->is_emad) { + sdqn = MLXSW_PCI_SDQ_EMAD_INDEX; + } else { + BUILD_BUG_ON(MLXSW_PCI_SDQ_EMAD_INDEX != 0); + sdqn = 1 + (tx_info->local_port % ctl_sdq_count); + } return mlxsw_pci_sdq_get(mlxsw_pci, sdqn); } @@ -1790,7 +1802,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus, mlxsw_pci, false, - NULL); + NULL, NULL); if (err) { dev_err(&pdev->dev, "cannot register bus device\n"); goto err_bus_device_register; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index e57e42e2d2b2..2b3aec482742 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -51,6 +51,11 @@ #define MLXSW_PCI_EQ_ASYNC_NUM 0 #define MLXSW_PCI_EQ_COMP_NUM 1 +#define MLXSW_PCI_SDQS_MIN 2 /* EMAD and control traffic */ +#define MLXSW_PCI_SDQ_EMAD_INDEX 0 +#define MLXSW_PCI_SDQ_EMAD_TC 0 +#define MLXSW_PCI_SDQ_CTL_TC 3 + #define MLXSW_PCI_AQ_PAGES 8 #define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5494cf93f34c..7b538e698a3d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -9531,6 +9531,12 @@ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); */ MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); +/* num_of_modules + * Number of modules. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8); + static inline void mlxsw_reg_mgpir_pack(char *payload) { MLXSW_REG_ZERO(mgpir, payload); @@ -9539,7 +9545,7 @@ static inline void mlxsw_reg_mgpir_pack(char *payload) static inline void mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, enum mlxsw_reg_mgpir_device_type *device_type, - u8 *devices_per_flash) + u8 *devices_per_flash, u8 *num_of_modules) { if (num_of_devices) *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload); @@ -9548,6 +9554,8 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, if (devices_per_flash) *devices_per_flash = mlxsw_reg_mgpir_devices_per_flash_get(payload); + if (num_of_modules) + *num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload); } /* TNGCR - Tunneling NVE General Configuration Register diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index dcf9562bce8a..3c5154e559b2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -409,9 +409,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) } if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) == MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) && - (rev->minor > req_rev->minor || - (rev->minor == req_rev->minor && - rev->subminor >= req_rev->subminor))) + mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) return 0; dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n", @@ -3635,6 +3633,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_etherdev; } SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev); + dev_net_set(dev, mlxsw_sp_net(mlxsw_sp)); mlxsw_sp_port = netdev_priv(dev); mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; @@ -4738,7 +4737,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr); static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); int err; @@ -4831,7 +4831,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } - err = mlxsw_sp_router_init(mlxsw_sp); + err = mlxsw_sp_router_init(mlxsw_sp, extack); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); goto err_router_init; @@ -4864,7 +4864,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, * respin. */ mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; - err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); goto err_netdev_notifier; @@ -4887,7 +4888,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_ports_create: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: - unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); err_netdev_notifier: if (mlxsw_sp->clock) mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); @@ -4924,7 +4926,8 @@ err_fids_init: } static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); @@ -4944,11 +4947,12 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->listeners = mlxsw_sp1_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); - return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); @@ -4964,7 +4968,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; - return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) @@ -4973,7 +4977,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); - unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); if (mlxsw_sp->clock) { mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b2a0028b1694..8f99d70d6b8b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -14,6 +14,7 @@ #include <linux/dcbnl.h> #include <linux/in6.h> #include <linux/notifier.h> +#include <linux/net_namespace.h> #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> @@ -524,7 +525,8 @@ union mlxsw_sp_l3addr { struct in6_addr addr6; }; -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, unsigned long event, void *ptr); @@ -982,4 +984,9 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group); +static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_net(mlxsw_sp->core); +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index 17f334b46c40..2153bcc4b585 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -870,7 +870,7 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_vni(fid, &vni))) goto out; - nve_dev = dev_get_by_index(&init_net, nve_ifindex); + nve_dev = dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); if (!nve_dev) goto out; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a330b369e899..0e99b64450ca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -16,6 +16,7 @@ #include <linux/if_macvlan.h> #include <linux/refcount.h> #include <linux/jhash.h> +#include <linux/net_namespace.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -2551,14 +2552,14 @@ static int mlxsw_sp_router_schedule_work(struct net *net, struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_router *router; - if (!net_eq(net, &init_net)) + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp))) return NOTIFY_DONE; net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); if (!net_work) return NOTIFY_BAD; - router = container_of(nb, struct mlxsw_sp_router, netevent_nb); INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; mlxsw_core_schedule_work(&net_work->work); @@ -6019,12 +6020,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, @@ -6065,12 +6060,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) fib_work->fib6_work.nrt6); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; } rtnl_unlock(); kfree(fib_work); @@ -6112,12 +6101,6 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) &fib_work->ven_info); dev_put(fib_work->ven_info.dev); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; } rtnl_unlock(); kfree(fib_work); @@ -6213,7 +6196,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, rule = fr_info->rule; /* Rule only affects locally generated traffic */ - if (rule->iifindex == info->net->loopback_dev->ifindex) + if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex) return 0; switch (info->family) { @@ -6250,8 +6233,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_router *router; int err; - if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6 && + if ((info->family != AF_INET && info->family != AF_INET6 && info->family != RTNL_FAMILY_IPMR && info->family != RTNL_FAMILY_IP6MR)) return NOTIFY_DONE; @@ -6263,9 +6245,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, case FIB_EVENT_RULE_DEL: err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); - if (!err || info->extack) - return notifier_from_errno(err); - break; + return notifier_from_errno(err); case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_APPEND: /* fall through */ @@ -7974,9 +7954,10 @@ static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field) mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true); } -static void mlxsw_sp_mp4_hash_init(char *recr2_pl) +static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl) { - bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy; + struct net *net = mlxsw_sp_net(mlxsw_sp); + bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy; mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP); @@ -7991,9 +7972,9 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl) mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT); } -static void mlxsw_sp_mp6_hash_init(char *recr2_pl) +static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl) { - bool only_l3 = !ip6_multipath_hash_policy(&init_net); + bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp)); mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); @@ -8021,8 +8002,8 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0); mlxsw_reg_recr2_pack(recr2_pl, seed); - mlxsw_sp_mp4_hash_init(recr2_pl); - mlxsw_sp_mp6_hash_init(recr2_pl); + mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl); + mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); } @@ -8053,7 +8034,8 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { - bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority; + struct net *net = mlxsw_sp_net(mlxsw_sp); + bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; int err; @@ -8079,7 +8061,8 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) { struct mlxsw_sp_router *router; int err; @@ -8155,8 +8138,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) goto err_dscp_init; mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; - err = register_fib_notifier(&mlxsw_sp->router->fib_nb, - mlxsw_sp_router_fib_dump_flush); + err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb, + mlxsw_sp_router_fib_dump_flush, extack); if (err) goto err_register_fib_notifier; @@ -8195,7 +8179,8 @@ err_register_inetaddr_notifier: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { - unregister_fib_notifier(&mlxsw_sp->router->fib_nb); + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb); unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 5ecb45118400..a3af171c6358 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2591,7 +2591,7 @@ __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp, if (err) return err; - dev = __dev_get_by_index(&init_net, nve_ifindex); + dev = __dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); if (!dev) return -EINVAL; *nve_dev = dev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index 0d9356b3f65d..4ff1e623aa76 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -446,7 +446,8 @@ static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) } static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core); int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 1c14c051ee52..de6cb22f68b1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -992,6 +992,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, if (!dev) return -ENOMEM; SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev); + dev_net_set(dev, mlxsw_core_net(mlxsw_sx->core)); mlxsw_sx_port = netdev_priv(dev); mlxsw_sx_port->dev = dev; mlxsw_sx_port->mlxsw_sx = mlxsw_sx; @@ -1563,7 +1564,8 @@ static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) } static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); int err; diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index b063eb78fa0c..aac115136720 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -388,13 +388,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev) continue; phy = of_phy_find_device(phy_node); + of_node_put(phy_node); if (!phy) continue; err = ocelot_probe_port(ocelot, port, regs, phy); if (err) { of_node_put(portnp); - return err; + goto out_put_ports; } phy_mode = of_get_phy_mode(portnp); @@ -422,7 +423,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) "invalid phy mode for port%d, (Q)SGMII only\n", port); of_node_put(portnp); - return -EINVAL; + err = -EINVAL; + goto out_put_ports; } serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); @@ -435,7 +437,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) "missing SerDes phys for port%d\n", port); - goto err_probe_ports; + of_node_put(portnp); + goto out_put_ports; } ocelot->ports[port]->serdes = serdes; @@ -447,9 +450,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Ocelot switch probed\n"); - return 0; - -err_probe_ports: +out_put_ports: + of_node_put(ports); return err; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 5afcb3c4c2ef..c80bb83c8ac9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -3952,7 +3952,7 @@ static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta1, *meta2; - const s32 exp_mask[] = { + static const s32 exp_mask[] = { [BPF_B] = 0x000000ffU, [BPF_H] = 0x0000ffffU, [BPF_W] = 0xffffffffU, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 61aabffc8888..bcdcd6de7dea 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -872,7 +872,8 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, /* jump forward, a TX may have gotten lost, need to sync TX */ if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4) - tls_offload_tx_resync_request(nskb->sk); + tls_offload_tx_resync_request(nskb->sk, seq, + ntls->next_seq); *nr_frags = 0; return nskb; diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig index bd0583e409df..d25b88f53de4 100644 --- a/drivers/net/ethernet/pensando/Kconfig +++ b/drivers/net/ethernet/pensando/Kconfig @@ -20,6 +20,7 @@ if NET_VENDOR_PENSANDO config IONIC tristate "Pensando Ethernet IONIC Support" depends on 64BIT && PCI + select NET_DEVLINK help This enables the support for the Pensando family of Ethernet adapters. More specific information on this driver can be diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index af1647afa4e8..6fb27dcc5787 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -19,31 +19,30 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req, err = devlink_info_driver_name_put(req, IONIC_DRV_NAME); if (err) - goto info_out; + return err; err = devlink_info_version_running_put(req, DEVLINK_INFO_VERSION_GENERIC_FW, idev->dev_info.fw_version); if (err) - goto info_out; + return err; snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_type); err = devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, buf); if (err) - goto info_out; + return err; snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_rev); err = devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, buf); if (err) - goto info_out; + return err; err = devlink_info_serial_number_put(req, idev->dev_info.serial_num); -info_out: return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 7d10265f782a..f778fff034f5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -254,12 +254,9 @@ static int ionic_set_link_ksettings(struct net_device *netdev, struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; struct ionic_dev *idev; - u32 req_rs, req_fc; - u8 fec_type; int err = 0; idev = &lif->ionic->idev; - fec_type = IONIC_PORT_FEC_TYPE_NONE; /* set autoneg */ if (ks->base.autoneg != idev->port_info->config.an_enable) { @@ -281,29 +278,6 @@ static int ionic_set_link_ksettings(struct net_device *netdev, return err; } - /* set FEC */ - req_rs = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_RS); - req_fc = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_BASER); - if (req_rs && req_fc) { - netdev_info(netdev, "Only select one FEC mode at a time\n"); - return -EINVAL; - } else if (req_fc) { - fec_type = IONIC_PORT_FEC_TYPE_FC; - } else if (req_rs) { - fec_type = IONIC_PORT_FEC_TYPE_RS; - } else if (!(req_rs | req_fc)) { - fec_type = IONIC_PORT_FEC_TYPE_NONE; - } - - if (fec_type != idev->port_info->config.fec_type) { - mutex_lock(&ionic->dev_cmd_lock); - ionic_dev_cmd_port_fec(idev, fec_type); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); - mutex_unlock(&ionic->dev_cmd_lock); - if (err) - return err; - } - return 0; } @@ -353,6 +327,70 @@ static int ionic_set_pauseparam(struct net_device *netdev, return 0; } +static int ionic_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + switch (lif->ionic->idev.port_info->config.fec_type) { + case IONIC_PORT_FEC_TYPE_NONE: + fec->active_fec = ETHTOOL_FEC_OFF; + break; + case IONIC_PORT_FEC_TYPE_RS: + fec->active_fec = ETHTOOL_FEC_RS; + break; + case IONIC_PORT_FEC_TYPE_FC: + fec->active_fec = ETHTOOL_FEC_BASER; + break; + } + + fec->fec = ETHTOOL_FEC_OFF | ETHTOOL_FEC_RS | ETHTOOL_FEC_BASER; + + return 0; +} + +static int ionic_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct ionic_lif *lif = netdev_priv(netdev); + u8 fec_type; + int ret = 0; + + if (lif->ionic->idev.port_info->config.an_enable) { + netdev_err(netdev, "FEC request not allowed while autoneg is enabled\n"); + return -EINVAL; + } + + switch (fec->fec) { + case ETHTOOL_FEC_NONE: + fec_type = IONIC_PORT_FEC_TYPE_NONE; + break; + case ETHTOOL_FEC_OFF: + fec_type = IONIC_PORT_FEC_TYPE_NONE; + break; + case ETHTOOL_FEC_RS: + fec_type = IONIC_PORT_FEC_TYPE_RS; + break; + case ETHTOOL_FEC_BASER: + fec_type = IONIC_PORT_FEC_TYPE_FC; + break; + case ETHTOOL_FEC_AUTO: + default: + netdev_err(netdev, "FEC request 0x%04x not supported\n", + fec->fec); + return -EINVAL; + } + + if (fec_type != lif->ionic->idev.port_info->config.fec_type) { + mutex_lock(&lif->ionic->dev_cmd_lock); + ionic_dev_cmd_port_fec(&lif->ionic->idev, fec_type); + ret = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT); + mutex_unlock(&lif->ionic->dev_cmd_lock); + } + + return ret; +} + static int ionic_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce) { @@ -372,7 +410,6 @@ static int ionic_set_coalesce(struct net_device *netdev, struct ionic_identity *ident; struct ionic_qcq *qcq; unsigned int i; - u32 usecs; u32 coal; if (coalesce->rx_max_coalesced_frames || @@ -410,26 +447,27 @@ static int ionic_set_coalesce(struct net_device *netdev, return -EINVAL; } + /* Convert the usec request to a HW useable value. If they asked + * for non-zero and it resolved to zero, bump it up + */ coal = ionic_coal_usec_to_hw(lif->ionic, coalesce->rx_coalesce_usecs); - - if (coal > IONIC_INTR_CTRL_COAL_MAX) - return -ERANGE; - - /* If they asked for non-zero and it resolved to zero, bump it up */ if (!coal && coalesce->rx_coalesce_usecs) coal = 1; - /* Convert it back to get device resolution */ - usecs = ionic_coal_hw_to_usec(lif->ionic, coal); + if (coal > IONIC_INTR_CTRL_COAL_MAX) + return -ERANGE; - if (usecs != lif->rx_coalesce_usecs) { - lif->rx_coalesce_usecs = usecs; + /* Save the new value */ + lif->rx_coalesce_usecs = coalesce->rx_coalesce_usecs; + if (coal != lif->rx_coalesce_hw) { + lif->rx_coalesce_hw = coal; if (test_bit(IONIC_LIF_UP, lif->state)) { for (i = 0; i < lif->nxqs; i++) { qcq = lif->rxqcqs[i].qcq; ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - qcq->intr.index, coal); + qcq->intr.index, + lif->rx_coalesce_hw); } } } @@ -453,6 +491,7 @@ static int ionic_set_ringparam(struct net_device *netdev, { struct ionic_lif *lif = netdev_priv(netdev); bool running; + int err; if (ring->rx_mini_pending || ring->rx_jumbo_pending) { netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n"); @@ -470,8 +509,9 @@ static int ionic_set_ringparam(struct net_device *netdev, ring->rx_pending == lif->nrxq_descs) return 0; - if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) - return -EBUSY; + err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET); + if (err) + return err; running = test_bit(IONIC_LIF_UP, lif->state); if (running) @@ -504,6 +544,7 @@ static int ionic_set_channels(struct net_device *netdev, { struct ionic_lif *lif = netdev_priv(netdev); bool running; + int err; if (!ch->combined_count || ch->other_count || ch->rx_count || ch->tx_count) @@ -512,8 +553,9 @@ static int ionic_set_channels(struct net_device *netdev, if (ch->combined_count == lif->nxqs) return 0; - if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) - return -EBUSY; + err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET); + if (err) + return err; running = test_bit(IONIC_LIF_UP, lif->state); if (running) @@ -747,6 +789,7 @@ static const struct ethtool_ops ionic_ethtool_ops = { .get_regs = ionic_get_regs, .get_link = ethtool_op_get_link, .get_link_ksettings = ionic_get_link_ksettings, + .set_link_ksettings = ionic_set_link_ksettings, .get_coalesce = ionic_get_coalesce, .set_coalesce = ionic_set_coalesce, .get_ringparam = ionic_get_ringparam, @@ -769,7 +812,8 @@ static const struct ethtool_ops ionic_ethtool_ops = { .get_module_eeprom = ionic_get_module_eeprom, .get_pauseparam = ionic_get_pauseparam, .set_pauseparam = ionic_set_pauseparam, - .set_link_ksettings = ionic_set_link_ksettings, + .get_fecparam = ionic_get_fecparam, + .set_fecparam = ionic_set_fecparam, .nway_reset = ionic_nway_reset, }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 72107a0627a9..559b96ae48f5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -242,6 +242,21 @@ static int ionic_qcq_disable(struct ionic_qcq *qcq) return ionic_adminq_post_wait(lif, &ctx); } +static void ionic_lif_quiesce(struct ionic_lif *lif) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .attr = IONIC_LIF_ATTR_STATE, + .index = lif->index, + .state = IONIC_LIF_DISABLE + }, + }; + + ionic_adminq_post_wait(lif, &ctx); +} + static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq) { struct ionic_dev *idev = &lif->ionic->idev; @@ -1430,7 +1445,6 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) unsigned int flags; unsigned int i; int err = 0; - u32 coal; flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG; for (i = 0; i < lif->nxqs; i++) { @@ -1447,7 +1461,6 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) } flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_INTR; - coal = ionic_coal_usec_to_hw(lif->ionic, lif->rx_coalesce_usecs); for (i = 0; i < lif->nxqs; i++) { err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, lif->nrxq_descs, @@ -1460,7 +1473,8 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats; ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[i].qcq->intr.index, coal); + lif->rxqcqs[i].qcq->intr.index, + lif->rx_coalesce_hw); ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq, lif->txqcqs[i].qcq); } @@ -1590,6 +1604,7 @@ int ionic_stop(struct net_device *netdev) netif_tx_disable(netdev); ionic_txrx_disable(lif); + ionic_lif_quiesce(lif); ionic_txrx_deinit(lif); ionic_txrx_free(lif); @@ -1619,8 +1634,9 @@ int ionic_reset_queues(struct ionic_lif *lif) /* Put off the next watchdog timeout */ netif_trans_update(lif->netdev); - if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) - return -EBUSY; + err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET); + if (err) + return err; running = netif_running(lif->netdev); if (running) @@ -1639,7 +1655,6 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index struct net_device *netdev; struct ionic_lif *lif; int tbl_sz; - u32 coal; int err; netdev = alloc_etherdev_mqs(sizeof(*lif), @@ -1670,8 +1685,9 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index lif->nrxq_descs = IONIC_DEF_TXRX_DESC; /* Convert the default coalesce value to actual hw resolution */ - coal = ionic_coal_usec_to_hw(lif->ionic, IONIC_ITR_COAL_USEC_DEFAULT); - lif->rx_coalesce_usecs = ionic_coal_hw_to_usec(lif->ionic, coal); + lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT; + lif->rx_coalesce_hw = ionic_coal_hw_to_usec(lif->ionic, + lif->rx_coalesce_usecs); snprintf(lif->name, sizeof(lif->name), "lif%u", index); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 812190e729c2..cf243a9d0168 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -175,7 +175,9 @@ struct ionic_lif { unsigned long *dbid_inuse; unsigned int dbid_count; struct dentry *dentry; - u32 rx_coalesce_usecs; + u32 rx_coalesce_usecs; /* what the user asked for */ + u32 rx_coalesce_hw; /* what the hw is using */ + u32 flags; struct work_struct tx_timeout_work; }; @@ -185,15 +187,10 @@ struct ionic_lif { #define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q) #define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q) +/* return 0 if successfully set the bit, else non-zero */ static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname) { - unsigned long tlimit = jiffies + HZ; - - while (test_and_set_bit(bitname, lif->state) && - time_before(jiffies, tlimit)) - usleep_range(100, 200); - - return test_bit(bitname, lif->state); + return wait_on_bit_lock(lif->state, bitname, TASK_INTERRUPTIBLE); } static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 457444894d80..b4b8ba00ee01 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2787,6 +2787,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) netdev_err(qdev->ndev, "PCI mapping failed with error: %d\n", err); + dev_kfree_skb_irq(skb); ql_free_large_buffers(qdev); return -ENOMEM; } diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 786b158bd305..bc4f951315da 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2189,9 +2189,6 @@ static int rocker_router_fib_event(struct notifier_block *nb, struct rocker_fib_event_work *fib_work; struct fib_notifier_info *info = ptr; - if (!net_eq(info->net, &init_net)) - return NOTIFY_DONE; - if (info->family != AF_INET) return NOTIFY_DONE; @@ -2994,7 +2991,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) * the device, so no need to pass a callback. */ rocker->fib_nb.notifier_call = rocker_router_fib_event; - err = register_fib_notifier(&rocker->fib_nb, NULL); + err = register_fib_notifier(&init_net, &rocker->fib_nb, NULL, NULL); if (err) goto err_register_fib_notifier; @@ -3021,7 +3018,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_register_switchdev_blocking_notifier: unregister_switchdev_notifier(&rocker_switchdev_notifier); err_register_switchdev_notifier: - unregister_fib_notifier(&rocker->fib_nb); + unregister_fib_notifier(&init_net, &rocker->fib_nb); err_register_fib_notifier: rocker_remove_ports(rocker); err_probe_ports: @@ -3057,7 +3054,7 @@ static void rocker_remove(struct pci_dev *pdev) unregister_switchdev_blocking_notifier(nb); unregister_switchdev_notifier(&rocker_switchdev_notifier); - unregister_fib_notifier(&rocker->fib_nb); + unregister_fib_notifier(&init_net, &rocker->fib_nb); rocker_remove_ports(rocker); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); destroy_workqueue(rocker->rocker_owq); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 55db7fbd43cc..f9e6744d8fd6 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -282,7 +282,6 @@ struct netsec_desc_ring { void *vaddr; u16 head, tail; u16 xdp_xmit; /* netsec_xdp_xmit packets */ - bool is_xdp; struct page_pool *page_pool; struct xdp_rxq_info xdp_rxq; spinlock_t lock; /* XDP tx queue locking */ @@ -634,8 +633,7 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv) unsigned int bytes; int cnt = 0; - if (dring->is_xdp) - spin_lock(&dring->lock); + spin_lock(&dring->lock); bytes = 0; entry = dring->vaddr + DESC_SZ * tail; @@ -682,8 +680,8 @@ next: entry = dring->vaddr + DESC_SZ * tail; cnt++; } - if (dring->is_xdp) - spin_unlock(&dring->lock); + + spin_unlock(&dring->lock); if (!cnt) return false; @@ -799,9 +797,6 @@ static void netsec_set_tx_de(struct netsec_priv *priv, de->data_buf_addr_lw = lower_32_bits(desc->dma_addr); de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len; de->attr = attr; - /* under spin_lock if using XDP */ - if (!dring->is_xdp) - dma_wmb(); dring->desc[idx] = *desc; if (desc->buf_type == TYPE_NETSEC_SKB) @@ -1123,12 +1118,10 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, u16 tso_seg_len = 0; int filled; - if (dring->is_xdp) - spin_lock_bh(&dring->lock); + spin_lock_bh(&dring->lock); filled = netsec_desc_used(dring); if (netsec_check_stop_tx(priv, filled)) { - if (dring->is_xdp) - spin_unlock_bh(&dring->lock); + spin_unlock_bh(&dring->lock); net_warn_ratelimited("%s %s Tx queue full\n", dev_name(priv->dev), ndev->name); return NETDEV_TX_BUSY; @@ -1161,8 +1154,7 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, tx_desc.dma_addr = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) { - if (dring->is_xdp) - spin_unlock_bh(&dring->lock); + spin_unlock_bh(&dring->lock); netif_err(priv, drv, priv->ndev, "%s: DMA mapping failed\n", __func__); ndev->stats.tx_dropped++; @@ -1177,8 +1169,7 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, netdev_sent_queue(priv->ndev, skb->len); netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb); - if (dring->is_xdp) - spin_unlock_bh(&dring->lock); + spin_unlock_bh(&dring->lock); netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */ return NETDEV_TX_OK; @@ -1262,7 +1253,6 @@ err: static void netsec_setup_tx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; - struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); int i; for (i = 0; i < DESC_NUM; i++) { @@ -1275,12 +1265,6 @@ static void netsec_setup_tx_dring(struct netsec_priv *priv) */ de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; } - - if (xdp_prog) - dring->is_xdp = true; - else - dring->is_xdp = false; - } static int netsec_setup_rx_dring(struct netsec_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 79f2ee37afed..cea7a0c7ce68 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -130,6 +130,31 @@ static void mt2712_delay_ps2stage(struct mediatek_dwmac_plat_data *plat) } } +static void mt2712_delay_stage2ps(struct mediatek_dwmac_plat_data *plat) +{ + struct mac_delay_struct *mac_delay = &plat->mac_delay; + + switch (plat->phy_mode) { + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_RMII: + /* 550ps per stage for MII/RMII */ + mac_delay->tx_delay *= 550; + mac_delay->rx_delay *= 550; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + /* 170ps per stage for RGMII */ + mac_delay->tx_delay *= 170; + mac_delay->rx_delay *= 170; + break; + default: + dev_err(plat->dev, "phy interface not supported\n"); + break; + } +} + static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat) { struct mac_delay_struct *mac_delay = &plat->mac_delay; @@ -199,6 +224,8 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat) regmap_write(plat->peri_regmap, PERI_ETH_DLY, delay_val); regmap_write(plat->peri_regmap, PERI_ETH_DLY_FINE, fine_val); + mt2712_delay_stage2ps(plat); + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 3d69da112625..d0356fbd1e43 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -130,7 +130,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits, writel(mcfilterbits[0], ioaddr + GMAC_HASH_LOW); writel(mcfilterbits[1], ioaddr + GMAC_HASH_HIGH); return; - break; case 7: numhashregs = 4; break; @@ -140,7 +139,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits, default: pr_debug("STMMAC: err in setting multicast filter\n"); return; - break; } for (regs = 0; regs < numhashregs; regs++) writel(mcfilterbits[regs], diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 89a3420eba42..07e97f45755d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -43,6 +43,10 @@ #define GMAC_ARP_ADDR 0x00000210 #define GMAC_ADDR_HIGH(reg) (0x300 + reg * 8) #define GMAC_ADDR_LOW(reg) (0x304 + reg * 8) +#define GMAC_L3L4_CTRL(reg) (0x900 + (reg) * 0x30) +#define GMAC_L4_ADDR(reg) (0x904 + (reg) * 0x30) +#define GMAC_L3_ADDR0(reg) (0x910 + (reg) * 0x30) +#define GMAC_L3_ADDR1(reg) (0x914 + (reg) * 0x30) /* RX Queues Routing */ #define GMAC_RXQCTRL_AVCPQ_MASK GENMASK(2, 0) @@ -67,6 +71,7 @@ #define GMAC_PACKET_FILTER_PCF BIT(7) #define GMAC_PACKET_FILTER_HPF BIT(10) #define GMAC_PACKET_FILTER_VTFE BIT(16) +#define GMAC_PACKET_FILTER_IPFE BIT(20) #define GMAC_MAX_PERFECT_ADDRESSES 128 @@ -202,9 +207,11 @@ enum power_event { #define GMAC_HW_FEAT_MIISEL BIT(0) /* MAC HW features1 bitmap */ +#define GMAC_HW_FEAT_L3L4FNUM GENMASK(30, 27) #define GMAC_HW_HASH_TB_SZ GENMASK(25, 24) #define GMAC_HW_FEAT_AVSEL BIT(20) #define GMAC_HW_TSOEN BIT(18) +#define GMAC_HW_ADDR64 GENMASK(15, 14) #define GMAC_HW_TXFIFOSIZE GENMASK(10, 6) #define GMAC_HW_RXFIFOSIZE GENMASK(4, 0) @@ -227,6 +234,21 @@ enum power_event { #define GMAC_HI_DCS_SHIFT 16 #define GMAC_HI_REG_AE BIT(31) +/* L3/L4 Filters regs */ +#define GMAC_L4DPIM0 BIT(21) +#define GMAC_L4DPM0 BIT(20) +#define GMAC_L4SPIM0 BIT(19) +#define GMAC_L4SPM0 BIT(18) +#define GMAC_L4PEN0 BIT(16) +#define GMAC_L3DAIM0 BIT(5) +#define GMAC_L3DAM0 BIT(4) +#define GMAC_L3SAIM0 BIT(3) +#define GMAC_L3SAM0 BIT(2) +#define GMAC_L3PEN0 BIT(0) +#define GMAC_L4DP0 GENMASK(31, 16) +#define GMAC_L4DP0_SHIFT 16 +#define GMAC_L4SP0 GENMASK(15, 0) + /* MTL registers */ #define MTL_OPERATION_MODE 0x00000c00 #define MTL_FRPE BIT(15) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 9b4b5f69fc02..e552d7958114 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -401,8 +401,11 @@ static void dwmac4_set_filter(struct mac_device_info *hw, int numhashregs = (hw->multicast_filter_bins >> 5); int mcbitslog2 = hw->mcast_bits_log2; unsigned int value; + u32 mc_filter[8]; int i; + memset(mc_filter, 0, sizeof(mc_filter)); + value = readl(ioaddr + GMAC_PACKET_FILTER); value &= ~GMAC_PACKET_FILTER_HMC; value &= ~GMAC_PACKET_FILTER_HPF; @@ -416,16 +419,13 @@ static void dwmac4_set_filter(struct mac_device_info *hw, /* Pass all multi */ value |= GMAC_PACKET_FILTER_PM; /* Set all the bits of the HASH tab */ - for (i = 0; i < numhashregs; i++) - writel(0xffffffff, ioaddr + GMAC_HASH_TAB(i)); + memset(mc_filter, 0xff, sizeof(mc_filter)); } else if (!netdev_mc_empty(dev)) { struct netdev_hw_addr *ha; - u32 mc_filter[8]; /* Hash filter for multicast */ value |= GMAC_PACKET_FILTER_HMC; - memset(mc_filter, 0, sizeof(mc_filter)); netdev_for_each_mc_addr(ha, dev) { /* The upper n bits of the calculated CRC are used to * index the contents of the hash table. The number of @@ -440,10 +440,11 @@ static void dwmac4_set_filter(struct mac_device_info *hw, */ mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1f)); } - for (i = 0; i < numhashregs; i++) - writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i)); } + for (i = 0; i < numhashregs; i++) + writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i)); + value |= GMAC_PACKET_FILTER_HPF; /* Handle multiple unicast addresses */ @@ -732,7 +733,7 @@ static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable) } static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash, - bool is_double) + u16 perfect_match, bool is_double) { void __iomem *ioaddr = hw->pcsr; @@ -747,6 +748,16 @@ static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash, } writel(value, ioaddr + GMAC_VLAN_TAG); + } else if (perfect_match) { + u32 value = GMAC_VLAN_ETV; + + if (is_double) { + value |= GMAC_VLAN_EDVLP; + value |= GMAC_VLAN_ESVL; + value |= GMAC_VLAN_DOVLTC; + } + + writel(value | perfect_match, ioaddr + GMAC_VLAN_TAG); } else { u32 value = readl(ioaddr + GMAC_VLAN_TAG); @@ -798,6 +809,106 @@ static void dwmac4_set_arp_offload(struct mac_device_info *hw, bool en, writel(value, ioaddr + GMAC_CONFIG); } +static int dwmac4_config_l3_filter(struct mac_device_info *hw, u32 filter_no, + bool en, bool ipv6, bool sa, bool inv, + u32 match) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + value = readl(ioaddr + GMAC_PACKET_FILTER); + value |= GMAC_PACKET_FILTER_IPFE; + writel(value, ioaddr + GMAC_PACKET_FILTER); + + value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no)); + + /* For IPv6 not both SA/DA filters can be active */ + if (ipv6) { + value |= GMAC_L3PEN0; + value &= ~(GMAC_L3SAM0 | GMAC_L3SAIM0); + value &= ~(GMAC_L3DAM0 | GMAC_L3DAIM0); + if (sa) { + value |= GMAC_L3SAM0; + if (inv) + value |= GMAC_L3SAIM0; + } else { + value |= GMAC_L3DAM0; + if (inv) + value |= GMAC_L3DAIM0; + } + } else { + value &= ~GMAC_L3PEN0; + if (sa) { + value |= GMAC_L3SAM0; + if (inv) + value |= GMAC_L3SAIM0; + } else { + value |= GMAC_L3DAM0; + if (inv) + value |= GMAC_L3DAIM0; + } + } + + writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + if (sa) { + writel(match, ioaddr + GMAC_L3_ADDR0(filter_no)); + } else { + writel(match, ioaddr + GMAC_L3_ADDR1(filter_no)); + } + + if (!en) + writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + return 0; +} + +static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no, + bool en, bool udp, bool sa, bool inv, + u32 match) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + value = readl(ioaddr + GMAC_PACKET_FILTER); + value |= GMAC_PACKET_FILTER_IPFE; + writel(value, ioaddr + GMAC_PACKET_FILTER); + + value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no)); + if (udp) { + value |= GMAC_L4PEN0; + } else { + value &= ~GMAC_L4PEN0; + } + + value &= ~(GMAC_L4SPM0 | GMAC_L4SPIM0); + value &= ~(GMAC_L4DPM0 | GMAC_L4DPIM0); + if (sa) { + value |= GMAC_L4SPM0; + if (inv) + value |= GMAC_L4SPIM0; + } else { + value |= GMAC_L4DPM0; + if (inv) + value |= GMAC_L4DPIM0; + } + + writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + if (sa) { + value = match & GMAC_L4SP0; + } else { + value = (match << GMAC_L4DP0_SHIFT) & GMAC_L4DP0; + } + + writel(value, ioaddr + GMAC_L4_ADDR(filter_no)); + + if (!en) + writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + return 0; +} + const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_set_mac, @@ -827,11 +938,14 @@ const struct stmmac_ops dwmac4_ops = { .pcs_get_adv_lp = dwmac4_get_adv_lp, .debug = dwmac4_debug, .set_filter = dwmac4_set_filter, + .flex_pps_config = dwmac5_flex_pps_config, .set_mac_loopback = dwmac4_set_mac_loopback, .update_vlan_hash = dwmac4_update_vlan_hash, .sarc_configure = dwmac4_sarc_configure, .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, + .config_l3_filter = dwmac4_config_l3_filter, + .config_l4_filter = dwmac4_config_l4_filter, }; const struct stmmac_ops dwmac410_ops = { @@ -868,6 +982,8 @@ const struct stmmac_ops dwmac410_ops = { .sarc_configure = dwmac4_sarc_configure, .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, + .config_l3_filter = dwmac4_config_l3_filter, + .config_l4_filter = dwmac4_config_l4_filter, }; const struct stmmac_ops dwmac510_ops = { @@ -909,6 +1025,8 @@ const struct stmmac_ops dwmac510_ops = { .sarc_configure = dwmac4_sarc_configure, .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, + .config_l3_filter = dwmac4_config_l3_filter, + .config_l4_filter = dwmac4_config_l4_filter, }; int dwmac4_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 15eb1abba91d..707ab5eba8da 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -431,8 +431,8 @@ static void dwmac4_get_addr(struct dma_desc *p, unsigned int *addr) static void dwmac4_set_addr(struct dma_desc *p, dma_addr_t addr) { - p->des0 = cpu_to_le32(addr); - p->des1 = 0; + p->des0 = cpu_to_le32(lower_32_bits(addr)); + p->des1 = cpu_to_le32(upper_32_bits(addr)); } static void dwmac4_clear(struct dma_desc *p) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 68c157979b94..b24c89572745 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -79,6 +79,10 @@ static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr, value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT); writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan)); + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame)) + writel(upper_32_bits(dma_rx_phy), + ioaddr + DMA_CHAN_RX_BASE_ADDR_HI(chan)); + writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan)); } @@ -97,6 +101,10 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr, writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan)); + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame)) + writel(upper_32_bits(dma_tx_phy), + ioaddr + DMA_CHAN_TX_BASE_ADDR_HI(chan)); + writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan)); } @@ -132,6 +140,9 @@ static void dwmac4_dma_init(void __iomem *ioaddr, if (dma_cfg->aal) value |= DMA_SYS_BUS_AAL; + if (dma_cfg->eame) + value |= DMA_SYS_BUS_EAME; + writel(value, ioaddr + DMA_SYS_BUS_MODE); } @@ -353,9 +364,27 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature1 */ hw_cap = readl(ioaddr + GMAC_HW_FEATURE1); + dma_cap->l3l4fnum = (hw_cap & GMAC_HW_FEAT_L3L4FNUM) >> 27; dma_cap->hash_tb_sz = (hw_cap & GMAC_HW_HASH_TB_SZ) >> 24; dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20; dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18; + + dma_cap->addr64 = (hw_cap & GMAC_HW_ADDR64) >> 14; + switch (dma_cap->addr64) { + case 0: + dma_cap->addr64 = 32; + break; + case 1: + dma_cap->addr64 = 40; + break; + case 2: + dma_cap->addr64 = 48; + break; + default: + dma_cap->addr64 = 32; + break; + } + /* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by * shifting and store the sizes in bytes. */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index b66da0237d2a..5299fa1001a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -65,6 +65,7 @@ #define DMA_SYS_BUS_MB BIT(14) #define DMA_AXI_1KBBE BIT(13) #define DMA_SYS_BUS_AAL BIT(12) +#define DMA_SYS_BUS_EAME BIT(11) #define DMA_AXI_BLEN256 BIT(7) #define DMA_AXI_BLEN128 BIT(6) #define DMA_AXI_BLEN64 BIT(5) @@ -91,7 +92,9 @@ #define DMA_CHAN_CONTROL(x) DMA_CHANX_BASE_ADDR(x) #define DMA_CHAN_TX_CONTROL(x) (DMA_CHANX_BASE_ADDR(x) + 0x4) #define DMA_CHAN_RX_CONTROL(x) (DMA_CHANX_BASE_ADDR(x) + 0x8) +#define DMA_CHAN_TX_BASE_ADDR_HI(x) (DMA_CHANX_BASE_ADDR(x) + 0x10) #define DMA_CHAN_TX_BASE_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x14) +#define DMA_CHAN_RX_BASE_ADDR_HI(x) (DMA_CHANX_BASE_ADDR(x) + 0x18) #define DMA_CHAN_RX_BASE_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x1c) #define DMA_CHAN_TX_END_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x20) #define DMA_CHAN_RX_END_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x28) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 5923ca62d793..99037386080a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -84,7 +84,7 @@ #define XGMAC_TSIE BIT(12) #define XGMAC_LPIIE BIT(5) #define XGMAC_PMTIE BIT(4) -#define XGMAC_INT_DEFAULT_EN (XGMAC_LPIIE | XGMAC_PMTIE | XGMAC_TSIE) +#define XGMAC_INT_DEFAULT_EN (XGMAC_LPIIE | XGMAC_PMTIE) #define XGMAC_Qx_TX_FLOW_CTRL(x) (0x00000070 + (x) * 4) #define XGMAC_PT GENMASK(31, 16) #define XGMAC_PT_SHIFT 16 @@ -122,6 +122,7 @@ #define XGMAC_HWFEAT_GMIISEL BIT(1) #define XGMAC_HW_FEATURE1 0x00000120 #define XGMAC_HWFEAT_L3L4FNUM GENMASK(30, 27) +#define XGMAC_HWFEAT_HASHTBLSZ GENMASK(25, 24) #define XGMAC_HWFEAT_RSSEN BIT(20) #define XGMAC_HWFEAT_TSOEN BIT(18) #define XGMAC_HWFEAT_SPHEN BIT(17) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 2b277b2c586b..5cda360d5d07 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -472,7 +472,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw, dwxgmac2_set_mchash(ioaddr, mc_filter, mcbitslog2); /* Handle multiple unicast addresses */ - if (netdev_uc_count(dev) > XGMAC_ADDR_MAX) { + if (netdev_uc_count(dev) > hw->unicast_filter_entries) { value |= XGMAC_FILTER_PR; } else { struct netdev_hw_addr *ha; @@ -523,8 +523,8 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw, struct stmmac_rss *cfg, u32 num_rxq) { void __iomem *ioaddr = hw->pcsr; + u32 value, *key; int i, ret; - u32 value; value = readl(ioaddr + XGMAC_RSS_CTRL); if (!cfg || !cfg->enable) { @@ -533,8 +533,9 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw, return 0; } - for (i = 0; i < (sizeof(cfg->key) / sizeof(u32)); i++) { - ret = dwxgmac2_rss_write_reg(ioaddr, true, i, cfg->key[i]); + key = (u32 *)cfg->key; + for (i = 0; i < (ARRAY_SIZE(cfg->key) / sizeof(u32)); i++) { + ret = dwxgmac2_rss_write_reg(ioaddr, true, i, key[i]); if (ret) return ret; } @@ -554,7 +555,7 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw, } static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, - bool is_double) + u16 perfect_match, bool is_double) { void __iomem *ioaddr = hw->pcsr; @@ -575,6 +576,21 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, } writel(value, ioaddr + XGMAC_VLAN_TAG); + } else if (perfect_match) { + u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); + + value |= XGMAC_FILTER_VTFE; + + writel(value, ioaddr + XGMAC_PACKET_FILTER); + + value = XGMAC_VLAN_ETV; + if (is_double) { + value |= XGMAC_VLAN_EDVLP; + value |= XGMAC_VLAN_ESVL; + value |= XGMAC_VLAN_DOVLTC; + } + + writel(value | perfect_match, ioaddr + XGMAC_VLAN_TAG); } else { u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 53c4a40d8386..7cc331996cd8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -27,7 +27,10 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr, if (dma_cfg->aal) value |= XGMAC_AAL; - writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE); + if (dma_cfg->eame) + value |= XGMAC_EAME; + + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); } static void dwxgmac2_dma_init_chan(void __iomem *ioaddr, @@ -380,6 +383,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature 1 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1); dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27; + dma_cap->hash_tb_sz = (hw_cap & XGMAC_HWFEAT_HASHTBLSZ) >> 24; dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20; dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18; dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index ddb851d99618..1303d1e9a18f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -357,7 +357,7 @@ struct stmmac_ops { struct stmmac_rss *cfg, u32 num_rxq); /* VLAN */ void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash, - bool is_double); + u16 perfect_match, bool is_double); void (*enable_vlan)(struct mac_device_info *hw, u32 type); /* TX Timestamp */ int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d3232738fb25..8b76745a7ec4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -629,6 +629,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + ts_event_en = PTP_TCR_TSEVNTENA; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; ptp_over_ethernet = PTP_TCR_TSIPENA; @@ -4206,15 +4207,25 @@ static u32 stmmac_vid_crc32_le(__le16 vid_le) static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double) { u32 crc, hash = 0; - u16 vid; + int count = 0; + u16 vid = 0; for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) { __le16 vid_le = cpu_to_le16(vid); crc = bitrev32(~stmmac_vid_crc32_le(vid_le)) >> 28; hash |= (1 << crc); + count++; + } + + if (!priv->dma_cap.vlhash) { + if (count > 2) /* VID = 0 always passes filter */ + return -EOPNOTSUPP; + + vid = cpu_to_le16(vid); + hash = 0; } - return stmmac_update_vlan_hash(priv, priv->hw, hash, is_double); + return stmmac_update_vlan_hash(priv, priv->hw, hash, vid, is_double); } static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) @@ -4223,8 +4234,6 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid bool is_double = false; int ret; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; @@ -4243,8 +4252,6 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi struct stmmac_priv *priv = netdev_priv(ndev); bool is_double = false; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; @@ -4514,6 +4521,13 @@ int stmmac_dvr_probe(struct device *device, if (!ret) { dev_info(priv->device, "Using %d bits DMA width\n", priv->dma_cap.addr64); + + /* + * If more than 32 bits can be addressed, make sure to + * enable enhanced addressing mode. + */ + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)) + priv->plat->dma_cfg->eame = true; } else { ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32)); if (ret) { @@ -4715,11 +4729,9 @@ int stmmac_suspend(struct device *dev) if (!ndev || !netif_running(ndev)) return 0; - mutex_lock(&priv->lock); + phylink_mac_change(priv->phylink, false); - rtnl_lock(); - phylink_stop(priv->phylink); - rtnl_unlock(); + mutex_lock(&priv->lock); netif_device_detach(ndev); stmmac_stop_all_queues(priv); @@ -4734,6 +4746,12 @@ int stmmac_suspend(struct device *dev) stmmac_pmt(priv, priv->hw, priv->wolopts); priv->irq_wake = 1; } else { + mutex_unlock(&priv->lock); + rtnl_lock(); + phylink_stop(priv->phylink); + rtnl_unlock(); + mutex_lock(&priv->lock); + stmmac_mac_set(priv, priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ @@ -4824,12 +4842,16 @@ int stmmac_resume(struct device *dev) stmmac_start_all_queues(priv); - rtnl_lock(); - phylink_start(priv->phylink); - rtnl_unlock(); - mutex_unlock(&priv->lock); + if (!device_may_wakeup(priv->device)) { + rtnl_lock(); + phylink_start(priv->phylink); + rtnl_unlock(); + } + + phylink_mac_change(priv->phylink, true); + return 0; } EXPORT_SYMBOL_GPL(stmmac_resume); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index 5f66f6161629..68c59cfb8f70 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -816,16 +816,13 @@ out: return 0; } -static int stmmac_test_vlanfilt(struct stmmac_priv *priv) +static int __stmmac_test_vlanfilt(struct stmmac_priv *priv) { struct stmmac_packet_attrs attr = { }; struct stmmac_test_priv *tpriv; struct sk_buff *skb = NULL; int ret = 0, i; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; - tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); if (!tpriv) return -ENOMEM; @@ -891,16 +888,32 @@ cleanup: return ret; } -static int stmmac_test_dvlanfilt(struct stmmac_priv *priv) +static int stmmac_test_vlanfilt(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.vlhash) + return -EOPNOTSUPP; + + return __stmmac_test_vlanfilt(priv); +} + +static int stmmac_test_vlanfilt_perfect(struct stmmac_priv *priv) +{ + int ret, prev_cap = priv->dma_cap.vlhash; + + priv->dma_cap.vlhash = 0; + ret = __stmmac_test_vlanfilt(priv); + priv->dma_cap.vlhash = prev_cap; + + return ret; +} + +static int __stmmac_test_dvlanfilt(struct stmmac_priv *priv) { struct stmmac_packet_attrs attr = { }; struct stmmac_test_priv *tpriv; struct sk_buff *skb = NULL; int ret = 0, i; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; - tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); if (!tpriv) return -ENOMEM; @@ -967,6 +980,25 @@ cleanup: return ret; } +static int stmmac_test_dvlanfilt(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.vlhash) + return -EOPNOTSUPP; + + return __stmmac_test_dvlanfilt(priv); +} + +static int stmmac_test_dvlanfilt_perfect(struct stmmac_priv *priv) +{ + int ret, prev_cap = priv->dma_cap.vlhash; + + priv->dma_cap.vlhash = 0; + ret = __stmmac_test_dvlanfilt(priv); + priv->dma_cap.vlhash = prev_cap; + + return ret; +} + #ifdef CONFIG_NET_CLS_ACT static int stmmac_test_rxp(struct stmmac_priv *priv) { @@ -1564,10 +1596,6 @@ static int __stmmac_test_jumbo(struct stmmac_priv *priv, u16 queue) struct stmmac_packet_attrs attr = { }; int size = priv->dma_buf_sz; - /* Only XGMAC has SW support for multiple RX descs in same packet */ - if (priv->plat->has_xgmac) - size = priv->dev->max_mtu; - attr.dst = priv->dev->dev_addr; attr.max_size = size - ETH_FCS_LEN; attr.queue_mapping = queue; @@ -1645,119 +1673,127 @@ static const struct stmmac_test { int (*fn)(struct stmmac_priv *priv); } stmmac_selftests[] = { { - .name = "MAC Loopback ", + .name = "MAC Loopback ", .lb = STMMAC_LOOPBACK_MAC, .fn = stmmac_test_mac_loopback, }, { - .name = "PHY Loopback ", + .name = "PHY Loopback ", .lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */ .fn = stmmac_test_phy_loopback, }, { - .name = "MMC Counters ", + .name = "MMC Counters ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_mmc, }, { - .name = "EEE ", + .name = "EEE ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_eee, }, { - .name = "Hash Filter MC ", + .name = "Hash Filter MC ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_hfilt, }, { - .name = "Perfect Filter UC ", + .name = "Perfect Filter UC ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_pfilt, }, { - .name = "MC Filter ", + .name = "MC Filter ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_mcfilt, }, { - .name = "UC Filter ", + .name = "UC Filter ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_ucfilt, }, { - .name = "Flow Control ", + .name = "Flow Control ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_flowctrl, }, { - .name = "RSS ", + .name = "RSS ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_rss, }, { - .name = "VLAN Filtering ", + .name = "VLAN Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_vlanfilt, }, { - .name = "Double VLAN Filtering", + .name = "VLAN Filtering (perf) ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_vlanfilt_perfect, + }, { + .name = "Double VLAN Filter ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_dvlanfilt, }, { - .name = "Flexible RX Parser ", + .name = "Double VLAN Filter (perf) ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_dvlanfilt_perfect, + }, { + .name = "Flexible RX Parser ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_rxp, }, { - .name = "SA Insertion (desc) ", + .name = "SA Insertion (desc) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_desc_sai, }, { - .name = "SA Replacement (desc)", + .name = "SA Replacement (desc) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_desc_sar, }, { - .name = "SA Insertion (reg) ", + .name = "SA Insertion (reg) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_reg_sai, }, { - .name = "SA Replacement (reg)", + .name = "SA Replacement (reg) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_reg_sar, }, { - .name = "VLAN TX Insertion ", + .name = "VLAN TX Insertion ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_vlanoff, }, { - .name = "SVLAN TX Insertion ", + .name = "SVLAN TX Insertion ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_svlanoff, }, { - .name = "L3 DA Filtering ", + .name = "L3 DA Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l3filt_da, }, { - .name = "L3 SA Filtering ", + .name = "L3 SA Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l3filt_sa, }, { - .name = "L4 DA TCP Filtering ", + .name = "L4 DA TCP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_da_tcp, }, { - .name = "L4 SA TCP Filtering ", + .name = "L4 SA TCP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_sa_tcp, }, { - .name = "L4 DA UDP Filtering ", + .name = "L4 DA UDP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_da_udp, }, { - .name = "L4 SA UDP Filtering ", + .name = "L4 SA UDP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_sa_udp, }, { - .name = "ARP Offload ", + .name = "ARP Offload ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_arpoffload, }, { - .name = "Jumbo Frame ", + .name = "Jumbo Frame ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_jumbo, }, { - .name = "Multichannel Jumbo ", + .name = "Multichannel Jumbo ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_mjumbo, }, { - .name = "Split Header ", + .name = "Split Header ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_sph, }, diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index ceddb424f887..0dd0ba915ab9 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -1137,10 +1137,11 @@ static void atusb_disconnect(struct usb_interface *interface) ieee802154_unregister_hw(atusb->hw); + usb_put_dev(atusb->usb_dev); + ieee802154_free_hw(atusb->hw); usb_set_intfdata(interface, NULL); - usb_put_dev(atusb->usb_dev); pr_debug("%s done\n", __func__); } diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 11402dc347db..430c93786153 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3145,12 +3145,12 @@ static int ca8210_probe(struct spi_device *spi_device) goto error; } + priv->spi->dev.platform_data = pdata; ret = ca8210_get_platform_data(priv->spi, pdata); if (ret) { dev_crit(&spi_device->dev, "ca8210_get_platform_data failed\n"); goto error; } - priv->spi->dev.platform_data = pdata; ret = ca8210_dev_com_init(priv); if (ret) { diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 17f2300e63ee..8dc04e2590b1 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -800,7 +800,7 @@ mcr20a_handle_rx_read_buf_complete(void *context) if (!skb) return; - memcpy(skb_put(skb, len), lp->rx_buf, len); + __skb_put_data(skb, lp->rx_buf, len); ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]); print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1, diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 887bbba4631e..b0ac557f8e60 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -108,8 +108,8 @@ static void ipvlan_port_destroy(struct net_device *dev) #define IPVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_CSUM_MASK | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ - NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \ - NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ + NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_GSO_ROBUST | \ + NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) #define IPVLAN_STATE_MASK \ diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index 09f1315d2f2a..f4d8f62f28c2 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ - netdev.o dev.o fib.o bus.o + netdev.o dev.o fib.o bus.o health.o ifeq ($(CONFIG_BPF_SYSCALL),y) netdevsim-objs += \ diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 1a0ff3d7747b..6aeed0c600f8 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -283,6 +283,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.bus = &nsim_bus; nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; + nsim_bus_dev->initial_net = current->nsproxy->net_ns; err = device_register(&nsim_bus_dev->dev); if (err) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 56576d4f34a5..468e157a7cb1 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -90,6 +90,10 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) &nsim_dev->test1); debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, &nsim_dev_take_snapshot_fops); + debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir, + &nsim_dev->dont_allow_reload); + debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir, + &nsim_dev->fail_reload); return 0; } @@ -123,39 +127,6 @@ static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port) debugfs_remove_recursive(nsim_dev_port->ddir); } -static struct net *nsim_devlink_net(struct devlink *devlink) -{ - return &init_net; -} - -static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); -} - -static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); -} - -static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); -} - -static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); -} - static int nsim_dev_resources_register(struct devlink *devlink) { struct devlink_resource_size_params params = { @@ -163,9 +134,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; - struct net *net = nsim_devlink_net(devlink); int err; - u64 n; /* Resources for IPv4 */ err = devlink_resource_register(devlink, "IPv4", (u64)-1, @@ -177,8 +146,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); - err = devlink_resource_register(devlink, "fib", n, + err = devlink_resource_register(devlink, "fib", (u64)-1, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); if (err) { @@ -186,8 +154,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, + err = devlink_resource_register(devlink, "fib-rules", (u64)-1, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); if (err) { @@ -205,8 +172,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); - err = devlink_resource_register(devlink, "fib", n, + err = devlink_resource_register(devlink, "fib", (u64)-1, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); if (err) { @@ -214,8 +180,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, + err = devlink_resource_register(devlink, "fib-rules", (u64)-1, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); if (err) { @@ -223,22 +188,6 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB, - nsim_dev_ipv4_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB_RULES, - nsim_dev_ipv4_fib_rules_res_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB, - nsim_dev_ipv6_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB_RULES, - nsim_dev_ipv6_fib_rules_res_occ_get, - net); out: return err; } @@ -524,36 +473,48 @@ static void nsim_dev_traps_exit(struct devlink *devlink) kfree(nsim_dev->trap_data); } -static int nsim_dev_reload_down(struct devlink *devlink, +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack); +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev); + +static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + if (nsim_dev->dont_allow_reload) { + /* For testing purposes, user set debugfs dont_allow_reload + * value to true. So forbid it. + */ + NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); + return -EOPNOTSUPP; + } + + nsim_dev_reload_destroy(nsim_dev); return 0; } static int nsim_dev_reload_up(struct devlink *devlink, struct netlink_ext_ack *extack) { - enum nsim_resource_id res_ids[] = { - NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES - }; - struct net *net = nsim_devlink_net(devlink); - int i; - - for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { - int err; - u64 val; + struct nsim_dev *nsim_dev = devlink_priv(devlink); - err = devlink_resource_size_get(devlink, res_ids[i], &val); - if (!err) { - err = nsim_fib_set_max(net, res_ids[i], val, extack); - if (err) - return err; - } + if (nsim_dev->fail_reload) { + /* For testing purposes, user set debugfs fail_reload + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); + return -EINVAL; } - nsim_devlink_param_load_driverinit_values(devlink); - return 0; + return nsim_dev_reload_create(nsim_dev, extack); +} + +static int nsim_dev_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + return devlink_info_driver_name_put(req, DRV_NAME); } #define NSIM_DEV_FLASH_SIZE 500000 @@ -649,6 +610,7 @@ nsim_dev_devlink_trap_action_set(struct devlink *devlink, static const struct devlink_ops nsim_dev_devlink_ops = { .reload_down = nsim_dev_reload_down, .reload_up = nsim_dev_reload_up, + .info_get = nsim_dev_info_get, .flash_update = nsim_dev_flash_update, .trap_init = nsim_dev_devlink_trap_init, .trap_action_set = nsim_dev_devlink_trap_action_set, @@ -657,8 +619,139 @@ static const struct devlink_ops nsim_dev_devlink_ops = { #define NSIM_DEV_MAX_MACS_DEFAULT 32 #define NSIM_DEV_TEST1_DEFAULT true -static struct nsim_dev * -nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) +static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, + unsigned int port_index) +{ + struct nsim_dev_port *nsim_dev_port; + struct devlink_port *devlink_port; + int err; + + nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); + if (!nsim_dev_port) + return -ENOMEM; + nsim_dev_port->port_index = port_index; + + devlink_port = &nsim_dev_port->devlink_port; + devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_index + 1, 0, 0, + nsim_dev->switch_id.id, + nsim_dev->switch_id.id_len); + err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, + port_index); + if (err) + goto err_port_free; + + err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); + if (err) + goto err_dl_port_unregister; + + nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port); + if (IS_ERR(nsim_dev_port->ns)) { + err = PTR_ERR(nsim_dev_port->ns); + goto err_port_debugfs_exit; + } + + devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); + list_add(&nsim_dev_port->list, &nsim_dev->port_list); + + return 0; + +err_port_debugfs_exit: + nsim_dev_port_debugfs_exit(nsim_dev_port); +err_dl_port_unregister: + devlink_port_unregister(devlink_port); +err_port_free: + kfree(nsim_dev_port); + return err; +} + +static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) +{ + struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; + + list_del(&nsim_dev_port->list); + devlink_port_type_clear(devlink_port); + nsim_destroy(nsim_dev_port->ns); + nsim_dev_port_debugfs_exit(nsim_dev_port); + devlink_port_unregister(devlink_port); + kfree(nsim_dev_port); +} + +static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_port *nsim_dev_port, *tmp; + + list_for_each_entry_safe(nsim_dev_port, tmp, + &nsim_dev->port_list, list) + __nsim_dev_port_del(nsim_dev_port); +} + +static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, + unsigned int port_count) +{ + int i, err; + + for (i = 0; i < port_count; i++) { + err = __nsim_dev_port_add(nsim_dev, i); + if (err) + goto err_port_del_all; + } + return 0; + +err_port_del_all: + nsim_dev_port_del_all(nsim_dev); + return err; +} + +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack) +{ + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + struct devlink *devlink; + int err; + + devlink = priv_to_devlink(nsim_dev); + nsim_dev = devlink_priv(devlink); + INIT_LIST_HEAD(&nsim_dev->port_list); + mutex_init(&nsim_dev->port_list_lock); + nsim_dev->fw_update_status = true; + + nsim_dev->fib_data = nsim_fib_create(devlink, extack); + if (IS_ERR(nsim_dev->fib_data)) + return PTR_ERR(nsim_dev->fib_data); + + nsim_devlink_param_load_driverinit_values(devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + goto err_fib_destroy; + + err = nsim_dev_traps_init(devlink); + if (err) + goto err_dummy_region_exit; + + err = nsim_dev_health_init(nsim_dev, devlink); + if (err) + goto err_traps_exit; + + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_health_exit; + + return 0; + +err_health_exit: + nsim_dev_health_exit(nsim_dev); +err_traps_exit: + nsim_dev_traps_exit(devlink); +err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); + return err; +} + +static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; struct devlink *devlink; @@ -667,6 +760,7 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev)); if (!devlink) return ERR_PTR(-ENOMEM); + devlink_net_set(devlink, nsim_bus_dev->initial_net); nsim_dev = devlink_priv(devlink); nsim_dev->nsim_bus_dev = nsim_bus_dev; nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); @@ -681,9 +775,15 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) if (err) goto err_devlink_free; + nsim_dev->fib_data = nsim_fib_create(devlink, NULL); + if (IS_ERR(nsim_dev->fib_data)) { + err = PTR_ERR(nsim_dev->fib_data); + goto err_resources_unregister; + } + err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) - goto err_resources_unregister; + goto err_fib_destroy; err = devlink_params_register(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); @@ -703,13 +803,25 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) if (err) goto err_traps_exit; - err = nsim_bpf_dev_init(nsim_dev); + err = nsim_dev_health_init(nsim_dev, devlink); if (err) goto err_debugfs_exit; + err = nsim_bpf_dev_init(nsim_dev); + if (err) + goto err_health_exit; + + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_bpf_dev_exit; + devlink_params_publish(devlink); return nsim_dev; +err_bpf_dev_exit: + nsim_bpf_dev_exit(nsim_dev); +err_health_exit: + nsim_dev_health_exit(nsim_dev); err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); err_traps_exit: @@ -721,6 +833,8 @@ err_params_unregister: ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: devlink_unregister(devlink); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); err_resources_unregister: devlink_resources_unregister(devlink, NULL); err_devlink_free: @@ -728,118 +842,51 @@ err_devlink_free: return ERR_PTR(err); } -static void nsim_dev_destroy(struct nsim_dev *nsim_dev) +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) { struct devlink *devlink = priv_to_devlink(nsim_dev); - nsim_bpf_dev_exit(nsim_dev); - nsim_dev_debugfs_exit(nsim_dev); + if (devlink_is_reload_failed(devlink)) + return; + nsim_dev_port_del_all(nsim_dev); + nsim_dev_health_exit(nsim_dev); nsim_dev_traps_exit(devlink); nsim_dev_dummy_region_exit(nsim_dev); - devlink_params_unregister(devlink, nsim_devlink_params, - ARRAY_SIZE(nsim_devlink_params)); - devlink_unregister(devlink); - devlink_resources_unregister(devlink, NULL); mutex_destroy(&nsim_dev->port_list_lock); - devlink_free(devlink); + nsim_fib_destroy(devlink, nsim_dev->fib_data); } -static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, - unsigned int port_index) -{ - struct nsim_dev_port *nsim_dev_port; - struct devlink_port *devlink_port; - int err; - - nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); - if (!nsim_dev_port) - return -ENOMEM; - nsim_dev_port->port_index = port_index; - - devlink_port = &nsim_dev_port->devlink_port; - devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_index + 1, 0, 0, - nsim_dev->switch_id.id, - nsim_dev->switch_id.id_len); - err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, - port_index); - if (err) - goto err_port_free; - - err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); - if (err) - goto err_dl_port_unregister; - - nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port); - if (IS_ERR(nsim_dev_port->ns)) { - err = PTR_ERR(nsim_dev_port->ns); - goto err_port_debugfs_exit; - } - - devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); - list_add(&nsim_dev_port->list, &nsim_dev->port_list); - - return 0; - -err_port_debugfs_exit: - nsim_dev_port_debugfs_exit(nsim_dev_port); -err_dl_port_unregister: - devlink_port_unregister(devlink_port); -err_port_free: - kfree(nsim_dev_port); - return err; -} - -static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) +static void nsim_dev_destroy(struct nsim_dev *nsim_dev) { - struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; - - list_del(&nsim_dev_port->list); - devlink_port_type_clear(devlink_port); - nsim_destroy(nsim_dev_port->ns); - nsim_dev_port_debugfs_exit(nsim_dev_port); - devlink_port_unregister(devlink_port); - kfree(nsim_dev_port); -} + struct devlink *devlink = priv_to_devlink(nsim_dev); -static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) -{ - struct nsim_dev_port *nsim_dev_port, *tmp; + nsim_dev_reload_destroy(nsim_dev); - list_for_each_entry_safe(nsim_dev_port, tmp, - &nsim_dev->port_list, list) - __nsim_dev_port_del(nsim_dev_port); + nsim_bpf_dev_exit(nsim_dev); + nsim_dev_debugfs_exit(nsim_dev); + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); + devlink_unregister(devlink); + devlink_resources_unregister(devlink, NULL); + devlink_free(devlink); } int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; - int i; - int err; - nsim_dev = nsim_dev_create(nsim_bus_dev, nsim_bus_dev->port_count); + nsim_dev = nsim_dev_create(nsim_bus_dev); if (IS_ERR(nsim_dev)) return PTR_ERR(nsim_dev); dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); - for (i = 0; i < nsim_bus_dev->port_count; i++) { - err = __nsim_dev_port_add(nsim_dev, i); - if (err) - goto err_port_del_all; - } return 0; - -err_port_del_all: - nsim_dev_port_del_all(nsim_dev); - nsim_dev_destroy(nsim_dev); - return err; } void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); - nsim_dev_port_del_all(nsim_dev); nsim_dev_destroy(nsim_dev); } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index f61d094746c0..13540dee7364 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -18,7 +18,7 @@ #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/fib_rules.h> -#include <net/netns/generic.h> +#include <net/net_namespace.h> #include "netdevsim.h" @@ -33,15 +33,14 @@ struct nsim_per_fib_data { }; struct nsim_fib_data { + struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; }; -static unsigned int nsim_fib_net_id; - -u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) +u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, bool max) { - struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; switch (res_id) { @@ -64,12 +63,10 @@ u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) return max ? entry->max : entry->num; } -int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack) +static void nsim_fib_set_max(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, u64 val) { - struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; - int err = 0; switch (res_id) { case NSIM_RESOURCE_IPV4_FIB: @@ -85,20 +82,10 @@ int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, entry = &fib_data->ipv6.rules; break; default: - return 0; - } - - /* not allowing a new max to be less than curren occupancy - * --> no means of evicting entries - */ - if (val < entry->num) { - NL_SET_ERR_MSG_MOD(extack, "New size is less than current occupancy"); - err = -EINVAL; - } else { - entry->max = val; + WARN_ON(1); + return; } - - return err; + entry->max = val; } static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, @@ -120,9 +107,9 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add) +static int nsim_fib_rule_event(struct nsim_fib_data *data, + struct fib_notifier_info *info, bool add) { - struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -157,9 +144,9 @@ static int nsim_fib_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_event(struct fib_notifier_info *info, bool add) +static int nsim_fib_event(struct nsim_fib_data *data, + struct fib_notifier_info *info, bool add) { - struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -178,18 +165,22 @@ static int nsim_fib_event(struct fib_notifier_info *info, bool add) static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + fib_nb); struct fib_notifier_info *info = ptr; int err = 0; switch (event) { case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD); + err = nsim_fib_rule_event(data, info, + event == FIB_EVENT_RULE_ADD); break; case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD); + err = nsim_fib_event(data, info, + event == FIB_EVENT_ENTRY_ADD); break; } @@ -199,68 +190,116 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, /* inconsistent dump, trying again */ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) { - struct nsim_fib_data *data; - struct net *net; + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + fib_nb); - rcu_read_lock(); - for_each_net_rcu(net) { - data = net_generic(net, nsim_fib_net_id); + data->ipv4.fib.num = 0ULL; + data->ipv4.rules.num = 0ULL; + data->ipv6.fib.num = 0ULL; + data->ipv6.rules.num = 0ULL; +} - data->ipv4.fib.num = 0ULL; - data->ipv4.rules.num = 0ULL; +static u64 nsim_fib_ipv4_resource_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; - data->ipv6.fib.num = 0ULL; - data->ipv6.rules.num = 0ULL; - } - rcu_read_unlock(); + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB, false); } -static struct notifier_block nsim_fib_nb = { - .notifier_call = nsim_fib_event_nb, -}; - -/* Initialize per network namespace state */ -static int __net_init nsim_fib_netns_init(struct net *net) +static u64 nsim_fib_ipv4_rules_res_occ_get(void *priv) { - struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id); + struct nsim_fib_data *data = priv; - data->ipv4.fib.max = (u64)-1; - data->ipv4.rules.max = (u64)-1; + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB_RULES, false); +} - data->ipv6.fib.max = (u64)-1; - data->ipv6.rules.max = (u64)-1; +static u64 nsim_fib_ipv6_resource_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; - return 0; + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB, false); } -static struct pernet_operations nsim_fib_net_ops = { - .init = nsim_fib_netns_init, - .id = &nsim_fib_net_id, - .size = sizeof(struct nsim_fib_data), -}; +static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); +} -void nsim_fib_exit(void) +static void nsim_fib_set_max_all(struct nsim_fib_data *data, + struct devlink *devlink) { - unregister_pernet_subsys(&nsim_fib_net_ops); - unregister_fib_notifier(&nsim_fib_nb); + enum nsim_resource_id res_ids[] = { + NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, + NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES + }; + int i; + + for (i = 0; i < ARRAY_SIZE(res_ids); i++) { + int err; + u64 val; + + err = devlink_resource_size_get(devlink, res_ids[i], &val); + if (err) + val = (u64) -1; + nsim_fib_set_max(data, res_ids[i], val); + } } -int nsim_fib_init(void) +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack) { + struct nsim_fib_data *data; int err; - err = register_pernet_subsys(&nsim_fib_net_ops); - if (err < 0) { - pr_err("Failed to register pernet subsystem\n"); - goto err_out; - } + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + nsim_fib_set_max_all(data, devlink); - err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent); - if (err < 0) { + data->fib_nb.notifier_call = nsim_fib_event_nb; + err = register_fib_notifier(devlink_net(devlink), &data->fib_nb, + nsim_fib_dump_inconsistent, extack); + if (err) { pr_err("Failed to register fib notifier\n"); goto err_out; } + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB, + nsim_fib_ipv4_resource_occ_get, + data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES, + nsim_fib_ipv4_rules_res_occ_get, + data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB, + nsim_fib_ipv6_resource_occ_get, + data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES, + nsim_fib_ipv6_rules_res_occ_get, + data); + return data; + err_out: - return err; + kfree(data); + return ERR_PTR(err); +} + +void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) +{ + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES); + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB); + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES); + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB); + unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); + kfree(data); } diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c new file mode 100644 index 000000000000..2716235a0336 --- /dev/null +++ b/drivers/net/netdevsim/health.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "netdevsim.h" + +static int +nsim_dev_empty_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int +nsim_dev_empty_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static const +struct devlink_health_reporter_ops nsim_dev_empty_reporter_ops = { + .name = "empty", + .dump = nsim_dev_empty_reporter_dump, + .diagnose = nsim_dev_empty_reporter_diagnose, +}; + +struct nsim_dev_dummy_reporter_ctx { + char *break_msg; +}; + +static int +nsim_dev_dummy_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + + if (health->fail_recover) { + /* For testing purposes, user set debugfs fail_recover + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the recover to fail for testing purposes"); + return -EINVAL; + } + if (ctx) { + kfree(health->recovered_break_msg); + health->recovered_break_msg = kstrdup(ctx->break_msg, + GFP_KERNEL); + if (!health->recovered_break_msg) + return -ENOMEM; + } + return 0; +} + +static int nsim_dev_dummy_fmsg_put(struct devlink_fmsg *fmsg, u32 binary_len) +{ + char *binary; + int err; + int i; + + err = devlink_fmsg_bool_pair_put(fmsg, "test_bool", true); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "test_u8", 1); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "test_u32", 3); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "test_u64", 4); + if (err) + return err; + err = devlink_fmsg_string_pair_put(fmsg, "test_string", "somestring"); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_binary"); + if (err) + return err; + binary = kmalloc(binary_len, GFP_KERNEL); + if (!binary) + return -ENOMEM; + get_random_bytes(binary, binary_len); + err = devlink_fmsg_binary_put(fmsg, binary, binary_len); + kfree(binary); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_start(fmsg, "test_nest"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_bool_pair_put(fmsg, "nested_test_bool", false); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "nested_test_u8", false); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_bool_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_bool_put(fmsg, true); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u8_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u8_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u32_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u32_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u64_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u64_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_array_of_objects"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_bool_pair_put(fmsg, + "in_array_nested_test_bool", + false); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, + "in_array_nested_test_u8", + i); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + } + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int +nsim_dev_dummy_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + int err; + + if (ctx) { + err = devlink_fmsg_string_pair_put(fmsg, "break_message", + ctx->break_msg); + if (err) + return err; + } + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static int +nsim_dev_dummy_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + int err; + + if (health->recovered_break_msg) { + err = devlink_fmsg_string_pair_put(fmsg, + "recovered_break_message", + health->recovered_break_msg); + if (err) + return err; + } + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static const +struct devlink_health_reporter_ops nsim_dev_dummy_reporter_ops = { + .name = "dummy", + .recover = nsim_dev_dummy_reporter_recover, + .dump = nsim_dev_dummy_reporter_dump, + .diagnose = nsim_dev_dummy_reporter_diagnose, +}; + +static ssize_t nsim_dev_health_break_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_health *health = file->private_data; + struct nsim_dev_dummy_reporter_ctx ctx; + char *break_msg; + int err; + + break_msg = kmalloc(count + 1, GFP_KERNEL); + if (!break_msg) + return -ENOMEM; + + if (copy_from_user(break_msg, data, count)) { + err = -EFAULT; + goto out; + } + break_msg[count] = '\0'; + if (break_msg[count - 1] == '\n') + break_msg[count - 1] = '\0'; + + ctx.break_msg = break_msg; + err = devlink_health_report(health->dummy_reporter, break_msg, &ctx); + if (err) + goto out; + +out: + kfree(break_msg); + return err ?: count; +} + +static const struct file_operations nsim_dev_health_break_fops = { + .open = simple_open, + .write = nsim_dev_health_break_write, + .llseek = generic_file_llseek, +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) +{ + struct nsim_dev_health *health = &nsim_dev->health; + int err; + + health->empty_reporter = + devlink_health_reporter_create(devlink, + &nsim_dev_empty_reporter_ops, + 0, false, health); + if (IS_ERR(health->empty_reporter)) + return PTR_ERR(health->empty_reporter); + + health->dummy_reporter = + devlink_health_reporter_create(devlink, + &nsim_dev_dummy_reporter_ops, + 0, false, health); + if (IS_ERR(health->dummy_reporter)) { + err = PTR_ERR(health->dummy_reporter); + goto err_empty_reporter_destroy; + } + + health->ddir = debugfs_create_dir("health", nsim_dev->ddir); + if (IS_ERR_OR_NULL(health->ddir)) { + err = PTR_ERR_OR_ZERO(health->ddir) ?: -EINVAL; + goto err_dummy_reporter_destroy; + } + + health->recovered_break_msg = NULL; + debugfs_create_file("break_health", 0200, health->ddir, health, + &nsim_dev_health_break_fops); + health->binary_len = 16; + debugfs_create_u32("binary_len", 0600, health->ddir, + &health->binary_len); + health->fail_recover = false; + debugfs_create_bool("fail_recover", 0600, health->ddir, + &health->fail_recover); + return 0; + +err_dummy_reporter_destroy: + devlink_health_reporter_destroy(health->dummy_reporter); +err_empty_reporter_destroy: + devlink_health_reporter_destroy(health->empty_reporter); + return err; +} + +void nsim_dev_health_exit(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_health *health = &nsim_dev->health; + + debugfs_remove_recursive(health->ddir); + kfree(health->recovered_break_msg); + devlink_health_reporter_destroy(health->dummy_reporter); + devlink_health_reporter_destroy(health->empty_reporter); +} diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 55f57f76d01b..2908e0a0d6e1 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -290,6 +290,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) if (!dev) return ERR_PTR(-ENOMEM); + dev_net_set(dev, nsim_dev_net(nsim_dev)); ns = netdev_priv(dev); ns->netdev = dev; ns->nsim_dev = nsim_dev; @@ -357,18 +358,12 @@ static int __init nsim_module_init(void) if (err) goto err_dev_exit; - err = nsim_fib_init(); - if (err) - goto err_bus_exit; - err = rtnl_link_register(&nsim_link_ops); if (err) - goto err_fib_exit; + goto err_bus_exit; return 0; -err_fib_exit: - nsim_fib_exit(); err_bus_exit: nsim_bus_exit(); err_dev_exit: @@ -379,7 +374,6 @@ err_dev_exit: static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); - nsim_fib_exit(); nsim_bus_exit(); nsim_dev_exit(); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 66bf13765ad0..94df795ef4d3 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -134,6 +134,18 @@ enum nsim_resource_id { NSIM_RESOURCE_IPV6_FIB_RULES, }; +struct nsim_dev_health { + struct devlink_health_reporter *empty_reporter; + struct devlink_health_reporter *dummy_reporter; + struct dentry *ddir; + char *recovered_break_msg; + u32 binary_len; + bool fail_recover; +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink); +void nsim_dev_health_exit(struct nsim_dev *nsim_dev); + struct nsim_dev_port { struct list_head list; struct devlink_port devlink_port; @@ -161,9 +173,17 @@ struct nsim_dev { bool fw_update_status; u32 max_macs; bool test1; + bool dont_allow_reload; + bool fail_reload; struct devlink_region *dummy_region; + struct nsim_dev_health health; }; +static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) +{ + return devlink_net(priv_to_devlink(nsim_dev)); +} + int nsim_dev_init(void); void nsim_dev_exit(void); int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev); @@ -173,11 +193,11 @@ int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index); -int nsim_fib_init(void); -void nsim_fib_exit(void); -u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack); +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack); +void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data); +u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, bool max); #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); @@ -215,6 +235,9 @@ struct nsim_bus_dev { struct device dev; struct list_head list; unsigned int port_count; + struct net *initial_net; /* Purpose of this is to carry net pointer + * during the probe time only. + */ unsigned int num_vfs; struct nsim_vf_config *vfconfigs; }; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 2aa7b2e60046..8e30db28fd7d 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -15,6 +15,15 @@ #include <linux/of_gpio.h> #include <linux/gpio/consumer.h> +#define AT803X_SPECIFIC_STATUS 0x11 +#define AT803X_SS_SPEED_MASK (3 << 14) +#define AT803X_SS_SPEED_1000 (2 << 14) +#define AT803X_SS_SPEED_100 (1 << 14) +#define AT803X_SS_SPEED_10 (0 << 14) +#define AT803X_SS_DUPLEX BIT(13) +#define AT803X_SS_SPEED_DUPLEX_RESOLVED BIT(11) +#define AT803X_SS_MDIX BIT(6) + #define AT803X_INTR_ENABLE 0x12 #define AT803X_INTR_ENABLE_AUTONEG_ERR BIT(15) #define AT803X_INTR_ENABLE_SPEED_CHANGED BIT(14) @@ -53,6 +62,7 @@ #define AT803X_DEBUG_REG_5 0x05 #define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8) +#define ATH9331_PHY_ID 0x004dd041 #define ATH8030_PHY_ID 0x004dd076 #define ATH8031_PHY_ID 0x004dd074 #define ATH8035_PHY_ID 0x004dd072 @@ -62,10 +72,6 @@ MODULE_DESCRIPTION("Atheros 803x PHY driver"); MODULE_AUTHOR("Matus Ujhelyi"); MODULE_LICENSE("GPL"); -struct at803x_priv { - bool phy_reset:1; -}; - struct at803x_context { u16 bmcr; u16 advertise; @@ -231,20 +237,6 @@ static int at803x_resume(struct phy_device *phydev) return phy_modify(phydev, MII_BMCR, BMCR_PDOWN | BMCR_ISOLATE, 0); } -static int at803x_probe(struct phy_device *phydev) -{ - struct device *dev = &phydev->mdio.dev; - struct at803x_priv *priv; - - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - phydev->priv = priv; - - return 0; -} - static int at803x_config_init(struct phy_device *phydev) { int ret; @@ -357,19 +349,77 @@ static int at803x_aneg_done(struct phy_device *phydev) return aneg_done; } +static int at803x_read_status(struct phy_device *phydev) +{ + int ss, err, old_link = phydev->link; + + /* Update the link, but return if there was an error */ + err = genphy_update_link(phydev); + if (err) + return err; + + /* why bother the PHY if nothing can have changed */ + if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) + return 0; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + err = genphy_read_lpa(phydev); + if (err < 0) + return err; + + /* Read the AT8035 PHY-Specific Status register, which indicates the + * speed and duplex that the PHY is actually using, irrespective of + * whether we are in autoneg mode or not. + */ + ss = phy_read(phydev, AT803X_SPECIFIC_STATUS); + if (ss < 0) + return ss; + + if (ss & AT803X_SS_SPEED_DUPLEX_RESOLVED) { + switch (ss & AT803X_SS_SPEED_MASK) { + case AT803X_SS_SPEED_10: + phydev->speed = SPEED_10; + break; + case AT803X_SS_SPEED_100: + phydev->speed = SPEED_100; + break; + case AT803X_SS_SPEED_1000: + phydev->speed = SPEED_1000; + break; + } + if (ss & AT803X_SS_DUPLEX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + if (ss & AT803X_SS_MDIX) + phydev->mdix = ETH_TP_MDI_X; + else + phydev->mdix = ETH_TP_MDI; + } + + if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) + phy_resolve_aneg_pause(phydev); + + return 0; +} + static struct phy_driver at803x_driver[] = { { /* ATHEROS 8035 */ .phy_id = ATH8035_PHY_ID, .name = "Atheros 8035 ethernet", .phy_id_mask = AT803X_PHY_ID_MASK, - .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, .suspend = at803x_suspend, .resume = at803x_resume, /* PHY_GBIT_FEATURES */ + .read_status = at803x_read_status, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { @@ -377,7 +427,6 @@ static struct phy_driver at803x_driver[] = { .phy_id = ATH8030_PHY_ID, .name = "Atheros 8030 ethernet", .phy_id_mask = AT803X_PHY_ID_MASK, - .probe = at803x_probe, .config_init = at803x_config_init, .link_change_notify = at803x_link_change_notify, .set_wol = at803x_set_wol, @@ -392,16 +441,26 @@ static struct phy_driver at803x_driver[] = { .phy_id = ATH8031_PHY_ID, .name = "Atheros 8031 ethernet", .phy_id_mask = AT803X_PHY_ID_MASK, - .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, .suspend = at803x_suspend, .resume = at803x_resume, /* PHY_GBIT_FEATURES */ + .read_status = at803x_read_status, .aneg_done = at803x_aneg_done, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, +}, { + /* ATHEROS AR9331 */ + PHY_ID_MATCH_EXACT(ATH9331_PHY_ID), + .name = "Atheros AR9331 built-in PHY", + .config_init = at803x_config_init, + .suspend = at803x_suspend, + .resume = at803x_resume, + /* PHY_BASIC_FEATURES */ + .ack_interrupt = &at803x_ack_interrupt, + .config_intr = &at803x_config_intr, } }; module_phy_driver(at803x_driver); @@ -410,6 +469,7 @@ static struct mdio_device_id __maybe_unused atheros_tbl[] = { { ATH8030_PHY_ID, AT803X_PHY_ID_MASK }, { ATH8031_PHY_ID, AT803X_PHY_ID_MASK }, { ATH8035_PHY_ID, AT803X_PHY_ID_MASK }, + { PHY_ID_MATCH_EXACT(ATH9331_PHY_ID) }, { } }; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 937d0059e8ac..4313c74b4fd8 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -26,18 +26,13 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); +static int bcm54xx_config_clock_delay(struct phy_device *phydev); + static int bcm54210e_config_init(struct phy_device *phydev) { int val; - val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); - val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; - val |= MII_BCM54XX_AUXCTL_MISC_WREN; - bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, val); - - val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); - val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; - bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + bcm54xx_config_clock_delay(phydev); if (phydev->dev_flags & PHY_BRCM_EN_MASTER_MODE) { val = phy_read(phydev, MII_CTRL1000); @@ -52,26 +47,7 @@ static int bcm54612e_config_init(struct phy_device *phydev) { int reg; - /* Clear TX internal delay unless requested. */ - if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && - (phydev->interface != PHY_INTERFACE_MODE_RGMII_TXID)) { - /* Disable TXD to GTXCLK clock delay (default set) */ - /* Bit 9 is the only field in shadow register 00011 */ - bcm_phy_write_shadow(phydev, 0x03, 0); - } - - /* Clear RX internal delay unless requested. */ - if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && - (phydev->interface != PHY_INTERFACE_MODE_RGMII_RXID)) { - reg = bcm54xx_auxctl_read(phydev, - MII_BCM54XX_AUXCTL_SHDWSEL_MISC); - /* Disable RXD to RXC delay (default set) */ - reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; - /* Clear shadow selector field */ - reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MASK; - bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, - MII_BCM54XX_AUXCTL_MISC_WREN | reg); - } + bcm54xx_config_clock_delay(phydev); /* Enable CLK125 MUX on LED4 if ref clock is enabled. */ if (!(phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)) { diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index e282600bd83e..c1d345c3cab3 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -121,7 +121,7 @@ void mdio_device_reset(struct mdio_device *mdiodev, int value) return; if (mdiodev->reset_gpio) - gpiod_set_value(mdiodev->reset_gpio, value); + gpiod_set_value_cansleep(mdiodev->reset_gpio, value); if (mdiodev->reset_ctrl) { if (value) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 7ada1fd9ca71..805cda3465d7 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -895,7 +895,7 @@ static void vsc85xx_tr_write(struct phy_device *phydev, u16 addr, u32 val) static int vsc8531_pre_init_seq_set(struct phy_device *phydev) { int rc; - const struct reg_val init_seq[] = { + static const struct reg_val init_seq[] = { {0x0f90, 0x00688980}, {0x0696, 0x00000003}, {0x07fa, 0x0050100f}, @@ -939,7 +939,7 @@ out_unlock: static int vsc85xx_eee_init_seq_set(struct phy_device *phydev) { - const struct reg_val init_eee[] = { + static const struct reg_val init_eee[] = { {0x0f82, 0x0012b00a}, {0x1686, 0x00000004}, {0x168c, 0x00d2c46f}, @@ -1224,7 +1224,7 @@ out: /* bus->mdio_lock should be locked when using this function */ static int vsc8574_config_pre_init(struct phy_device *phydev) { - const struct reg_val pre_init1[] = { + static const struct reg_val pre_init1[] = { {0x0fae, 0x000401bd}, {0x0fac, 0x000f000f}, {0x17a0, 0x00a0f147}, @@ -1272,7 +1272,7 @@ static int vsc8574_config_pre_init(struct phy_device *phydev) {0x0fee, 0x0004a6a1}, {0x0ffe, 0x00b01807}, }; - const struct reg_val pre_init2[] = { + static const struct reg_val pre_init2[] = { {0x0486, 0x0008a518}, {0x0488, 0x006dc696}, {0x048a, 0x00000912}, @@ -1427,7 +1427,7 @@ out: /* bus->mdio_lock should be locked when using this function */ static int vsc8584_config_pre_init(struct phy_device *phydev) { - const struct reg_val pre_init1[] = { + static const struct reg_val pre_init1[] = { {0x07fa, 0x0050100f}, {0x1688, 0x00049f81}, {0x0f90, 0x00688980}, @@ -1451,7 +1451,7 @@ static int vsc8584_config_pre_init(struct phy_device *phydev) {0x16b2, 0x00007000}, {0x16b4, 0x00000814}, }; - const struct reg_val pre_init2[] = { + static const struct reg_val pre_init2[] = { {0x0486, 0x0008a518}, {0x0488, 0x006dc696}, {0x048a, 0x00000912}, @@ -1786,7 +1786,7 @@ static int vsc8514_config_pre_init(struct phy_device *phydev) * values to handle hardware performance of PHY. They * are set at Power-On state and remain until PHY Reset. */ - const struct reg_val pre_init1[] = { + static const struct reg_val pre_init1[] = { {0x0f90, 0x00688980}, {0x0786, 0x00000003}, {0x07fa, 0x0050100f}, diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 369903d9b6ec..9412669b579c 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -283,6 +283,18 @@ void of_set_phy_eee_broken(struct phy_device *phydev) phydev->eee_broken_modes = broken; } +void phy_resolve_aneg_pause(struct phy_device *phydev) +{ + if (phydev->duplex == DUPLEX_FULL) { + phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->lp_advertising); + phydev->asym_pause = linkmode_test_bit( + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->lp_advertising); + } +} +EXPORT_SYMBOL_GPL(phy_resolve_aneg_pause); + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct @@ -305,13 +317,7 @@ void phy_resolve_aneg_linkmode(struct phy_device *phydev) break; } - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->lp_advertising); - phydev->asym_pause = linkmode_test_bit( - ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->lp_advertising); - } + phy_resolve_aneg_pause(phydev); } EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 7c92afd36bbe..119e6f466056 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -457,6 +457,11 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) val); change_autoneg = true; break; + case MII_CTRL1000: + mii_ctrl1000_mod_linkmode_adv_t(phydev->advertising, + val); + change_autoneg = true; + break; default: /* do nothing */ break; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d347ddcac45b..9d2bbb13293e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1783,32 +1783,9 @@ done: } EXPORT_SYMBOL(genphy_update_link); -/** - * genphy_read_status - check the link status and update current link state - * @phydev: target phy_device struct - * - * Description: Check the link, then figure out the current state - * by comparing what we advertise with what the link partner - * advertises. Start by checking the gigabit possibilities, - * then move on to 10/100. - */ -int genphy_read_status(struct phy_device *phydev) +int genphy_read_lpa(struct phy_device *phydev) { - int lpa, lpagb, err, old_link = phydev->link; - - /* Update the link, but return if there was an error */ - err = genphy_update_link(phydev); - if (err) - return err; - - /* why bother the PHY if nothing can have changed */ - if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) - return 0; - - phydev->speed = SPEED_UNKNOWN; - phydev->duplex = DUPLEX_UNKNOWN; - phydev->pause = 0; - phydev->asym_pause = 0; + int lpa, lpagb; if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { if (phydev->is_gigabit_capable) { @@ -1838,6 +1815,44 @@ int genphy_read_status(struct phy_device *phydev) return lpa; mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); + } + + return 0; +} +EXPORT_SYMBOL(genphy_read_lpa); + +/** + * genphy_read_status - check the link status and update current link state + * @phydev: target phy_device struct + * + * Description: Check the link, then figure out the current state + * by comparing what we advertise with what the link partner + * advertises. Start by checking the gigabit possibilities, + * then move on to 10/100. + */ +int genphy_read_status(struct phy_device *phydev) +{ + int err, old_link = phydev->link; + + /* Update the link, but return if there was an error */ + err = genphy_update_link(phydev); + if (err) + return err; + + /* why bother the PHY if nothing can have changed */ + if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) + return 0; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + err = genphy_read_lpa(phydev); + if (err < 0) + return err; + + if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { phy_resolve_aneg_linkmode(phydev); } else if (phydev->autoneg == AUTONEG_DISABLE) { int bmcr = phy_read(phydev, MII_BMCR); diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 734de7de03f7..e1fabb3e3246 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -238,7 +238,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); - nf_reset(skb); + nf_reset_ct(skb); skb->ip_summed = CHECKSUM_NONE; ip_select_ident(net, skb, NULL); @@ -358,7 +358,7 @@ static int pptp_rcv(struct sk_buff *skb) po = lookup_chan(htons(header->call_id), iph->saddr); if (po) { skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); return sk_receive_skb(sk_pppox(po), skb, 0); } drop: diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8089def5a46..cb1d5fe60c31 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2066,7 +2066,8 @@ static int team_ethtool_get_link_ksettings(struct net_device *dev, cmd->base.duplex = DUPLEX_UNKNOWN; cmd->base.port = PORT_OTHER; - list_for_each_entry(port, &team->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) { if (team_port_txable(port)) { if (port->state.speed != SPEED_UNKNOWN) speed += port->state.speed; @@ -2075,6 +2076,8 @@ static int team_ethtool_get_link_ksettings(struct net_device *dev, cmd->base.duplex = port->state.duplex; } } + rcu_read_unlock(); + cmd->base.speed = speed ? : SPEED_UNKNOWN; return 0; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index aab0be40d443..0413d182d782 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1104,7 +1104,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) */ skb_orphan(skb); - nf_reset(skb); + nf_reset_ct(skb); if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; @@ -2290,7 +2290,13 @@ static void tun_free_netdev(struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); BUG_ON(!(list_empty(&tun->disabled))); + free_percpu(tun->pcpu_stats); + /* We clear pcpu_stats so that tun_set_iff() can tell if + * tun_free_netdev() has been called from register_netdevice(). + */ + tun->pcpu_stats = NULL; + tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); @@ -2782,9 +2788,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!dev) return -ENOMEM; - err = dev_get_valid_name(net, dev, name); - if (err < 0) - goto err_free_dev; dev_net_set(dev, net); dev->rtnl_link_ops = &tun_link_ops; @@ -2859,8 +2862,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err_detach: tun_detach_all(dev); - /* register_netdevice() already called tun_free_netdev() */ - goto err_free_dev; + /* We are here because register_netdevice() has failed. + * If register_netdevice() already called tun_free_netdev() + * while dealing with the error, tun->pcpu_stats has been cleared. + */ + if (!tun->pcpu_stats) + goto err_free_dev; err_free_flow: tun_flow_uninit(tun); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 5a587663e7dc..c5a6e75c24e3 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1235,6 +1235,9 @@ static void ax88179_get_mac_addr(struct usbnet *dev) netdev_info(dev->net, "invalid MAC address, using random\n"); eth_hw_addr_random(dev->net); } + + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, + dev->net->dev_addr); } static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index ce78714f536f..a505b2ab88b8 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2620,14 +2620,18 @@ static struct hso_device *hso_create_bulk_serial_device( */ if (serial->tiocmget) { tiocmget = serial->tiocmget; + tiocmget->endp = hso_get_ep(interface, + USB_ENDPOINT_XFER_INT, + USB_DIR_IN); + if (!tiocmget->endp) { + dev_err(&interface->dev, "Failed to find INT IN ep\n"); + goto exit; + } + tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL); if (tiocmget->urb) { mutex_init(&tiocmget->mutex); init_waitqueue_head(&tiocmget->waitq); - tiocmget->endp = hso_get_ep( - interface, - USB_ENDPOINT_XFER_INT, - USB_DIR_IN); } else hso_free_tiomget(serial); } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b6dc5d714b5e..3d77cd402ba9 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1350,6 +1350,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ + {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */ {QMI_FIXED_INTF(0x413c, 0x81a2, 8)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a3, 8)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 30a6df73a955..54a83f734ede 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -736,16 +736,16 @@ struct r8152 { struct tasklet_struct tx_tl; struct rtl_ops { - void (*init)(struct r8152 *); - int (*enable)(struct r8152 *); - void (*disable)(struct r8152 *); - void (*up)(struct r8152 *); - void (*down)(struct r8152 *); - void (*unload)(struct r8152 *); - int (*eee_get)(struct r8152 *, struct ethtool_eee *); - int (*eee_set)(struct r8152 *, struct ethtool_eee *); - bool (*in_nway)(struct r8152 *); - void (*hw_phy_cfg)(struct r8152 *); + void (*init)(struct r8152 *tp); + int (*enable)(struct r8152 *tp); + void (*disable)(struct r8152 *tp); + void (*up)(struct r8152 *tp); + void (*down)(struct r8152 *tp); + void (*unload)(struct r8152 *tp); + int (*eee_get)(struct r8152 *tp, struct ethtool_eee *eee); + int (*eee_set)(struct r8152 *tp, struct ethtool_eee *eee); + bool (*in_nway)(struct r8152 *tp); + void (*hw_phy_cfg)(struct r8152 *tp); void (*autosuspend_en)(struct r8152 *tp, bool enable); } rtl_ops; @@ -1688,7 +1688,7 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp) } /* r8152_csum_workaround() - * The hw limites the value the transport offset. When the offset is out of the + * The hw limits the value of the transport offset. When the offset is out of * range, calculate the checksum by sw. */ static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb, @@ -2178,6 +2178,7 @@ static void tx_bottom(struct r8152 *tp) int res; do { + struct net_device *netdev = tp->netdev; struct tx_agg *agg; if (skb_queue_empty(&tp->tx_queue)) @@ -2188,24 +2189,23 @@ static void tx_bottom(struct r8152 *tp) break; res = r8152_tx_agg_fill(tp, agg); - if (res) { - struct net_device *netdev = tp->netdev; + if (!res) + continue; - if (res == -ENODEV) { - rtl_set_unplug(tp); - netif_device_detach(netdev); - } else { - struct net_device_stats *stats = &netdev->stats; - unsigned long flags; + if (res == -ENODEV) { + rtl_set_unplug(tp); + netif_device_detach(netdev); + } else { + struct net_device_stats *stats = &netdev->stats; + unsigned long flags; - netif_warn(tp, tx_err, netdev, - "failed tx_urb %d\n", res); - stats->tx_dropped += agg->skb_num; + netif_warn(tp, tx_err, netdev, + "failed tx_urb %d\n", res); + stats->tx_dropped += agg->skb_num; - spin_lock_irqsave(&tp->tx_lock, flags); - list_add_tail(&agg->list, &tp->tx_free); - spin_unlock_irqrestore(&tp->tx_lock, flags); - } + spin_lock_irqsave(&tp->tx_lock, flags); + list_add_tail(&agg->list, &tp->tx_free); + spin_unlock_irqrestore(&tp->tx_lock, flags); } } while (res == 0); } @@ -4768,10 +4768,9 @@ static int rtl8152_reset_resume(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); clear_bit(SELECTIVE_SUSPEND, &tp->flags); - mutex_lock(&tp->control); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - mutex_unlock(&tp->control); + set_ethernet_addr(tp); return rtl8152_resume(intf); } @@ -5603,7 +5602,8 @@ static int rtl8152_probe(struct usb_interface *intf, } if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && - (!strcmp(udev->serial, "000001000000") || !strcmp(udev->serial, "000002000000"))) { + (!strcmp(udev->serial, "000001000000") || + !strcmp(udev->serial, "000002000000"))) { dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation"); set_bit(DELL_TB_RX_AGG_BUG, &tp->flags); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba98e0971b84..5a635f028bdc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1585,7 +1585,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); - nf_reset(skb); + nf_reset_ct(skb); } /* If running out of space, stop queue to avoid getting packets that we diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index a4b38a980c3c..ee52bde058df 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -366,7 +366,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, struct neighbour *neigh; int ret; - nf_reset(skb); + nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -459,7 +459,7 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); + nf_reset_ct(skb); else skb = NULL; @@ -560,7 +560,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s bool is_v6gw = false; int ret = -EINVAL; - nf_reset(skb); + nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -670,7 +670,7 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); + nf_reset_ct(skb); else skb = NULL; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a82ad739ab80..791f6633667c 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1674,7 +1674,7 @@ static int ath9k_htc_ampdu_action(struct ieee80211_hw *hw, case IEEE80211_AMPDU_TX_START: ret = ath9k_htc_tx_aggr_oper(priv, vif, sta, action, tid); if (!ret) - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 34121fbf32e3..0548aa3702e3 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1921,7 +1921,7 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw, ath9k_ps_wakeup(sc); ret = ath_tx_aggr_start(sc, sta, tid, ssn); if (!ret) - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; ath9k_ps_restore(sc); break; case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 40a8054f8aa6..5914926a5c5b 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1449,8 +1449,7 @@ static int carl9170_op_ampdu_action(struct ieee80211_hw *hw, rcu_assign_pointer(sta_info->agg[tid], tid_info); spin_unlock_bh(&ar->tx_ampdu_list_lock); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 79998a3ddb7a..a276dae30887 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1084,6 +1084,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, enum ieee80211_ampdu_mlme_action action = params->action; u16 tid = params->tid; u16 *ssn = ¶ms->ssn; + int ret = 0; wcn36xx_dbg(WCN36XX_DBG_MAC, "mac ampdu action action %d tid %d\n", action, tid); @@ -1106,7 +1107,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, sta_priv->ampdu_state[tid] = WCN36XX_AMPDU_START; spin_unlock_bh(&sta_priv->ampdu_lock); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_OPERATIONAL: spin_lock_bh(&sta_priv->ampdu_lock); @@ -1131,7 +1132,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, mutex_unlock(&wcn->conf_mutex); - return 0; + return ret; } static const struct ieee80211_ops wcn36xx_ops = { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 6188275b17e5..8e8b685cfe09 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -850,8 +850,7 @@ brcms_ops_ampdu_action(struct ieee80211_hw *hw, "START: tid %d is not agg\'able\n", tid); return -EINVAL; } - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index ffb705b18fb1..51fdd7ce30af 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -2265,7 +2265,7 @@ il4965_tx_agg_start(struct il_priv *il, struct ieee80211_vif *vif, if (tid_data->tfds_in_queue == 0) { D_HT("HW queue is empty\n"); tid_data->agg.state = IL_AGG_ON; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; } else { D_HT("HW queue is NOT empty: %d packets in HW queue\n", tid_data->tfds_in_queue); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c index 3029e3f6de63..cd73fc5cfcbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c @@ -621,7 +621,7 @@ int iwlagn_tx_agg_start(struct iwl_priv *priv, struct ieee80211_vif *vif, IWL_DEBUG_TX_QUEUES(priv, "Can proceed: ssn = next_recl = %d\n", tid_data->agg.ssn); tid_data->agg.state = IWL_AGG_STARTING; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; } else { IWL_DEBUG_TX_QUEUES(priv, "Can't proceed: ssn %d, " "next_reclaimed = %d\n", diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 0bedba4c61f2..1d6bc62b104c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2818,13 +2818,12 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (normalized_ssn == tid_data->next_reclaimed) { tid_data->state = IWL_AGG_STARTING; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; } else { tid_data->state = IWL_EMPTYING_HW_QUEUE_ADDBA; + ret = 0; } - ret = 0; - out: spin_unlock_bh(&mvmsta->lock); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 635956024e88..3888ad0f797b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -148,23 +148,25 @@ static const char *hwsim_alpha2s[] = { }; static const struct ieee80211_regdomain hwsim_world_regdom_custom_01 = { - .n_reg_rules = 4, + .n_reg_rules = 5, .alpha2 = "99", .reg_rules = { REG_RULE(2412-10, 2462+10, 40, 0, 20, 0), REG_RULE(2484-10, 2484+10, 40, 0, 20, 0), REG_RULE(5150-10, 5240+10, 40, 0, 30, 0), REG_RULE(5745-10, 5825+10, 40, 0, 30, 0), + REG_RULE(5855-10, 5925+10, 40, 0, 33, 0), } }; static const struct ieee80211_regdomain hwsim_world_regdom_custom_02 = { - .n_reg_rules = 2, + .n_reg_rules = 3, .alpha2 = "99", .reg_rules = { REG_RULE(2412-10, 2462+10, 40, 0, 20, 0), REG_RULE(5725-10, 5850+10, 40, 0, 30, NL80211_RRF_NO_IR), + REG_RULE(5855-10, 5925+10, 40, 0, 33, 0), } }; @@ -354,6 +356,24 @@ static const struct ieee80211_channel hwsim_channels_5ghz[] = { CHAN5G(5805), /* Channel 161 */ CHAN5G(5825), /* Channel 165 */ CHAN5G(5845), /* Channel 169 */ + + CHAN5G(5855), /* Channel 171 */ + CHAN5G(5860), /* Channel 172 */ + CHAN5G(5865), /* Channel 173 */ + CHAN5G(5870), /* Channel 174 */ + + CHAN5G(5875), /* Channel 175 */ + CHAN5G(5880), /* Channel 176 */ + CHAN5G(5885), /* Channel 177 */ + CHAN5G(5890), /* Channel 178 */ + CHAN5G(5895), /* Channel 179 */ + CHAN5G(5900), /* Channel 180 */ + CHAN5G(5905), /* Channel 181 */ + + CHAN5G(5910), /* Channel 182 */ + CHAN5G(5915), /* Channel 183 */ + CHAN5G(5920), /* Channel 184 */ + CHAN5G(5925), /* Channel 185 */ }; static const struct ieee80211_rate hwsim_rates[] = { @@ -1261,8 +1281,8 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw, skb_orphan(skb); skb_dst_drop(skb); skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); /* * Get absolute mactime here so all HWs RX at the "same time", and @@ -1550,7 +1570,8 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, if (vif->type != NL80211_IFTYPE_AP && vif->type != NL80211_IFTYPE_MESH_POINT && - vif->type != NL80211_IFTYPE_ADHOC) + vif->type != NL80211_IFTYPE_ADHOC && + vif->type != NL80211_IFTYPE_OCB) return; skb = ieee80211_beacon_get(hw, vif); @@ -1604,6 +1625,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer) } static const char * const hwsim_chanwidths[] = { + [NL80211_CHAN_WIDTH_5] = "ht5", + [NL80211_CHAN_WIDTH_10] = "ht10", [NL80211_CHAN_WIDTH_20_NOHT] = "noht", [NL80211_CHAN_WIDTH_20] = "ht20", [NL80211_CHAN_WIDTH_40] = "ht40", @@ -1979,8 +2002,7 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_TX_START: - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: @@ -2723,7 +2745,8 @@ static void mac80211_hwsim_he_capab(struct ieee80211_supported_band *sband) BIT(NL80211_IFTYPE_P2P_CLIENT) | \ BIT(NL80211_IFTYPE_P2P_GO) | \ BIT(NL80211_IFTYPE_ADHOC) | \ - BIT(NL80211_IFTYPE_MESH_POINT)) + BIT(NL80211_IFTYPE_MESH_POINT) | \ + BIT(NL80211_IFTYPE_OCB)) static int mac80211_hwsim_new_radio(struct genl_info *info, struct hwsim_new_radio_params *param) @@ -2847,6 +2870,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, } else { data->if_combination.num_different_channels = 1; data->if_combination.radar_detect_widths = + BIT(NL80211_CHAN_WIDTH_5) | + BIT(NL80211_CHAN_WIDTH_10) | BIT(NL80211_CHAN_WIDTH_20_NOHT) | BIT(NL80211_CHAN_WIDTH_20) | BIT(NL80211_CHAN_WIDTH_40) | diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index c4db6417748f..d55f229abeea 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -5520,7 +5520,7 @@ mwl8k_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, rc = -EBUSY; break; } - ieee80211_start_tx_ba_cb_irqsafe(vif, addr, tid); + rc = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 25d5b1608bc9..4b3217b43a04 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -582,8 +582,7 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 87c748715b5d..b6d78212306a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -477,8 +477,7 @@ mt7615_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; mt7615_mcu_set_tx_ba(dev, params, 0); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index aec73a0295e8..414b22399d93 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -393,8 +393,7 @@ int mt76x02_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index 72e608cc53af..671d8897ae76 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -372,8 +372,7 @@ mt76_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: msta->agg_ssn[tid] = ssn << 4; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index f1cdcd61c54a..25466454b73e 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -10476,7 +10476,7 @@ int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, * when the hw reorders frames due to aggregation. */ if (sta_priv->wcid > WCID_END) - return 1; + return -ENOSPC; switch (action) { case IEEE80211_AMPDU_RX_START: @@ -10489,7 +10489,7 @@ int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, */ break; case IEEE80211_AMPDU_TX_START: - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index ac746c322554..c75192c4447f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1776,8 +1776,7 @@ int rtl_tx_agg_start(struct ieee80211_hw *hw, struct ieee80211_vif *vif, tid_data->agg.agg_state = RTL_AGG_START; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - return 0; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; } int rtl_tx_agg_stop(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index e5e3605bb693..a203b4705b94 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -437,8 +437,7 @@ static int rtw_ops_ampdu_action(struct ieee80211_hw *hw, switch (params->action) { case IEEE80211_AMPDU_TX_START: - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index ce5e92d82efc..440088293aff 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1140,8 +1140,7 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, else if ((vif->type == NL80211_IFTYPE_AP) || (vif->type == NL80211_IFTYPE_P2P_GO)) rsta->seq_start[tid] = seq_no; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - status = 0; + status = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e14ec75b61d6..482c6c8b0fb7 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -887,9 +887,9 @@ static int xennet_set_skb_gso(struct sk_buff *skb, return 0; } -static RING_IDX xennet_fill_frags(struct netfront_queue *queue, - struct sk_buff *skb, - struct sk_buff_head *list) +static int xennet_fill_frags(struct netfront_queue *queue, + struct sk_buff *skb, + struct sk_buff_head *list) { RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *nskb; @@ -908,7 +908,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { queue->rx.rsp_cons = ++cons + skb_queue_len(list); kfree_skb(nskb); - return ~0U; + return -ENOENT; } skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, @@ -919,7 +919,9 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, kfree_skb(nskb); } - return cons; + queue->rx.rsp_cons = cons; + + return 0; } static int checksum_setup(struct net_device *dev, struct sk_buff *skb) @@ -1045,8 +1047,7 @@ err: skb->data_len = rx->status; skb->len += rx->status; - i = xennet_fill_frags(queue, skb, &tmpq); - if (unlikely(i == ~0U)) + if (unlikely(xennet_fill_frags(queue, skb, &tmpq))) goto err; if (rx->flags & XEN_NETRXF_csum_blank) @@ -1056,7 +1057,7 @@ err: __skb_queue_tail(&rxq, skb); - queue->rx.rsp_cons = ++i; + i = ++queue->rx.rsp_cons; work_done++; } diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 0f22379887ca..18cd96284b77 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -278,7 +278,6 @@ static struct i2c_driver nfcmrvl_i2c_driver = { .remove = nfcmrvl_i2c_remove, .driver = { .name = "nfcmrvl_i2c", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(of_nfcmrvl_i2c_match), }, }; diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c index e4f7fa00862d..b4eb926d220a 100644 --- a/drivers/nfc/s3fwrn5/i2c.c +++ b/drivers/nfc/s3fwrn5/i2c.c @@ -279,7 +279,6 @@ MODULE_DEVICE_TABLE(of, of_s3fwrn5_i2c_match); static struct i2c_driver s3fwrn5_i2c_driver = { .driver = { - .owner = THIS_MODULE, .name = S3FWRN5_I2C_DRIVER_NAME, .of_match_table = of_match_ptr(of_s3fwrn5_i2c_match), }, diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index a8d56887ec88..3e9f45aec8d1 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -392,9 +392,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; if (++(arena->freelist[lane].seq) == 4) arena->freelist[lane].seq = 1; - if (ent_e_flag(ent->old_map)) + if (ent_e_flag(le32_to_cpu(ent->old_map))) arena->freelist[lane].has_err = 1; - arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map)); + arena->freelist[lane].block = ent_lba(le32_to_cpu(ent->old_map)); return ret; } @@ -560,8 +560,8 @@ static int btt_freelist_init(struct arena_info *arena) * FIXME: if error clearing fails during init, we want to make * the BTT read-only */ - if (ent_e_flag(log_new.old_map) && - !ent_normal(log_new.old_map)) { + if (ent_e_flag(le32_to_cpu(log_new.old_map)) && + !ent_normal(le32_to_cpu(log_new.old_map))) { arena->freelist[i].has_err = 1; ret = arena_clear_freelist_error(arena, i); if (ret) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 75a58a6e9615..d47412dcdf38 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -180,7 +180,7 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) sector_t sector; /* make sure device is a region */ - if (!is_nd_pmem(dev)) + if (!is_memory(dev)) return 0; nd_region = to_nd_region(dev); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 43401325c874..cca0a3ba1d2c 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1987,7 +1987,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region, nd_mapping = &nd_region->mapping[i]; label_ent = list_first_entry_or_null(&nd_mapping->labels, typeof(*label_ent), list); - label0 = label_ent ? label_ent->label : 0; + label0 = label_ent ? label_ent->label : NULL; if (!label0) { WARN_ON(1); @@ -2322,8 +2322,9 @@ static struct device **scan_labels(struct nd_region *nd_region) continue; /* skip labels that describe extents outside of the region */ - if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end) - continue; + if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start || + __le64_to_cpu(nd_label->dpa) > map_end) + continue; i = add_namespace_resource(nd_region, nd_label, devs, count); if (i < 0) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e89af4b2d8e9..ee5c04070ef9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -289,11 +289,7 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE -#else -#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE -#endif +#define MAX_NVDIMM_ALIGN 4 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index bb9cc5cf0873..60d81fae06ee 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -103,39 +103,42 @@ static ssize_t align_show(struct device *dev, return sprintf(buf, "%ld\n", nd_pfn->align); } -static const unsigned long *nd_pfn_supported_alignments(void) +static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) { - /* - * This needs to be a non-static variable because the *_SIZE - * macros aren't always constants. - */ - const unsigned long supported_alignments[] = { - PAGE_SIZE, -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - HPAGE_PMD_SIZE, -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD - HPAGE_PUD_SIZE, -#endif -#endif - 0, - }; - static unsigned long data[ARRAY_SIZE(supported_alignments)]; - memcpy(data, supported_alignments, sizeof(data)); + alignments[0] = PAGE_SIZE; + + if (has_transparent_hugepage()) { + alignments[1] = HPAGE_PMD_SIZE; + if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) + alignments[2] = HPAGE_PUD_SIZE; + } + + return alignments; +} + +/* + * Use pmd mapping if supported as default alignment + */ +static unsigned long nd_pfn_default_alignment(void) +{ - return data; + if (has_transparent_hugepage()) + return HPAGE_PMD_SIZE; + return PAGE_SIZE; } static ssize_t align_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); + unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; ssize_t rc; nd_device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, - nd_pfn_supported_alignments()); + nd_pfn_supported_alignments(aligns)); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); @@ -259,7 +262,10 @@ static DEVICE_ATTR_RO(size); static ssize_t supported_alignments_show(struct device *dev, struct device_attribute *attr, char *buf) { - return nd_size_select_show(0, nd_pfn_supported_alignments(), buf); + unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; + + return nd_size_select_show(0, + nd_pfn_supported_alignments(aligns), buf); } static DEVICE_ATTR_RO(supported_alignments); @@ -302,7 +308,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, return NULL; nd_pfn->mode = PFN_MODE_NONE; - nd_pfn->align = PFN_DEFAULT_ALIGNMENT; + nd_pfn->align = nd_pfn_default_alignment(); dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { @@ -412,6 +418,21 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) return 0; } +static bool nd_supported_alignment(unsigned long align) +{ + int i; + unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, }; + + if (align == 0) + return false; + + nd_pfn_supported_alignments(supported); + for (i = 0; supported[i]; i++) + if (align == supported[i]) + return true; + return false; +} + /** * nd_pfn_validate - read and validate info-block * @nd_pfn: fsdax namespace runtime state / properties @@ -496,6 +517,18 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EOPNOTSUPP; } + /* + * Check whether the we support the alignment. For Dax if the + * superblock alignment is not matching, we won't initialize + * the device. + */ + if (!nd_supported_alignment(align) && + !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) { + dev_err(&nd_pfn->dev, "init failed, alignment mismatch: " + "%ld:%ld\n", nd_pfn->align, align); + return -EOPNOTSUPP; + } + if (!nd_pfn->uuid) { /* * When probing a namepace via nd_pfn_probe() the uuid @@ -639,9 +672,11 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; + resource_size_t end = nsio->res.end - end_trunc; struct vmem_altmap __altmap = { .base_pfn = init_altmap_base(base), .reserve = init_altmap_reserve(base), + .end_pfn = PHYS_PFN(end), }; memcpy(res, &nsio->res, sizeof(*res)); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 37bf8719a2a4..0f6978e72e7c 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -34,7 +34,7 @@ static int nd_region_probe(struct device *dev) if (rc) return rc; - if (is_nd_pmem(&nd_region->dev)) { + if (is_memory(&nd_region->dev)) { struct resource ndr_res; if (devm_init_badblocks(dev, &nd_region->bb)) @@ -123,7 +123,7 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event) struct nd_region *nd_region = to_nd_region(dev); struct resource res; - if (is_nd_pmem(&nd_region->dev)) { + if (is_memory(&nd_region->dev)) { res.start = nd_region->ndr_start; res.end = nd_region->ndr_start + nd_region->ndr_size - 1; diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 3fd6b59abd33..ef423ba1a711 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -632,11 +632,11 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) if (!is_memory(dev) && a == &dev_attr_dax_seed.attr) return 0; - if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) + if (!is_memory(dev) && a == &dev_attr_badblocks.attr) return 0; if (a == &dev_attr_resource.attr) { - if (is_nd_pmem(dev)) + if (is_memory(dev)) return 0400; else return 0; @@ -1168,6 +1168,9 @@ EXPORT_SYMBOL_GPL(nvdimm_has_cache); bool is_nvdimm_sync(struct nd_region *nd_region) { + if (is_nd_volatile(&nd_region->dev)) + return true; + return is_nd_pmem(&nd_region->dev) && !test_bit(ND_REGION_ASYNC, &nd_region->flags); } diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 9e45b207ff01..89b85970912d 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -177,6 +177,10 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) || !nvdimm->sec.flags) return -EIO; + /* No need to go further if security is disabled */ + if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) + return 0; + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { dev_dbg(dev, "Security operation in progress.\n"); return -EBUSY; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 108f60b46804..fd7dea36c3b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) */ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; - revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); + /* + * Revalidate after unblocking dispatchers that may be holding bd_butex + */ + revalidate_disk(ns->disk); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -847,7 +850,7 @@ out: static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u32 *result, unsigned timeout) + u32 meta_seed, u64 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -888,7 +891,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, else ret = nvme_req(req)->status; if (result) - *result = le32_to_cpu(nvme_req(req)->result.u32); + *result = le64_to_cpu(nvme_req(req)->result.u64); if (meta && !ret && !write) { if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; @@ -1335,6 +1338,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_command c; unsigned timeout = 0; u32 effects; + u64 result; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + if (cmd.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10 = cpu_to_le32(cmd.cdw10); + c.common.cdw11 = cpu_to_le32(cmd.cdw11); + c.common.cdw12 = cpu_to_le32(cmd.cdw12); + c.common.cdw13 = cpu_to_le32(cmd.cdw13); + c.common.cdw14 = cpu_to_le32(cmd.cdw14); + c.common.cdw15 = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + effects = nvme_passthru_start(ctrl, ns, cmd.opcode); + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + (void __user *)(uintptr_t)cmd.addr, cmd.data_len, + (void __user *)(uintptr_t)cmd.metadata, + cmd.metadata_len, 0, &result, timeout); + nvme_passthru_end(ctrl, effects); + + if (status >= 0) { + if (put_user(result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd64 __user *ucmd) +{ + struct nvme_passthru_cmd64 cmd; + struct nvme_command c; + unsigned timeout = 0; + u32 effects; int status; if (!capable(CAP_SYS_ADMIN)) @@ -1405,6 +1456,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) srcu_read_unlock(&head->srcu, idx); } +static bool is_ctrl_ioctl(unsigned int cmd) +{ + if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) + return true; + if (is_sed_ioctl(cmd)) + return true; + return false; +} + +static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp, + struct nvme_ns_head *head, + int srcu_idx) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + int ret; + + nvme_get_ctrl(ns->ctrl); + nvme_put_ns_from_disk(head, srcu_idx); + + switch (cmd) { + case NVME_IOCTL_ADMIN_CMD: + ret = nvme_user_cmd(ctrl, NULL, argp); + break; + case NVME_IOCTL_ADMIN64_CMD: + ret = nvme_user_cmd64(ctrl, NULL, argp); + break; + default: + ret = sed_ioctl(ctrl->opal_dev, cmd, argp); + break; + } + nvme_put_ctrl(ctrl); + return ret; +} + static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1422,20 +1508,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, * seperately and drop the ns SRCU reference early. This avoids a * deadlock when deleting namespaces using the passthrough interface. */ - if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) { - struct nvme_ctrl *ctrl = ns->ctrl; - - nvme_get_ctrl(ns->ctrl); - nvme_put_ns_from_disk(head, srcu_idx); - - if (cmd == NVME_IOCTL_ADMIN_CMD) - ret = nvme_user_cmd(ctrl, NULL, argp); - else - ret = sed_ioctl(ctrl->opal_dev, cmd, argp); - - nvme_put_ctrl(ctrl); - return ret; - } + if (is_ctrl_ioctl(cmd)) + return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); switch (cmd) { case NVME_IOCTL_ID: @@ -1448,6 +1522,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, case NVME_IOCTL_SUBMIT_IO: ret = nvme_submit_io(ns, argp); break; + case NVME_IOCTL_IO64_CMD: + ret = nvme_user_cmd64(ns->ctrl, ns, argp); + break; default: if (ns->ndev) ret = nvme_nvm_ioctl(ns, cmd, arg); @@ -2289,6 +2366,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, + }, + { + /* + * This Kingston E8FK11.T firmware version has no interrupt + * after resume with actions related to suspend to idle + * https://bugzilla.kernel.org/show_bug.cgi?id=204887 + */ + .vid = 0x2646, + .fr = "E8FK11.T", + .quirks = NVME_QUIRK_SIMPLE_SUSPEND, } }; @@ -2540,8 +2627,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) list_add_tail(&subsys->entry, &nvme_subsystems); } - if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, - dev_name(ctrl->device))) { + ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, + dev_name(ctrl->device)); + if (ret) { dev_err(ctrl->device, "failed to create sysfs link from subsystem.\n"); goto out_put_subsystem; @@ -2838,6 +2926,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); + case NVME_IOCTL_ADMIN64_CMD: + return nvme_user_cmd64(ctrl, NULL, argp); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: @@ -3045,6 +3135,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); nvme_show_int_function(cntlid); nvme_show_int_function(numa_node); +nvme_show_int_function(queue_count); +nvme_show_int_function(sqsize); static ssize_t nvme_sysfs_delete(struct device *dev, struct device_attribute *attr, const char *buf, @@ -3125,6 +3217,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_address.attr, &dev_attr_state.attr, &dev_attr_numa_node.attr, + &dev_attr_queue_count.attr, + &dev_attr_sqsize.attr, NULL }; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b5013c101b35..38a83ef5bcd3 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -221,6 +221,7 @@ struct nvme_ctrl { u16 oacs; u16 nssa; u16 nr_streams; + u16 sqsize; u32 max_namespaces; atomic_t abort_limit; u8 vwc; @@ -269,7 +270,6 @@ struct nvme_ctrl { u16 hmmaxd; /* Fabrics only */ - u16 sqsize; u32 ioccsz; u32 iorcsz; u16 icdoff; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c0808f9eb8ab..bb88681f4dc3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2946,11 +2946,21 @@ static int nvme_suspend(struct device *dev) if (ret < 0) goto unfreeze; + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); + ret = nvme_set_power_state(ctrl, ctrl->npss); if (ret < 0) goto unfreeze; if (ret) { + /* discard the saved state */ + pci_load_saved_state(pdev, NULL); + /* * Clearing npss forces a controller reset on resume. The * correct value will be resdicovered then. @@ -2958,14 +2968,7 @@ static int nvme_suspend(struct device *dev) nvme_dev_disable(ndev, true); ctrl->npss = 0; ret = 0; - goto unfreeze; } - /* - * A saved state prevents pci pm from generically controlling the - * device's power. If we're using protocol specific settings, we don't - * want pci interfering. - */ - pci_save_state(pdev); unfreeze: nvme_unfreeze(ctrl); return ret; @@ -3090,6 +3093,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dfa07bb9dfeb..4d280160dd3f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -427,7 +427,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) { return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len); + ibdev->attrs.max_fast_reg_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -437,7 +437,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) const int cq_factor = send_wr_factor + 1; /* + RECV */ int comp_vector, idx = nvme_rdma_queue_idx(queue); enum ib_poll_context poll_ctx; - int ret; + int ret, pages_per_mr; queue->device = nvme_rdma_find_get_device(queue->cm_id); if (!queue->device) { @@ -479,10 +479,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_qp; } + /* + * Currently we don't use SG_GAPS MR's so if the first entry is + * misaligned we'll end up using two entries for a single data page, + * so one additional entry is required. + */ + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, - nvme_rdma_get_max_fr_pages(ibdev), 0); + pages_per_mr, 0); if (ret) { dev_err(queue->ctrl->ctrl.device, "failed to initialize MR pool sized %d for QID %d\n", @@ -614,7 +620,8 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) if (!ret) { set_bit(NVME_RDMA_Q_LIVE, &queue->flags); } else { - __nvme_rdma_stop_queue(queue); + if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + __nvme_rdma_stop_queue(queue); dev_info(ctrl->ctrl.device, "failed to connect queue: %d ret=%d\n", idx, ret); } @@ -820,8 +827,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) goto out_stop_queue; - ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); + ctrl->ctrl.max_segments = ctrl->max_fr_pages; + ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ffd5957637a..385a5212c10f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1042,7 +1042,7 @@ static void nvme_tcp_io_work(struct work_struct *w) { struct nvme_tcp_queue *queue = container_of(w, struct nvme_tcp_queue, io_work); - unsigned long start = jiffies + msecs_to_jiffies(1); + unsigned long deadline = jiffies + msecs_to_jiffies(1); do { bool pending = false; @@ -1067,7 +1067,7 @@ static void nvme_tcp_io_work(struct work_struct *w) if (!pending) return; - } while (time_after(jiffies, start)); /* quota is exhausted */ + } while (!time_after(jiffies, deadline)); /* quota is exhausted */ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index de0bff70ebb6..32008d85172b 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -11,10 +11,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) { const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; - /* Number of physical blocks per logical block. */ - const u32 ppl = ql->physical_block_size / ql->logical_block_size; - /* Physical blocks per logical block, 0's based. */ - const __le16 ppl0b = to0based(ppl); + /* Number of logical blocks per physical block. */ + const u32 lpp = ql->physical_block_size / ql->logical_block_size; + /* Logical blocks per physical block, 0's based. */ + const __le16 lpp0b = to0based(lpp); /* * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -25,9 +25,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) * field from the identify controller data structure should be used. */ id->nsfeat |= 1 << 1; - id->nawun = ppl0b; - id->nawupf = ppl0b; - id->nacwu = ppl0b; + id->nawun = lpp0b; + id->nawupf = lpp0b; + id->nacwu = lpp0b; /* * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and @@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) */ id->nsfeat |= 1 << 4; /* NPWG = Namespace Preferred Write Granularity. 0's based */ - id->npwg = ppl0b; + id->npwg = lpp0b; /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index bf4f03474e89..d535080b781f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,8 +348,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) return 0; err: - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); return NVME_SC_INTERNAL; } @@ -554,8 +553,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) if (queue->nvme_sq.sqhd_disabled) { kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } return 1; @@ -586,8 +584,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, return -EAGAIN; kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); return 1; @@ -1310,8 +1307,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) nvmet_req_uninit(&cmd->req); nvmet_tcp_unmap_pdu_iovec(cmd); kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) diff --git a/drivers/ptp/ptp_dte.c b/drivers/ptp/ptp_dte.c index 0dcfdc806f57..82d31ba32690 100644 --- a/drivers/ptp/ptp_dte.c +++ b/drivers/ptp/ptp_dte.c @@ -240,14 +240,12 @@ static int ptp_dte_probe(struct platform_device *pdev) { struct ptp_dte *ptp_dte; struct device *dev = &pdev->dev; - struct resource *res; ptp_dte = devm_kzalloc(dev, sizeof(struct ptp_dte), GFP_KERNEL); if (!ptp_dte) return -ENOMEM; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ptp_dte->regs = devm_ioremap_resource(dev, res); + ptp_dte->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ptp_dte->regs)) return PTR_ERR(ptp_dte->regs); diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index c61f00b72e15..a577218d1ab7 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -507,6 +507,8 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET; } + spin_lock_init(&ptp_qoriq->lock); + ktime_get_real_ts64(&now); ptp_qoriq_settime(&ptp_qoriq->caps, &now); @@ -514,7 +516,6 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT; - spin_lock_init(&ptp_qoriq->lock); spin_lock_irqsave(&ptp_qoriq->lock, flags); regs = &ptp_qoriq->regs; diff --git a/drivers/reset/reset-scmi.c b/drivers/reset/reset-scmi.c index c6d3c8427f14..b46df80ec6c3 100644 --- a/drivers/reset/reset-scmi.c +++ b/drivers/reset/reset-scmi.c @@ -102,6 +102,7 @@ static int scmi_reset_probe(struct scmi_device *sdev) data->rcdev.owner = THIS_MODULE; data->rcdev.of_node = np; data->rcdev.nr_resets = handle->reset_ops->num_domains_get(handle); + data->handle = handle; return devm_reset_controller_register(dev, &data->rcdev); } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index fc53e1e221f0..c94184d080f8 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1553,8 +1553,8 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device) if (rc == 0) { memcpy(&private->vsq, vsq, sizeof(*vsq)); } else { - dev_warn(&device->cdev->dev, - "Reading the volume storage information failed with rc=%d\n", rc); + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading the volume storage information failed with rc=%d", rc); } if (useglobal) @@ -1737,8 +1737,8 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device) if (rc == 0) { dasd_eckd_cpy_ext_pool_data(device, lcq); } else { - dev_warn(&device->cdev->dev, - "Reading the logical configuration failed with rc=%d\n", rc); + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading the logical configuration failed with rc=%d", rc); } dasd_sfree_request(cqr, cqr->memdev); @@ -2020,14 +2020,10 @@ dasd_eckd_check_characteristics(struct dasd_device *device) dasd_eckd_read_features(device); /* Read Volume Information */ - rc = dasd_eckd_read_vol_info(device); - if (rc) - goto out_err3; + dasd_eckd_read_vol_info(device); /* Read Extent Pool Information */ - rc = dasd_eckd_read_ext_pool_info(device); - if (rc) - goto out_err3; + dasd_eckd_read_ext_pool_info(device); /* Read Device Characteristics */ rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, @@ -2059,9 +2055,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device) if (readonly) set_bit(DASD_FLAG_DEVICE_RO, &device->flags); - if (dasd_eckd_is_ese(device)) - dasd_set_feature(device->cdev, DASD_FEATURE_DISCARD, 1); - dev_info(&device->cdev->dev, "New DASD %04X/%02X (CU %04X/%02X) " "with %d cylinders, %d heads, %d sectors%s\n", private->rdc_data.dev_type, @@ -3695,14 +3688,6 @@ static int dasd_eckd_release_space(struct dasd_device *device, return -EINVAL; } -static struct dasd_ccw_req * -dasd_eckd_build_cp_discard(struct dasd_device *device, struct dasd_block *block, - struct request *req, sector_t first_trk, - sector_t last_trk) -{ - return dasd_eckd_dso_ras(device, block, req, first_trk, last_trk, 1); -} - static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( struct dasd_device *startdev, struct dasd_block *block, @@ -4447,10 +4432,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev, cmdwtd = private->features.feature[12] & 0x40; use_prefix = private->features.feature[8] & 0x01; - if (req_op(req) == REQ_OP_DISCARD) - return dasd_eckd_build_cp_discard(startdev, block, req, - first_trk, last_trk); - cqr = NULL; if (cdlspecial || dasd_page_cache) { /* do nothing, just fall through to the cmd mode single case */ @@ -4729,14 +4710,12 @@ static struct dasd_ccw_req *dasd_eckd_build_alias_cp(struct dasd_device *base, struct dasd_block *block, struct request *req) { - struct dasd_device *startdev = NULL; struct dasd_eckd_private *private; - struct dasd_ccw_req *cqr; + struct dasd_device *startdev; unsigned long flags; + struct dasd_ccw_req *cqr; - /* Discard requests can only be processed on base devices */ - if (req_op(req) != REQ_OP_DISCARD) - startdev = dasd_alias_get_start_dev(base); + startdev = dasd_alias_get_start_dev(base); if (!startdev) startdev = base; private = startdev->private; @@ -5663,14 +5642,10 @@ static int dasd_eckd_restore_device(struct dasd_device *device) dasd_eckd_read_features(device); /* Read Volume Information */ - rc = dasd_eckd_read_vol_info(device); - if (rc) - goto out_err2; + dasd_eckd_read_vol_info(device); /* Read Extent Pool Information */ - rc = dasd_eckd_read_ext_pool_info(device); - if (rc) - goto out_err2; + dasd_eckd_read_ext_pool_info(device); /* Read Device Characteristics */ rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, @@ -6521,20 +6496,8 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) unsigned int logical_block_size = block->bp_block; struct request_queue *q = block->request_queue; struct dasd_device *device = block->base; - struct dasd_eckd_private *private; - unsigned int max_discard_sectors; - unsigned int max_bytes; - unsigned int ext_bytes; /* Extent Size in Bytes */ - int recs_per_trk; - int trks_per_cyl; - int ext_limit; - int ext_size; /* Extent Size in Cylinders */ int max; - private = device->private; - trks_per_cyl = private->rdc_data.trk_per_cyl; - recs_per_trk = recs_per_track(&private->rdc_data, 0, logical_block_size); - if (device->features & DASD_FEATURE_USERAW) { /* * the max_blocks value for raw_track access is 256 @@ -6555,28 +6518,6 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) /* With page sized segments each segment can be translated into one idaw/tidaw */ blk_queue_max_segment_size(q, PAGE_SIZE); blk_queue_segment_boundary(q, PAGE_SIZE - 1); - - if (dasd_eckd_is_ese(device)) { - /* - * Depending on the extent size, up to UINT_MAX bytes can be - * accepted. However, neither DASD_ECKD_RAS_EXTS_MAX nor the - * device limits should be exceeded. - */ - ext_size = dasd_eckd_ext_size(device); - ext_limit = min(private->real_cyl / ext_size, DASD_ECKD_RAS_EXTS_MAX); - ext_bytes = ext_size * trks_per_cyl * recs_per_trk * - logical_block_size; - max_bytes = UINT_MAX - (UINT_MAX % ext_bytes); - if (max_bytes / ext_bytes > ext_limit) - max_bytes = ext_bytes * ext_limit; - - max_discard_sectors = max_bytes / 512; - - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = ext_bytes; - q->limits.discard_alignment = ext_bytes; - } } static struct ccw_driver dasd_eckd_driver = { diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index f4ca1d29d61b..cd164886132f 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -113,7 +113,7 @@ static void set_impl_params(struct qdio_irq *irq_ptr, irq_ptr->qib.pfmt = qib_param_field_format; if (qib_param_field) memcpy(irq_ptr->qib.parm, qib_param_field, - QDIO_MAX_BUFFERS_PER_Q); + sizeof(irq_ptr->qib.parm)); if (!input_slib_elements) goto output; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a7868c8133ee..dda274351c21 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4715,8 +4715,7 @@ static int qeth_qdio_establish(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "qdioest"); - qib_param_field = kzalloc(QDIO_MAX_BUFFERS_PER_Q, - GFP_KERNEL); + qib_param_field = kzalloc(FIELD_SIZEOF(struct qib, parm), GFP_KERNEL); if (!qib_param_field) { rc = -ENOMEM; goto out_free_nothing; diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index da00ca5fa5dc..401743e2b429 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1923,6 +1923,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, struct fcoe_fcp_rsp_payload *fcp_rsp; struct bnx2fc_rport *tgt = io_req->tgt; struct scsi_cmnd *sc_cmd; + u16 scope = 0, qualifier = 0; /* scsi_cmd_cmpl is called with tgt lock held */ @@ -1990,12 +1991,30 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, if (io_req->cdb_status == SAM_STAT_TASK_SET_FULL || io_req->cdb_status == SAM_STAT_BUSY) { - /* Set the jiffies + retry_delay_timer * 100ms - for the rport/tgt */ - tgt->retry_delay_timestamp = jiffies + - fcp_rsp->retry_delay_timer * HZ / 10; + /* Newer array firmware with BUSY or + * TASK_SET_FULL may return a status that needs + * the scope bits masked. + * Or a huge delay timestamp up to 27 minutes + * can result. + */ + if (fcp_rsp->retry_delay_timer) { + /* Upper 2 bits */ + scope = fcp_rsp->retry_delay_timer + & 0xC000; + /* Lower 14 bits */ + qualifier = fcp_rsp->retry_delay_timer + & 0x3FFF; + } + if (scope > 0 && qualifier > 0 && + qualifier <= 0x3FEF) { + /* Set the jiffies + + * retry_delay_timer * 100ms + * for the rport/tgt + */ + tgt->retry_delay_timestamp = jiffies + + (qualifier * HZ / 10); + } } - } if (io_req->fcp_resid) scsi_set_resid(sc_cmd, io_req->fcp_resid); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index d1513fdf1e00..0847e682797b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -3683,7 +3683,7 @@ void hisi_sas_debugfs_work_handler(struct work_struct *work) } EXPORT_SYMBOL_GPL(hisi_sas_debugfs_work_handler); -void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) +static void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; int i; @@ -3705,7 +3705,7 @@ void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) devm_kfree(dev, hisi_hba->debugfs_port_reg[i]); } -int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba) +static int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba) { const struct hisi_sas_hw *hw = hisi_hba->hw; struct device *dev = hisi_hba->dev; @@ -3796,7 +3796,7 @@ fail: return -ENOMEM; } -void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba) +static void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba) { hisi_hba->debugfs_bist_dentry = debugfs_create_dir("bist", hisi_hba->debugfs_dir); diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 45a66048801b..ff6d4aa92421 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4183,11 +4183,11 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ && pdev->subsystem_device == 0xC000) - return -ENODEV; + goto out_disable_device; /* Now check the magic signature byte */ pci_read_config_word(pdev, PCI_CONF_AMISIG, &magic); if (magic != HBA_SIGNATURE_471 && magic != HBA_SIGNATURE) - return -ENODEV; + goto out_disable_device; /* Ok it is probably a megaraid */ } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 1659d35cd37b..59ca98f12afd 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -596,7 +596,7 @@ static void qedf_dcbx_handler(void *dev, struct qed_dcbx_get *get, u32 mib_type) tmp_prio = get->operational.app_prio.fcoe; if (qedf_default_prio > -1) qedf->prio = qedf_default_prio; - else if (tmp_prio < 0 || tmp_prio > 7) { + else if (tmp_prio > 7) { QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "FIP/FCoE prio %d out of range, setting to %d.\n", tmp_prio, QEDF_DEFAULT_PRIO); diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 8190c2a27584..30bafd9d21e9 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2920,6 +2920,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) struct qla_hw_data *ha = vha->hw; uint16_t id = vha->vp_idx; + set_bit(VPORT_DELETE, &vha->dpc_flags); + while (test_bit(LOOP_RESYNC_ACTIVE, &vha->dpc_flags) || test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags)) msleep(1000); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 873a6aef1c5c..6ffa9877c28b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2396,6 +2396,7 @@ typedef struct fc_port { unsigned int query:1; unsigned int id_changed:1; unsigned int scan_needed:1; + unsigned int n2n_flag:1; struct completion nvme_del_done; uint32_t nvme_prli_service_param; @@ -2446,7 +2447,6 @@ typedef struct fc_port { uint8_t fc4_type; uint8_t fc4f_nvme; uint8_t scan_state; - uint8_t n2n_flag; unsigned long last_queue_full; unsigned long last_ramp_up; @@ -3036,6 +3036,7 @@ enum scan_flags_t { enum fc4type_t { FS_FC4TYPE_FCP = BIT_0, FS_FC4TYPE_NVME = BIT_1, + FS_FCP_IS_N2N = BIT_7, }; struct fab_scan_rp { @@ -4394,6 +4395,7 @@ typedef struct scsi_qla_host { #define IOCB_WORK_ACTIVE 31 #define SET_ZIO_THRESHOLD_NEEDED 32 #define ISP_ABORT_TO_ROM 33 +#define VPORT_DELETE 34 unsigned long pci_flags; #define PFLG_DISCONNECTED 0 /* PCI device removed */ diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index dc0e36676313..5298ed10059f 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3102,7 +3102,8 @@ int qla24xx_post_gpnid_work(struct scsi_qla_host *vha, port_id_t *id) { struct qla_work_evt *e; - if (test_bit(UNLOADING, &vha->dpc_flags)) + if (test_bit(UNLOADING, &vha->dpc_flags) || + (vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags))) return 0; e = qla2x00_alloc_work(vha, QLA_EVT_GPNID); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 643d2324082e..1d041313ec52 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -746,12 +746,15 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, break; default: if ((id.b24 != fcport->d_id.b24 && - fcport->d_id.b24) || + fcport->d_id.b24 && + fcport->loop_id != FC_NO_LOOP_ID) || (fcport->loop_id != FC_NO_LOOP_ID && fcport->loop_id != loop_id)) { ql_dbg(ql_dbg_disc, vha, 0x20e3, "%s %d %8phC post del sess\n", __func__, __LINE__, fcport->port_name); + if (fcport->n2n_flag) + fcport->d_id.b24 = 0; qlt_schedule_sess_for_deletion(fcport); return; } @@ -759,6 +762,8 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, } fcport->loop_id = loop_id; + if (fcport->n2n_flag) + fcport->d_id.b24 = id.b24; wwn = wwn_to_u64(fcport->port_name); qlt_find_sess_invalidate_other(vha, wwn, @@ -972,7 +977,7 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) wwn = wwn_to_u64(e->port_name); ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x20e8, - "%s %8phC %02x:%02x:%02x state %d/%d lid %x \n", + "%s %8phC %02x:%02x:%02x CLS %x/%x lid %x \n", __func__, (void *)&wwn, e->port_id[2], e->port_id[1], e->port_id[0], e->current_login_state, e->last_login_state, (loop_id & 0x7fff)); @@ -1499,7 +1504,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) (fcport->fw_login_state == DSC_LS_PRLI_PEND))) return 0; - if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP && + !N2N_TOPO(vha->hw)) { if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) { set_bit(RELOGIN_NEEDED, &vha->dpc_flags); return 0; @@ -1570,8 +1576,9 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) qla24xx_post_gpdb_work(vha, fcport, 0); } else { ql_dbg(ql_dbg_disc, vha, 0x2118, - "%s %d %8phC post NVMe PRLI\n", - __func__, __LINE__, fcport->port_name); + "%s %d %8phC post %s PRLI\n", + __func__, __LINE__, fcport->port_name, + fcport->fc4f_nvme ? "NVME" : "FC"); qla24xx_post_prli_work(vha, fcport); } break; @@ -1853,17 +1860,38 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) break; } - if (ea->fcport->n2n_flag) { + if (ea->fcport->fc4f_nvme) { ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC post fc4 prli\n", __func__, __LINE__, ea->fcport->port_name); ea->fcport->fc4f_nvme = 0; - ea->fcport->n2n_flag = 0; qla24xx_post_prli_work(vha, ea->fcport); + return; + } + + /* at this point both PRLI NVME & PRLI FCP failed */ + if (N2N_TOPO(vha->hw)) { + if (ea->fcport->n2n_link_reset_cnt < 3) { + ea->fcport->n2n_link_reset_cnt++; + /* + * remote port is not sending Plogi. Reset + * link to kick start his state machine + */ + set_bit(N2N_LINK_RESET, &vha->dpc_flags); + } else { + ql_log(ql_log_warn, vha, 0x2119, + "%s %d %8phC Unable to reconnect\n", + __func__, __LINE__, ea->fcport->port_name); + } + } else { + /* + * switch connect. login failed. Take connection + * down and allow relogin to retrigger + */ + ea->fcport->flags &= ~FCF_ASYNC_SENT; + ea->fcport->keep_nport_handle = 0; + qlt_schedule_sess_for_deletion(ea->fcport); } - ql_dbg(ql_dbg_disc, vha, 0x2119, - "%s %d %8phC unhandle event of %x\n", - __func__, __LINE__, ea->fcport->port_name, ea->data[0]); break; } } @@ -3190,7 +3218,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) for (j = 0; j < 2; j++, fwdt++) { if (!fwdt->template) { - ql_log(ql_log_warn, vha, 0x00ba, + ql_dbg(ql_dbg_init, vha, 0x00ba, "-> fwdt%u no template\n", j); continue; } @@ -4986,28 +5014,47 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) unsigned long flags; /* Inititae N2N login. */ - if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { - /* borrowing */ - u32 *bp, i, sz; - - memset(ha->init_cb, 0, ha->init_cb_size); - sz = min_t(int, sizeof(struct els_plogi_payload), - ha->init_cb_size); - rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma, - (void *)ha->init_cb, sz); - if (rval == QLA_SUCCESS) { - bp = (uint32_t *)ha->init_cb; - for (i = 0; i < sz/4 ; i++, bp++) - *bp = cpu_to_be32(*bp); + if (N2N_TOPO(ha)) { + if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { + /* borrowing */ + u32 *bp, i, sz; + + memset(ha->init_cb, 0, ha->init_cb_size); + sz = min_t(int, sizeof(struct els_plogi_payload), + ha->init_cb_size); + rval = qla24xx_get_port_login_templ(vha, + ha->init_cb_dma, (void *)ha->init_cb, sz); + if (rval == QLA_SUCCESS) { + bp = (uint32_t *)ha->init_cb; + for (i = 0; i < sz/4 ; i++, bp++) + *bp = cpu_to_be32(*bp); - memcpy(&ha->plogi_els_payld.data, (void *)ha->init_cb, - sizeof(ha->plogi_els_payld.data)); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - } else { - ql_dbg(ql_dbg_init, vha, 0x00d1, - "PLOGI ELS param read fail.\n"); + memcpy(&ha->plogi_els_payld.data, + (void *)ha->init_cb, + sizeof(ha->plogi_els_payld.data)); + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + } else { + ql_dbg(ql_dbg_init, vha, 0x00d1, + "PLOGI ELS param read fail.\n"); + goto skip_login; + } + } + + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->n2n_flag) { + qla24xx_fcport_handle_login(vha, fcport); + return QLA_SUCCESS; + } + } +skip_login: + spin_lock_irqsave(&vha->work_lock, flags); + vha->scan.scan_retry++; + spin_unlock_irqrestore(&vha->work_lock, flags); + + if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); } - return QLA_SUCCESS; } found_devs = 0; diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index e92e52aa6e9b..518eb954cf42 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2656,9 +2656,10 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) els_iocb->port_id[0] = sp->fcport->d_id.b.al_pa; els_iocb->port_id[1] = sp->fcport->d_id.b.area; els_iocb->port_id[2] = sp->fcport->d_id.b.domain; - els_iocb->s_id[0] = vha->d_id.b.al_pa; - els_iocb->s_id[1] = vha->d_id.b.area; - els_iocb->s_id[2] = vha->d_id.b.domain; + /* For SID the byte order is different than DID */ + els_iocb->s_id[1] = vha->d_id.b.al_pa; + els_iocb->s_id[2] = vha->d_id.b.area; + els_iocb->s_id[0] = vha->d_id.b.domain; if (elsio->u.els_logo.els_cmd == ELS_DCMD_PLOGI) { els_iocb->control_flags = 0; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 4c858e2d0ea8..1cc6913f76c4 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2249,7 +2249,7 @@ qla2x00_lip_reset(scsi_qla_host_t *vha) mbx_cmd_t mc; mbx_cmd_t *mcp = &mc; - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x105a, + ql_dbg(ql_dbg_disc, vha, 0x105a, "Entered %s.\n", __func__); if (IS_CNA_CAPABLE(vha->hw)) { @@ -3883,14 +3883,24 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, case TOPO_N2N: ha->current_topology = ISP_CFG_N; spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + fcport->scan_state = QLA_FCPORT_SCAN; + fcport->n2n_flag = 0; + } + fcport = qla2x00_find_fcport_by_wwpn(vha, rptid_entry->u.f1.port_name, 1); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); if (fcport) { fcport->plogi_nack_done_deadline = jiffies + HZ; - fcport->dm_login_expire = jiffies + 3*HZ; + fcport->dm_login_expire = jiffies + 2*HZ; fcport->scan_state = QLA_FCPORT_FOUND; + fcport->n2n_flag = 1; + fcport->keep_nport_handle = 1; + if (vha->flags.nvme_enabled) + fcport->fc4f_nvme = 1; + switch (fcport->disc_state) { case DSC_DELETED: set_bit(RELOGIN_NEEDED, @@ -3924,7 +3934,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, rptid_entry->u.f1.port_name, rptid_entry->u.f1.node_name, NULL, - FC4_TYPE_UNKNOWN); + FS_FCP_IS_N2N); } /* if our portname is higher then initiate N2N login */ @@ -4023,6 +4033,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, list_for_each_entry(fcport, &vha->vp_fcports, list) { fcport->scan_state = QLA_FCPORT_SCAN; + fcport->n2n_flag = 0; } fcport = qla2x00_find_fcport_by_wwpn(vha, @@ -4032,6 +4043,14 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, fcport->login_retry = vha->hw->login_retry_count; fcport->plogi_nack_done_deadline = jiffies + HZ; fcport->scan_state = QLA_FCPORT_FOUND; + fcport->keep_nport_handle = 1; + fcport->n2n_flag = 1; + fcport->d_id.b.domain = + rptid_entry->u.f2.remote_nport_id[2]; + fcport->d_id.b.area = + rptid_entry->u.f2.remote_nport_id[1]; + fcport->d_id.b.al_pa = + rptid_entry->u.f2.remote_nport_id[0]; } } } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 1a9a11ae7285..6afad68e5ba2 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -66,6 +66,7 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) uint16_t vp_id; struct qla_hw_data *ha = vha->hw; unsigned long flags = 0; + u8 i; mutex_lock(&ha->vport_lock); /* @@ -75,8 +76,9 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) * ensures no active vp_list traversal while the vport is removed * from the queue) */ - wait_event_timeout(vha->vref_waitq, !atomic_read(&vha->vref_count), - 10*HZ); + for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++) + wait_event_timeout(vha->vref_waitq, + atomic_read(&vha->vref_count), HZ); spin_lock_irqsave(&ha->vport_slock, flags); if (atomic_read(&vha->vref_count)) { @@ -262,6 +264,9 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) spin_lock_irqsave(&ha->vport_slock, flags); list_for_each_entry(vha, &ha->vp_list, list) { if (vha->vp_idx) { + if (test_bit(VPORT_DELETE, &vha->dpc_flags)) + continue; + atomic_inc(&vha->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); @@ -300,6 +305,20 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) int qla2x00_vp_abort_isp(scsi_qla_host_t *vha) { + fc_port_t *fcport; + + /* + * To exclusively reset vport, we need to log it out first. + * Note: This control_vp can fail if ISP reset is already + * issued, this is expected, as the vp would be already + * logged out due to ISP reset. + */ + if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) { + qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); + list_for_each_entry(fcport, &vha->vp_fcports, list) + fcport->logout_on_delete = 0; + } + /* * Physical port will do most of the abort and recovery work. We can * just treat it as a loop down @@ -312,16 +331,9 @@ qla2x00_vp_abort_isp(scsi_qla_host_t *vha) atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); } - /* - * To exclusively reset vport, we need to log it out first. Note: this - * control_vp can fail if ISP reset is already issued, this is - * expected, as the vp would be already logged out due to ISP reset. - */ - if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) - qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); - ql_dbg(ql_dbg_taskm, vha, 0x801d, "Scheduling enable of Vport %d.\n", vha->vp_idx); + return qla24xx_enable_vp(vha); } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 73db01e3b4e4..3568031c6504 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1115,9 +1115,15 @@ static inline int test_fcport_count(scsi_qla_host_t *vha) void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) { + u8 i; + qla2x00_mark_all_devices_lost(vha, 0); - wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), 10*HZ); + for (i = 0; i < 10; i++) + wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), + HZ); + + flush_workqueue(vha->hw->wq); } /* @@ -5036,6 +5042,10 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) memcpy(fcport->port_name, e->u.new_sess.port_name, WWN_SIZE); + + if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N) + fcport->n2n_flag = 1; + } else { ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %8phC mem alloc fail.\n", @@ -5134,11 +5144,9 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) if (dfcp) qlt_schedule_sess_for_deletion(tfcp); - - if (N2N_TOPO(vha->hw)) - fcport->flags &= ~FCF_FABRIC_DEVICE; - if (N2N_TOPO(vha->hw)) { + fcport->flags &= ~FCF_FABRIC_DEVICE; + fcport->keep_nport_handle = 1; if (vha->flags.nvme_enabled) { fcport->fc4f_nvme = 1; fcport->n2n_flag = 1; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 0ffda6171614..a06e56224a55 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -953,7 +953,7 @@ void qlt_free_session_done(struct work_struct *work) struct qla_hw_data *ha = vha->hw; unsigned long flags; bool logout_started = false; - scsi_qla_host_t *base_vha; + scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); struct qlt_plogi_ack_t *own = sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN]; @@ -1020,6 +1020,7 @@ void qlt_free_session_done(struct work_struct *work) if (logout_started) { bool traced = false; + u16 cnt = 0; while (!READ_ONCE(sess->logout_completed)) { if (!traced) { @@ -1029,6 +1030,9 @@ void qlt_free_session_done(struct work_struct *work) traced = true; } msleep(100); + cnt++; + if (cnt > 200) + break; } ql_dbg(ql_dbg_disc, vha, 0xf087, @@ -1101,6 +1105,7 @@ void qlt_free_session_done(struct work_struct *work) } spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + sess->free_pending = 0; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001, "Unregistration of sess %p %8phC finished fcp_cnt %d\n", @@ -1109,17 +1114,9 @@ void qlt_free_session_done(struct work_struct *work) if (tgt && (tgt->sess_count == 0)) wake_up_all(&tgt->waitQ); - if (vha->fcport_count == 0) - wake_up_all(&vha->fcport_waitQ); - - base_vha = pci_get_drvdata(ha->pdev); - - sess->free_pending = 0; - - if (test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags)) - return; - - if ((!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) { + if (!test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags) && + !(vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags)) && + (!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) { switch (vha->host->active_mode) { case MODE_INITIATOR: case MODE_DUAL: @@ -1132,6 +1129,9 @@ void qlt_free_session_done(struct work_struct *work) break; } } + + if (vha->fcport_count == 0) + wake_up_all(&vha->fcport_waitQ); } /* ha->tgt.sess_lock supposed to be held on entry */ @@ -1161,7 +1161,7 @@ void qlt_unreg_sess(struct fc_port *sess) sess->last_login_gen = sess->login_gen; INIT_WORK(&sess->free_work, qlt_free_session_done); - schedule_work(&sess->free_work); + queue_work(sess->vha->hw->wq, &sess->free_work); } EXPORT_SYMBOL(qlt_unreg_sess); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ed8b9ac805e6..542d2bac2922 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1837,8 +1837,7 @@ static int storvsc_probe(struct hv_device *device, /* * Set the number of HW queues we are supporting. */ - if (stor_device->num_sc != 0) - host->nr_hw_queues = stor_device->num_sc + 1; + host->nr_hw_queues = num_present_cpus(); /* * Set the error handler work queue. diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 034dd9cb9ec8..11a87f51c442 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8143,6 +8143,9 @@ int ufshcd_shutdown(struct ufs_hba *hba) { int ret = 0; + if (!hba->is_powered) + goto out; + if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f9502dbbb5c1..9bb36c32cbf9 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1171,6 +1171,11 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, spi_statistics_add_transfer_stats(statm, xfer, ctlr); spi_statistics_add_transfer_stats(stats, xfer, ctlr); + if (!ctlr->ptp_sts_supported) { + xfer->ptp_sts_word_pre = 0; + ptp_read_system_prets(xfer->ptp_sts); + } + if (xfer->tx_buf || xfer->rx_buf) { reinit_completion(&ctlr->xfer_completion); @@ -1197,6 +1202,11 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, xfer->len); } + if (!ctlr->ptp_sts_supported) { + ptp_read_system_postts(xfer->ptp_sts); + xfer->ptp_sts_word_post = xfer->len; + } + trace_spi_transfer_stop(msg, xfer); if (msg->status != -EINPROGRESS) @@ -1265,6 +1275,7 @@ EXPORT_SYMBOL_GPL(spi_finalize_current_transfer); */ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) { + struct spi_transfer *xfer; struct spi_message *msg; bool was_busy = false; unsigned long flags; @@ -1391,6 +1402,13 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) goto out; } + if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) { + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + xfer->ptp_sts_word_pre = 0; + ptp_read_system_prets(xfer->ptp_sts); + } + } + ret = ctlr->transfer_one_message(ctlr, msg); if (ret) { dev_err(&ctlr->dev, @@ -1419,6 +1437,99 @@ static void spi_pump_messages(struct kthread_work *work) } /** + * spi_take_timestamp_pre - helper for drivers to collect the beginning of the + * TX timestamp for the requested byte from the SPI + * transfer. The frequency with which this function + * must be called (once per word, once for the whole + * transfer, once per batch of words etc) is arbitrary + * as long as the @tx buffer offset is greater than or + * equal to the requested byte at the time of the + * call. The timestamp is only taken once, at the + * first such call. It is assumed that the driver + * advances its @tx buffer pointer monotonically. + * @ctlr: Pointer to the spi_controller structure of the driver + * @xfer: Pointer to the transfer being timestamped + * @tx: Pointer to the current word within the xfer->tx_buf that the driver is + * preparing to transmit right now. + * @irqs_off: If true, will disable IRQs and preemption for the duration of the + * transfer, for less jitter in time measurement. Only compatible + * with PIO drivers. If true, must follow up with + * spi_take_timestamp_post or otherwise system will crash. + * WARNING: for fully predictable results, the CPU frequency must + * also be under control (governor). + */ +void spi_take_timestamp_pre(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off) +{ + u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8); + + if (!xfer->ptp_sts) + return; + + if (xfer->timestamped_pre) + return; + + if (tx < (xfer->tx_buf + xfer->ptp_sts_word_pre * bytes_per_word)) + return; + + /* Capture the resolution of the timestamp */ + xfer->ptp_sts_word_pre = (tx - xfer->tx_buf) / bytes_per_word; + + xfer->timestamped_pre = true; + + if (irqs_off) { + local_irq_save(ctlr->irq_flags); + preempt_disable(); + } + + ptp_read_system_prets(xfer->ptp_sts); +} +EXPORT_SYMBOL_GPL(spi_take_timestamp_pre); + +/** + * spi_take_timestamp_post - helper for drivers to collect the end of the + * TX timestamp for the requested byte from the SPI + * transfer. Can be called with an arbitrary + * frequency: only the first call where @tx exceeds + * or is equal to the requested word will be + * timestamped. + * @ctlr: Pointer to the spi_controller structure of the driver + * @xfer: Pointer to the transfer being timestamped + * @tx: Pointer to the current word within the xfer->tx_buf that the driver has + * just transmitted. + * @irqs_off: If true, will re-enable IRQs and preemption for the local CPU. + */ +void spi_take_timestamp_post(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off) +{ + u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8); + + if (!xfer->ptp_sts) + return; + + if (xfer->timestamped_post) + return; + + if (tx < (xfer->tx_buf + xfer->ptp_sts_word_post * bytes_per_word)) + return; + + ptp_read_system_postts(xfer->ptp_sts); + + if (irqs_off) { + local_irq_restore(ctlr->irq_flags); + preempt_enable(); + } + + /* Capture the resolution of the timestamp */ + xfer->ptp_sts_word_post = (tx - xfer->tx_buf) / bytes_per_word; + + xfer->timestamped_post = true; +} +EXPORT_SYMBOL_GPL(spi_take_timestamp_post); + +/** * spi_set_thread_rt - set the controller to pump at realtime priority * @ctlr: controller to boost priority of * @@ -1503,6 +1614,7 @@ EXPORT_SYMBOL_GPL(spi_get_next_queued_message); */ void spi_finalize_current_message(struct spi_controller *ctlr) { + struct spi_transfer *xfer; struct spi_message *mesg; unsigned long flags; int ret; @@ -1511,6 +1623,13 @@ void spi_finalize_current_message(struct spi_controller *ctlr) mesg = ctlr->cur_msg; spin_unlock_irqrestore(&ctlr->queue_lock, flags); + if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) { + list_for_each_entry(xfer, &mesg->transfers, transfer_list) { + ptp_read_system_postts(xfer->ptp_sts); + xfer->ptp_sts_word_post = xfer->len; + } + } + spi_unmap_msg(ctlr, mesg); if (ctlr->cur_msg_prepared && ctlr->unprepare_message) { @@ -3273,6 +3392,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) static int __spi_async(struct spi_device *spi, struct spi_message *message) { struct spi_controller *ctlr = spi->controller; + struct spi_transfer *xfer; /* * Some controllers do not support doing regular SPI transfers. Return @@ -3288,6 +3408,13 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message) trace_spi_message_submit(message); + if (!ctlr->ptp_sts_supported) { + list_for_each_entry(xfer, &message->transfers, transfer_list) { + xfer->ptp_sts_word_pre = 0; + ptp_read_system_prets(xfer->ptp_sts); + } + } + return ctlr->transfer(spi, message); } diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index c64728fc21f2..a62057555d1b 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -349,10 +349,8 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) */ dst_release(skb_dst(skb)); skb_dst_set(skb, NULL); -#ifdef CONFIG_XFRM - secpath_reset(skb); -#endif - nf_reset(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); #ifdef CONFIG_NET_SCHED skb->tc_index = 0; diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 9966364a6deb..001a21abcc28 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -310,7 +310,7 @@ config DOVE_THERMAL config DB8500_THERMAL tristate "DB8500 thermal management" - depends on MFD_DB8500_PRCMU + depends on MFD_DB8500_PRCMU && OF default y help Adds DB8500 thermal management implementation according to the thermal diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index b71a999d17d6..372dbbaaafb8 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -3,9 +3,9 @@ * db8500_thermal.c - DB8500 Thermal Management Implementation * * Copyright (C) 2012 ST-Ericsson - * Copyright (C) 2012 Linaro Ltd. + * Copyright (C) 2012-2019 Linaro Ltd. * - * Author: Hongbo Zhang <[email protected]> + * Authors: Hongbo Zhang, Linus Walleij */ #include <linux/cpu_cooling.h> @@ -13,7 +13,6 @@ #include <linux/mfd/dbx500-prcmu.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/platform_data/db8500_thermal.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/thermal.h> @@ -21,453 +20,201 @@ #define PRCMU_DEFAULT_MEASURE_TIME 0xFFF #define PRCMU_DEFAULT_LOW_TEMP 0 +/** + * db8500_thermal_points - the interpolation points that trigger + * interrupts + */ +static const unsigned long db8500_thermal_points[] = { + 15000, + 20000, + 25000, + 30000, + 35000, + 40000, + 45000, + 50000, + 55000, + 60000, + 65000, + 70000, + 75000, + 80000, + /* + * This is where things start to get really bad for the + * SoC and the thermal zones should be set up to trigger + * critical temperature at 85000 mC so we don't get above + * this point. + */ + 85000, + 90000, + 95000, + 100000, +}; + struct db8500_thermal_zone { - struct thermal_zone_device *therm_dev; - struct mutex th_lock; - struct work_struct therm_work; - struct db8500_thsens_platform_data *trip_tab; - enum thermal_device_mode mode; + struct thermal_zone_device *tz; enum thermal_trend trend; - unsigned long cur_temp_pseudo; + unsigned long interpolated_temp; unsigned int cur_index; }; -/* Local function to check if thermal zone matches cooling devices */ -static int db8500_thermal_match_cdev(struct thermal_cooling_device *cdev, - struct db8500_trip_point *trip_point) -{ - int i; - - if (!strlen(cdev->type)) - return -EINVAL; - - for (i = 0; i < COOLING_DEV_MAX; i++) { - if (!strcmp(trip_point->cdev_name[i], cdev->type)) - return 0; - } - - return -ENODEV; -} - -/* Callback to bind cooling device to thermal zone */ -static int db8500_cdev_bind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned long max_state, upper, lower; - int i, ret = -EINVAL; - - cdev->ops->get_max_state(cdev, &max_state); - - for (i = 0; i < ptrips->num_trips; i++) { - if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i])) - continue; - - upper = lower = i > max_state ? max_state : i; - - ret = thermal_zone_bind_cooling_device(thermal, i, cdev, - upper, lower, THERMAL_WEIGHT_DEFAULT); - - dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type, - i, ret, ret ? "fail" : "succeed"); - } - - return ret; -} - -/* Callback to unbind cooling device from thermal zone */ -static int db8500_cdev_unbind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - int i, ret = -EINVAL; - - for (i = 0; i < ptrips->num_trips; i++) { - if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i])) - continue; - - ret = thermal_zone_unbind_cooling_device(thermal, i, cdev); - - dev_info(&cdev->device, "%s unbind from %d: %s\n", cdev->type, - i, ret ? "fail" : "succeed"); - } - - return ret; -} - /* Callback to get current temperature */ -static int db8500_sys_get_temp(struct thermal_zone_device *thermal, int *temp) +static int db8500_thermal_get_temp(void *data, int *temp) { - struct db8500_thermal_zone *pzone = thermal->devdata; + struct db8500_thermal_zone *th = data; /* * TODO: There is no PRCMU interface to get temperature data currently, * so a pseudo temperature is returned , it works for thermal framework * and this will be fixed when the PRCMU interface is available. */ - *temp = pzone->cur_temp_pseudo; + *temp = th->interpolated_temp; return 0; } /* Callback to get temperature changing trend */ -static int db8500_sys_get_trend(struct thermal_zone_device *thermal, - int trip, enum thermal_trend *trend) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - - *trend = pzone->trend; - - return 0; -} - -/* Callback to get thermal zone mode */ -static int db8500_sys_get_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode *mode) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - - mutex_lock(&pzone->th_lock); - *mode = pzone->mode; - mutex_unlock(&pzone->th_lock); - - return 0; -} - -/* Callback to set thermal zone mode */ -static int db8500_sys_set_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode mode) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - - mutex_lock(&pzone->th_lock); - - pzone->mode = mode; - if (mode == THERMAL_DEVICE_ENABLED) - schedule_work(&pzone->therm_work); - - mutex_unlock(&pzone->th_lock); - - return 0; -} - -/* Callback to get trip point type */ -static int db8500_sys_get_trip_type(struct thermal_zone_device *thermal, - int trip, enum thermal_trip_type *type) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - - if (trip >= ptrips->num_trips) - return -EINVAL; - - *type = ptrips->trip_points[trip].type; - - return 0; -} - -/* Callback to get trip point temperature */ -static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal, - int trip, int *temp) +static int db8500_thermal_get_trend(void *data, int trip, enum thermal_trend *trend) { - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; + struct db8500_thermal_zone *th = data; - if (trip >= ptrips->num_trips) - return -EINVAL; - - *temp = ptrips->trip_points[trip].temp; + *trend = th->trend; return 0; } -/* Callback to get critical trip point temperature */ -static int db8500_sys_get_crit_temp(struct thermal_zone_device *thermal, - int *temp) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - int i; - - for (i = ptrips->num_trips - 1; i > 0; i--) { - if (ptrips->trip_points[i].type == THERMAL_TRIP_CRITICAL) { - *temp = ptrips->trip_points[i].temp; - return 0; - } - } - - return -EINVAL; -} - -static struct thermal_zone_device_ops thdev_ops = { - .bind = db8500_cdev_bind, - .unbind = db8500_cdev_unbind, - .get_temp = db8500_sys_get_temp, - .get_trend = db8500_sys_get_trend, - .get_mode = db8500_sys_get_mode, - .set_mode = db8500_sys_set_mode, - .get_trip_type = db8500_sys_get_trip_type, - .get_trip_temp = db8500_sys_get_trip_temp, - .get_crit_temp = db8500_sys_get_crit_temp, +static struct thermal_zone_of_device_ops thdev_ops = { + .get_temp = db8500_thermal_get_temp, + .get_trend = db8500_thermal_get_trend, }; -static void db8500_thermal_update_config(struct db8500_thermal_zone *pzone, - unsigned int idx, enum thermal_trend trend, - unsigned long next_low, unsigned long next_high) +static void db8500_thermal_update_config(struct db8500_thermal_zone *th, + unsigned int idx, + enum thermal_trend trend, + unsigned long next_low, + unsigned long next_high) { prcmu_stop_temp_sense(); - pzone->cur_index = idx; - pzone->cur_temp_pseudo = (next_low + next_high)/2; - pzone->trend = trend; + th->cur_index = idx; + th->interpolated_temp = (next_low + next_high)/2; + th->trend = trend; + /* + * The PRCMU accept absolute temperatures in celsius so divide + * down the millicelsius with 1000 + */ prcmu_config_hotmon((u8)(next_low/1000), (u8)(next_high/1000)); prcmu_start_temp_sense(PRCMU_DEFAULT_MEASURE_TIME); } static irqreturn_t prcmu_low_irq_handler(int irq, void *irq_data) { - struct db8500_thermal_zone *pzone = irq_data; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned int idx = pzone->cur_index; + struct db8500_thermal_zone *th = irq_data; + unsigned int idx = th->cur_index; unsigned long next_low, next_high; - if (unlikely(idx == 0)) + if (idx == 0) /* Meaningless for thermal management, ignoring it */ return IRQ_HANDLED; if (idx == 1) { - next_high = ptrips->trip_points[0].temp; + next_high = db8500_thermal_points[0]; next_low = PRCMU_DEFAULT_LOW_TEMP; } else { - next_high = ptrips->trip_points[idx-1].temp; - next_low = ptrips->trip_points[idx-2].temp; + next_high = db8500_thermal_points[idx - 1]; + next_low = db8500_thermal_points[idx - 2]; } idx -= 1; - db8500_thermal_update_config(pzone, idx, THERMAL_TREND_DROPPING, - next_low, next_high); - - dev_dbg(&pzone->therm_dev->device, + db8500_thermal_update_config(th, idx, THERMAL_TREND_DROPPING, + next_low, next_high); + dev_dbg(&th->tz->device, "PRCMU set max %ld, min %ld\n", next_high, next_low); - schedule_work(&pzone->therm_work); + thermal_zone_device_update(th->tz, THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } static irqreturn_t prcmu_high_irq_handler(int irq, void *irq_data) { - struct db8500_thermal_zone *pzone = irq_data; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned int idx = pzone->cur_index; + struct db8500_thermal_zone *th = irq_data; + unsigned int idx = th->cur_index; unsigned long next_low, next_high; + int num_points = ARRAY_SIZE(db8500_thermal_points); - if (idx < ptrips->num_trips - 1) { - next_high = ptrips->trip_points[idx+1].temp; - next_low = ptrips->trip_points[idx].temp; + if (idx < num_points - 1) { + next_high = db8500_thermal_points[idx+1]; + next_low = db8500_thermal_points[idx]; idx += 1; - db8500_thermal_update_config(pzone, idx, THERMAL_TREND_RAISING, - next_low, next_high); + db8500_thermal_update_config(th, idx, THERMAL_TREND_RAISING, + next_low, next_high); - dev_dbg(&pzone->therm_dev->device, - "PRCMU set max %ld, min %ld\n", next_high, next_low); - } else if (idx == ptrips->num_trips - 1) - pzone->cur_temp_pseudo = ptrips->trip_points[idx].temp + 1; + dev_info(&th->tz->device, + "PRCMU set max %ld, min %ld\n", next_high, next_low); + } else if (idx == num_points - 1) + /* So we roof out 1 degree over the max point */ + th->interpolated_temp = db8500_thermal_points[idx] + 1; - schedule_work(&pzone->therm_work); + thermal_zone_device_update(th->tz, THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } -static void db8500_thermal_work(struct work_struct *work) -{ - enum thermal_device_mode cur_mode; - struct db8500_thermal_zone *pzone; - - pzone = container_of(work, struct db8500_thermal_zone, therm_work); - - mutex_lock(&pzone->th_lock); - cur_mode = pzone->mode; - mutex_unlock(&pzone->th_lock); - - if (cur_mode == THERMAL_DEVICE_DISABLED) - return; - - thermal_zone_device_update(pzone->therm_dev, THERMAL_EVENT_UNSPECIFIED); - dev_dbg(&pzone->therm_dev->device, "thermal work finished.\n"); -} - -#ifdef CONFIG_OF -static struct db8500_thsens_platform_data* - db8500_thermal_parse_dt(struct platform_device *pdev) -{ - struct db8500_thsens_platform_data *ptrips; - struct device_node *np = pdev->dev.of_node; - char prop_name[32]; - const char *tmp_str; - u32 tmp_data; - int i, j; - - ptrips = devm_kzalloc(&pdev->dev, sizeof(*ptrips), GFP_KERNEL); - if (!ptrips) - return NULL; - - if (of_property_read_u32(np, "num-trips", &tmp_data)) - goto err_parse_dt; - - if (tmp_data > THERMAL_MAX_TRIPS) - goto err_parse_dt; - - ptrips->num_trips = tmp_data; - - for (i = 0; i < ptrips->num_trips; i++) { - sprintf(prop_name, "trip%d-temp", i); - if (of_property_read_u32(np, prop_name, &tmp_data)) - goto err_parse_dt; - - ptrips->trip_points[i].temp = tmp_data; - - sprintf(prop_name, "trip%d-type", i); - if (of_property_read_string(np, prop_name, &tmp_str)) - goto err_parse_dt; - - if (!strcmp(tmp_str, "active")) - ptrips->trip_points[i].type = THERMAL_TRIP_ACTIVE; - else if (!strcmp(tmp_str, "passive")) - ptrips->trip_points[i].type = THERMAL_TRIP_PASSIVE; - else if (!strcmp(tmp_str, "hot")) - ptrips->trip_points[i].type = THERMAL_TRIP_HOT; - else if (!strcmp(tmp_str, "critical")) - ptrips->trip_points[i].type = THERMAL_TRIP_CRITICAL; - else - goto err_parse_dt; - - sprintf(prop_name, "trip%d-cdev-num", i); - if (of_property_read_u32(np, prop_name, &tmp_data)) - goto err_parse_dt; - - if (tmp_data > COOLING_DEV_MAX) - goto err_parse_dt; - - for (j = 0; j < tmp_data; j++) { - sprintf(prop_name, "trip%d-cdev-name%d", i, j); - if (of_property_read_string(np, prop_name, &tmp_str)) - goto err_parse_dt; - - if (strlen(tmp_str) >= THERMAL_NAME_LENGTH) - goto err_parse_dt; - - strcpy(ptrips->trip_points[i].cdev_name[j], tmp_str); - } - } - return ptrips; - -err_parse_dt: - dev_err(&pdev->dev, "Parsing device tree data error.\n"); - return NULL; -} -#else -static inline struct db8500_thsens_platform_data* - db8500_thermal_parse_dt(struct platform_device *pdev) -{ - return NULL; -} -#endif - static int db8500_thermal_probe(struct platform_device *pdev) { - struct db8500_thermal_zone *pzone = NULL; - struct db8500_thsens_platform_data *ptrips = NULL; - struct device_node *np = pdev->dev.of_node; + struct db8500_thermal_zone *th = NULL; + struct device *dev = &pdev->dev; int low_irq, high_irq, ret = 0; - unsigned long dft_low, dft_high; - if (np) - ptrips = db8500_thermal_parse_dt(pdev); - else - ptrips = dev_get_platdata(&pdev->dev); - - if (!ptrips) - return -EINVAL; - - pzone = devm_kzalloc(&pdev->dev, sizeof(*pzone), GFP_KERNEL); - if (!pzone) + th = devm_kzalloc(dev, sizeof(*th), GFP_KERNEL); + if (!th) return -ENOMEM; - mutex_init(&pzone->th_lock); - mutex_lock(&pzone->th_lock); - - pzone->mode = THERMAL_DEVICE_DISABLED; - pzone->trip_tab = ptrips; - - INIT_WORK(&pzone->therm_work, db8500_thermal_work); - low_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_LOW"); if (low_irq < 0) { - dev_err(&pdev->dev, "Get IRQ_HOTMON_LOW failed.\n"); - ret = low_irq; - goto out_unlock; + dev_err(dev, "Get IRQ_HOTMON_LOW failed\n"); + return low_irq; } - ret = devm_request_threaded_irq(&pdev->dev, low_irq, NULL, + ret = devm_request_threaded_irq(dev, low_irq, NULL, prcmu_low_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT, - "dbx500_temp_low", pzone); + "dbx500_temp_low", th); if (ret < 0) { - dev_err(&pdev->dev, "Failed to allocate temp low irq.\n"); - goto out_unlock; + dev_err(dev, "failed to allocate temp low irq\n"); + return ret; } high_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_HIGH"); if (high_irq < 0) { - dev_err(&pdev->dev, "Get IRQ_HOTMON_HIGH failed.\n"); - ret = high_irq; - goto out_unlock; + dev_err(dev, "Get IRQ_HOTMON_HIGH failed\n"); + return high_irq; } - ret = devm_request_threaded_irq(&pdev->dev, high_irq, NULL, + ret = devm_request_threaded_irq(dev, high_irq, NULL, prcmu_high_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT, - "dbx500_temp_high", pzone); + "dbx500_temp_high", th); if (ret < 0) { - dev_err(&pdev->dev, "Failed to allocate temp high irq.\n"); - goto out_unlock; + dev_err(dev, "failed to allocate temp high irq\n"); + return ret; } - pzone->therm_dev = thermal_zone_device_register("db8500_thermal_zone", - ptrips->num_trips, 0, pzone, &thdev_ops, NULL, 0, 0); - - if (IS_ERR(pzone->therm_dev)) { - dev_err(&pdev->dev, "Register thermal zone device failed.\n"); - ret = PTR_ERR(pzone->therm_dev); - goto out_unlock; + /* register of thermal sensor and get info from DT */ + th->tz = devm_thermal_zone_of_sensor_register(dev, 0, th, &thdev_ops); + if (IS_ERR(th->tz)) { + dev_err(dev, "register thermal zone sensor failed\n"); + return PTR_ERR(th->tz); } - dev_info(&pdev->dev, "Thermal zone device registered.\n"); - - dft_low = PRCMU_DEFAULT_LOW_TEMP; - dft_high = ptrips->trip_points[0].temp; - - db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE, - dft_low, dft_high); - - platform_set_drvdata(pdev, pzone); - pzone->mode = THERMAL_DEVICE_ENABLED; + dev_info(dev, "thermal zone sensor registered\n"); -out_unlock: - mutex_unlock(&pzone->th_lock); + /* Start measuring at the lowest point */ + db8500_thermal_update_config(th, 0, THERMAL_TREND_STABLE, + PRCMU_DEFAULT_LOW_TEMP, + db8500_thermal_points[0]); - return ret; -} - -static int db8500_thermal_remove(struct platform_device *pdev) -{ - struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev); - - thermal_zone_device_unregister(pzone->therm_dev); - cancel_work_sync(&pzone->therm_work); - mutex_destroy(&pzone->th_lock); + platform_set_drvdata(pdev, th); return 0; } @@ -475,9 +222,6 @@ static int db8500_thermal_remove(struct platform_device *pdev) static int db8500_thermal_suspend(struct platform_device *pdev, pm_message_t state) { - struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev); - - flush_work(&pzone->therm_work); prcmu_stop_temp_sense(); return 0; @@ -485,26 +229,21 @@ static int db8500_thermal_suspend(struct platform_device *pdev, static int db8500_thermal_resume(struct platform_device *pdev) { - struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev); - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned long dft_low, dft_high; - - dft_low = PRCMU_DEFAULT_LOW_TEMP; - dft_high = ptrips->trip_points[0].temp; + struct db8500_thermal_zone *th = platform_get_drvdata(pdev); - db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE, - dft_low, dft_high); + /* Resume and start measuring at the lowest point */ + db8500_thermal_update_config(th, 0, THERMAL_TREND_STABLE, + PRCMU_DEFAULT_LOW_TEMP, + db8500_thermal_points[0]); return 0; } -#ifdef CONFIG_OF static const struct of_device_id db8500_thermal_match[] = { { .compatible = "stericsson,db8500-thermal" }, {}, }; MODULE_DEVICE_TABLE(of, db8500_thermal_match); -#endif static struct platform_driver db8500_thermal_driver = { .driver = { @@ -514,7 +253,6 @@ static struct platform_driver db8500_thermal_driver = { .probe = db8500_thermal_probe, .suspend = db8500_thermal_suspend, .resume = db8500_thermal_resume, - .remove = db8500_thermal_remove, }; module_platform_driver(db8500_thermal_driver); diff --git a/drivers/thermal/thermal_mmio.c b/drivers/thermal/thermal_mmio.c index de3cceea23bc..40524fa13533 100644 --- a/drivers/thermal/thermal_mmio.c +++ b/drivers/thermal/thermal_mmio.c @@ -53,13 +53,6 @@ static int thermal_mmio_probe(struct platform_device *pdev) return -ENOMEM; resource = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (IS_ERR(resource)) { - dev_err(&pdev->dev, - "fail to get platform memory resource (%ld)\n", - PTR_ERR(resource)); - return PTR_ERR(resource); - } - sensor->mmio_base = devm_ioremap_resource(&pdev->dev, resource); if (IS_ERR(sensor->mmio_base)) { dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", diff --git a/drivers/video/logo/Makefile b/drivers/video/logo/Makefile index 228a89b9bdd1..16f60c1e1766 100644 --- a/drivers/video/logo/Makefile +++ b/drivers/video/logo/Makefile @@ -18,23 +18,6 @@ obj-$(CONFIG_SPU_BASE) += logo_spe_clut224.o # How to generate logo's -# Use logo-cfiles to retrieve list of .c files to be built -logo-cfiles = $(notdir $(patsubst %.$(2), %.c, \ - $(wildcard $(srctree)/$(src)/*$(1).$(2)))) - - -# Mono logos -extra-y += $(call logo-cfiles,_mono,pbm) - -# VGA16 logos -extra-y += $(call logo-cfiles,_vga16,ppm) - -# 224 Logos -extra-y += $(call logo-cfiles,_clut224,ppm) - -# Gray 256 -extra-y += $(call logo-cfiles,_gray256,pgm) - pnmtologo := scripts/pnmtologo # Create commands like "pnmtologo -t mono -n logo_mac_mono -o ..." @@ -55,5 +38,5 @@ $(obj)/%_clut224.c: $(src)/%_clut224.ppm $(pnmtologo) FORCE $(obj)/%_gray256.c: $(src)/%_gray256.pgm $(pnmtologo) FORCE $(call if_changed,logo) -# Files generated that shall be removed upon make clean -clean-files := *.o *_mono.c *_vga16.c *_clut224.c *_gray256.c +# generated C files +targets += *_mono.c *_vga16.c *_clut224.c *_gray256.c diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 4e11de6cde81..5bae515c8e25 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -156,8 +156,10 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) /* balloon_append: add the given page to the balloon. */ -static void __balloon_append(struct page *page) +static void balloon_append(struct page *page) { + __SetPageOffline(page); + /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { list_add_tail(&page->lru, &ballooned_pages); @@ -169,11 +171,6 @@ static void __balloon_append(struct page *page) wake_up(&balloon_wq); } -static void balloon_append(struct page *page) -{ - __balloon_append(page); -} - /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ static struct page *balloon_retrieve(bool require_lowmem) { @@ -192,6 +189,7 @@ static struct page *balloon_retrieve(bool require_lowmem) else balloon_stats.balloon_low--; + __ClearPageOffline(page); return page; } @@ -377,8 +375,7 @@ static void xen_online_page(struct page *page, unsigned int order) for (i = 0; i < size; i++) { p = pfn_to_page(start_pfn + i); __online_page_set_limits(p); - __SetPageOffline(p); - __balloon_append(p); + balloon_append(p); } mutex_unlock(&balloon_mutex); } @@ -444,7 +441,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages) xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]); /* Relinquish the page back to the allocator. */ - __ClearPageOffline(page); free_reserved_page(page); } @@ -471,7 +467,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) state = BP_EAGAIN; break; } - __SetPageOffline(page); adjust_managed_page_count(page, -1); xenmem_reservation_scrub_page(page); list_add(&page->lru, &pages); @@ -611,7 +606,6 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages) while (pgno < nr_pages) { page = balloon_retrieve(true); if (page) { - __ClearPageOffline(page); pages[pgno++] = page; #ifdef CONFIG_XEN_HAVE_PVMMU /* @@ -653,10 +647,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) mutex_lock(&balloon_mutex); for (i = 0; i < nr_pages; i++) { - if (pages[i]) { - __SetPageOffline(pages[i]); + if (pages[i]) balloon_append(pages[i]); - } } balloon_stats.target_unpopulated -= nr_pages; @@ -674,7 +666,6 @@ static void __init balloon_add_region(unsigned long start_pfn, unsigned long pages) { unsigned long pfn, extra_pfn_end; - struct page *page; /* * If the amount of usable memory has been limited (e.g., with @@ -684,11 +675,10 @@ static void __init balloon_add_region(unsigned long start_pfn, extra_pfn_end = min(max_pfn, start_pfn + pages); for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { - page = pfn_to_page(pfn); /* totalram_pages and totalhigh_pages do not include the boot-time balloon extension, so don't subtract from it. */ - __balloon_append(page); + balloon_append(pfn_to_page(pfn)); } balloon_stats.total_pages += extra_pfn_end - start_pfn; diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index 89d60f8e3c18..d1ff2186ebb4 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -40,7 +40,7 @@ #define efi_data(op) (op.u.efi_runtime_call) -efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { struct xen_platform_op op = INIT_EFI_OP(get_time); @@ -61,9 +61,8 @@ efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_time); -efi_status_t xen_efi_set_time(efi_time_t *tm) +static efi_status_t xen_efi_set_time(efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_time); @@ -75,10 +74,10 @@ efi_status_t xen_efi_set_time(efi_time_t *tm) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_time); -efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, - efi_time_t *tm) +static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, + efi_bool_t *pending, + efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time); @@ -98,9 +97,8 @@ efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time); -efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time); @@ -117,11 +115,10 @@ efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time); -efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 *attr, unsigned long *data_size, - void *data) +static efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(get_variable); @@ -141,11 +138,10 @@ efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_variable); -efi_status_t xen_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) +static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) { struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name); @@ -165,11 +161,10 @@ efi_status_t xen_efi_get_next_variable(unsigned long *name_size, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_next_variable); -efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, - void *data) +static efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(set_variable); @@ -186,11 +181,10 @@ efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_variable); -efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size) +static efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) { struct xen_platform_op op = INIT_EFI_OP(query_variable_info); @@ -208,9 +202,8 @@ efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_query_variable_info); -efi_status_t xen_efi_get_next_high_mono_count(u32 *count) +static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) { struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count); @@ -221,10 +214,9 @@ efi_status_t xen_efi_get_next_high_mono_count(u32 *count) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count); -efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, unsigned long sg_list) +static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) { struct xen_platform_op op = INIT_EFI_OP(update_capsule); @@ -241,11 +233,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_update_capsule); -efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, u64 *max_size, - int *reset_type) +static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, int *reset_type) { struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities); @@ -264,10 +254,9 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); -void xen_efi_reset_system(int reset_type, efi_status_t status, - unsigned long data_size, efi_char16_t *data) +static void xen_efi_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) { switch (reset_type) { case EFI_RESET_COLD: @@ -281,4 +270,25 @@ void xen_efi_reset_system(int reset_type, efi_status_t status, BUG(); } } -EXPORT_SYMBOL_GPL(xen_efi_reset_system); + +/* + * Set XEN EFI runtime services function pointers. Other fields of struct efi, + * e.g. efi.systab, will be set like normal EFI. + */ +void __init xen_efi_runtime_setup(void) +{ + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.set_variable_nonblocking = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.query_variable_info_nonblocking = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = xen_efi_reset_system; +} diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 08adc590f631..597af455a522 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -55,6 +55,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/miscdevice.h> +#include <linux/workqueue.h> #include <xen/xenbus.h> #include <xen/xen.h> @@ -116,6 +117,8 @@ struct xenbus_file_priv { wait_queue_head_t read_waitq; struct kref kref; + + struct work_struct wq; }; /* Read out any raw xenbus messages queued up. */ @@ -300,14 +303,14 @@ static void watch_fired(struct xenbus_watch *watch, mutex_unlock(&adap->dev_data->reply_mutex); } -static void xenbus_file_free(struct kref *kref) +static void xenbus_worker(struct work_struct *wq) { struct xenbus_file_priv *u; struct xenbus_transaction_holder *trans, *tmp; struct watch_adapter *watch, *tmp_watch; struct read_buffer *rb, *tmp_rb; - u = container_of(kref, struct xenbus_file_priv, kref); + u = container_of(wq, struct xenbus_file_priv, wq); /* * No need for locking here because there are no other users, @@ -333,6 +336,18 @@ static void xenbus_file_free(struct kref *kref) kfree(u); } +static void xenbus_file_free(struct kref *kref) +{ + struct xenbus_file_priv *u; + + /* + * We might be called in xenbus_thread(). + * Use workqueue to avoid deadlock. + */ + u = container_of(kref, struct xenbus_file_priv, kref); + schedule_work(&u->wq); +} + static struct xenbus_transaction_holder *xenbus_get_transaction( struct xenbus_file_priv *u, uint32_t tx_id) { @@ -650,6 +665,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) INIT_LIST_HEAD(&u->watches); INIT_LIST_HEAD(&u->read_buffers); init_waitqueue_head(&u->read_waitq); + INIT_WORK(&u->wq, xenbus_worker); mutex_init(&u->reply_mutex); mutex_init(&u->msgbuffer_mutex); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index bcd1bafb0278..4150280509ff 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -10,13 +10,6 @@ #include <linux/dns_resolver.h> #include "internal.h" -const struct file_operations afs_dynroot_file_operations = { - .open = dcache_dir_open, - .release = dcache_dir_close, - .iterate_shared = dcache_readdir, - .llseek = dcache_dir_lseek, -}; - /* * Probe to see if a cell may exist. This prevents positive dentries from * being created unnecessarily. diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 7b1c18c32f48..46d2d7cb461d 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -443,7 +443,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; if (root) { inode->i_op = &afs_dynroot_inode_operations; - inode->i_fop = &afs_dynroot_file_operations; + inode->i_fop = &simple_dir_operations; } else { inode->i_op = &afs_autocell_inode_operations; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9cdfabaeaa0b..759e0578012c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -910,7 +910,6 @@ extern int afs_silly_iput(struct dentry *, struct inode *); /* * dynroot.c */ -extern const struct file_operations afs_dynroot_file_operations; extern const struct inode_operations afs_dynroot_inode_operations; extern const struct dentry_operations afs_dynroot_dentry_operations; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7b32b6af322d..cceaf05aada2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3745,12 +3745,21 @@ err_unlock: static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; + struct btrfs_fs_info *fs_info; SetPageError(page); if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) return; /* + * If we error out, we should add back the dirty_metadata_bytes + * to make it consistent. + */ + fs_info = eb->fs_info; + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, + eb->len, fs_info->dirty_metadata_batch); + + /* * If writeback for a btree extent that doesn't belong to a log tree * failed, increment the counter transaction->eb_write_errors. * We do this because while the transaction is running and before it's @@ -3986,6 +3995,10 @@ retry: if (!ret) { free_extent_buffer(eb); continue; + } else if (ret < 0) { + done = 1; + free_extent_buffer(eb); + break; } ret = write_one_eb(eb, wbc, &epd); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 8d3bd799ac7d..c4bb69941c77 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3166,9 +3166,6 @@ out: btrfs_free_path(path); mutex_lock(&fs_info->qgroup_rescan_lock); - if (!btrfs_fs_closing(fs_info)) - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; - if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; @@ -3184,16 +3181,30 @@ out: trans = btrfs_start_transaction(fs_info->quota_root, 1); if (IS_ERR(trans)) { err = PTR_ERR(trans); + trans = NULL; btrfs_err(fs_info, "fail to start transaction for status update: %d", err); - goto done; } - ret = update_qgroup_status_item(trans); - if (ret < 0) { - err = ret; - btrfs_err(fs_info, "fail to update qgroup status: %d", err); + + mutex_lock(&fs_info->qgroup_rescan_lock); + if (!btrfs_fs_closing(fs_info)) + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + if (trans) { + ret = update_qgroup_status_item(trans); + if (ret < 0) { + err = ret; + btrfs_err(fs_info, "fail to update qgroup status: %d", + err); + } } + fs_info->qgroup_rescan_running = false; + complete_all(&fs_info->qgroup_rescan_completion); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + if (!trans) + return; + btrfs_end_transaction(trans); if (btrfs_fs_closing(fs_info)) { @@ -3204,12 +3215,6 @@ out: } else { btrfs_err(fs_info, "qgroup scan failed with %d", err); } - -done: - mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = false; - mutex_unlock(&fs_info->qgroup_rescan_lock); - complete_all(&fs_info->qgroup_rescan_completion); } /* @@ -3437,6 +3442,9 @@ cleanup: while ((unode = ulist_next(&reserved->range_changed, &uiter))) clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); + /* Also free data bytes of already reserved one */ + btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, + orig_reserved, BTRFS_QGROUP_RSV_DATA); extent_changeset_release(reserved); return ret; } @@ -3481,7 +3489,7 @@ static int qgroup_free_reserved_data(struct inode *inode, * EXTENT_QGROUP_RESERVED, we won't double free. * So not need to rush. */ - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, + ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, free_start, free_start + free_len - 1, EXTENT_QGROUP_RESERVED, &changeset); if (ret < 0) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2f0e25afa486..00504657b602 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1435,6 +1435,13 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, int clear_rsv = 0; int ret; + /* + * The subvolume has reloc tree but the swap is finished, no need to + * create/update the dead reloc tree + */ + if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)) + return 0; + if (root->reloc_root) { reloc_root = root->reloc_root; reloc_root->last_trans = trans->transid; @@ -2187,7 +2194,6 @@ static int clean_dirty_subvols(struct reloc_control *rc) /* Merged subvolume, cleanup its reloc root */ struct btrfs_root *reloc_root = root->reloc_root; - clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); list_del_init(&root->reloc_dirty_list); root->reloc_root = NULL; if (reloc_root) { @@ -2196,6 +2202,7 @@ static int clean_dirty_subvols(struct reloc_control *rc) if (ret2 < 0 && !ret) ret = ret2; } + clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); btrfs_put_fs_root(root); } else { /* Orphan reloc tree, just clean it up */ diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index b5e80563efaa..99fe9bf3fdac 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -52,7 +52,13 @@ static struct file_system_type test_type = { struct inode *btrfs_new_test_inode(void) { - return new_inode(test_mnt->mnt_sb); + struct inode *inode; + + inode = new_inode(test_mnt->mnt_sb); + if (inode) + inode_init_owner(inode, NULL, S_IFREG); + + return inode; } static int btrfs_init_test_fs(void) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a324480bc88b..cdd7af424033 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4063,7 +4063,13 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, } num_devices = btrfs_num_devices(fs_info); - allowed = 0; + + /* + * SINGLE profile on-disk has no profile bit, but in-memory we have a + * special bit for it, to make it easier to distinguish. Thus we need + * to set it manually, or balance would refuse the profile. + */ + allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) if (num_devices >= btrfs_raid_array[i].devs_min) allowed |= btrfs_raid_array[i].bg_flag; diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index 6c3bd07868d7..0f0dc1c1fe41 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -57,9 +57,18 @@ struct smb_query_info { /* char buffer[]; */ } __packed; +struct smb3_key_debug_info { + __u64 Suid; + __u16 cipher_type; + __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */ + __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; + __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; +} __packed; + #define CIFS_IOCTL_MAGIC 0xCF #define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) #define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4) #define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info) #define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array) #define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info) +#define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info) diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index eb428349f29a..439b99cefeb0 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h @@ -90,8 +90,39 @@ struct cifs_acl { __le32 num_aces; } __attribute__((packed)); +/* ACE types - see MS-DTYP 2.4.4.1 */ +#define ACCESS_ALLOWED_ACE_TYPE 0x00 +#define ACCESS_DENIED_ACE_TYPE 0x01 +#define SYSTEM_AUDIT_ACE_TYPE 0x02 +#define SYSTEM_ALARM_ACE_TYPE 0x03 +#define ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04 +#define ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05 +#define ACCESS_DENIED_OBJECT_ACE_TYPE 0x06 +#define SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 +#define SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 +#define ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09 +#define ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A +#define ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B +#define ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C +#define SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D +#define SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E /* Reserved */ +#define SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F +#define SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10 /* reserved */ +#define SYSTEM_MANDATORY_LABEL_ACE_TYPE 0x11 +#define SYSTEM_RESOURCE_ATTRIBUTE_ACE_TYPE 0x12 +#define SYSTEM_SCOPED_POLICY_ID_ACE_TYPE 0x13 + +/* ACE flags */ +#define OBJECT_INHERIT_ACE 0x01 +#define CONTAINER_INHERIT_ACE 0x02 +#define NO_PROPAGATE_INHERIT_ACE 0x04 +#define INHERIT_ONLY_ACE 0x08 +#define INHERITED_ACE 0x10 +#define SUCCESSFUL_ACCESS_ACE_FLAG 0x40 +#define FAILED_ACCESS_ACE_FLAG 0x80 + struct cifs_ace { - __u8 type; + __u8 type; /* see above and MS-DTYP 2.4.4.1 */ __u8 flags; __le16 size; __le32 access_req; @@ -99,6 +130,54 @@ struct cifs_ace { } __attribute__((packed)); /* + * The current SMB3 form of security descriptor is similar to what was used for + * cifs (see above) but some fields are split, and fields in the struct below + * matches names of fields to the the spec, MS-DTYP (see sections 2.4.5 and + * 2.4.6). Note that "CamelCase" fields are used in this struct in order to + * match the MS-DTYP and MS-SMB2 specs which define the wire format. + */ +struct smb3_sd { + __u8 Revision; /* revision level, MUST be one */ + __u8 Sbz1; /* only meaningful if 'RM' flag set below */ + __le16 Control; + __le32 OffsetOwner; + __le32 OffsetGroup; + __le32 OffsetSacl; + __le32 OffsetDacl; +} __packed; + +/* Meaning of 'Control' field flags */ +#define ACL_CONTROL_SR 0x0001 /* Self relative */ +#define ACL_CONTROL_RM 0x0002 /* Resource manager control bits */ +#define ACL_CONTROL_PS 0x0004 /* SACL protected from inherits */ +#define ACL_CONTROL_PD 0x0008 /* DACL protected from inherits */ +#define ACL_CONTROL_SI 0x0010 /* SACL Auto-Inherited */ +#define ACL_CONTROL_DI 0x0020 /* DACL Auto-Inherited */ +#define ACL_CONTROL_SC 0x0040 /* SACL computed through inheritance */ +#define ACL_CONTROL_DC 0x0080 /* DACL computed through inheritence */ +#define ACL_CONTROL_SS 0x0100 /* Create server ACL */ +#define ACL_CONTROL_DT 0x0200 /* DACL provided by trusteed source */ +#define ACL_CONTROL_SD 0x0400 /* SACL defaulted */ +#define ACL_CONTROL_SP 0x0800 /* SACL is present on object */ +#define ACL_CONTROL_DD 0x1000 /* DACL defaulted */ +#define ACL_CONTROL_DP 0x2000 /* DACL is present on object */ +#define ACL_CONTROL_GD 0x4000 /* Group was defaulted */ +#define ACL_CONTROL_OD 0x8000 /* User was defaulted */ + +/* Meaning of AclRevision flags */ +#define ACL_REVISION 0x02 /* See section 2.4.4.1 of MS-DTYP */ +#define ACL_REVISION_DS 0x04 /* Additional AceTypes allowed */ + +struct smb3_acl { + u8 AclRevision; /* revision level */ + u8 Sbz1; /* MBZ */ + __le16 AclSize; + __le16 AceCount; + __le16 Sbz2; /* MBZ */ +} __packed; + + +/* * Minimum security identifier can be one for system defined Users * and Groups such as NULL SID and World or Built-in accounts such * as Administrator and Guest and consists of diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 54e204589cb9..2e960e1049db 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -331,8 +331,9 @@ struct smb_version_operations { umode_t mode, struct cifs_tcon *tcon, const char *full_path, struct cifs_sb_info *cifs_sb); - int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *, - struct cifs_sb_info *); + int (*mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, + struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *sb); /* set info on created directory */ void (*mkdir_setinfo)(struct inode *, const char *, struct cifs_sb_info *, struct cifs_tcon *, @@ -1209,6 +1210,7 @@ struct cifs_search_info { bool smallBuf:1; /* so we know which buf_release function to call */ }; +#define ACL_NO_MODE -1 struct cifs_open_parms { struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 99b1b1ef558c..e53e9f62b87b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -372,7 +372,8 @@ extern int CIFSSMBUnixSetPathInfo(const unsigned int xid, const struct nls_table *nls_codepage, int remap); -extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, +extern int CIFSSMBMkDir(const unsigned int xid, struct inode *inode, + umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index dbee2132e419..4f554f019a98 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1078,7 +1078,8 @@ RmDirRetry: } int -CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, +CIFSSMBMkDir(const unsigned int xid, struct inode *inode, umode_t mode, + struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { int rc = 0; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 26cdfbf1e164..3bae2e53f0b8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1622,13 +1622,14 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) } /* BB add setting the equivalent of mode via CreateX w/ACLs */ - rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb); + rc = server->ops->mkdir(xid, inode, mode, tcon, full_path, cifs_sb); if (rc) { cifs_dbg(FYI, "cifs_mkdir returned 0x%x\n", rc); d_drop(direntry); goto mkdir_out; } + /* TODO: skip this for smb2/smb3 */ rc = cifs_mkdir_qinfo(inode, direntry, mode, full_path, cifs_sb, tcon, xid); mkdir_out: diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 76ddd98b6298..1a01e108d75e 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -164,6 +164,7 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = file_inode(filep); + struct smb3_key_debug_info pkey_inf; int rc = -ENOTTY; /* strange error - but the precedent */ unsigned int xid; struct cifsFileInfo *pSMBFile = filep->private_data; @@ -270,6 +271,34 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) else rc = -EOPNOTSUPP; break; + case CIFS_DUMP_KEY: + if (pSMBFile == NULL) + break; + if (!capable(CAP_SYS_ADMIN)) { + rc = -EACCES; + break; + } + + tcon = tlink_tcon(pSMBFile->tlink); + if (!smb3_encryption_required(tcon)) { + rc = -EOPNOTSUPP; + break; + } + pkey_inf.cipher_type = + le16_to_cpu(tcon->ses->server->cipher_type); + pkey_inf.Suid = tcon->ses->Suid; + memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response, + 16 /* SMB2_NTLMV2_SESSKEY_SIZE */); + memcpy(pkey_inf.smb3decryptionkey, + tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE); + memcpy(pkey_inf.smb3encryptionkey, + tcon->ses->smb3encryptionkey, SMB3_SIGN_KEY_SIZE); + if (copy_to_user((void __user *)arg, &pkey_inf, + sizeof(struct smb3_key_debug_info))) + rc = -EFAULT; + else + rc = 0; + break; default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 4c764ff7edd2..85bd644f9773 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -698,7 +698,6 @@ sess_auth_lanman(struct sess_data *sess_data) char *bcc_ptr; struct cifs_ses *ses = sess_data->ses; char lnm_session_key[CIFS_AUTH_RESP_SIZE]; - __u32 capabilities; __u16 bytes_remaining; /* lanman 2 style sessionsetup */ @@ -709,7 +708,7 @@ sess_auth_lanman(struct sess_data *sess_data) pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; bcc_ptr = sess_data->iov[2].iov_base; - capabilities = cifs_ssetup_hdr(ses, pSMB); + (void)cifs_ssetup_hdr(ses, pSMB); pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index d2a3fb7e5c8d..4121ac1163ca 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -51,7 +51,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, __u32 desired_access, __u32 create_disposition, - __u32 create_options, void *ptr, int command, + __u32 create_options, umode_t mode, void *ptr, int command, struct cifsFileInfo *cfile) { int rc; @@ -103,6 +103,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = mode; memset(&open_iov, 0, sizeof(open_iov)); rqst[num_rqst].rq_iov = open_iov; @@ -478,7 +479,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, - smb2_data, SMB2_OP_QUERY_INFO, cfile); + ACL_NO_MODE, smb2_data, SMB2_OP_QUERY_INFO, cfile); if (rc == -EOPNOTSUPP) { *symlink = true; create_options |= OPEN_REPARSE_POINT; @@ -486,8 +487,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, /* Failed on a symbolic link - query a reparse point info */ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, - create_options, smb2_data, - SMB2_OP_QUERY_INFO, NULL); + create_options, ACL_NO_MODE, + smb2_data, SMB2_OP_QUERY_INFO, NULL); } if (rc) goto out; @@ -499,12 +500,14 @@ out: } int -smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, +smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode, + struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, - CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR, NULL); + CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR, + NULL); } void @@ -525,8 +528,8 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, cifs_get_writable_path(tcon, name, &cfile); tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, - CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO, - cfile); + CREATE_NOT_FILE, ACL_NO_MODE, + &data, SMB2_OP_SET_INFO, cfile); if (tmprc == 0) cifs_i->cifsAttrs = dosattrs; } @@ -536,7 +539,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, - CREATE_NOT_FILE, + CREATE_NOT_FILE, ACL_NO_MODE, NULL, SMB2_OP_RMDIR, NULL); } @@ -546,7 +549,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, { return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, - NULL, SMB2_OP_DELETE, NULL); + ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL); } static int @@ -564,7 +567,8 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, goto smb2_rename_path; } rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access, - FILE_OPEN, 0, smb2_to_name, command, cfile); + FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name, + command, cfile); smb2_rename_path: kfree(smb2_to_name); return rc; @@ -601,8 +605,8 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, __le64 eof = cpu_to_le64(size); return smb2_compound_op(xid, tcon, cifs_sb, full_path, - FILE_WRITE_DATA, FILE_OPEN, 0, &eof, - SMB2_OP_SET_EOF, NULL); + FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE, + &eof, SMB2_OP_SET_EOF, NULL); } int @@ -623,8 +627,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path, return PTR_ERR(tlink); rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path, - FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf, - SMB2_OP_SET_INFO, NULL); + FILE_WRITE_ATTRIBUTES, FILE_OPEN, + 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, NULL); cifs_put_tlink(tlink); return rc; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index eaed18061314..4c0922596467 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -751,6 +751,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) goto oshr_exit; } + atomic_inc(&tcon->num_remote_opens); + o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; oparms.fid->persistent_fid = o_rsp->PersistentFileId; oparms.fid->volatile_fid = o_rsp->VolatileFileId; @@ -1176,6 +1178,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rc = compound_send_recv(xid, ses, flags, 3, rqst, resp_buftype, rsp_iov); + /* no need to bump num_remote_opens because handle immediately closed */ sea_exit: kfree(ea); @@ -1518,6 +1521,8 @@ smb2_ioctl_query_info(const unsigned int xid, resp_buftype, rsp_iov); if (rc) goto iqinf_exit; + + /* No need to bump num_remote_opens since handle immediately closed */ if (qi.flags & PASSTHRU_FSCTL) { pqi = (struct smb_query_info __user *)arg; io_rsp = (struct smb2_ioctl_rsp *)rsp_iov[1].iov_base; @@ -3328,6 +3333,11 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) return; + /* Check if the server granted an oplock rather than a lease */ + if (oplock & SMB2_OPLOCK_LEVEL_EXCLUSIVE) + return smb2_set_oplock_level(cinode, oplock, epoch, + purge_cache); + if (oplock & SMB2_LEASE_READ_CACHING_HE) { new_oplock |= CIFS_CACHE_READ_FLG; strcat(message, "R"); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 87066f1af12c..85f9d614d968 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -751,6 +751,8 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode) unsigned int num = *num_iovec; iov[num].iov_base = create_posix_buf(mode); + if (mode == -1) + cifs_dbg(VFS, "illegal mode\n"); /* BB REMOVEME */ if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = sizeof(struct create_posix); @@ -2352,6 +2354,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, rqst.rq_iov = iov; rqst.rq_nvec = n_iov; + /* no need to inc num_remote_opens because we close it just below */ trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES); /* resource #4: response buffer */ @@ -2416,6 +2419,7 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, /* File attributes ignored on open (used in create though) */ req->FileAttributes = cpu_to_le32(file_attributes); req->ShareAccess = FILE_SHARE_ALL_LE; + req->CreateDisposition = cpu_to_le32(oparms->disposition); req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK); req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req)); @@ -2517,6 +2521,23 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, return rc; } + /* TODO: add handling for the mode on create */ + if (oparms->disposition == FILE_CREATE) + cifs_dbg(VFS, "mode is 0x%x\n", oparms->mode); /* BB REMOVEME */ + + if ((oparms->disposition == FILE_CREATE) && (oparms->mode != -1)) { + if (n_iov > 2) { + struct create_context *ccontext = + (struct create_context *)iov[n_iov-1].iov_base; + ccontext->Next = + cpu_to_le32(iov[n_iov-1].iov_len); + } + + /* rc = add_sd_context(iov, &n_iov, oparms->mode); */ + if (rc) + return rc; + } + if (n_iov > 2) { struct create_context *ccontext = (struct create_context *)iov[n_iov-1].iov_base; @@ -3180,7 +3201,7 @@ SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, * See MS-SMB2 2.2.35 and 2.2.36 */ -int +static int SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 completion_filter, bool watch_tree) diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 67a91b11fd59..da3a6d580808 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -84,7 +84,8 @@ extern int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *full_path, struct cifs_sb_info *cifs_sb); -extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, +extern int smb2_mkdir(const unsigned int xid, struct inode *inode, + umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, struct cifs_sb_info *cifs_sb, diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 08628e6a42ac..1ff28529cf4b 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -144,6 +144,17 @@ #define IO_REPARSE_APPXSTREAM 0xC0000014 /* NFS symlinks, Win 8/SMB3 and later */ #define IO_REPARSE_TAG_NFS 0x80000014 +/* + * AzureFileSync - see + * https://docs.microsoft.com/en-us/azure/storage/files/storage-sync-cloud-tiering + */ +#define IO_REPARSE_TAG_AZ_FILE_SYNC 0x8000001e +/* WSL reparse tags */ +#define IO_REPARSE_TAG_LX_SYMLINK 0xA000001D +#define IO_REPARSE_TAG_AF_UNIX 0x80000023 +#define IO_REPARSE_TAG_LX_FIFO 0x80000024 +#define IO_REPARSE_TAG_LX_CHR 0x80000025 +#define IO_REPARSE_TAG_LX_BLK 0x80000026 /* fsctl flags */ /* If Flags is set to this value, the request is an FSCTL not ioctl request */ diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 9076150758d8..db4ba8f6077e 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -31,7 +31,7 @@ #include "cifs_fs_sb.h" #include "cifs_unicode.h" -#define MAX_EA_VALUE_SIZE 65535 +#define MAX_EA_VALUE_SIZE CIFSMaxBufSize #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" #define CIFS_XATTR_ATTRIB "cifs.dosattrib" /* full name: user.cifs.dosattrib */ #define CIFS_XATTR_CREATETIME "cifs.creationtime" /* user.cifs.creationtime */ diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 8a9fcbd0e8ac..fc3a8d8064f8 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -34,11 +34,15 @@ static void erofs_readendio(struct bio *bio) struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) { - struct inode *const bd_inode = sb->s_bdev->bd_inode; - struct address_space *const mapping = bd_inode->i_mapping; + struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; + struct page *page; - return read_cache_page_gfp(mapping, blkaddr, + page = read_cache_page_gfp(mapping, blkaddr, mapping_gfp_constraint(mapping, ~__GFP_FS)); + /* should already be PageUptodate */ + if (!IS_ERR(page)) + lock_page(page); + return page; } static int erofs_map_blocks_flatmode(struct inode *inode, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index caf9a95173b0..0e369494f2f2 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -105,9 +105,9 @@ static int erofs_read_superblock(struct super_block *sb) int ret; page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); - if (!page) { + if (IS_ERR(page)) { erofs_err(sb, "cannot read erofs superblock"); - return -EIO; + return PTR_ERR(page); } sbi = EROFS_SB(sb); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 96e34c90f814..fad80c97d247 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -575,7 +575,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct erofs_map_blocks *const map = &fe->map; struct z_erofs_collector *const clt = &fe->clt; const loff_t offset = page_offset(page); - bool tight = (clt->mode >= COLLECT_PRIMARY_HOOKED); + bool tight = true; enum z_erofs_cache_alloctype cache_strategy; enum z_erofs_page_type page_type; @@ -628,8 +628,16 @@ restart_now: preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy, pagepool); - tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED); hitted: + /* + * Ensure the current partial page belongs to this submit chain rather + * than other concurrent submit chains or the noio(bypass) chain since + * those chains are handled asynchronously thus the page cannot be used + * for inplace I/O or pagevec (should be processed in strict order.) + */ + tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && + clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + cur = end - min_t(unsigned int, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, cur, end); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 123e3dee7733..516faa280ced 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4551,6 +4551,7 @@ static int __ext4_get_inode_loc(struct inode *inode, struct buffer_head *bh; struct super_block *sb = inode->i_sb; ext4_fsblk_t block; + struct blk_plug plug; int inodes_per_block, inode_offset; iloc->bh = NULL; @@ -4639,6 +4640,7 @@ make_io: * If we need to do any I/O, try to pre-readahead extra * blocks from the inode table. */ + blk_start_plug(&plug); if (EXT4_SB(sb)->s_inode_readahead_blks) { ext4_fsblk_t b, end, table; unsigned num; @@ -4669,6 +4671,7 @@ make_io: get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); + blk_finish_plug(&plug); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/fhandle.c b/fs/fhandle.c index 0ee727485615..01263ffbc4c0 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -246,7 +246,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, * sys_open_by_handle_at: Open the file handle * @mountdirfd: directory file descriptor * @handle: file handle to be opened - * @flag: open flags. + * @flags: open flags. * * @mountdirfd indicate the directory file descriptor * of the mount point. file handle is decoded relative diff --git a/fs/io_uring.c b/fs/io_uring.c index aa8ac557493c..8a0381f1a43b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1892,15 +1892,15 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) unsigned count, req_dist, tail_index; struct io_ring_ctx *ctx = req->ctx; struct list_head *entry; - struct timespec ts; + struct timespec64 ts; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags || sqe->len != 1) return -EINVAL; - if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr, - sizeof(ts))) + + if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr))) return -EFAULT; /* @@ -1934,7 +1934,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); req->timeout.timer.function = io_timeout_fn; - hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts), + hrtimer_start(&req->timeout.timer, timespec64_to_ktime(ts), HRTIMER_MODE_REL); return 0; } diff --git a/fs/readdir.c b/fs/readdir.c index 2f6a4534e0df..19bea591c3f1 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -20,9 +20,63 @@ #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/compat.h> - #include <linux/uaccess.h> +#include <asm/unaligned.h> + +/* + * Note the "unsafe_put_user() semantics: we goto a + * label for errors. + * + * Also note how we use a "while()" loop here, even though + * only the biggest size needs to loop. The compiler (well, + * at least gcc) is smart enough to turn the smaller sizes + * into just if-statements, and this way we don't need to + * care whether 'u64' or 'u32' is the biggest size. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(get_unaligned((type *)src), \ + (type __user *)dst, label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +/* + * We avoid doing 64-bit copies on 32-bit architectures. They + * might be better, but the component names are mostly small, + * and the 64-bit cases can end up being much more complex and + * put much more register pressure on the code, so it's likely + * not worth the pain of unaligned accesses etc. + * + * So limit the copies to "unsigned long" size. I did verify + * that at least the x86-32 case is ok without this limiting, + * but I worry about random other legacy 32-bit cases that + * might not do as well. + */ +#define unsafe_copy_type(dst, src, len, type, label) do { \ + if (sizeof(type) <= sizeof(unsigned long)) \ + unsafe_copy_loop(dst, src, len, type, label); \ +} while (0) + +/* + * Copy the dirent name to user space, and NUL-terminate + * it. This should not be a function call, since we're doing + * the copy inside a "user_access_begin/end()" section. + */ +#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ + char __user *dst = (_dst); \ + const char *src = (_src); \ + size_t len = (_len); \ + unsafe_copy_type(dst, src, len, u64, label); \ + unsafe_copy_type(dst, src, len, u32, label); \ + unsafe_copy_type(dst, src, len, u16, label); \ + unsafe_copy_type(dst, src, len, u8, label); \ + unsafe_put_user(0, dst, label); \ +} while (0) + + int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); @@ -65,6 +119,40 @@ out: EXPORT_SYMBOL(iterate_dir); /* + * POSIX says that a dirent name cannot contain NULL or a '/'. + * + * It's not 100% clear what we should really do in this case. + * The filesystem is clearly corrupted, but returning a hard + * error means that you now don't see any of the other names + * either, so that isn't a perfect alternative. + * + * And if you return an error, what error do you use? Several + * filesystems seem to have decided on EUCLEAN being the error + * code for EFSCORRUPTED, and that may be the error to use. Or + * just EIO, which is perhaps more obvious to users. + * + * In order to see the other file names in the directory, the + * caller might want to make this a "soft" error: skip the + * entry, and return the error at the end instead. + * + * Note that this should likely do a "memchr(name, 0, len)" + * check too, since that would be filesystem corruption as + * well. However, that case can't actually confuse user space, + * which has to do a strlen() on the name anyway to find the + * filename length, and the above "soft error" worry means + * that it's probably better left alone until we have that + * issue clarified. + */ +static int verify_dirent_name(const char *name, int len) +{ + if (WARN_ON_ONCE(!len)) + return -EIO; + if (WARN_ON_ONCE(memchr(name, '/', len))) + return -EIO; + return 0; +} + +/* * Traditional linux readdir() handling.. * * "count=1" is a special case, meaning that the buffer is one @@ -173,6 +261,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; @@ -182,28 +273,31 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, return -EOVERFLOW; } dirent = buf->previous; - if (dirent) { - if (signal_pending(current)) - return -EINTR; - if (__put_user(offset, &dirent->d_off)) - goto efault; - } - dirent = buf->current_dir; - if (__put_user(d_ino, &dirent->d_ino)) - goto efault; - if (__put_user(reclen, &dirent->d_reclen)) - goto efault; - if (copy_to_user(dirent->d_name, name, namlen)) - goto efault; - if (__put_user(0, dirent->d_name + namlen)) - goto efault; - if (__put_user(d_type, (char __user *) dirent + reclen - 1)) + if (dirent && signal_pending(current)) + return -EINTR; + + /* + * Note! This range-checks 'previous' (which may be NULL). + * The real range was checked in getdents + */ + if (!user_access_begin(dirent, sizeof(*dirent))) goto efault; + if (dirent) + unsafe_put_user(offset, &dirent->d_off, efault_end); + dirent = buf->current_dir; + unsafe_put_user(d_ino, &dirent->d_ino, efault_end); + unsafe_put_user(reclen, &dirent->d_reclen, efault_end); + unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); + unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); + user_access_end(); + buf->previous = dirent; dirent = (void __user *)dirent + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; +efault_end: + user_access_end(); efault: buf->error = -EFAULT; return -EFAULT; @@ -259,34 +353,38 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; dirent = buf->previous; - if (dirent) { - if (signal_pending(current)) - return -EINTR; - if (__put_user(offset, &dirent->d_off)) - goto efault; - } - dirent = buf->current_dir; - if (__put_user(ino, &dirent->d_ino)) - goto efault; - if (__put_user(0, &dirent->d_off)) - goto efault; - if (__put_user(reclen, &dirent->d_reclen)) - goto efault; - if (__put_user(d_type, &dirent->d_type)) - goto efault; - if (copy_to_user(dirent->d_name, name, namlen)) - goto efault; - if (__put_user(0, dirent->d_name + namlen)) + if (dirent && signal_pending(current)) + return -EINTR; + + /* + * Note! This range-checks 'previous' (which may be NULL). + * The real range was checked in getdents + */ + if (!user_access_begin(dirent, sizeof(*dirent))) goto efault; + if (dirent) + unsafe_put_user(offset, &dirent->d_off, efault_end); + dirent = buf->current_dir; + unsafe_put_user(ino, &dirent->d_ino, efault_end); + unsafe_put_user(reclen, &dirent->d_reclen, efault_end); + unsafe_put_user(d_type, &dirent->d_type, efault_end); + unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); + user_access_end(); + buf->previous = dirent; dirent = (void __user *)dirent + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; +efault_end: + user_access_end(); efault: buf->error = -EFAULT; return -EFAULT; diff --git a/fs/statfs.c b/fs/statfs.c index eea7af6f2f22..2616424012ea 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -318,19 +318,10 @@ COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) { struct compat_statfs64 buf; - if (sizeof(ubuf->f_bsize) == 4) { - if ((kbuf->f_type | kbuf->f_bsize | kbuf->f_namelen | - kbuf->f_frsize | kbuf->f_flags) & 0xffffffff00000000ULL) - return -EOVERFLOW; - /* f_files and f_ffree may be -1; it's okay - * to stuff that into 32 bits */ - if (kbuf->f_files != 0xffffffffffffffffULL - && (kbuf->f_files & 0xffffffff00000000ULL)) - return -EOVERFLOW; - if (kbuf->f_ffree != 0xffffffffffffffffULL - && (kbuf->f_ffree & 0xffffffff00000000ULL)) - return -EOVERFLOW; - } + + if ((kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) + return -EOVERFLOW; + memset(&buf, 0, sizeof(struct compat_statfs64)); buf.f_type = kbuf->f_type; buf.f_bsize = kbuf->f_bsize; diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf074bce3eb3..c94a9ff9f082 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,6 +4,13 @@ #include <asm/types.h> #include <linux/bits.h> +/* Set bits in the first 'n' bytes when loaded from memory */ +#ifdef __LITTLE_ENDIAN +# define aligned_byte_mask(n) ((1UL << 8*(n))-1) +#else +# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) +#endif + #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) diff --git a/include/linux/dim.h b/include/linux/dim.h index 9fa4b3f88c39..b698266d0035 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -4,22 +4,26 @@ #ifndef DIM_H #define DIM_H +#include <linux/bits.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/types.h> +#include <linux/workqueue.h> -/** +/* * Number of events between DIM iterations. * Causes a moderation of the algorithm run. */ #define DIM_NEVENTS 64 -/** +/* * Is a difference between values justifies taking an action. * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ (((100UL * abs((val) - (ref))) / (ref)) > 10) -/** +/* * Calculate the gap between two values. * Take wrap-around and variable size into consideration. */ @@ -27,12 +31,13 @@ & (BIT_ULL(bits) - 1)) /** - * Structure for CQ moderation values. + * struct dim_cq_moder - Structure for CQ moderation values. * Used for communications between DIM and its consumer. * * @usec: CQ timer suggestion (by DIM) * @pkts: CQ packet counter suggestion (by DIM) - * @cq_period_mode: CQ priod count mode (from CQE/EQE) + * @comps: Completion counter + * @cq_period_mode: CQ period count mode (from CQE/EQE) */ struct dim_cq_moder { u16 usec; @@ -42,13 +47,14 @@ struct dim_cq_moder { }; /** - * Structure for DIM sample data. + * struct dim_sample - Structure for DIM sample data. * Used for communications between DIM and its consumer. * * @time: Sample timestamp * @pkt_ctr: Number of packets * @byte_ctr: Number of bytes * @event_ctr: Number of events + * @comp_ctr: Current completion counter */ struct dim_sample { ktime_t time; @@ -59,12 +65,14 @@ struct dim_sample { }; /** - * Structure for DIM stats. + * struct dim_stats - Structure for DIM stats. * Used for holding current measured rates. * * @ppms: Packets per msec * @bpms: Bytes per msec * @epms: Events per msec + * @cpms: Completions per msec + * @cpe_ratio: Ratio of completions to events */ struct dim_stats { int ppms; /* packets per msec */ @@ -75,12 +83,13 @@ struct dim_stats { }; /** - * Main structure for dynamic interrupt moderation (DIM). + * struct dim - Main structure for dynamic interrupt moderation (DIM). * Used for holding all information about a specific DIM instance. * * @state: Algorithm state (see below) * @prev_stats: Measured rates from previous iteration (for comparison) * @start_sample: Sampled data at start of current iteration + * @measuring_sample: A &dim_sample that is used to update the current events * @work: Work to perform on action required * @priv: A pointer to the struct that points to dim * @profile_ix: Current moderation profile @@ -106,24 +115,21 @@ struct dim { }; /** - * enum dim_cq_period_mode - * - * These are the modes for CQ period count. + * enum dim_cq_period_mode - Modes for CQ period count * * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) * @DIM_CQ_PERIOD_NUM_MODES: Number of modes */ -enum { +enum dim_cq_period_mode { DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, DIM_CQ_PERIOD_NUM_MODES }; /** - * enum dim_state + * enum dim_state - DIM algorithm states * - * These are the DIM algorithm states. * These will determine if the algorithm is in a valid state to start an iteration. * * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) @@ -131,16 +137,15 @@ enum { * need to perform an action * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure */ -enum { +enum dim_state { DIM_START_MEASURE, DIM_MEASURE_IN_PROGRESS, DIM_APPLY_NEW_PROFILE, }; /** - * enum dim_tune_state + * enum dim_tune_state - DIM algorithm tune states * - * These are the DIM algorithm tune states. * These will determine which action the algorithm should perform. * * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference @@ -148,7 +153,7 @@ enum { * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels */ -enum { +enum dim_tune_state { DIM_PARKING_ON_TOP, DIM_PARKING_TIRED, DIM_GOING_RIGHT, @@ -156,25 +161,23 @@ enum { }; /** - * enum dim_stats_state + * enum dim_stats_state - DIM algorithm statistics states * - * These are the DIM algorithm statistics states. * These will determine the verdict of current iteration. * * @DIM_STATS_WORSE: Current iteration shows worse performance than before - * @DIM_STATS_WORSE: Current iteration shows same performance than before - * @DIM_STATS_WORSE: Current iteration shows better performance than before + * @DIM_STATS_SAME: Current iteration shows same performance than before + * @DIM_STATS_BETTER: Current iteration shows better performance than before */ -enum { +enum dim_stats_state { DIM_STATS_WORSE, DIM_STATS_SAME, DIM_STATS_BETTER, }; /** - * enum dim_step_result + * enum dim_step_result - DIM algorithm step results * - * These are the DIM algorithm step results. * These describe the result of a step. * * @DIM_STEPPED: Performed a regular step @@ -182,7 +185,7 @@ enum { * tired parking * @DIM_ON_EDGE: Stepped to the most left/right profile */ -enum { +enum dim_step_result { DIM_STEPPED, DIM_TOO_TIRED, DIM_ON_EDGE, @@ -199,7 +202,7 @@ enum { bool dim_on_top(struct dim *dim); /** - * dim_turn - change profile alterning direction + * dim_turn - change profile altering direction * @dim: DIM context * * Go left if we were going right and vice-versa. @@ -238,7 +241,7 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, struct dim_stats *curr_stats); /** - * dim_update_sample - set a sample's fields with give values + * dim_update_sample - set a sample's fields with given values * @event_ctr: number of events to set * @packets: number of packets to set * @bytes: number of bytes to set @@ -304,8 +307,8 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); * @end_sample: Current data measurement * * Called by the consumer. - * This is the main logic of the algorithm, where data is processed in order to decide on next - * required action. + * This is the main logic of the algorithm, where data is processed in order + * to decide on next required action. */ void net_dim(struct dim *dim, struct dim_sample end_sample); diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 79435cfc20eb..897e799dbcb9 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -31,6 +31,8 @@ #define SJA1105_META_SMAC 0x222222222222ull #define SJA1105_META_DMAC 0x0180C200000Eull +#define SJA1105_HWTS_RX_EN 0 + /* Global tagger data: each struct sja1105_port has a reference to * the structure defined in struct sja1105_private. */ @@ -42,7 +44,7 @@ struct sja1105_tagger_data { * from taggers running on multiple ports on SMP systems */ spinlock_t meta_lock; - bool hwts_rx_en; + unsigned long state; }; struct sja1105_skb_cb { diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 61c9ffd89b05..93d5cf0bc716 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -108,7 +108,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) return true; - + /* + * For dax vmas, try to always use hugepage mappings. If the kernel does + * not support hugepages, fsdax mappings will fallback to PAGE_SIZE + * mappings, and device-dax namespaces, that try to guarantee a given + * mapping size, will fail to enable + */ if (vma_is_dax(vma)) return true; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fcb46b3374c6..719fc3e15ea4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1090,6 +1090,7 @@ enum kvm_stat_kind { struct kvm_stat_data { int offset; + int mode; struct kvm *kvm; }; @@ -1097,6 +1098,7 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; + int mode; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index bef51e35d8d2..6fefb09af7c3 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -17,6 +17,7 @@ struct device; */ struct vmem_altmap { const unsigned long base_pfn; + const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; diff --git a/include/linux/mii.h b/include/linux/mii.h index 5cd824c1c0ca..4ce8901a1af6 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -455,6 +455,15 @@ static inline void mii_lpa_mod_linkmode_lpa_t(unsigned long *lp_advertising, lp_advertising, lpa & LPA_LPACK); } +static inline void mii_ctrl1000_mod_linkmode_adv_t(unsigned long *advertising, + u32 ctrl1000) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, advertising, + ctrl1000 & ADVERTISE_1000HALF); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, advertising, + ctrl1000 & ADVERTISE_1000FULL); +} + /** * linkmode_adv_to_lcl_adv_t * @advertising:pointer to linkmode advertising diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 34de06b426ef..8071148f29a6 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -47,16 +47,16 @@ struct vif_entry_notifier_info { }; static inline int mr_call_vif_notifier(struct notifier_block *nb, - struct net *net, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, - unsigned short vif_index, u32 tb_id) + unsigned short vif_index, u32 tb_id, + struct netlink_ext_ack *extack) { struct vif_entry_notifier_info info = { .info = { .family = family, - .net = net, + .extack = extack, }, .dev = vif->dev, .vif_index = vif_index, @@ -64,7 +64,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, .tb_id = tb_id, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static inline int mr_call_vif_notifiers(struct net *net, @@ -77,7 +77,6 @@ static inline int mr_call_vif_notifiers(struct net *net, struct vif_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .dev = vif->dev, .vif_index = vif_index, @@ -173,21 +172,21 @@ struct mfc_entry_notifier_info { }; static inline int mr_call_mfc_notifier(struct notifier_block *nb, - struct net *net, unsigned short family, enum fib_event_type event_type, - struct mr_mfc *mfc, u32 tb_id) + struct mr_mfc *mfc, u32 tb_id, + struct netlink_ext_ack *extack) { struct mfc_entry_notifier_info info = { .info = { .family = family, - .net = net, + .extack = extack, }, .mfc = mfc, .tb_id = tb_id }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static inline int mr_call_mfc_notifiers(struct net *net, @@ -199,7 +198,6 @@ static inline int mr_call_mfc_notifiers(struct net *net, struct mfc_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .mfc = mfc, .tb_id = tb_id @@ -301,10 +299,11 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock); + rwlock_t *mrt_lock, struct netlink_ext_ack *extack); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -355,10 +354,11 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, static inline int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock) + rwlock_t *mrt_lock, struct netlink_ext_ack *extack) { return -EINVAL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 48cc71aae466..3207e0b9ec4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2504,6 +2504,9 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd); int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); +int unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb); struct netdev_notifier_info { struct net_device *dev; @@ -2574,6 +2577,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_continue_reverse(net, d) \ + list_for_each_entry_continue_reverse(d, &(net)->dev_base_head, \ + dev_list) #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ @@ -4107,9 +4113,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); -int dev_get_valid_name(struct net *net, struct net_device *dev, - const char *name); - #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) diff --git a/include/linux/phy.h b/include/linux/phy.h index a7ecbe0e55aa..9a0e981df502 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -678,6 +678,7 @@ static inline bool phy_is_started(struct phy_device *phydev) return phydev->state >= PHY_UP; } +void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); /** @@ -1076,6 +1077,7 @@ int genphy_config_eee_advert(struct phy_device *phydev); int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); +int genphy_read_lpa(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); diff --git a/include/linux/platform_data/db8500_thermal.h b/include/linux/platform_data/db8500_thermal.h deleted file mode 100644 index 55e55750a165..000000000000 --- a/include/linux/platform_data/db8500_thermal.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * db8500_thermal.h - DB8500 Thermal Management Implementation - * - * Copyright (C) 2012 ST-Ericsson - * Copyright (C) 2012 Linaro Ltd. - * - * Author: Hongbo Zhang <[email protected]> - */ - -#ifndef _DB8500_THERMAL_H_ -#define _DB8500_THERMAL_H_ - -#include <linux/thermal.h> - -#define COOLING_DEV_MAX 8 - -struct db8500_trip_point { - unsigned long temp; - enum thermal_trip_type type; - char cdev_name[COOLING_DEV_MAX][THERMAL_NAME_LENGTH]; -}; - -struct db8500_thsens_platform_data { - struct db8500_trip_point trip_points[THERMAL_MAX_TRIPS]; - int num_trips; -}; - -#endif /* _DB8500_THERMAL_H_ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e7d3b1a513ef..0a58402a166e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2261,12 +2261,12 @@ static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); } -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { if (likely(len <= skb_headlen(skb))) - return 1; + return true; if (unlikely(len > skb->len)) - return 0; + return false; return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } @@ -4160,15 +4160,12 @@ static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} #endif /* CONFIG_SKB_EXTENSIONS */ -static inline void nf_reset(struct sk_buff *skb) +static inline void nf_reset_ct(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(skb)); skb->_nfct = 0; #endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - skb_ext_del(skb, SKB_EXT_BRIDGE_NF); -#endif } static inline void nf_reset_trace(struct sk_buff *skb) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e4b3fb4bb77c..fe80d537945d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -28,13 +28,14 @@ struct sk_msg_sg { u32 end; u32 size; u32 copybreak; - bool copy[MAX_MSG_FRAGS]; + unsigned long copy; /* The extra element is used for chaining the front and sections when * the list becomes partitioned (e.g. end < start). The crypto APIs * require the chaining. */ struct scatterlist data[MAX_MSG_FRAGS + 1]; }; +static_assert(BITS_PER_LONG >= MAX_MSG_FRAGS); /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { @@ -227,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) { struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); - if (msg->sg.copy[msg->sg.start]) { + if (test_bit(msg->sg.start, &msg->sg.copy)) { msg->data = NULL; msg->data_end = NULL; } else { @@ -246,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sg_set_page(sge, page, len, offset); sg_unmark_end(sge); - msg->sg.copy[msg->sg.end] = true; + __set_bit(msg->sg.end, &msg->sg.copy); msg->sg.size += len; sk_msg_iter_next(msg, end); } @@ -254,7 +255,10 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { - msg->sg.copy[i] = copy_state; + if (copy_state) + __set_bit(i, &msg->sg.copy); + else + __clear_bit(i, &msg->sg.copy); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af4f265d0f67..27f6b046cf92 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -13,6 +13,7 @@ #include <linux/completion.h> #include <linux/scatterlist.h> #include <linux/gpio/consumer.h> +#include <linux/ptp_clock_kernel.h> struct dma_chan; struct property_entry; @@ -409,6 +410,12 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @fw_translate_cs: If the boot firmware uses different numbering scheme * what Linux expects, this optional hook can be used to translate * between the two. + * @ptp_sts_supported: If the driver sets this to true, it must provide a + * time snapshot in @spi_transfer->ptp_sts as close as possible to the + * moment in time when @spi_transfer->ptp_sts_word_pre and + * @spi_transfer->ptp_sts_word_post were transmitted. + * If the driver does not set this, the SPI core takes the snapshot as + * close to the driver hand-over as possible. * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -604,6 +611,15 @@ struct spi_controller { void *dummy_tx; int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); + + /* + * Driver sets this field to indicate it is able to snapshot SPI + * transfers (needed e.g. for reading the time of POSIX clocks) + */ + bool ptp_sts_supported; + + /* Interrupt enable state during PTP system timestamping */ + unsigned long irq_flags; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) @@ -644,6 +660,14 @@ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ct extern void spi_finalize_current_message(struct spi_controller *ctlr); extern void spi_finalize_current_transfer(struct spi_controller *ctlr); +/* Helper calls for driver to timestamp transfer */ +void spi_take_timestamp_pre(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); +void spi_take_timestamp_post(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); + /* the spi driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -753,6 +777,35 @@ extern void spi_res_release(struct spi_controller *ctlr, * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use + * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset + * within @tx_buf for which the SPI device is requesting that the time + * snapshot for this transfer begins. Upon completing the SPI transfer, + * this value may have changed compared to what was requested, depending + * on the available snapshotting resolution (DMA transfer, + * @ptp_sts_supported is false, etc). + * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning + * that a single byte should be snapshotted). + * If the core takes care of the timestamp (if @ptp_sts_supported is false + * for this controller), it will set @ptp_sts_word_pre to 0, and + * @ptp_sts_word_post to the length of the transfer. This is done + * purposefully (instead of setting to spi_transfer->len - 1) to denote + * that a transfer-level snapshot taken from within the driver may still + * be of higher quality. + * @ptp_sts: Pointer to a memory location held by the SPI slave device where a + * PTP system timestamp structure may lie. If drivers use PIO or their + * hardware has some sort of assist for retrieving exact transfer timing, + * they can (and should) assert @ptp_sts_supported and populate this + * structure using the ptp_read_system_*ts helper functions. + * The timestamp must represent the time at which the SPI slave device has + * processed the word, i.e. the "pre" timestamp should be taken before + * transmitting the "pre" word, and the "post" timestamp after receiving + * transmit confirmation from the controller for the "post" word. + * @timestamped_pre: Set by the SPI controller driver to denote it has acted + * upon the @ptp_sts request. Not set when the SPI core has taken care of + * the task. SPI device drivers are free to print a warning if this comes + * back unset and they need the better resolution. + * @timestamped_post: See above. The reason why both exist is that these + * booleans are also used to keep state in the core SPI logic. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. @@ -842,6 +895,14 @@ struct spi_transfer { u32 effective_speed_hz; + unsigned int ptp_sts_word_pre; + unsigned int ptp_sts_word_post; + + struct ptp_system_timestamp *ptp_sts; + + bool timestamped_pre; + bool timestamped_post; + struct list_head transfer_list; }; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dc60d03c4b60..86f9464c3f5d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -92,6 +92,7 @@ struct stmmac_dma_cfg { int fixed_burst; int mixed_burst; bool aal; + bool eame; }; #define AXI_BLEN 7 diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70bbdc38dc37..e47d0522a1f4 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -231,6 +231,76 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, #endif /* ARCH_HAS_NOCACHE_UACCESS */ +extern __must_check int check_zeroed_user(const void __user *from, size_t size); + +/** + * copy_struct_from_user: copy a struct from userspace + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @src: Source address, in userspace. + * @usize: (Alleged) size of @src struct. + * + * Copies a struct from userspace to kernel space, in a way that guarantees + * backwards-compatibility for struct syscall arguments (as long as future + * struct extensions are made such that all new fields are *appended* to the + * old struct, and zeroed-out new fields have the same meaning as the old + * struct). + * + * @ksize is just sizeof(*dst), and @usize should've been passed by userspace. + * The recommended usage is something like the following: + * + * SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize) + * { + * int err; + * struct foo karg = {}; + * + * if (usize > PAGE_SIZE) + * return -E2BIG; + * if (usize < FOO_SIZE_VER0) + * return -EINVAL; + * + * err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); + * if (err) + * return err; + * + * // ... + * } + * + * There are three cases to consider: + * * If @usize == @ksize, then it's copied verbatim. + * * If @usize < @ksize, then the userspace has passed an old struct to a + * newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize) + * are to be zero-filled. + * * If @usize > @ksize, then the userspace has passed a new struct to an + * older kernel. The trailing bytes unknown to the kernel (@usize - @ksize) + * are checked to ensure they are zeroed, otherwise -E2BIG is returned. + * + * Returns (in all cases, some data may have been copied): + * * -E2BIG: (@usize > @ksize) and there are non-zero trailing bytes in @src. + * * -EFAULT: access to userspace failed. + */ +static __always_inline __must_check int +copy_struct_from_user(void *dst, size_t ksize, const void __user *src, + size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + /* Deal with trailing bytes. */ + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + int ret = check_zeroed_user(src + size, rest); + if (ret <= 0) + return ret ?: -E2BIG; + } + /* Copy the interoperable parts of the struct. */ + if (copy_from_user(dst, src, size)) + return -EFAULT; + return 0; +} + /* * probe_kernel_read(): safely attempt to read from a location * @dst: pointer to the buffer that shall take the data diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 3f62b347b04a..1bab88184d3c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -202,11 +202,11 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, /* * multicast prototypes (mcast.c) */ -static inline int ipv6_mc_may_pull(struct sk_buff *skb, - unsigned int len) +static inline bool ipv6_mc_may_pull(struct sk_buff *skb, + unsigned int len) { if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) - return 0; + return false; return pskb_may_pull(skb, len); } diff --git a/include/net/devlink.h b/include/net/devlink.h index 23e4b65ec9df..6bf3b9e0595a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -39,6 +39,7 @@ struct devlink { possible_net_t _net; struct mutex lock; bool reload_failed; + bool registered; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -506,11 +507,13 @@ enum devlink_health_reporter_state { struct devlink_health_reporter_ops { char *name; int (*recover)(struct devlink_health_reporter *reporter, - void *priv_ctx); + void *priv_ctx, struct netlink_ext_ack *extack); int (*dump)(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx); + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack); int (*diagnose)(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg); + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack); }; /** @@ -643,7 +646,7 @@ enum devlink_trap_group_generic_id { } struct devlink_ops { - int (*reload_down)(struct devlink *devlink, + int (*reload_down)(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack); int (*reload_up)(struct devlink *devlink, struct netlink_ext_ack *extack); @@ -771,6 +774,8 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev) struct ib_device; +struct net *devlink_net(const struct devlink *devlink); +void devlink_net_set(struct devlink *devlink, struct net *net); struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); diff --git a/include/net/dsa.h b/include/net/dsa.h index 541fb514e31d..8c3ea0530f65 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -94,8 +94,6 @@ struct __dsa_skb_cb { u8 priv[48 - sizeof(struct dsa_skb_cb)]; }; -#define __DSA_SKB_CB(skb) ((struct __dsa_skb_cb *)((skb)->cb)) - #define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb)) #define DSA_SKB_CB_PRIV(skb) \ diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index c49d7bfb5c30..6d59221ff05a 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -8,7 +8,6 @@ struct module; struct fib_notifier_info { - struct net *net; int family; struct netlink_ext_ack *extack; }; @@ -30,19 +29,21 @@ struct fib_notifier_ops { int family; struct list_head list; unsigned int (*fib_seq_read)(struct net *net); - int (*fib_dump)(struct net *net, struct notifier_block *nb); + int (*fib_dump)(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); struct module *owner; struct rcu_head rcu; }; -int call_fib_notifier(struct notifier_block *nb, struct net *net, +int call_fib_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)); -int unregister_fib_notifier(struct notifier_block *nb); +int register_fib_notifier(struct net *net, struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + struct netlink_ext_ack *extack); +int unregister_fib_notifier(struct net *net, struct notifier_block *nb); struct fib_notifier_ops * fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net); void fib_notifier_ops_unregister(struct fib_notifier_ops *ops); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 20dcadd8eed9..54e227e6b06a 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -194,7 +194,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); -int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, + struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9292f1c588b7..74950663bb00 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -75,8 +75,6 @@ struct genl_family { struct module *module; }; -struct nlattr **genl_family_attrbuf(const struct genl_family *family); - /** * struct genl_info - receiving information * @snd_seq: sending sequence number @@ -128,6 +126,24 @@ enum genl_validate_flags { }; /** + * struct genl_info - info that is available during dumpit op call + * @family: generic netlink family - for internal genl code usage + * @ops: generic netlink ops - for internal genl code usage + * @attrs: netlink attributes + */ +struct genl_dumpit_info { + const struct genl_family *family; + const struct genl_ops *ops; + struct nlattr **attrs; +}; + +static inline const struct genl_dumpit_info * +genl_dumpit_info(struct netlink_callback *cb) +{ + return cb->data; +} + +/** * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 4b5656c71abc..5d1615463138 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -478,7 +478,7 @@ struct ipv6_route_iter { extern const struct seq_operations ipv6_route_seq_ops; -int call_fib6_notifier(struct notifier_block *nb, struct net *net, +int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, @@ -488,7 +488,8 @@ int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); -int fib6_tables_dump(struct net *net, struct notifier_block *nb); +int fib6_tables_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); @@ -504,7 +505,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); -int fib6_rules_dump(struct net *net, struct notifier_block *nb); +int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); unsigned int fib6_rules_seq_read(struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, @@ -537,7 +539,8 @@ static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } -static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb) +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ab1ca9e238d2..52b2406a5dfc 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -219,7 +219,7 @@ struct fib_nh_notifier_info { struct fib_nh *fib_nh; }; -int call_fib4_notifier(struct notifier_block *nb, struct net *net, +int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, @@ -229,7 +229,8 @@ int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_info_notify_update(struct net *net, struct nl_info *info); -void fib_notify(struct net *net, struct notifier_block *nb); +int fib_notify(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); struct fib_table { struct hlist_node tb_hlist; @@ -315,7 +316,8 @@ static inline bool fib4_rule_default(const struct fib_rule *rule) return true; } -static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb) +static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } @@ -377,7 +379,8 @@ out: } bool fib4_rule_default(const struct fib_rule *rule); -int fib4_rules_dump(struct net *net, struct notifier_block *nb); +int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); unsigned int fib4_rules_seq_read(struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 523c6a09e1c8..d69081c38788 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3095,7 +3095,9 @@ enum ieee80211_filter_flags { * * @IEEE80211_AMPDU_RX_START: start RX aggregation * @IEEE80211_AMPDU_RX_STOP: stop RX aggregation - * @IEEE80211_AMPDU_TX_START: start TX aggregation + * @IEEE80211_AMPDU_TX_START: start TX aggregation, the driver must either + * call ieee80211_start_tx_ba_cb_irqsafe() or return the special + * status %IEEE80211_AMPDU_TX_START_IMMEDIATE. * @IEEE80211_AMPDU_TX_OPERATIONAL: TX aggregation has become operational * @IEEE80211_AMPDU_TX_STOP_CONT: stop TX aggregation but continue transmitting * queued packets, now unaggregated. After all packets are transmitted the @@ -3119,6 +3121,8 @@ enum ieee80211_ampdu_mlme_action { IEEE80211_AMPDU_TX_OPERATIONAL, }; +#define IEEE80211_AMPDU_TX_START_IMMEDIATE 1 + /** * struct ieee80211_ampdu_params - AMPDU action parameters * @@ -3896,7 +3900,10 @@ struct ieee80211_ops { * * Even ``189`` would be wrong since 1 could be lost again. * - * Returns a negative error code on failure. + * Returns a negative error code on failure. The driver may return + * %IEEE80211_AMPDU_TX_START_IMMEDIATE for %IEEE80211_AMPDU_TX_START + * if the session can start immediately. + * * The callback can sleep. */ int (*ampdu_action)(struct ieee80211_hw *hw, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f8712bbeb2e0..5ac2bb16d4b3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -36,6 +36,7 @@ #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> +#include <linux/notifier.h> struct user_namespace; struct proc_dir_entry; @@ -96,6 +97,8 @@ struct net { struct list_head dev_base_head; struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; + struct raw_notifier_head netdev_chain; + unsigned int dev_base_seq; /* protected by rtnl_mutex */ int ifindex; unsigned int dev_unreg_count; @@ -317,7 +320,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet) /* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) - +#define for_each_net_continue_reverse(VAR) \ + list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) #define for_each_net_rcu(VAR) \ list_for_each_entry_rcu(VAR, &net_namespace_list, list) diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 830bdf345b17..b5fdb108d602 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -24,6 +24,9 @@ struct netns_mib { #ifdef CONFIG_XFRM_STATISTICS DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); #endif +#if IS_ENABLED(CONFIG_TLS) + DEFINE_SNMP_STAT(struct linux_tls_mib, tls_statistics); +#endif }; #endif diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index e1a92c4610f3..0b032b92da0b 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -80,13 +80,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( struct sctp_chunk *chunk, gfp_t gfp); -struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( - const struct sctp_association *asoc, - const struct sockaddr_storage *aaddr, - int flags, - int state, - int error, - gfp_t gfp); +void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport, + int state, int error); struct sctp_ulpevent *sctp_ulpevent_make_remote_error( const struct sctp_association *asoc, @@ -100,6 +95,13 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( __u32 error, gfp_t gfp); +struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event( + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + __u16 flags, + __u32 error, + gfp_t gfp); + struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( const struct sctp_association *asoc, __u16 flags, diff --git a/include/net/smc.h b/include/net/smc.h index bd9c0fb3b577..05174ae4f325 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -75,6 +75,9 @@ struct smcd_dev { struct workqueue_struct *event_wq; u8 pnetid[SMC_MAX_PNETID_LEN]; bool pnetid_by_user; + struct list_head lgr_list; + spinlock_t lgr_lock; + u8 going_away : 1; }; struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, diff --git a/include/net/snmp.h b/include/net/snmp.h index cb8ced4380a6..468a67836e2f 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -111,6 +111,12 @@ struct linux_xfrm_mib { unsigned long mibs[LINUX_MIB_XFRMMAX]; }; +/* Linux TLS */ +#define LINUX_MIB_TLSMAX __LINUX_MIB_TLSMAX +struct linux_tls_mib { + unsigned long mibs[LINUX_MIB_TLSMAX]; +}; + #define DEFINE_SNMP_STAT(type, name) \ __typeof__(type) __percpu *name #define DEFINE_SNMP_STAT_ATOMIC(type, name) \ diff --git a/include/net/sock.h b/include/net/sock.h index 2c53f1a1d905..ab905c4b1f0e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2512,7 +2512,7 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } -void sock_enable_timestamp(struct sock *sk, int flag); +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); diff --git a/include/net/tls.h b/include/net/tls.h index c664e6dba0d1..41265e542e71 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -43,6 +43,7 @@ #include <linux/netdevice.h> #include <linux/rcupdate.h> +#include <net/net_namespace.h> #include <net/tcp.h> #include <net/strparser.h> #include <crypto/aead.h> @@ -60,7 +61,6 @@ #define TLS_RECORD_TYPE_DATA 0x17 #define TLS_AAD_SPACE_SIZE 13 -#define TLS_DEVICE_NAME_MAX 32 #define MAX_IV_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 @@ -74,36 +74,14 @@ */ #define TLS_AES_CCM_IV_B0_BYTE 2 -/* - * This structure defines the routines for Inline TLS driver. - * The following routines are optional and filled with a - * null pointer if not defined. - * - * @name: Its the name of registered Inline tls device - * @dev_list: Inline tls device list - * int (*feature)(struct tls_device *device); - * Called to return Inline TLS driver capability - * - * int (*hash)(struct tls_device *device, struct sock *sk); - * This function sets Inline driver for listen and program - * device specific functioanlity as required - * - * void (*unhash)(struct tls_device *device, struct sock *sk); - * This function cleans listen state set by Inline TLS driver - * - * void (*release)(struct kref *kref); - * Release the registered device and allocated resources - * @kref: Number of reference to tls_device - */ -struct tls_device { - char name[TLS_DEVICE_NAME_MAX]; - struct list_head dev_list; - int (*feature)(struct tls_device *device); - int (*hash)(struct tls_device *device, struct sock *sk); - void (*unhash)(struct tls_device *device, struct sock *sk); - void (*release)(struct kref *kref); - struct kref kref; -}; +#define __TLS_INC_STATS(net, field) \ + __SNMP_INC_STATS((net)->mib.tls_statistics, field) +#define TLS_INC_STATS(net, field) \ + SNMP_INC_STATS((net)->mib.tls_statistics, field) +#define __TLS_DEC_STATS(net, field) \ + __SNMP_DEC_STATS((net)->mib.tls_statistics, field) +#define TLS_DEC_STATS(net, field) \ + SNMP_DEC_STATS((net)->mib.tls_statistics, field) enum { TLS_BASE, @@ -158,7 +136,7 @@ struct tls_sw_context_tx { struct list_head tx_list; atomic_t encrypt_pending; int async_notify; - int async_capable; + u8 async_capable:1; #define BIT_TX_SCHEDULED 0 #define BIT_TX_CLOSING 1 @@ -174,8 +152,8 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 control; - int async_capable; - bool decrypted; + u8 async_capable:1; + u8 decrypted:1; atomic_t decrypt_pending; bool async_notify; }; @@ -340,7 +318,10 @@ struct tls_offload_context_rx { #define TLS_OFFLOAD_CONTEXT_SIZE_RX \ (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) +struct tls_context *tls_ctx_create(struct sock *sk); void tls_ctx_free(struct sock *sk, struct tls_context *ctx); +void update_sk_prot(struct sock *sk, struct tls_context *ctx); + int wait_on_pending_writer(struct sock *sk, long *timeo); int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); @@ -623,13 +604,6 @@ tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) tls_offload_ctx_rx(tls_ctx)->resync_type = type; } -static inline void tls_offload_tx_resync_request(struct sock *sk) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - - WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags)); -} - /* Driver's seq tracking has to be disabled until resync succeeded */ static inline bool tls_offload_tx_resync_pending(struct sock *sk) { @@ -641,10 +615,11 @@ static inline bool tls_offload_tx_resync_pending(struct sock *sk) return ret; } +int __net_init tls_proc_init(struct net *net); +void __net_exit tls_proc_fini(struct net *net); + int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, unsigned char *record_type); -void tls_register_device(struct tls_device *device); -void tls_unregister_device(struct tls_device *device); int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout); struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); @@ -665,7 +640,9 @@ void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); void tls_device_offload_cleanup_rx(struct sock *sk); void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); -int tls_device_decrypted(struct sock *sk, struct sk_buff *skb); +void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq); +int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm); #else static inline void tls_device_init(void) {} static inline void tls_device_cleanup(void) {} @@ -688,7 +665,9 @@ static inline void tls_device_offload_cleanup_rx(struct sock *sk) {} static inline void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {} -static inline int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) +static inline int +tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm) { return 0; } diff --git a/include/net/tls_toe.h b/include/net/tls_toe.h new file mode 100644 index 000000000000..b3aa7593ce2c --- /dev/null +++ b/include/net/tls_toe.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <[email protected]>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kref.h> +#include <linux/list.h> + +struct sock; + +#define TLS_TOE_DEVICE_NAME_MAX 32 + +/* + * This structure defines the routines for Inline TLS driver. + * The following routines are optional and filled with a + * null pointer if not defined. + * + * @name: Its the name of registered Inline tls device + * @dev_list: Inline tls device list + * int (*feature)(struct tls_toe_device *device); + * Called to return Inline TLS driver capability + * + * int (*hash)(struct tls_toe_device *device, struct sock *sk); + * This function sets Inline driver for listen and program + * device specific functioanlity as required + * + * void (*unhash)(struct tls_toe_device *device, struct sock *sk); + * This function cleans listen state set by Inline TLS driver + * + * void (*release)(struct kref *kref); + * Release the registered device and allocated resources + * @kref: Number of reference to tls_toe_device + */ +struct tls_toe_device { + char name[TLS_TOE_DEVICE_NAME_MAX]; + struct list_head dev_list; + int (*feature)(struct tls_toe_device *device); + int (*hash)(struct tls_toe_device *device, struct sock *sk); + void (*unhash)(struct tls_toe_device *device, struct sock *sk); + void (*release)(struct kref *kref); + struct kref kref; +}; + +int tls_toe_bypass(struct sock *sk); +int tls_toe_hash(struct sock *sk); +void tls_toe_unhash(struct sock *sk); + +void tls_toe_register_device(struct tls_toe_device *device); +void tls_toe_unregister_device(struct tls_toe_device *device); diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eb57e3037deb..69e8bb8963db 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -35,8 +35,8 @@ DECLARE_EVENT_CLASS(kmem_alloc, __entry->gfp_flags = gfp_flags; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", + (void *)__entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, @@ -131,7 +131,8 @@ DECLARE_EVENT_CLASS(kmem_free, __entry->ptr = ptr; ), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_printk("call_site=%pS ptr=%p", + (void *)__entry->call_site, __entry->ptr) ); DEFINE_EVENT(kmem_free, kfree, diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index a13a62db3565..edc5c887a44c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1068,7 +1068,7 @@ TRACE_EVENT(rxrpc_recvmsg, ), TP_fast_assign( - __entry->call = call->debug_id; + __entry->call = call ? call->debug_id : 0; __entry->why = why; __entry->seq = seq; __entry->offset = offset; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c99b4f2482c6..4fe35d600ab8 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1003,6 +1003,8 @@ struct drm_amdgpu_info_device { __u64 high_va_max; /* gfx10 pa_sc_tile_steering_override */ __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; }; struct drm_amdgpu_info_hw_ip { diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 580b7a2e40e1..b558ea88b766 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -421,6 +421,10 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */ + DEVLINK_ATTR_NETNS_FD, /* u32 */ + DEVLINK_ATTR_NETNS_PID, /* u32 */ + DEVLINK_ATTR_NETNS_ID, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/netfilter_arp/arp_tables.h b/include/uapi/linux/netfilter_arp/arp_tables.h index a2a0927d9bd6..bbf5af2b67a8 100644 --- a/include/uapi/linux/netfilter_arp/arp_tables.h +++ b/include/uapi/linux/netfilter_arp/arp_tables.h @@ -199,7 +199,7 @@ struct arpt_get_entries { /* Helper functions */ static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e) { - return (void *)e + e->target_offset; + return (struct xt_entry_target *)((char *)e + e->target_offset); } /* diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h index 8076c940ffeb..a494cf43a755 100644 --- a/include/uapi/linux/netfilter_bridge/ebtables.h +++ b/include/uapi/linux/netfilter_bridge/ebtables.h @@ -194,7 +194,7 @@ struct ebt_entry { static __inline__ struct ebt_entry_target * ebt_get_target(struct ebt_entry *e) { - return (void *)e + e->target_offset; + return (struct ebt_entry_target *)((char *)e + e->target_offset); } /* {g,s}etsockopt numbers */ diff --git a/include/uapi/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h index 6aaeb14bfce1..50c7fee625ae 100644 --- a/include/uapi/linux/netfilter_ipv4/ip_tables.h +++ b/include/uapi/linux/netfilter_ipv4/ip_tables.h @@ -222,7 +222,7 @@ struct ipt_get_entries { static __inline__ struct xt_entry_target * ipt_get_target(struct ipt_entry *e) { - return (void *)e + e->target_offset; + return (struct xt_entry_target *)((char *)e + e->target_offset); } /* diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h index 031d0a43bed2..d9e364f96a5c 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h +++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h @@ -262,7 +262,7 @@ struct ip6t_get_entries { static __inline__ struct xt_entry_target * ip6t_get_target(struct ip6t_entry *e) { - return (void *)e + e->target_offset; + return (struct xt_entry_target *)((char *)e + e->target_offset); } /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index beee59c831a7..64135ab3a7ac 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -571,6 +571,14 @@ * set of BSSID,frequency parameters is used (i.e., either the enforcing * %NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict * %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT). + * Driver shall not modify the IEs specified through %NL80211_ATTR_IE if + * %NL80211_ATTR_MAC is included. However, if %NL80211_ATTR_MAC_HINT is + * included, these IEs through %NL80211_ATTR_IE are specified by the user + * space based on the best possible BSS selected. Thus, if the driver ends + * up selecting a different BSS, it can modify these IEs accordingly (e.g. + * userspace asks the driver to perform PMKSA caching with BSS1 and the + * driver ends up selecting BSS2 with different PMKSA cache entry; RSNIE + * has to get updated with the apt PMKID). * %NL80211_ATTR_PREV_BSSID can be used to request a reassociation within * the ESS in case the device is already associated and an association with * a different BSS is desired. diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 1c215ea1798e..e168dc59e9a0 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -45,6 +45,27 @@ struct nvme_passthru_cmd { __u32 result; }; +struct nvme_passthru_cmd64 { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u64 result; +}; + #define nvme_admin_cmd nvme_passthru_cmd #define NVME_IOCTL_ID _IO('N', 0x40) @@ -54,5 +75,7 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_RESET _IO('N', 0x44) #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) #define NVME_IOCTL_RESCAN _IO('N', 0x46) +#define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ diff --git a/include/uapi/linux/pg.h b/include/uapi/linux/pg.h index 364c350e85cd..62b6f69bd9fb 100644 --- a/include/uapi/linux/pg.h +++ b/include/uapi/linux/pg.h @@ -35,6 +35,9 @@ */ +#ifndef _UAPI_LINUX_PG_H +#define _UAPI_LINUX_PG_H + #define PG_MAGIC 'P' #define PG_RESET 'Z' #define PG_COMMAND 'C' @@ -61,4 +64,4 @@ struct pg_read_hdr { }; -/* end of pg.h */ +#endif /* _UAPI_LINUX_PG_H */ diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index b3105ac1381a..99335e1f4a27 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -33,8 +33,31 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ -/* - * Arguments for the clone3 syscall +#ifndef __ASSEMBLY__ +/** + * struct clone_args - arguments for the clone3 syscall + * @flags: Flags for the new process as listed above. + * All flags are valid except for CSIGNAL and + * CLONE_DETACHED. + * @pidfd: If CLONE_PIDFD is set, a pidfd will be + * returned in this argument. + * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the + * child process will be returned in the child's + * memory. + * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of + * the child process will be returned in the + * parent's memory. + * @exit_signal: The exit_signal the parent process will be + * sent when the child exits. + * @stack: Specify the location of the stack for the + * child process. + * @stack_size: The size of the stack for the child process. + * @tls: If CLONE_SETTLS is set, the tls descriptor + * is set to tls. + * + * The structure is versioned by size and thus extensible. + * New struct members must go at the end of the struct and + * must be properly 64bit aligned. */ struct clone_args { __aligned_u64 flags; @@ -46,6 +69,9 @@ struct clone_args { __aligned_u64 stack_size; __aligned_u64 tls; }; +#endif + +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ /* * Scheduling policies diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6d5b164af55c..6bce7f9837a9 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -449,6 +449,16 @@ struct sctp_send_failed { __u8 ssf_data[0]; }; +struct sctp_send_failed_event { + __u16 ssf_type; + __u16 ssf_flags; + __u32 ssf_length; + __u32 ssf_error; + struct sctp_sndinfo ssfe_info; + sctp_assoc_t ssf_assoc_id; + __u8 ssf_data[0]; +}; + /* * ssf_flags: 16 bits (unsigned integer) * @@ -605,6 +615,7 @@ struct sctp_event_subscribe { __u8 sctp_stream_reset_event; __u8 sctp_assoc_reset_event; __u8 sctp_stream_change_event; + __u8 sctp_send_failure_event_event; }; /* @@ -632,6 +643,7 @@ union sctp_notification { struct sctp_stream_reset_event sn_strreset_event; struct sctp_assoc_reset_event sn_assocreset_event; struct sctp_stream_change_event sn_strchange_event; + struct sctp_send_failed_event sn_send_failed_event; }; /* Section 5.3.1 @@ -667,7 +679,9 @@ enum sctp_sn_type { #define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT SCTP_STREAM_CHANGE_EVENT, #define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT - SCTP_SN_TYPE_MAX = SCTP_STREAM_CHANGE_EVENT, + SCTP_SEND_FAILED_EVENT, +#define SCTP_SEND_FAILED_EVENT SCTP_SEND_FAILED_EVENT + SCTP_SN_TYPE_MAX = SCTP_SEND_FAILED_EVENT, #define SCTP_SN_TYPE_MAX SCTP_SN_TYPE_MAX }; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 549a31c29f7d..7eee233e78d2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -323,4 +323,21 @@ enum __LINUX_MIB_XFRMMAX }; +/* linux TLS mib definitions */ +enum +{ + LINUX_MIB_TLSNUM = 0, + LINUX_MIB_TLSCURRTXSW, /* TlsCurrTxSw */ + LINUX_MIB_TLSCURRRXSW, /* TlsCurrRxSw */ + LINUX_MIB_TLSCURRTXDEVICE, /* TlsCurrTxDevice */ + LINUX_MIB_TLSCURRRXDEVICE, /* TlsCurrRxDevice */ + LINUX_MIB_TLSTXSW, /* TlsTxSw */ + LINUX_MIB_TLSRXSW, /* TlsRxSw */ + LINUX_MIB_TLSTXDEVICE, /* TlsTxDevice */ + LINUX_MIB_TLSRXDEVICE, /* TlsRxDevice */ + LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */ + LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ + __LINUX_MIB_TLSMAX +}; + #endif /* _LINUX_SNMP_H */ diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 4955e1a9f1bc..4dfc05651c98 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -309,7 +309,7 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) tlv_ptr->tlv_len = htons(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); - memset(TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); + memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); } return TLV_SPACE(len); } @@ -409,7 +409,7 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, tcm_hdr->tcm_flags = htons(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); - memset(TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); + memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); } return TCM_SPACE(data_len); } diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 4c4e24c291a5..559f42e73315 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -169,7 +169,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, { vr->num = num; vr->desc = p; - vr->avail = p + num*sizeof(struct vring_desc); + vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc)); vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + align-1) & ~(align - 1)); } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 98b30c1613b2..d89969aa9942 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -212,30 +212,7 @@ int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr, bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); -efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc); -efi_status_t xen_efi_set_time(efi_time_t *tm); -efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, - efi_time_t *tm); -efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm); -efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 *attr, unsigned long *data_size, - void *data); -efi_status_t xen_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, efi_guid_t *vendor); -efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, - void *data); -efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size); -efi_status_t xen_efi_get_next_high_mono_count(u32 *count); -efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, unsigned long sg_list); -efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, u64 *max_size, - int *reset_type); -void xen_efi_reset_system(int reset_type, efi_status_t status, - unsigned long data_size, efi_char16_t *data); +void xen_efi_runtime_setup(void); #ifdef CONFIG_PREEMPT diff --git a/init/do_mounts.c b/init/do_mounts.c index 9634ecf3743d..af9cda887a23 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -212,6 +212,7 @@ static int match_dev_by_label(struct device *dev, const void *data) * a colon. * 9) PARTLABEL=<name> with name being the GPT partition label. * MSDOS partitions do not support labels! + * 10) /dev/cifs represents Root_CIFS (0xfe) * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -268,6 +269,9 @@ dev_t name_to_dev_t(const char *name) res = Root_NFS; if (strcmp(name, "nfs") == 0) goto done; + res = Root_CIFS; + if (strcmp(name, "cifs") == 0) + goto done; res = Root_RAM0; if (strcmp(name, "ram") == 0) goto done; @@ -501,6 +505,42 @@ static int __init mount_nfs_root(void) } #endif +#ifdef CONFIG_CIFS_ROOT + +extern int cifs_root_data(char **dev, char **opts); + +#define CIFSROOT_TIMEOUT_MIN 5 +#define CIFSROOT_TIMEOUT_MAX 30 +#define CIFSROOT_RETRY_MAX 5 + +static int __init mount_cifs_root(void) +{ + char *root_dev, *root_data; + unsigned int timeout; + int try, err; + + err = cifs_root_data(&root_dev, &root_data); + if (err != 0) + return 0; + + timeout = CIFSROOT_TIMEOUT_MIN; + for (try = 1; ; try++) { + err = do_mount_root(root_dev, "cifs", root_mountflags, + root_data); + if (err == 0) + return 1; + if (try > CIFSROOT_RETRY_MAX) + break; + + ssleep(timeout); + timeout <<= 1; + if (timeout > CIFSROOT_TIMEOUT_MAX) + timeout = CIFSROOT_TIMEOUT_MAX; + } + return 0; +} +#endif + #if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD) void __init change_floppy(char *fmt, ...) { @@ -542,6 +582,15 @@ void __init mount_root(void) ROOT_DEV = Root_FD0; } #endif +#ifdef CONFIG_CIFS_ROOT + if (ROOT_DEV == Root_CIFS) { + if (mount_cifs_root()) + return; + + printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying floppy.\n"); + ROOT_DEV = Root_FD0; + } +#endif #ifdef CONFIG_BLK_DEV_FD if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { /* rd_doload is 2 for a dual initrd/ramload setup */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 4655adbbae10..3f0cb82e4fbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10586,55 +10586,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, u32 size; int ret; - if (!access_ok(uattr, PERF_ATTR_SIZE_VER0)) - return -EFAULT; - - /* - * zero the full structure, so that a short copy will be nice. - */ + /* Zero the full structure, so that a short copy will be nice. */ memset(attr, 0, sizeof(*attr)); ret = get_user(size, &uattr->size); if (ret) return ret; - if (size > PAGE_SIZE) /* silly large */ - goto err_size; - - if (!size) /* abi compat */ + /* ABI compatibility quirk: */ + if (!size) size = PERF_ATTR_SIZE_VER0; - - if (size < PERF_ATTR_SIZE_VER0) + if (size < PERF_ATTR_SIZE_VER0 || size > PAGE_SIZE) goto err_size; - /* - * If we're handed a bigger struct than we know of, - * ensure all the unknown bits are 0 - i.e. new - * user-space does not rely on any kernel feature - * extensions we dont know about yet. - */ - if (size > sizeof(*attr)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uattr + sizeof(*attr); - end = (void __user *)uattr + size; - - for (; addr < end; addr++) { - ret = get_user(val, addr); - if (ret) - return ret; - if (val) - goto err_size; - } - size = sizeof(*attr); + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); + if (ret) { + if (ret == -E2BIG) + goto err_size; + return ret; } - ret = copy_from_user(attr, uattr, size); - if (ret) - return -EFAULT; - attr->size = size; if (attr->__reserved_1) diff --git a/kernel/fork.c b/kernel/fork.c index f9572f416126..1f6c45f6a734 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2525,39 +2525,19 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, #ifdef __ARCH_WANT_SYS_CLONE3 noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, struct clone_args __user *uargs, - size_t size) + size_t usize) { + int err; struct clone_args args; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(usize > PAGE_SIZE)) return -E2BIG; - - if (unlikely(size < sizeof(struct clone_args))) + if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) return -EINVAL; - if (unlikely(!access_ok(uargs, size))) - return -EFAULT; - - if (size > sizeof(struct clone_args)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uargs + sizeof(struct clone_args); - end = (void __user *)uargs + size; - - for (; addr < end; addr++) { - if (get_user(val, addr)) - return -EFAULT; - if (val) - return -E2BIG; - } - - size = sizeof(struct clone_args); - } - - if (copy_from_user(&args, uargs, size)) - return -EFAULT; + err = copy_struct_from_user(&args, sizeof(args), uargs, usize); + if (err) + return err; /* * Verify that higher 32bits of exit_signal are unset and that @@ -2604,6 +2584,17 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs) return true; } +/** + * clone3 - create a new process with specific properties + * @uargs: argument structure + * @size: size of @uargs + * + * clone3() is the extensible successor to clone()/clone2(). + * It takes a struct as argument that is versioned by its size. + * + * Return: On success, a positive PID for the child process. + * On error, a negative errno number. + */ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) { int err; diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 9ff449888d9c..aff79e461fc9 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -71,7 +71,10 @@ done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 find $cpio_dir -type f -print0 | xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' -tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null +# Create archive and try to normalize metadata for reproducibility +tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ + --owner=0 --group=0 --sort=name --numeric-owner \ + -Jcf $tarfile -C $cpio_dir/ . > /dev/null echo "$src_files_md5" > kernel/kheaders.md5 echo "$obj_files_md5" >> kernel/kheaders.md5 diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7880f4f64d0e..dd05a378631a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5106,9 +5106,6 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a u32 size; int ret; - if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0)) - return -EFAULT; - /* Zero the full structure, so that a short copy will be nice: */ memset(attr, 0, sizeof(*attr)); @@ -5116,45 +5113,19 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a if (ret) return ret; - /* Bail out on silly large: */ - if (size > PAGE_SIZE) - goto err_size; - /* ABI compatibility quirk: */ if (!size) size = SCHED_ATTR_SIZE_VER0; - - if (size < SCHED_ATTR_SIZE_VER0) + if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) goto err_size; - /* - * If we're handed a bigger struct than we know of, - * ensure all the unknown bits are 0 - i.e. new - * user-space does not rely on any kernel feature - * extensions we dont know about yet. - */ - if (size > sizeof(*attr)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uattr + sizeof(*attr); - end = (void __user *)uattr + size; - - for (; addr < end; addr++) { - ret = get_user(val, addr); - if (ret) - return ret; - if (val) - goto err_size; - } - size = sizeof(*attr); + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); + if (ret) { + if (ret == -E2BIG) + goto err_size; + return ret; } - ret = copy_from_user(attr, uattr, size); - if (ret) - return -EFAULT; - if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && size < SCHED_ATTR_SIZE_VER1) return -EINVAL; @@ -5354,7 +5325,7 @@ sched_attr_copy_to_user(struct sched_attr __user *uattr, * sys_sched_getattr - similar to sched_getparam, but with sched_attr * @pid: the pid in question. * @uattr: structure containing the extended parameters. - * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility. + * @usize: sizeof(attr) for fwd/bwd comp. * @flags: for future extension. */ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a39bed2c784f..168479a7d61b 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -174,7 +174,6 @@ static int membarrier_private_expedited(int flags) */ if (cpu == raw_smp_processor_id()) continue; - rcu_read_lock(); p = rcu_dereference(cpu_rq(cpu)->curr); if (p && p->mm == mm) __cpumask_set_cpu(cpu, tmpmask); diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index c1f5bb590b5e..b5a65e212df2 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -42,39 +42,39 @@ static int bc_shutdown(struct clock_event_device *evt) */ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) { - int bc_moved; /* - * We try to cancel the timer first. If the callback is on - * flight on some other cpu then we let it handle it. If we - * were able to cancel the timer nothing can rearm it as we - * own broadcast_lock. + * This is called either from enter/exit idle code or from the + * broadcast handler. In all cases tick_broadcast_lock is held. * - * However we can also be called from the event handler of - * ce_broadcast_hrtimer itself when it expires. We cannot - * restart the timer because we are in the callback, but we - * can set the expiry time and let the callback return - * HRTIMER_RESTART. + * hrtimer_cancel() cannot be called here neither from the + * broadcast handler nor from the enter/exit idle code. The idle + * code can run into the problem described in bc_shutdown() and the + * broadcast handler cannot wait for itself to complete for obvious + * reasons. * - * Since we are in the idle loop at this point and because - * hrtimer_{start/cancel} functions call into tracing, - * calls to these functions must be bound within RCU_NONIDLE. + * Each caller tries to arm the hrtimer on its own CPU, but if the + * hrtimer callbback function is currently running, then + * hrtimer_start() cannot move it and the timer stays on the CPU on + * which it is assigned at the moment. + * + * As this can be called from idle code, the hrtimer_start() + * invocation has to be wrapped with RCU_NONIDLE() as + * hrtimer_start() can call into tracing. */ - RCU_NONIDLE( - { - bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; - if (bc_moved) { - hrtimer_start(&bctimer, expires, - HRTIMER_MODE_ABS_PINNED_HARD); - } - } - ); - - if (bc_moved) { - /* Bind the "device" to the cpu */ - bc->bound_on = smp_processor_id(); - } else if (bc->bound_on == smp_processor_id()) { - hrtimer_set_expires(&bctimer, expires); - } + RCU_NONIDLE( { + hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD); + /* + * The core tick broadcast mode expects bc->bound_on to be set + * correctly to prevent a CPU which has the broadcast hrtimer + * armed from going deep idle. + * + * As tick_broadcast_lock is held, nothing can change the cpu + * base which was just established in hrtimer_start() above. So + * the below access is safe even without holding the hrtimer + * base lock. + */ + bc->bound_on = bctimer.base->cpu_base->cpu; + } ); return 0; } @@ -100,10 +100,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) { ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); - if (clockevent_state_oneshot(&ce_broadcast_hrtimer)) - if (ce_broadcast_hrtimer.next_event != KTIME_MAX) - return HRTIMER_RESTART; - return HRTIMER_NORESTART; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 26b0a08f3c7d..f801d154ff6a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -365,11 +365,11 @@ static inline struct trace_array *top_trace_array(void) __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN -#define IF_ASSIGN(var, entry, etype, id) \ - if (FTRACE_CMP_TYPE(var, etype)) { \ - var = (typeof(var))(entry); \ - WARN_ON(id && (entry)->type != id); \ - break; \ +#define IF_ASSIGN(var, entry, etype, id) \ + if (FTRACE_CMP_TYPE(var, etype)) { \ + var = (typeof(var))(entry); \ + WARN_ON(id != 0 && (entry)->type != id); \ + break; \ } /* Will cause compile errors if type is not found. */ diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c773b8fb270c..c9a74f82b14a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -452,8 +452,10 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, switch (*next) { case '(': /* #2 */ - if (top - op_stack > nr_parens) - return ERR_PTR(-EINVAL); + if (top - op_stack > nr_parens) { + ret = -EINVAL; + goto out_free; + } *(++top) = invert; continue; case '!': /* #3 */ diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index baf58a3612c0..905b10af5d5c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -178,6 +178,16 @@ void __trace_probe_log_err(int offset, int err_type) if (!command) return; + if (trace_probe_log.index >= trace_probe_log.argc) { + /** + * Set the error position is next to the last arg + space. + * Note that len includes the terminal null and the cursor + * appaers at pos + 1. + */ + pos = len; + offset = 0; + } + /* And make a command string from argv array */ p = command; for (i = 0; i < trace_probe_log.argc; i++) { @@ -1084,6 +1094,12 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b) { int i; + /* In case of more arguments */ + if (a->nr_args < b->nr_args) + return a->nr_args + 1; + if (a->nr_args > b->nr_args) + return b->nr_args + 1; + for (i = 0; i < a->nr_args; i++) { if ((b->nr_args <= i) || ((a->args[i].type != b->args[i].type) || diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 28ff554a1be8..6c0005d5dd5c 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -3,16 +3,10 @@ #include <linux/export.h> #include <linux/uaccess.h> #include <linux/mm.h> +#include <linux/bitops.h> #include <asm/word-at-a-time.h> -/* Set bits in the first 'n' bytes when loaded from memory */ -#ifdef __LITTLE_ENDIAN -# define aligned_byte_mask(n) ((1ul << 8*(n))-1) -#else -# define aligned_byte_mask(n) (~0xfful << (BITS_PER_LONG - 8 - 8*(n))) -#endif - /* * Do a strnlen, return length of string *with* final '\0'. * 'count' is the user-supplied count, while 'max' is the diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index 67bcd5dfd847..e365ace06538 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -31,14 +31,133 @@ # define TEST_U64 #endif -#define test(condition, msg) \ -({ \ - int cond = (condition); \ - if (cond) \ - pr_warn("%s\n", msg); \ - cond; \ +#define test(condition, msg, ...) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("[%d] " msg "\n", __LINE__, ##__VA_ARGS__); \ + cond; \ }) +static bool is_zeroed(void *from, size_t size) +{ + return memchr_inv(from, 0x0, size) == NULL; +} + +static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) +{ + int ret = 0; + size_t start, end, i; + size_t zero_start = size / 4; + size_t zero_end = size - zero_start; + + /* + * We conduct a series of check_nonzero_user() tests on a block of memory + * with the following byte-pattern (trying every possible [start,end] + * pair): + * + * [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ] + * + * And we verify that check_nonzero_user() acts identically to memchr_inv(). + */ + + memset(kmem, 0x0, size); + for (i = 1; i < zero_start; i += 2) + kmem[i] = 0xff; + for (i = zero_end; i < size; i += 2) + kmem[i] = 0xff; + + ret |= test(copy_to_user(umem, kmem, size), + "legitimate copy_to_user failed"); + + for (start = 0; start <= size; start++) { + for (end = start; end <= size; end++) { + size_t len = end - start; + int retval = check_zeroed_user(umem + start, len); + int expected = is_zeroed(kmem + start, len); + + ret |= test(retval != expected, + "check_nonzero_user(=%d) != memchr_inv(=%d) mismatch (start=%zu, end=%zu)", + retval, expected, start, end); + } + } + + return ret; +} + +static int test_copy_struct_from_user(char *kmem, char __user *umem, + size_t size) +{ + int ret = 0; + char *umem_src = NULL, *expected = NULL; + size_t ksize, usize; + + umem_src = kmalloc(size, GFP_KERNEL); + if ((ret |= test(umem_src == NULL, "kmalloc failed"))) + goto out_free; + + expected = kmalloc(size, GFP_KERNEL); + if ((ret |= test(expected == NULL, "kmalloc failed"))) + goto out_free; + + /* Fill umem with a fixed byte pattern. */ + memset(umem_src, 0x3e, size); + ret |= test(copy_to_user(umem, umem_src, size), + "legitimate copy_to_user failed"); + + /* Check basic case -- (usize == ksize). */ + ksize = size; + usize = size; + + memcpy(expected, umem_src, ksize); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize == ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize == ksize) gives unexpected copy"); + + /* Old userspace case -- (usize < ksize). */ + ksize = size; + usize = size / 2; + + memcpy(expected, umem_src, usize); + memset(expected + usize, 0x0, ksize - usize); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize < ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize < ksize) gives unexpected copy"); + + /* New userspace (-E2BIG) case -- (usize > ksize). */ + ksize = size / 2; + usize = size; + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize) != -E2BIG, + "copy_struct_from_user(usize > ksize) didn't give E2BIG"); + + /* New userspace (success) case -- (usize > ksize). */ + ksize = size / 2; + usize = size; + + memcpy(expected, umem_src, ksize); + ret |= test(clear_user(umem + ksize, usize - ksize), + "legitimate clear_user failed"); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize > ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize > ksize) gives unexpected copy"); + +out_free: + kfree(expected); + kfree(umem_src); + return ret; +} + static int __init test_user_copy_init(void) { int ret = 0; @@ -106,6 +225,11 @@ static int __init test_user_copy_init(void) #endif #undef test_legit + /* Test usage of check_nonzero_user(). */ + ret |= test_check_nonzero_user(kmem, usermem, 2 * PAGE_SIZE); + /* Test usage of copy_struct_from_user(). */ + ret |= test_copy_struct_from_user(kmem, usermem, 2 * PAGE_SIZE); + /* * Invalid usage: none of these copies should succeed. */ diff --git a/lib/textsearch.c b/lib/textsearch.c index 4f16eec5d554..f68dea8806be 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -89,9 +89,9 @@ * goto errout; * } * - * pos = textsearch_find_continuous(conf, \&state, example, strlen(example)); + * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); * if (pos != UINT_MAX) - * panic("Oh my god, dancing chickens at \%d\n", pos); + * panic("Oh my god, dancing chickens at %d\n", pos); * * textsearch_destroy(conf); */ diff --git a/lib/usercopy.c b/lib/usercopy.c index c2bfbcaeb3dc..cbb4d9ec00f2 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/uaccess.h> +#include <linux/bitops.h> /* out-of-line parts */ @@ -31,3 +32,57 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) } EXPORT_SYMBOL(_copy_to_user); #endif + +/** + * check_zeroed_user: check if a userspace buffer only contains zero bytes + * @from: Source address, in userspace. + * @size: Size of buffer. + * + * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for + * userspace addresses (and is more efficient because we don't care where the + * first non-zero byte is). + * + * Returns: + * * 0: There were non-zero bytes present in the buffer. + * * 1: The buffer was full of zero bytes. + * * -EFAULT: access to userspace failed. + */ +int check_zeroed_user(const void __user *from, size_t size) +{ + unsigned long val; + uintptr_t align = (uintptr_t) from % sizeof(unsigned long); + + if (unlikely(size == 0)) + return 1; + + from -= align; + size += align; + + if (!user_access_begin(from, size)) + return -EFAULT; + + unsafe_get_user(val, (unsigned long __user *) from, err_fault); + if (align) + val &= ~aligned_byte_mask(align); + + while (size > sizeof(unsigned long)) { + if (unlikely(val)) + goto done; + + from += sizeof(unsigned long); + size -= sizeof(unsigned long); + + unsafe_get_user(val, (unsigned long __user *) from, err_fault); + } + + if (size < sizeof(unsigned long)) + val &= aligned_byte_mask(size); + +done: + user_access_end(); + return (val == 0); +err_fault: + user_access_end(); + return -EFAULT; +} +EXPORT_SYMBOL(check_zeroed_user); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index a1146cb10919..9cbed6f5a85a 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -436,7 +436,7 @@ void batadv_interface_rx(struct net_device *soft_iface, /* clean the netfilter state now that the batman-adv header has been * removed */ - nf_reset(skb); + nf_reset_ct(skb); if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) goto dropped; diff --git a/net/caif/Kconfig b/net/caif/Kconfig index eb83051c8330..b7532a79ca7a 100644 --- a/net/caif/Kconfig +++ b/net/caif/Kconfig @@ -13,11 +13,11 @@ menuconfig CAIF with its modems. It is accessed from user space as sockets (PF_CAIF). Say Y (or M) here if you build for a phone product (e.g. Android or - MeeGo ) that uses CAIF as transport, if unsure say N. + MeeGo) that uses CAIF as transport. If unsure say N. If you select to build it as module then CAIF_NETDEV also needs to be - built as modules. You will also need to say yes to any CAIF physical - devices that your platform requires. + built as a module. You will also need to say Y (or M) to any CAIF + physical devices that your platform requires. See Documentation/networking/caif for a further explanation on how to use and configure CAIF. @@ -37,7 +37,7 @@ config CAIF_NETDEV default CAIF ---help--- Say Y if you will be using a CAIF based GPRS network device. - This can be either built-in or a loadable module, + This can be either built-in or a loadable module. If you select to build it as a built-in then the main CAIF device must also be a built-in. If unsure say Y. @@ -48,7 +48,7 @@ config CAIF_USB default n ---help--- Say Y if you are using CAIF over USB CDC NCM. - This can be either built-in or a loadable module, + This can be either built-in or a loadable module. If you select to build it as a built-in then the main CAIF device must also be a built-in. If unsure say N. diff --git a/net/core/dev.c b/net/core/dev.c index 7a456c6a7ad8..8bc3dce71fc0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1249,8 +1249,8 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -int dev_get_valid_name(struct net *net, struct net_device *dev, - const char *name) +static int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) { BUG_ON(!net); @@ -1266,7 +1266,6 @@ int dev_get_valid_name(struct net *net, struct net_device *dev, return 0; } -EXPORT_SYMBOL(dev_get_valid_name); /** * dev_change_name - change name of a device @@ -1725,6 +1724,62 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, return nb->notifier_call(nb, val, &info); } +static int call_netdevice_register_notifiers(struct notifier_block *nb, + struct net_device *dev) +{ + int err; + + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + return err; + + if (!(dev->flags & IFF_UP)) + return 0; + + call_netdevice_notifier(nb, NETDEV_UP, dev); + return 0; +} + +static void call_netdevice_unregister_notifiers(struct notifier_block *nb, + struct net_device *dev) +{ + if (dev->flags & IFF_UP) { + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); + } + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); +} + +static int call_netdevice_register_net_notifiers(struct notifier_block *nb, + struct net *net) +{ + struct net_device *dev; + int err; + + for_each_netdev(net, dev) { + err = call_netdevice_register_notifiers(nb, dev); + if (err) + goto rollback; + } + return 0; + +rollback: + for_each_netdev_continue_reverse(net, dev) + call_netdevice_unregister_notifiers(nb, dev); + return err; +} + +static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb, + struct net *net) +{ + struct net_device *dev; + + for_each_netdev(net, dev) + call_netdevice_unregister_notifiers(nb, dev); +} + static int dev_boot_phase = 1; /** @@ -1743,8 +1798,6 @@ static int dev_boot_phase = 1; int register_netdevice_notifier(struct notifier_block *nb) { - struct net_device *dev; - struct net_device *last; struct net *net; int err; @@ -1757,17 +1810,9 @@ int register_netdevice_notifier(struct notifier_block *nb) if (dev_boot_phase) goto unlock; for_each_net(net) { - for_each_netdev(net, dev) { - err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); - err = notifier_to_errno(err); - if (err) - goto rollback; - - if (!(dev->flags & IFF_UP)) - continue; - - call_netdevice_notifier(nb, NETDEV_UP, dev); - } + err = call_netdevice_register_net_notifiers(nb, net); + if (err) + goto rollback; } unlock: @@ -1776,22 +1821,9 @@ unlock: return err; rollback: - last = dev; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev == last) - goto outroll; + for_each_net_continue_reverse(net) + call_netdevice_unregister_net_notifiers(nb, net); - if (dev->flags & IFF_UP) { - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, - dev); - call_netdevice_notifier(nb, NETDEV_DOWN, dev); - } - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); - } - } - -outroll: raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } @@ -1842,6 +1874,80 @@ unlock: EXPORT_SYMBOL(unregister_netdevice_notifier); /** + * register_netdevice_notifier_net - register a per-netns network notifier block + * @net: network namespace + * @nb: notifier + * + * Register a notifier to be called when network device events occur. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + * + * When registered all registration and up events are replayed + * to the new notifier to allow device to have a race free + * view of the network device list. + */ + +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = raw_notifier_chain_register(&net->netdev_chain, nb); + if (err) + goto unlock; + if (dev_boot_phase) + goto unlock; + + err = call_netdevice_register_net_notifiers(nb, net); + if (err) + goto chain_unregister; + +unlock: + rtnl_unlock(); + return err; + +chain_unregister: + raw_notifier_chain_unregister(&netdev_chain, nb); + goto unlock; +} +EXPORT_SYMBOL(register_netdevice_notifier_net); + +/** + * unregister_netdevice_notifier_net - unregister a per-netns + * network notifier block + * @net: network namespace + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_netdevice_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + * + * After unregistering unregister and down device events are synthesized + * for all devices on the device list to the removed notifier to remove + * the need for special case cleanup code. + */ + +int unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); + if (err) + goto unlock; + + call_netdevice_unregister_net_notifiers(nb, net); + +unlock: + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_net); + +/** * call_netdevice_notifiers_info - call all network notifier blocks * @val: value passed unmodified to notifier function * @info: notifier information data @@ -1853,7 +1959,18 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); static int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info) { + struct net *net = dev_net(info->dev); + int ret; + ASSERT_RTNL(); + + /* Run per-netns notifier block chain first, then run the global one. + * Hopefully, one day, the global one is going to be removed after + * all notifier block registrators get converted to be per-netns. + */ + ret = raw_notifier_call_chain(&net->netdev_chain, val, info); + if (ret & NOTIFY_STOP_MASK) + return ret; return raw_notifier_call_chain(&netdev_chain, val, info); } @@ -3047,12 +3164,9 @@ int skb_checksum_help(struct sk_buff *skb) offset += skb->csum_offset; BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__sum16))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } + ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); + if (ret) + goto out; *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: @@ -3087,12 +3201,11 @@ int skb_crc32c_csum_help(struct sk_buff *skb) ret = -EINVAL; goto out; } - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__le32))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } + + ret = skb_ensure_writable(skb, offset + sizeof(__le32)); + if (ret) + goto out; + crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, skb->len - start, ~(__u32)0, crc32c_csum_stub)); @@ -8817,6 +8930,7 @@ int register_netdevice(struct net_device *dev) if (ret < 0) goto out; + ret = -ENOMEM; dev->name_node = netdev_name_node_head_alloc(dev); if (!dev->name_node) goto out; @@ -9752,6 +9866,8 @@ static int __net_init netdev_init(struct net *net) if (net->dev_index_head == NULL) goto err_idx; + RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); + return 0; err_idx: diff --git a/net/core/devlink.c b/net/core/devlink.c index e48680efe54a..97e9a2246929 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -95,16 +95,25 @@ static LIST_HEAD(devlink_list); */ static DEFINE_MUTEX(devlink_mutex); -static struct net *devlink_net(const struct devlink *devlink) +struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); } +EXPORT_SYMBOL_GPL(devlink_net); -static void devlink_net_set(struct devlink *devlink, struct net *net) +static void __devlink_net_set(struct devlink *devlink, struct net *net) { write_pnet(&devlink->_net, net); } +void devlink_net_set(struct devlink *devlink, struct net *net) +{ + if (WARN_ON(devlink->registered)) + return; + __devlink_net_set(devlink, net); +} +EXPORT_SYMBOL_GPL(devlink_net_set); + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { @@ -434,8 +443,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, { struct devlink *devlink; - devlink = devlink_get_from_info(info); - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + /* When devlink changes netns, it would not be found + * by devlink_get_from_info(). So try if it is stored first. + */ + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { + devlink = info->user_ptr[0]; + } else { + devlink = devlink_get_from_info(info); + WARN_ON(IS_ERR(devlink)); + } + if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); } @@ -1035,7 +1052,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, struct devlink_sb *devlink_sb; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -1058,6 +1075,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -1233,7 +1253,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, struct devlink_sb *devlink_sb; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -1256,6 +1276,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -1460,7 +1483,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, struct devlink_sb *devlink_sb; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -1485,6 +1508,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -2674,6 +2700,72 @@ devlink_resources_validate(struct devlink *devlink, return err; } +static struct net *devlink_netns_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID]; + struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD]; + struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID]; + struct net *net; + + if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) { + NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified"); + return ERR_PTR(-EINVAL); + } + + if (netns_pid_attr) { + net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr)); + } else if (netns_fd_attr) { + net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr)); + } else if (netns_id_attr) { + net = get_net_ns_by_id(sock_net(skb->sk), + nla_get_u32(netns_id_attr)); + if (!net) + net = ERR_PTR(-EINVAL); + } else { + WARN_ON(1); + net = ERR_PTR(-EINVAL); + } + if (IS_ERR(net)) { + NL_SET_ERR_MSG(info->extack, "Unknown network namespace"); + return ERR_PTR(-EINVAL); + } + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return ERR_PTR(-EPERM); + } + return net; +} + +static void devlink_param_notify(struct devlink *devlink, + unsigned int port_index, + struct devlink_param_item *param_item, + enum devlink_command cmd); + +static void devlink_reload_netns_change(struct devlink *devlink, + struct net *dest_net) +{ + struct devlink_param_item *param_item; + + /* Userspace needs to be notified about devlink objects + * removed from original and entering new network namespace. + * The rest of the devlink objects are re-created during + * reload process so the notifications are generated separatelly. + */ + + list_for_each_entry(param_item, &devlink->param_list, list) + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + devlink_notify(devlink, DEVLINK_CMD_DEL); + + __devlink_net_set(devlink, dest_net); + + devlink_notify(devlink, DEVLINK_CMD_NEW); + list_for_each_entry(param_item, &devlink->param_list, list) + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); +} + static bool devlink_reload_supported(struct devlink *devlink) { return devlink->ops->reload_down && devlink->ops->reload_up; @@ -2694,9 +2786,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_is_reload_failed); +static int devlink_reload(struct devlink *devlink, struct net *dest_net, + struct netlink_ext_ack *extack) +{ + int err; + + err = devlink->ops->reload_down(devlink, !!dest_net, extack); + if (err) + return err; + + if (dest_net && !net_eq(dest_net, devlink_net(devlink))) + devlink_reload_netns_change(devlink, dest_net); + + err = devlink->ops->reload_up(devlink, extack); + devlink_reload_failed_set(devlink, !!err); + return err; +} + static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; + struct net *dest_net = NULL; int err; if (!devlink_reload_supported(devlink)) @@ -2707,11 +2817,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed"); return err; } - err = devlink->ops->reload_down(devlink, info->extack); - if (err) - return err; - err = devlink->ops->reload_up(devlink, info->extack); - devlink_reload_failed_set(devlink, !!err); + + if (info->attrs[DEVLINK_ATTR_NETNS_PID] || + info->attrs[DEVLINK_ATTR_NETNS_FD] || + info->attrs[DEVLINK_ATTR_NETNS_ID]) { + dest_net = devlink_netns_get(skb, info); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + } + + err = devlink_reload(devlink, dest_net, info->extack); + + if (dest_net) + put_net(dest_net); + return err; } @@ -3155,7 +3274,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, struct devlink *devlink; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -3172,7 +3291,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err) { + if (err && err != -EOPNOTSUPP) { mutex_unlock(&devlink->lock); goto out; } @@ -3183,6 +3302,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -3411,7 +3533,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct devlink *devlink; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -3432,7 +3554,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err) { + if (err && err != -EOPNOTSUPP) { mutex_unlock(&devlink->lock); goto out; } @@ -3444,6 +3566,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -3818,29 +3943,19 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + const struct genl_dumpit_info *info = genl_dumpit_info(cb); u64 ret_offset, start_offset, end_offset = 0; + struct nlattr **attrs = info->attrs; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; - struct nlattr **attrs; bool dump = true; void *hdr; int err; start_offset = *((u64 *)&cb->args[0]); - attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return -ENOMEM; - - err = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, - devlink_nl_family.policy, cb->extack); - if (err) - goto out_free; - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) { @@ -3917,7 +4032,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); - kfree(attrs); return skb->len; @@ -3927,8 +4041,6 @@ out_unlock: mutex_unlock(&devlink->lock); out_dev: mutex_unlock(&devlink_mutex); -out_free: - kfree(attrs); return err; } @@ -4066,7 +4178,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, struct devlink *devlink; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -4088,12 +4200,15 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); mutex_unlock(&devlink->lock); - if (err) + if (err && err != -EOPNOTSUPP) break; idx++; } mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -4732,14 +4847,17 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); static int devlink_health_reporter_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, struct netlink_ext_ack *extack) { int err; + if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY) + return 0; + if (!reporter->ops->recover) return -EOPNOTSUPP; - err = reporter->ops->recover(reporter, priv_ctx); + err = reporter->ops->recover(reporter, priv_ctx, extack); if (err) return err; @@ -4760,7 +4878,8 @@ devlink_health_dump_clear(struct devlink_health_reporter *reporter) } static int devlink_health_do_dump(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { int err; @@ -4781,7 +4900,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter, goto dump_err; err = reporter->ops->dump(reporter, reporter->dump_fmsg, - priv_ctx); + priv_ctx, extack); if (err) goto dump_err; @@ -4828,11 +4947,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter, mutex_lock(&reporter->dump_lock); /* store current dump of current error, for later analysis */ - devlink_health_do_dump(reporter, priv_ctx); + devlink_health_do_dump(reporter, priv_ctx, NULL); mutex_unlock(&reporter->dump_lock); if (reporter->auto_recover) - return devlink_health_reporter_recover(reporter, priv_ctx); + return devlink_health_reporter_recover(reporter, + priv_ctx, NULL); return 0; } @@ -4867,21 +4987,10 @@ devlink_health_reporter_get_from_info(struct devlink *devlink, static struct devlink_health_reporter * devlink_health_reporter_get_from_cb(struct netlink_callback *cb) { + const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct devlink_health_reporter *reporter; + struct nlattr **attrs = info->attrs; struct devlink *devlink; - struct nlattr **attrs; - int err; - - attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return NULL; - - err = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, - devlink_nl_family.policy, cb->extack); - if (err) - goto free; mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); @@ -4890,12 +4999,9 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); mutex_unlock(&devlink_mutex); - kfree(attrs); return reporter; unlock: mutex_unlock(&devlink_mutex); -free: - kfree(attrs); return NULL; } @@ -5084,7 +5190,7 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - err = devlink_health_reporter_recover(reporter, NULL); + err = devlink_health_reporter_recover(reporter, NULL, info->extack); devlink_health_reporter_put(reporter); return err; @@ -5117,7 +5223,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, if (err) goto out; - err = reporter->ops->diagnose(reporter, fmsg); + err = reporter->ops->diagnose(reporter, fmsg, info->extack); if (err) goto out; @@ -5152,7 +5258,7 @@ devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, } mutex_lock(&reporter->dump_lock); if (!start) { - err = devlink_health_do_dump(reporter, NULL); + err = devlink_health_do_dump(reporter, NULL, cb->extack); if (err) goto unlock; cb->args[1] = reporter->dump_ts; @@ -5793,6 +5899,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -6023,7 +6132,8 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_REGION_READ, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_region_read_dumpit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, @@ -6071,7 +6181,8 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -6155,7 +6266,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) if (!devlink) return NULL; devlink->ops = ops; - devlink_net_set(devlink, &init_net); + __devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); @@ -6181,6 +6292,7 @@ int devlink_register(struct devlink *devlink, struct device *dev) { mutex_lock(&devlink_mutex); devlink->dev = dev; + devlink->registered = true; list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); @@ -8060,9 +8172,43 @@ int devlink_compat_switch_id_get(struct net_device *dev, return 0; } +static void __net_exit devlink_pernet_pre_exit(struct net *net) +{ + struct devlink *devlink; + int err; + + /* In case network namespace is getting destroyed, reload + * all devlink instances from this namespace into init_net. + */ + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (net_eq(devlink_net(devlink), net)) { + if (WARN_ON(!devlink_reload_supported(devlink))) + continue; + err = devlink_reload(devlink, &init_net, NULL); + if (err) + pr_warn("Failed to reload devlink instance into init_net\n"); + } + } + mutex_unlock(&devlink_mutex); +} + +static struct pernet_operations devlink_pernet_ops __net_initdata = { + .pre_exit = devlink_pernet_pre_exit, +}; + static int __init devlink_init(void) { - return genl_register_family(&devlink_nl_family); + int err; + + err = genl_register_family(&devlink_nl_family); + if (err) + goto out; + err = register_pernet_subsys(&devlink_pernet_ops); + +out: + WARN_ON(err); + return err; } subsys_initcall(devlink_init); diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 470a606d5e8d..fc96259807b6 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -12,17 +12,15 @@ static unsigned int fib_notifier_net_id; struct fib_notifier_net { struct list_head fib_notifier_ops; + struct atomic_notifier_head fib_chain; }; -static ATOMIC_NOTIFIER_HEAD(fib_chain); - -int call_fib_notifier(struct notifier_block *nb, struct net *net, +int call_fib_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info) { int err; - info->net = net; err = nb->notifier_call(nb, event_type, info); return notifier_to_errno(err); } @@ -31,106 +29,100 @@ EXPORT_SYMBOL(call_fib_notifier); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info) { + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); int err; - info->net = net; - err = atomic_notifier_call_chain(&fib_chain, event_type, info); + err = atomic_notifier_call_chain(&fn_net->fib_chain, event_type, info); return notifier_to_errno(err); } EXPORT_SYMBOL(call_fib_notifiers); -static unsigned int fib_seq_sum(void) +static unsigned int fib_seq_sum(struct net *net) { - struct fib_notifier_net *fn_net; + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *ops; unsigned int fib_seq = 0; - struct net *net; rtnl_lock(); - down_read(&net_rwsem); - for_each_net(net) { - fn_net = net_generic(net, fib_notifier_net_id); - rcu_read_lock(); - list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { - if (!try_module_get(ops->owner)) - continue; - fib_seq += ops->fib_seq_read(net); - module_put(ops->owner); - } - rcu_read_unlock(); + rcu_read_lock(); + list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { + if (!try_module_get(ops->owner)) + continue; + fib_seq += ops->fib_seq_read(net); + module_put(ops->owner); } - up_read(&net_rwsem); + rcu_read_unlock(); rtnl_unlock(); return fib_seq; } -static int fib_net_dump(struct net *net, struct notifier_block *nb) +static int fib_net_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *ops; + int err = 0; + rcu_read_lock(); list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { - int err; - if (!try_module_get(ops->owner)) continue; - err = ops->fib_dump(net, nb); + err = ops->fib_dump(net, nb, extack); module_put(ops->owner); if (err) - return err; + goto unlock; } - return 0; +unlock: + rcu_read_unlock(); + + return err; } -static bool fib_dump_is_consistent(struct notifier_block *nb, +static bool fib_dump_is_consistent(struct net *net, struct notifier_block *nb, void (*cb)(struct notifier_block *nb), unsigned int fib_seq) { - atomic_notifier_chain_register(&fib_chain, nb); - if (fib_seq == fib_seq_sum()) + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + atomic_notifier_chain_register(&fn_net->fib_chain, nb); + if (fib_seq == fib_seq_sum(net)) return true; - atomic_notifier_chain_unregister(&fib_chain, nb); + atomic_notifier_chain_unregister(&fn_net->fib_chain, nb); if (cb) cb(nb); return false; } #define FIB_DUMP_MAX_RETRIES 5 -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)) +int register_fib_notifier(struct net *net, struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + struct netlink_ext_ack *extack) { int retries = 0; int err; do { - unsigned int fib_seq = fib_seq_sum(); - struct net *net; - - rcu_read_lock(); - for_each_net_rcu(net) { - err = fib_net_dump(net, nb); - if (err) - goto err_fib_net_dump; - } - rcu_read_unlock(); - - if (fib_dump_is_consistent(nb, cb, fib_seq)) + unsigned int fib_seq = fib_seq_sum(net); + + err = fib_net_dump(net, nb, extack); + if (err) + return err; + + if (fib_dump_is_consistent(net, nb, cb, fib_seq)) return 0; } while (++retries < FIB_DUMP_MAX_RETRIES); return -EBUSY; - -err_fib_net_dump: - rcu_read_unlock(); - return err; } EXPORT_SYMBOL(register_fib_notifier); -int unregister_fib_notifier(struct notifier_block *nb) +int unregister_fib_notifier(struct net *net, struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&fib_chain, nb); + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + return atomic_notifier_chain_unregister(&fn_net->fib_chain, nb); } EXPORT_SYMBOL(unregister_fib_notifier); @@ -181,6 +173,7 @@ static int __net_init fib_notifier_net_init(struct net *net) struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); INIT_LIST_HEAD(&fn_net->fib_notifier_ops); + ATOMIC_INIT_NOTIFIER_HEAD(&fn_net->fib_chain); return 0; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index dd220ce7ca7a..3e7e15278c46 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -321,16 +321,18 @@ out: } EXPORT_SYMBOL_GPL(fib_rules_lookup); -static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, +static int call_fib_rule_notifier(struct notifier_block *nb, enum fib_event_type event_type, - struct fib_rule *rule, int family) + struct fib_rule *rule, int family, + struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = family, + .info.extack = extack, .rule = rule, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static int call_fib_rule_notifiers(struct net *net, @@ -350,20 +352,25 @@ static int call_fib_rule_notifiers(struct net *net, } /* Called with rcu_read_lock() */ -int fib_rules_dump(struct net *net, struct notifier_block *nb, int family) +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, + struct netlink_ext_ack *extack) { struct fib_rules_ops *ops; struct fib_rule *rule; + int err = 0; ops = lookup_rules_ops(net, family); if (!ops) return -EAFNOSUPPORT; - list_for_each_entry_rcu(rule, &ops->rules_list, list) - call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule, - family); + list_for_each_entry_rcu(rule, &ops->rules_list, list) { + err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD, + rule, family, extack); + if (err) + break; + } rules_ops_put(ops); - return 0; + return err; } EXPORT_SYMBOL_GPL(fib_rules_dump); diff --git a/net/core/filter.c b/net/core/filter.c index ed6563622ce3..46196e212413 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2245,7 +2245,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, * account for the headroom. */ bytes_sg_total = start - offset + bytes; - if (!msg->sg.copy[i] && bytes_sg_total <= len) + if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len) goto out; /* At this point we need to linearize multiple scatterlist @@ -2450,7 +2450,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, /* Place newly allocated data buffer */ sk_mem_charge(msg->sk, len); msg->sg.size += len; - msg->sg.copy[new] = false; + __clear_bit(new, &msg->sg.copy); sg_set_page(&msg->sg.data[new], page, len + copy, 0); if (rsge.length) { get_page(sg_page(&rsge)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 01d65206f4fb..529133611ea2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5120,7 +5120,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb_ext_reset(skb); - nf_reset(skb); + nf_reset_ct(skb); nf_reset_trace(skb); #ifdef CONFIG_NET_SWITCHDEV diff --git a/net/core/sock.c b/net/core/sock.c index 07863edbe6fc..ceda6b126d84 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -333,7 +333,6 @@ EXPORT_SYMBOL(__sk_backlog_rcv); static int sock_get_timeout(long timeo, void *optval, bool old_timeval) { struct __kernel_sock_timeval tv; - int size; if (timeo == MAX_SCHEDULE_TIMEOUT) { tv.tv_sec = 0; @@ -354,13 +353,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) old_tv.tv_sec = tv.tv_sec; old_tv.tv_usec = tv.tv_usec; *(struct __kernel_old_timeval *)optval = old_tv; - size = sizeof(old_tv); - } else { - *(struct __kernel_sock_timeval *)optval = tv; - size = sizeof(tv); + return sizeof(old_tv); } - return size; + *(struct __kernel_sock_timeval *)optval = tv; + return sizeof(tv); } static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval) @@ -687,7 +684,8 @@ out: return ret; } -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) +static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit, + int valbool) { if (valbool) sock_set_flag(sk, bit); @@ -1700,8 +1698,6 @@ static void __sk_destruct(struct rcu_head *head) sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } - if (rcu_access_pointer(sk->sk_reuseport_cb)) - reuseport_detach_sock(sk); sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); @@ -1728,7 +1724,14 @@ static void __sk_destruct(struct rcu_head *head) void sk_destruct(struct sock *sk) { - if (sock_flag(sk, SOCK_RCU_FREE)) + bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); + + if (rcu_access_pointer(sk->sk_reuseport_cb)) { + reuseport_detach_sock(sk); + use_call_rcu = true; + } + + if (use_call_rcu) call_rcu(&sk->sk_rcu, __sk_destruct); else __sk_destruct(&sk->sk_rcu); @@ -3033,7 +3036,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp, } EXPORT_SYMBOL(sock_gettstamp); -void sock_enable_timestamp(struct sock *sk, int flag) +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) { if (!sock_flag(sk, flag)) { unsigned long previous_flags = sk->sk_flags; @@ -3492,7 +3495,7 @@ static long sock_prot_memory_allocated(struct proto *proto) return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; } -static char *sock_prot_memory_pressure(struct proto *proto) +static const char *sock_prot_memory_pressure(struct proto *proto) { return proto->memory_pressure != NULL ? proto_memory_pressure(proto) ? "yes" : "no" : "NI"; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b685bc82f8d0..d9b4200ed12d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -871,7 +871,7 @@ lookup: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - nf_reset(skb); + nf_reset_ct(skb); return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 9c9aff3e52cf..63ef2a14c934 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -156,7 +156,11 @@ static struct sk_buff /* Step 1: A timestampable frame was received. * Buffer it until we get its meta frame. */ - if (is_link_local && sp->data->hwts_rx_en) { + if (is_link_local) { + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) + /* Do normal processing. */ + return skb; + spin_lock(&sp->data->meta_lock); /* Was this a link-local frame instead of the meta * that we were expecting? @@ -187,6 +191,12 @@ static struct sk_buff } else if (is_meta) { struct sk_buff *stampable_skb; + /* Drop the meta frame if we're not in the right state + * to process it. + */ + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) + return NULL; + spin_lock(&sp->data->meta_lock); stampable_skb = sp->data->stampable_skb; diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index ffcfcef76291..7c5a1aa5adb4 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -236,21 +236,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, struct cfg802154_registered_device **rdev, struct wpan_dev **wpan_dev) { + const struct genl_dumpit_info *info = genl_dumpit_info(cb); int err; rtnl_lock(); if (!cb->args[0]) { - err = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nl802154_fam.hdrsize, - genl_family_attrbuf(&nl802154_fam), - nl802154_fam.maxattr, - nl802154_policy, NULL); - if (err) - goto out_unlock; - *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk), - genl_family_attrbuf(&nl802154_fam)); + info->attrs); if (IS_ERR(*wpan_dev)) { err = PTR_ERR(*wpan_dev); goto out_unlock; @@ -557,17 +550,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl802154_dump_wpan_phy_state *state) { - struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); - int ret = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nl802154_fam.hdrsize, - tb, nl802154_fam.maxattr, - nl802154_policy, NULL); - - /* TODO check if we can handle error here, - * we have no backward compatibility - */ - if (ret) - return 0; + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct nlattr **tb = info->attrs; if (tb[NL802154_ATTR_WPAN_PHY]) state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]); @@ -2203,7 +2187,8 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_GET_WPAN_PHY, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .doit = nl802154_get_wpan_phy, .dumpit = nl802154_dump_wpan_phy, .done = nl802154_dump_wpan_phy_done, @@ -2343,7 +2328,8 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_GET_SEC_KEY, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO .doit by matching key id? */ .dumpit = nl802154_dump_llsec_key, .flags = GENL_ADMIN_PERM, @@ -2369,7 +2355,8 @@ static const struct genl_ops nl802154_ops[] = { /* TODO unique identifier must short+pan OR extended_addr */ { .cmd = NL802154_CMD_GET_SEC_DEV, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO .doit by matching extended_addr? */ .dumpit = nl802154_dump_llsec_dev, .flags = GENL_ADMIN_PERM, @@ -2395,7 +2382,8 @@ static const struct genl_ops nl802154_ops[] = { /* TODO remove complete devkey, put it as nested? */ { .cmd = NL802154_CMD_GET_SEC_DEVKEY, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO doit by matching ??? */ .dumpit = nl802154_dump_llsec_devkey, .flags = GENL_ADMIN_PERM, @@ -2420,7 +2408,8 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_GET_SEC_LEVEL, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO .doit by matching frame_type? */ .dumpit = nl802154_dump_llsec_seclevel, .flags = GENL_ADMIN_PERM, diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index b804ccbdb241..0c28bd469a68 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -9,12 +9,12 @@ #include <net/netns/ipv4.h> #include <net/ip_fib.h> -int call_fib4_notifier(struct notifier_block *nb, struct net *net, +int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info) { info->family = AF_INET; - return call_fib_notifier(nb, net, event_type, info); + return call_fib_notifier(nb, event_type, info); } int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, @@ -34,17 +34,16 @@ static unsigned int fib4_seq_read(struct net *net) return net->ipv4.fib_seq + fib4_rules_seq_read(net); } -static int fib4_dump(struct net *net, struct notifier_block *nb) +static int fib4_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { int err; - err = fib4_rules_dump(net, nb); + err = fib4_rules_dump(net, nb, extack); if (err) return err; - fib_notify(net, nb); - - return 0; + return fib_notify(net, nb, extack); } static const struct fib_notifier_ops fib4_notifier_ops_template = { diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b43a7ba5c6a4..f99e3bac5cab 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -65,9 +65,10 @@ bool fib4_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib4_rule_default); -int fib4_rules_dump(struct net *net, struct notifier_block *nb) +int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, AF_INET); + return fib_rules_dump(net, nb, AF_INET, extack); } unsigned int fib4_rules_seq_read(struct net *net) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1ab2fb6bb37d..b9df9c09b84e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -74,11 +74,13 @@ #include <trace/events/fib.h> #include "fib_lookup.h" -static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, +static int call_fib_entry_notifier(struct notifier_block *nb, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_alias *fa) + int dst_len, struct fib_alias *fa, + struct netlink_ext_ack *extack) { struct fib_entry_notifier_info info = { + .info.extack = extack, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, @@ -86,7 +88,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .type = fa->fa_type, .tb_id = fa->tb_id, }; - return call_fib4_notifier(nb, net, event_type, &info.info); + return call_fib4_notifier(nb, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, @@ -2015,10 +2017,12 @@ void fib_info_notify_update(struct net *net, struct nl_info *info) } } -static void fib_leaf_notify(struct net *net, struct key_vector *l, - struct fib_table *tb, struct notifier_block *nb) +static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb, + struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct fib_alias *fa; + int err; hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { struct fib_info *fi = fa->fa_info; @@ -2032,39 +2036,53 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, if (tb->tb_id != fa->tb_id) continue; - call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, - KEYLENGTH - fa->fa_slen, fa); + err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key, + KEYLENGTH - fa->fa_slen, + fa, extack); + if (err) + return err; } + return 0; } -static void fib_table_notify(struct net *net, struct fib_table *tb, - struct notifier_block *nb) +static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; t_key key = 0; + int err; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - fib_leaf_notify(net, l, tb, nb); + err = fib_leaf_notify(l, tb, nb, extack); + if (err) + return err; key = l->key + 1; /* stop in case of wrap around */ if (key < l->key) break; } + return 0; } -void fib_notify(struct net *net, struct notifier_block *nb) +int fib_notify(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { unsigned int h; + int err; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, head, tb_hlist) - fib_table_notify(net, tb, nb); + hlist_for_each_entry_rcu(tb, head, tb_hlist) { + err = fib_table_notify(tb, nb, extack); + if (err) + return err; + } } + return 0; } static void __trie_free_rcu(struct rcu_head *head) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 480d0b22db1a..3b9c7a2725a9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1563,7 +1563,7 @@ static int ip_mc_check_igmp_msg(struct sk_buff *skb) } } -static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) +static __sum16 ip_mc_validate_checksum(struct sk_buff *skb) { return skb_checksum_simple_validate(skb); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a53a543fe055..52690bb3e40f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1446,6 +1446,7 @@ static void erspan_setup(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &erspan_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1e2392b7c64e..c59a78a267c3 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -199,7 +199,7 @@ resubmit: kfree_skb(skb); return; } - nf_reset(skb); + nf_reset_ct(skb); } ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, skb); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 9bcca08efec9..32e20b758b68 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1483,10 +1483,10 @@ static int __init ip_auto_config(void) * missing values. */ if (ic_myaddr == NONE || -#ifdef CONFIG_ROOT_NFS +#if defined(CONFIG_ROOT_NFS) || defined(CONFIG_CIFS_ROOT) (root_server_addr == NONE && ic_servaddr == NONE && - ROOT_DEV == Root_NFS) || + (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) || #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC @@ -1513,6 +1513,12 @@ static int __init ip_auto_config(void) goto try_try_again; } #endif +#ifdef CONFIG_CIFS_ROOT + if (ROOT_DEV == Root_CIFS) { + pr_err("IP-Config: Retrying forever (CIFS root)...\n"); + goto try_try_again; + } +#endif if (--retries) { pr_err("IP-Config: Reopening network devices...\n"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 313470f6bb14..440294bdb752 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -278,9 +278,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) rtnl_unlock(); } -static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); + return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); } static unsigned int ipmr_rules_seq_read(struct net *net) @@ -336,7 +337,8 @@ static void __net_exit ipmr_rules_exit(struct net *net) rtnl_unlock(); } -static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } @@ -1794,7 +1796,7 @@ static void ip_encap(struct net *net, struct sk_buff *skb, ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - nf_reset(skb); + nf_reset_ct(skb); } static inline int ipmr_forward_finish(struct net *net, struct sock *sk, @@ -2140,7 +2142,7 @@ int ip_mr_input(struct sk_buff *skb) mroute_sk = rcu_dereference(mrt->mroute_sk); if (mroute_sk) { - nf_reset(skb); + nf_reset_ct(skb); raw_rcv(mroute_sk, skb); return 0; } @@ -3040,10 +3042,11 @@ static unsigned int ipmr_seq_read(struct net *net) return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); } -static int ipmr_dump(struct net *net, struct notifier_block *nb) +static int ipmr_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, - ipmr_mr_table_iter, &mrt_lock); + ipmr_mr_table_iter, &mrt_lock, extack); } static const struct fib_notifier_ops ipmr_notifier_ops_template = { diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index ea48bd15a575..aa8738a91210 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -386,15 +386,17 @@ EXPORT_SYMBOL(mr_rtm_dumproute); int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock) + rwlock_t *mrt_lock, + struct netlink_ext_ack *extack) { struct mr_table *mrt; int err; - err = rules_dump(net, nb); + err = rules_dump(net, nb, extack); if (err) return err; @@ -409,17 +411,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, if (!v->dev) continue; - mr_call_vif_notifier(nb, net, family, - FIB_EVENT_VIF_ADD, - v, vifi, mrt->id); + err = mr_call_vif_notifier(nb, family, + FIB_EVENT_VIF_ADD, + v, vifi, mrt->id, extack); + if (err) + break; } read_unlock(mrt_lock); + if (err) + return err; + /* Notify on table MFC entries */ - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) - mr_call_mfc_notifier(nb, net, family, - FIB_EVENT_ENTRY_ADD, - mfc, mrt->id); + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { + err = mr_call_mfc_notifier(nb, family, + FIB_EVENT_ENTRY_ADD, + mfc, mrt->id, extack); + if (err) + return err; + } } return 0; diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index af3fbf76dbd3..6cc5743c553a 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -65,7 +65,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ - nf_reset(skb); + nf_reset_ct(skb); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif /* diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 80da5a66d5d7..3183413ebc6c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -332,7 +332,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } - nf_reset(skb); + nf_reset_ct(skb); skb_push(skb, skb->data - skb_network_header(skb)); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7dcce724c78b..14654876127e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -916,16 +916,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (peer->rate_tokens == 0 || time_after(jiffies, (peer->rate_last + - (ip_rt_redirect_load << peer->rate_tokens)))) { + (ip_rt_redirect_load << peer->n_redirects)))) { __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; - ++peer->rate_tokens; ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - peer->rate_tokens == ip_rt_redirect_number) + peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 79c325a07ba5..9f41a76c1c54 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1739,8 +1739,8 @@ static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; + u32 length = 0, seq, offset, zap_len; const skb_frag_t *frags = NULL; - u32 length = 0, seq, offset; struct vm_area_struct *vma; struct sk_buff *skb = NULL; struct tcp_sock *tp; @@ -1767,12 +1767,12 @@ static int tcp_zerocopy_receive(struct sock *sk, seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); - zc->length &= ~(PAGE_SIZE - 1); - if (zc->length) { - zap_page_range(vma, address, zc->length); + zap_len = zc->length & ~(PAGE_SIZE - 1); + if (zap_len) { + zap_page_range(vma, address, zap_len); zc->recv_skip_hint = 0; } else { - zc->recv_skip_hint = inq; + zc->recv_skip_hint = zc->length; } ret = 0; while (length + PAGE_SIZE <= zc->length) { @@ -1798,13 +1798,11 @@ static int tcp_zerocopy_receive(struct sock *sk, } if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; - int size = skb_frag_size(frags); - while (remaining && (size != PAGE_SIZE || + while (remaining && (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags))) { - remaining -= size; + remaining -= skb_frag_size(frags); frags++; - size = skb_frag_size(frags); } zc->recv_skip_hint -= remaining; break; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 27dc3c1e9094..5cb0e7f065ea 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1914,7 +1914,7 @@ process: if (tcp_v4_inbound_md5_hash(sk, skb)) goto discard_and_relse; - nf_reset(skb); + nf_reset_ct(skb); if (tcp_filter(sk, skb)) goto discard_and_relse; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 40de2d2364a1..05be564414e9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -198,8 +198,13 @@ static bool retransmits_timed_out(struct sock *sk, return false; start_ts = tcp_sk(sk)->retrans_stamp; - if (likely(timeout == 0)) - timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN); + if (likely(timeout == 0)) { + unsigned int rto_base = TCP_RTO_MIN; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) + rto_base = tcp_timeout_init(sk); + timeout = tcp_model_timeout(sk, boundary, rto_base); + } return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf755156a684..14bc654b6842 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -821,6 +821,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, int is_udplite = IS_UDPLITE(sk); int offset = skb_transport_offset(skb); int len = skb->len - offset; + int datalen = len - sizeof(*uh); __wsum csum = 0; /* @@ -854,10 +855,12 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, return -EIO; } - skb_shinfo(skb)->gso_size = cork->gso_size; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(len - sizeof(uh), - cork->gso_size); + if (datalen > cork->gso_size) { + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen, + cork->gso_size); + } goto csum_partial; } @@ -1969,7 +1972,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) */ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; - nf_reset(skb); + nf_reset_ct(skb); if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); @@ -2298,7 +2301,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - nf_reset(skb); + nf_reset_ct(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_lib_checksum_complete(skb)) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 413b00cf9c2b..98d82305d6de 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5963,13 +5963,20 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: /* - * If the address was optimistic - * we inserted the route at the start of - * our DAD process, so we don't need - * to do it again + * If the address was optimistic we inserted the route at the + * start of our DAD process, so we don't need to do it again. + * If the device was taken down in the middle of the DAD + * cycle there is a race where we could get here without a + * host route, so nothing to insert. That will be fixed when + * the device is brought up. */ - if (!rcu_access_pointer(ifp->rt->fib6_node)) + if (ifp->rt && !rcu_access_pointer(ifp->rt->fib6_node)) { ip6_ins_rt(net, ifp->rt); + } else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) { + pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n", + &ifp->addr, ifp->idev->dev->name); + } + if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index 05f82baaa99e..f87ae33e1d01 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -7,12 +7,12 @@ #include <net/netns/ipv6.h> #include <net/ip6_fib.h> -int call_fib6_notifier(struct notifier_block *nb, struct net *net, +int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info) { info->family = AF_INET6; - return call_fib_notifier(nb, net, event_type, info); + return call_fib_notifier(nb, event_type, info); } int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, @@ -27,15 +27,16 @@ static unsigned int fib6_seq_read(struct net *net) return fib6_tables_seq_read(net) + fib6_rules_seq_read(net); } -static int fib6_dump(struct net *net, struct notifier_block *nb) +static int fib6_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { int err; - err = fib6_rules_dump(net, nb); + err = fib6_rules_dump(net, nb, extack); if (err) return err; - return fib6_tables_dump(net, nb); + return fib6_tables_dump(net, nb, extack); } static const struct fib_notifier_ops fib6_notifier_ops_template = { diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index f9e8fe3ff0c5..fafe556d21e0 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -47,9 +47,10 @@ bool fib6_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib6_rule_default); -int fib6_rules_dump(struct net *net, struct notifier_block *nb) +int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, AF_INET6); + return fib_rules_dump(net, nb, AF_INET6, extack); } unsigned int fib6_rules_seq_read(struct net *net) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6e2af411cd9c..f66bc2af4e9d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -357,15 +357,17 @@ unsigned int fib6_tables_seq_read(struct net *net) return fib_seq; } -static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, +static int call_fib6_entry_notifier(struct notifier_block *nb, enum fib_event_type event_type, - struct fib6_info *rt) + struct fib6_info *rt, + struct netlink_ext_ack *extack) { struct fib6_entry_notifier_info info = { + .info.extack = extack, .rt = rt, }; - return call_fib6_notifier(nb, net, event_type, &info.info); + return call_fib6_notifier(nb, event_type, &info.info); } int call_fib6_entry_notifiers(struct net *net, @@ -401,40 +403,51 @@ int call_fib6_multipath_entry_notifiers(struct net *net, struct fib6_dump_arg { struct net *net; struct notifier_block *nb; + struct netlink_ext_ack *extack; }; -static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) +static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { if (rt == arg->net->ipv6.fib6_null_entry) - return; - call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); + return 0; + return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD, + rt, arg->extack); } static int fib6_node_dump(struct fib6_walker *w) { struct fib6_info *rt; + int err = 0; - for_each_fib6_walker_rt(w) - fib6_rt_dump(rt, w->args); + for_each_fib6_walker_rt(w) { + err = fib6_rt_dump(rt, w->args); + if (err) + break; + } w->leaf = NULL; - return 0; + return err; } -static void fib6_table_dump(struct net *net, struct fib6_table *tb, - struct fib6_walker *w) +static int fib6_table_dump(struct net *net, struct fib6_table *tb, + struct fib6_walker *w) { + int err; + w->root = &tb->tb6_root; spin_lock_bh(&tb->tb6_lock); - fib6_walk(net, w); + err = fib6_walk(net, w); spin_unlock_bh(&tb->tb6_lock); + return err; } /* Called with rcu_read_lock() */ -int fib6_tables_dump(struct net *net, struct notifier_block *nb) +int fib6_tables_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct fib6_dump_arg arg; struct fib6_walker *w; unsigned int h; + int err = 0; w = kzalloc(sizeof(*w), GFP_ATOMIC); if (!w) @@ -443,19 +456,24 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb) w->func = fib6_node_dump; arg.net = net; arg.nb = nb; + arg.extack = extack; w->args = &arg; for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv6.fib_table_hash[h]; struct fib6_table *tb; - hlist_for_each_entry_rcu(tb, head, tb6_hlist) - fib6_table_dump(net, tb, w); + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { + err = fib6_table_dump(net, tb, w); + if (err < 0) + goto out; + } } +out: kfree(w); - return 0; + return err; } static int fib6_dump_node(struct fib6_walker *w) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index d432d0011c16..3d71c7d6102c 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -223,6 +223,16 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (ipv6_addr_is_multicast(&hdr->saddr)) goto err; + /* While RFC4291 is not explicit about v4mapped addresses + * in IPv6 headers, it seems clear linux dual-stack + * model can not deal properly with these. + * Security models could be fooled by ::ffff:127.0.0.1 for example. + * + * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02 + */ + if (ipv6_addr_v4mapped(&hdr->saddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); @@ -371,7 +381,7 @@ resubmit_final: /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ - nf_reset(skb); + nf_reset_ct(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 857a89ad4d6c..bfa49ff70531 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -265,9 +265,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net) rtnl_unlock(); } -static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) +static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); + return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); } static unsigned int ip6mr_rules_seq_read(struct net *net) @@ -324,7 +325,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net) rtnl_unlock(); } -static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) +static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } @@ -1256,10 +1258,11 @@ static unsigned int ip6mr_seq_read(struct net *net) return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); } -static int ip6mr_dump(struct net *net, struct notifier_block *nb) +static int ip6mr_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, - ip6mr_mr_table_iter, &mrt_lock); + ip6mr_mr_table_iter, &mrt_lock, extack); } static struct notifier_block ip6_mr_notifier = { diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index e6c9da9866b1..a0a2de30be3e 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -54,7 +54,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, return; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_reset(skb); + nf_reset_ct(skb); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif if (hooknum == NF_INET_PRE_ROUTING || diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6e1888ee4036..a77f6b7d3a7c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -215,7 +215,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) /* Not releasing hash table! */ if (clone) { - nf_reset(clone); + nf_reset_ct(clone); rawv6_rcv(sk, clone); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index aae4938f3dea..6324d3a8cb53 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1109,6 +1109,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, __wsum csum = 0; int offset = skb_transport_offset(skb); int len = skb->len - offset; + int datalen = len - sizeof(*uh); /* * Create a UDP header @@ -1141,8 +1142,12 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, return -EIO; } - skb_shinfo(skb)->gso_size = cork->gso_size; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + if (datalen > cork->gso_size) { + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen, + cork->gso_size); + } goto csum_partial; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 105e5a7092e7..f82ea12bac37 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1078,7 +1078,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); - nf_reset(skb); + nf_reset_ct(skb); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index bd3f39349d40..fd5ac2788e45 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -151,7 +151,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); rcu_read_lock(); dev = rcu_dereference(spriv->dev); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 622833317dcb..0d7c887a2b75 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -193,7 +193,7 @@ pass_up: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; - nf_reset(skb); + nf_reset_ct(skb); return sk_receive_skb(sk, skb, 1); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 687e23a8b326..802f19aba7e3 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -206,7 +206,7 @@ pass_up: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; - nf_reset(skb); + nf_reset_ct(skb); return sk_receive_skb(sk, skb, 1); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b11883d26875..33da6f738c99 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -485,7 +485,14 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); - if (ret) { + if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { + /* + * We didn't send the request yet, so don't need to check + * here if we already got a response, just mark as driver + * ready immediately. + */ + set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); + } else if (ret) { ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", sta->sta.addr, tid); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index b1438fd4d876..64b544ae9966 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -487,9 +487,14 @@ static ssize_t ieee80211_if_fmt_aqm( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { struct ieee80211_local *local = sdata->local; - struct txq_info *txqi = to_txq_info(sdata->vif.txq); + struct txq_info *txqi; int len; + if (!sdata->vif.txq) + return 0; + + txqi = to_txq_info(sdata->vif.txq); + spin_lock_bh(&local->fq.lock); rcu_read_lock(); @@ -658,7 +663,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); - if (sdata->local->ops->wake_tx_queue) + if (sdata->local->ops->wake_tx_queue && + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) DEBUGFS_ADD(aqm); } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0a6ff01c68a9..d40744903fa9 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -538,7 +538,6 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct cfg80211_bss *cbss; - int err, changed = 0; sdata_assert_lock(sdata); @@ -560,13 +559,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) ifibss->chandef = sdata->csa_chandef; /* generate the beacon */ - err = ieee80211_ibss_csa_beacon(sdata, NULL); - if (err < 0) - return err; - - changed |= err; - - return changed; + return ieee80211_ibss_csa_beacon(sdata, NULL); } void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ee86c3333999..86bc469a28bc 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -70,7 +70,7 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) } /* return current EMWA throughput */ -int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg) { int usecs; @@ -79,13 +79,13 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) usecs = 1000000; /* reset thr. below 10% success */ - if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100)) + if (mr->stats.prob_avg < MINSTREL_FRAC(10, 100)) return 0; - if (prob_ewma > MINSTREL_FRAC(90, 100)) + if (prob_avg > MINSTREL_FRAC(90, 100)) return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs)); else - return MINSTREL_TRUNC(100000 * (prob_ewma / usecs)); + return MINSTREL_TRUNC(100000 * (prob_avg / usecs)); } /* find & sort topmost throughput rates */ @@ -98,8 +98,8 @@ minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) for (j = MAX_THR_RATES; j > 0; --j) { tmp_mrs = &mi->r[tp_list[j - 1]].stats; - if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <= - minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma)) + if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_avg) <= + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_avg)) break; } @@ -157,20 +157,24 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * Recalculate statistics and counters of a given rate */ void -minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs) +minstrel_calc_rate_stats(struct minstrel_priv *mp, + struct minstrel_rate_stats *mrs) { unsigned int cur_prob; if (unlikely(mrs->attempts > 0)) { mrs->sample_skipped = 0; cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); - if (unlikely(!mrs->att_hist)) { - mrs->prob_ewma = cur_prob; + if (mp->new_avg) { + minstrel_filter_avg_add(&mrs->prob_avg, + &mrs->prob_avg_1, cur_prob); + } else if (unlikely(!mrs->att_hist)) { + mrs->prob_avg = cur_prob; } else { /*update exponential weighted moving avarage */ - mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma, - cur_prob, - EWMA_LEVEL); + mrs->prob_avg = minstrel_ewma(mrs->prob_avg, + cur_prob, + EWMA_LEVEL); } mrs->att_hist += mrs->attempts; mrs->succ_hist += mrs->success; @@ -200,12 +204,12 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats; /* Update statistics of success probability per rate */ - minstrel_calc_rate_stats(mrs); + minstrel_calc_rate_stats(mp, mrs); /* Sample less often below the 10% chance of success. * Sample less often above the 95% chance of success. */ - if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) || - mrs->prob_ewma < MINSTREL_FRAC(10, 100)) { + if (mrs->prob_avg > MINSTREL_FRAC(95, 100) || + mrs->prob_avg < MINSTREL_FRAC(10, 100)) { mr->adjusted_retry_count = mrs->retry_count >> 1; if (mr->adjusted_retry_count > 2) mr->adjusted_retry_count = 2; @@ -225,14 +229,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * choose the maximum throughput rate as max_prob_rate * (2) if all success probabilities < 95%, the rate with * highest success probability is chosen as max_prob_rate */ - if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) { - tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma); + if (mrs->prob_avg >= MINSTREL_FRAC(95, 100)) { + tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_avg); tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate], - tmp_mrs->prob_ewma); + tmp_mrs->prob_avg); if (tmp_cur_tp >= tmp_prob_tp) tmp_prob_rate = i; } else { - if (mrs->prob_ewma >= tmp_mrs->prob_ewma) + if (mrs->prob_avg >= tmp_mrs->prob_avg) tmp_prob_rate = i; } } @@ -290,7 +294,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->sample_deferred--; if (time_after(jiffies, mi->last_stats_update + - (mp->update_interval * HZ) / 1000)) + mp->update_interval / (mp->new_avg ? 2 : 1))) minstrel_update_stats(mp, mi); } @@ -422,7 +426,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * has a probability of >95%, we shouldn't be attempting * to use it, as this only wastes precious airtime */ if (!mrr_capable && - (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100))) + (mi->r[ndx].stats.prob_avg > MINSTREL_FRAC(95, 100))) return; mi->prev_sample = true; @@ -573,7 +577,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta) * computing cur_tp */ tmp_mrs = &mi->r[idx].stats; - tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10; + tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_avg) * 10; tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024; return tmp_cur_tp; diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 51d8b2c846e7..dbb43bcd3c45 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -19,6 +19,21 @@ #define MAX_THR_RATES 4 /* + * Coefficients for moving average with noise filter (period=16), + * scaled by 10 bits + * + * a1 = exp(-pi * sqrt(2) / period) + * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period) + * coeff3 = -sqr(a1) + * coeff1 = 1 - coeff2 - coeff3 + */ +#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \ + MINSTREL_AVG_COEFF2 - \ + MINSTREL_AVG_COEFF3) +#define MINSTREL_AVG_COEFF2 0x00001499 +#define MINSTREL_AVG_COEFF3 -0x0000092e + +/* * Perform EWMA (Exponentially Weighted Moving Average) calculation */ static inline int @@ -32,6 +47,37 @@ minstrel_ewma(int old, int new, int weight) return old + incr; } +static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in) +{ + s32 out_1 = *prev_1; + s32 out_2 = *prev_2; + s32 val; + + if (!in) + in += 1; + + if (!out_1) { + val = out_1 = in; + goto out; + } + + val = MINSTREL_AVG_COEFF1 * in; + val += MINSTREL_AVG_COEFF2 * out_1; + val += MINSTREL_AVG_COEFF3 * out_2; + val >>= MINSTREL_SCALE; + + if (val > 1 << MINSTREL_SCALE) + val = 1 << MINSTREL_SCALE; + if (val < 0) + val = 1; + +out: + *prev_2 = out_1; + *prev_1 = val; + + return val; +} + struct minstrel_rate_stats { /* current / last sampling period attempts/success counters */ u16 attempts, last_attempts; @@ -40,8 +86,9 @@ struct minstrel_rate_stats { /* total attempts/success counters */ u32 att_hist, succ_hist; - /* prob_ewma - exponential weighted moving average of prob */ - u16 prob_ewma; + /* prob_avg - moving average of prob */ + u16 prob_avg; + u16 prob_avg_1; /* maximum retry counts */ u8 retry_count; @@ -95,6 +142,7 @@ struct minstrel_sta_info { struct minstrel_priv { struct ieee80211_hw *hw; bool has_mrr; + bool new_avg; u32 sample_switch; unsigned int cw_min; unsigned int cw_max; @@ -126,8 +174,9 @@ extern const struct rate_control_ops mac80211_minstrel; void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); /* Recalculate success probabilities and counters for a given rate using EWMA */ -void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs); -int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma); +void minstrel_calc_rate_stats(struct minstrel_priv *mp, + struct minstrel_rate_stats *mrs); +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg); /* debugfs */ int minstrel_stats_open(struct inode *inode, struct file *file); diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index c8afd85b51a0..9b8e0daeb7bb 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -90,8 +90,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) p += sprintf(p, "%6u ", mr->perfect_tx_time); tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); - tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u" " %3u %3u %-3u " @@ -147,8 +147,8 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file) p += sprintf(p, "%u,",mr->perfect_tx_time); tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); - tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u," "%llu,%llu,%d,%d\n", diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 0ef2633349b5..694a31978a04 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -346,12 +346,12 @@ minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi) */ int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, - int prob_ewma) + int prob_avg) { unsigned int nsecs = 0; /* do not account throughput if sucess prob is below 10% */ - if (prob_ewma < MINSTREL_FRAC(10, 100)) + if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; if (group != MINSTREL_CCK_GROUP) @@ -365,11 +365,11 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, * account for collision related packet error rate fluctuation * (prob is scaled - see MINSTREL_FRAC above) */ - if (prob_ewma > MINSTREL_FRAC(90, 100)) + if (prob_avg > MINSTREL_FRAC(90, 100)) return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000) / nsecs)); else - return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs)); + return MINSTREL_TRUNC(100000 * ((prob_avg * 1000) / nsecs)); } /* @@ -389,13 +389,13 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, cur_group = index / MCS_GROUP_RATES; cur_idx = index % MCS_GROUP_RATES; - cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma; + cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg; cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob); do { tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (cur_tp_avg < tmp_tp_avg || @@ -432,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) tmp_group = mi->max_prob_rate / MCS_GROUP_RATES; tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from @@ -444,11 +444,11 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES; max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; - max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma; + max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg; - if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) { + if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) { cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, - mrs->prob_ewma); + mrs->prob_avg); if (cur_tp_avg > tmp_tp_avg) mi->max_prob_rate = index; @@ -458,9 +458,9 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) if (cur_tp_avg > max_gpr_tp_avg) mg->max_group_prob_rate = index; } else { - if (mrs->prob_ewma > tmp_prob) + if (mrs->prob_avg > tmp_prob) mi->max_prob_rate = index; - if (mrs->prob_ewma > max_gpr_prob) + if (mrs->prob_avg > max_gpr_prob) mg->max_group_prob_rate = index; } } @@ -482,12 +482,12 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) { @@ -518,7 +518,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) continue; tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; - tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg; if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) && (minstrel_mcs_groups[group].streams < tmp_max_streams)) { @@ -623,7 +623,7 @@ minstrel_ht_rate_sample_switch(struct minstrel_priv *mp, * If that fails, look again for a rate that is at least as fast */ mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); - faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100); + faster_rate = mrs->prob_avg > MINSTREL_FRAC(75, 100); minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate); if (!n_rates && faster_rate) minstrel_ht_find_probe_rates(mi, rates, &n_rates, false); @@ -737,8 +737,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, mrs = &mg->rates[i]; mrs->retry_updated = false; - minstrel_calc_rate_stats(mrs); - cur_prob = mrs->prob_ewma; + minstrel_calc_rate_stats(mp, mrs); + cur_prob = mrs->prob_avg; if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) continue; @@ -773,6 +773,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, /* try to sample all available rates during each interval */ mi->sample_count *= 8; + if (mp->new_avg) + mi->sample_count /= 2; if (sample) minstrel_ht_rate_sample_switch(mp, mi); @@ -889,6 +891,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_tx_rate *ar = info->status.rates; struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL; struct minstrel_priv *mp = priv; + u32 update_interval = mp->update_interval / 2; bool last, update = false; bool sample_status = false; int i; @@ -943,6 +946,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, switch (mi->sample_mode) { case MINSTREL_SAMPLE_IDLE: + if (mp->new_avg && + (mp->hw->max_rates > 1 || + mi->total_packets_cur < SAMPLE_SWITCH_THR)) + update_interval /= 2; break; case MINSTREL_SAMPLE_ACTIVE: @@ -970,23 +977,20 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, */ rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); if (rate->attempts > 30 && - MINSTREL_FRAC(rate->success, rate->attempts) < - MINSTREL_FRAC(20, 100)) { + rate->success < rate->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); update = true; } rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); if (rate2->attempts > 30 && - MINSTREL_FRAC(rate2->success, rate2->attempts) < - MINSTREL_FRAC(20, 100)) { + rate2->success < rate2->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); update = true; } } - if (time_after(jiffies, mi->last_stats_update + - (mp->update_interval / 2 * HZ) / 1000)) { + if (time_after(jiffies, mi->last_stats_update + update_interval)) { update = true; minstrel_ht_update_stats(mp, mi, true); } @@ -1008,7 +1012,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, unsigned int overhead = 0, overhead_rtscts = 0; mrs = minstrel_get_ratestats(mi, index); - if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) { + if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) { mrs->retry_count = 1; mrs->retry_count_rtscts = 1; return; @@ -1065,7 +1069,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (!mrs->retry_updated) minstrel_calc_retransmit(mp, mi, index); - if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { + if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { ratetbl->rate[offset].count = 2; ratetbl->rate[offset].count_rts = 2; ratetbl->rate[offset].count_cts = 2; @@ -1099,11 +1103,11 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, } static inline int -minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate) +minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate) { int group = rate / MCS_GROUP_RATES; rate %= MCS_GROUP_RATES; - return mi->groups[group].rates[rate].prob_ewma; + return mi->groups[group].rates[rate].prob_avg; } static int @@ -1115,7 +1119,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) unsigned int duration; /* Disable A-MSDU if max_prob_rate is bad */ - if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100)) + if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100)) return 1; duration = g->duration[rate]; @@ -1138,7 +1142,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) * data packet size */ if (duration > MCS_DURATION(1, 0, 260) || - (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) < + (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) < MINSTREL_FRAC(75, 100))) return 3200; @@ -1243,7 +1247,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) * rate, to avoid wasting airtime. */ sample_dur = minstrel_get_duration(sample_idx); - if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) || + if (mrs->prob_avg > MINSTREL_FRAC(95, 100) || minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur) return -1; @@ -1666,7 +1670,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->has_mrr = true; mp->hw = hw; - mp->update_interval = 100; + mp->update_interval = HZ / 10; + mp->new_avg = true; #ifdef CONFIG_MAC80211_DEBUGFS mp->fixed_rate_idx = (u32) -1; @@ -1674,6 +1679,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) &mp->fixed_rate_idx); debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir, &mp->sample_switch); + debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir, + &mp->new_avg); #endif minstrel_ht_init_cck_rates(mp); @@ -1698,7 +1705,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) i = mi->max_tp_rate[0] / MCS_GROUP_RATES; j = mi->max_tp_rate[0] % MCS_GROUP_RATES; - prob = mi->groups[i].rates[j].prob_ewma; + prob = mi->groups[i].rates[j].prob_avg; /* convert tp_avg from pkt per second in kbps */ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10; diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index f938701e7ab7..53ea3c29debf 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -119,6 +119,6 @@ struct minstrel_ht_sta_priv { void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, - int prob_ewma); + int prob_avg); #endif diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 5a6e9f3edc04..bebb71917742 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -98,8 +98,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) p += sprintf(p, "%6u ", tx_time); tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); - tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u" " %3u %3u %-3u " @@ -243,8 +243,8 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) p += sprintf(p, "%u,", tx_time); tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); - tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u," "%u,%llu,%llu,", diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1fa422782905..938c10f7955b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1617,7 +1617,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct sta_info *sta, struct sk_buff_head *skbs, bool txpending) { @@ -1679,7 +1679,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); info->control.vif = vif; - control.sta = sta; + control.sta = sta ? &sta->sta : NULL; __skb_unlink(skb, skbs); drv_tx(local, &control, skb); @@ -1698,7 +1698,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local, struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata; struct ieee80211_vif *vif; - struct ieee80211_sta *pubsta; struct sk_buff *skb; bool result = true; __le16 fc; @@ -1713,11 +1712,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local, if (sta && !sta->uploaded) sta = NULL; - if (sta) - pubsta = &sta->sta; - else - pubsta = NULL; - switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { @@ -1744,8 +1738,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } - result = ieee80211_tx_frags(local, vif, pubsta, skbs, - txpending); + result = ieee80211_tx_frags(local, vif, sta, skbs, txpending); ieee80211_tpt_led_trig_tx(local, fc, led_len); @@ -3529,7 +3522,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, struct ieee80211_sub_if_data, u.ap); __skb_queue_tail(&tx.skbs, skb); - ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false); + ieee80211_tx_frags(local, &sdata->vif, sta, &tx.skbs, false); return true; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 051a02ddcb85..32a7a53833c0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -247,7 +247,8 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) struct sta_info *sta; int i; - spin_lock_bh(&fq->lock); + local_bh_disable(); + spin_lock(&fq->lock); if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; @@ -273,9 +274,9 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) &txqi->flags)) continue; - spin_unlock_bh(&fq->lock); + spin_unlock(&fq->lock); drv_wake_tx_queue(local, txqi); - spin_lock_bh(&fq->lock); + spin_lock(&fq->lock); } } @@ -288,12 +289,14 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac) goto out; - spin_unlock_bh(&fq->lock); + spin_unlock(&fq->lock); drv_wake_tx_queue(local, txqi); + local_bh_enable(); return; out: - spin_unlock_bh(&fq->lock); + spin_unlock(&fq->lock); + local_bh_enable(); } static void diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 9c464d24beec..888d3068a492 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -613,7 +613,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) ret = ip_vs_confirm_conntrack(skb); if (ret == NF_ACCEPT) { - nf_reset(skb); + nf_reset_ct(skb); skb_forward_csum(skb); } return ret; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index af1497ab9464..69d6173f91e2 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -218,8 +218,13 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx, static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) { struct nft_connlimit *priv = nft_expr_priv(expr); + bool ret; - return nf_conncount_gc_list(net, &priv->list); + local_bh_disable(); + ret = nf_conncount_gc_list(net, &priv->list); + local_bh_enable(); + + return ret; } static struct nft_expr_type nft_connlimit_type; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index efccd1ac9a66..0522b2b1fd95 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -458,10 +458,63 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static struct genl_dumpit_info *genl_dumpit_info_alloc(void) +{ + return kmalloc(sizeof(struct genl_dumpit_info), GFP_KERNEL); +} + +static void genl_dumpit_info_free(const struct genl_dumpit_info *info) +{ + kfree(info); +} + +static struct nlattr ** +genl_family_rcv_msg_attrs_parse(const struct genl_family *family, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + const struct genl_ops *ops, + int hdrlen, + enum genl_validate_flags no_strict_flag, + bool parallel) +{ + enum netlink_validation validate = ops->validate & no_strict_flag ? + NL_VALIDATE_LIBERAL : + NL_VALIDATE_STRICT; + struct nlattr **attrbuf; + int err; + + if (!family->maxattr) + return NULL; + + if (parallel) { + attrbuf = kmalloc_array(family->maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) + return ERR_PTR(-ENOMEM); + } else { + attrbuf = family->attrbuf; + } + + err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, + family->policy, validate, extack); + if (err && parallel) { + kfree(attrbuf); + return ERR_PTR(err); + } + return attrbuf; +} + +static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, + struct nlattr **attrbuf, + bool parallel) +{ + if (parallel) + kfree(attrbuf); +} + static int genl_lock_start(struct netlink_callback *cb) { - /* our ops are always const - netlink API doesn't propagate that */ - const struct genl_ops *ops = cb->data; + const struct genl_ops *ops = genl_dumpit_info(cb)->ops; int rc = 0; if (ops->start) { @@ -474,8 +527,7 @@ static int genl_lock_start(struct netlink_callback *cb) static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - /* our ops are always const - netlink API doesn't propagate that */ - const struct genl_ops *ops = cb->data; + const struct genl_ops *ops = genl_dumpit_info(cb)->ops; int rc; genl_lock(); @@ -486,8 +538,8 @@ static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) static int genl_lock_done(struct netlink_callback *cb) { - /* our ops are always const - netlink API doesn't propagate that */ - const struct genl_ops *ops = cb->data; + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + const struct genl_ops *ops = info->ops; int rc = 0; if (ops->done) { @@ -495,120 +547,111 @@ static int genl_lock_done(struct netlink_callback *cb) rc = ops->done(cb); genl_unlock(); } + genl_family_rcv_msg_attrs_free(info->family, info->attrs, true); + genl_dumpit_info_free(info); return rc; } -static int genl_family_rcv_msg(const struct genl_family *family, - struct sk_buff *skb, - struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int genl_parallel_done(struct netlink_callback *cb) { - const struct genl_ops *ops; - struct net *net = sock_net(skb->sk); - struct genl_info info; - struct genlmsghdr *hdr = nlmsg_data(nlh); - struct nlattr **attrbuf; - int hdrlen, err; + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + const struct genl_ops *ops = info->ops; + int rc = 0; - /* this family doesn't exist in this netns */ - if (!family->netnsok && !net_eq(net, &init_net)) - return -ENOENT; + if (ops->done) + rc = ops->done(cb); + genl_family_rcv_msg_attrs_free(info->family, info->attrs, true); + genl_dumpit_info_free(info); + return rc; +} - hdrlen = GENL_HDRLEN + family->hdrsize; - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) - return -EINVAL; +static int genl_family_rcv_msg_dumpit(const struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + const struct genl_ops *ops, + int hdrlen, struct net *net) +{ + struct genl_dumpit_info *info; + struct nlattr **attrs = NULL; + int err; - ops = genl_get_cmd(hdr->cmd, family); - if (ops == NULL) + if (!ops->dumpit) return -EOPNOTSUPP; - if ((ops->flags & GENL_ADMIN_PERM) && - !netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if ((ops->flags & GENL_UNS_ADMIN_PERM) && - !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { - int rc; - - if (ops->dumpit == NULL) - return -EOPNOTSUPP; - - if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) { - int hdrlen = GENL_HDRLEN + family->hdrsize; - - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) - return -EINVAL; + if (ops->validate & GENL_DONT_VALIDATE_DUMP) + goto no_attrs; - if (family->maxattr) { - unsigned int validate = NL_VALIDATE_STRICT; - - if (ops->validate & - GENL_DONT_VALIDATE_DUMP_STRICT) - validate = NL_VALIDATE_LIBERAL; - rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), - family->maxattr, - family->policy, - validate, extack); - if (rc) - return rc; - } - } + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; - if (!family->parallel_ops) { - struct netlink_dump_control c = { - .module = family->module, - /* we have const, but the netlink API doesn't */ - .data = (void *)ops, - .start = genl_lock_start, - .dump = genl_lock_dumpit, - .done = genl_lock_done, - }; + attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack, + ops, hdrlen, + GENL_DONT_VALIDATE_DUMP_STRICT, + true); + if (IS_ERR(attrs)) + return PTR_ERR(attrs); + +no_attrs: + /* Allocate dumpit info. It is going to be freed by done() callback. */ + info = genl_dumpit_info_alloc(); + if (!info) { + genl_family_rcv_msg_attrs_free(family, attrs, true); + return -ENOMEM; + } - genl_unlock(); - rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); - genl_lock(); + info->family = family; + info->ops = ops; + info->attrs = attrs; - } else { - struct netlink_dump_control c = { - .module = family->module, - .start = ops->start, - .dump = ops->dumpit, - .done = ops->done, - }; + if (!family->parallel_ops) { + struct netlink_dump_control c = { + .module = family->module, + .data = info, + .start = genl_lock_start, + .dump = genl_lock_dumpit, + .done = genl_lock_done, + }; - rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); - } + genl_unlock(); + err = __netlink_dump_start(net->genl_sock, skb, nlh, &c); + genl_lock(); - return rc; + } else { + struct netlink_dump_control c = { + .module = family->module, + .data = info, + .start = ops->start, + .dump = ops->dumpit, + .done = genl_parallel_done, + }; + + err = __netlink_dump_start(net->genl_sock, skb, nlh, &c); } - if (ops->doit == NULL) - return -EOPNOTSUPP; - - if (family->maxattr && family->parallel_ops) { - attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), - GFP_KERNEL); - if (attrbuf == NULL) - return -ENOMEM; - } else - attrbuf = family->attrbuf; + return err; +} - if (attrbuf) { - enum netlink_validation validate = NL_VALIDATE_STRICT; +static int genl_family_rcv_msg_doit(const struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + const struct genl_ops *ops, + int hdrlen, struct net *net) +{ + struct nlattr **attrbuf; + struct genl_info info; + int err; - if (ops->validate & GENL_DONT_VALIDATE_STRICT) - validate = NL_VALIDATE_LIBERAL; + if (!ops->doit) + return -EOPNOTSUPP; - err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - family->policy, validate, extack); - if (err < 0) - goto out; - } + attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack, + ops, hdrlen, + GENL_DONT_VALIDATE_STRICT, + family->parallel_ops); + if (IS_ERR(attrbuf)) + return PTR_ERR(attrbuf); info.snd_seq = nlh->nlmsg_seq; info.snd_portid = NETLINK_CB(skb).portid; @@ -632,12 +675,49 @@ static int genl_family_rcv_msg(const struct genl_family *family, family->post_doit(ops, skb, &info); out: - if (family->parallel_ops) - kfree(attrbuf); + genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops); return err; } +static int genl_family_rcv_msg(const struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + const struct genl_ops *ops; + struct net *net = sock_net(skb->sk); + struct genlmsghdr *hdr = nlmsg_data(nlh); + int hdrlen; + + /* this family doesn't exist in this netns */ + if (!family->netnsok && !net_eq(net, &init_net)) + return -ENOENT; + + hdrlen = GENL_HDRLEN + family->hdrsize; + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + ops = genl_get_cmd(hdr->cmd, family); + if (ops == NULL) + return -EOPNOTSUPP; + + if ((ops->flags & GENL_ADMIN_PERM) && + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + + if ((ops->flags & GENL_UNS_ADMIN_PERM) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) + return genl_family_rcv_msg_dumpit(family, skb, nlh, extack, + ops, hdrlen, net); + else + return genl_family_rcv_msg_doit(family, skb, nlh, extack, + ops, hdrlen, net); +} + static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -1088,25 +1168,6 @@ problem: subsys_initcall(genl_init); -/** - * genl_family_attrbuf - return family's attrbuf - * @family: the family - * - * Return the family's attrbuf, while validating that it's - * actually valid to access it. - * - * You cannot use this function with a family that has parallel_ops - * and you can only use it within (pre/post) doit/dumpit callbacks. - */ -struct nlattr **genl_family_attrbuf(const struct genl_family *family) -{ - if (!WARN_ON(family->parallel_ops)) - lockdep_assert_held(&genl_mutex); - - return family->attrbuf; -} -EXPORT_SYMBOL(genl_family_attrbuf); - static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 8dfea26536c9..ccdd790e163a 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -107,9 +107,14 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->service_name = kmemdup(llcp_addr.service_name, llcp_sock->service_name_len, GFP_KERNEL); - + if (!llcp_sock->service_name) { + ret = -ENOMEM; + goto put_dev; + } llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock); if (llcp_sock->ssap == LLCP_SAP_MAX) { + kfree(llcp_sock->service_name); + llcp_sock->service_name = NULL; ret = -EADDRINUSE; goto put_dev; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 17e6ca62f1be..fd9ad534dd9b 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -102,22 +102,14 @@ nla_put_failure: static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) { - struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family); + const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct nfc_dev *dev; - int rc; u32 idx; - rc = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nfc_genl_family.hdrsize, - attrbuf, nfc_genl_family.maxattr, - nfc_genl_policy, NULL); - if (rc < 0) - return ERR_PTR(rc); - - if (!attrbuf[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return ERR_PTR(-EINVAL); - idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]); + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) @@ -1697,7 +1689,8 @@ static const struct genl_ops nfc_genl_ops[] = { }, { .cmd = NFC_CMD_GET_TARGET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, }, diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 05249eb45082..df9c80bf621d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -971,6 +971,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, ct = nf_ct_get(skb, &ctinfo); if (ct) { + bool add_helper = false; + /* Packets starting a new connection must be NATted before the * helper, so that the helper knows about the NAT. We enforce * this by delaying both NAT and helper calls for unconfirmed @@ -988,16 +990,17 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, } /* Userspace may decide to perform a ct lookup without a helper - * specified followed by a (recirculate and) commit with one. - * Therefore, for unconfirmed connections which we will commit, - * we need to attach the helper here. + * specified followed by a (recirculate and) commit with one, + * or attach a helper in a later commit. Therefore, for + * connections which we will commit, we may need to attach + * the helper here. */ - if (!nf_ct_is_confirmed(ct) && info->commit && - info->helper && !nfct_help(ct)) { + if (info->commit && info->helper && !nfct_help(ct)) { int err = __nf_ct_try_assign_helper(ct, info->ct, GFP_ATOMIC); if (err) return err; + add_helper = true; /* helper installed, add seqadj if NAT is required */ if (info->nat && !nfct_seqadj(ct)) { @@ -1007,11 +1010,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, } /* Call the helper only if: - * - nf_conntrack_in() was executed above ("!cached") for a - * confirmed connection, or + * - nf_conntrack_in() was executed above ("!cached") or a + * helper was just attached ("add_helper") for a confirmed + * connection, or * - When committing an unconfirmed connection. */ - if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) && + if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : + info->commit) && ovs_ct_helper(skb, info->family) != NF_ACCEPT) { return -EINVAL; } diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index d2437b5b2f6a..21c90d3a7ebf 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -237,7 +237,7 @@ static netdev_tx_t internal_dev_recv(struct sk_buff *skb) } skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); secpath_reset(skb); skb->pkt_type = PACKET_HOST; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e2742b006d25..82a50e850245 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1821,7 +1821,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); /* drop conntrack reference */ - nf_reset(skb); + nf_reset_ct(skb); spkt = &PACKET_SKB_CB(skb)->sa.pkt; @@ -2121,7 +2121,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); /* drop conntrack reference */ - nf_reset(skb); + nf_reset_ct(skb); spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; diff --git a/net/rds/ib.c b/net/rds/ib.c index 45acab2de0cf..62d4ebeb08c1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,6 +30,7 @@ * SOFTWARE. * */ +#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> @@ -107,6 +108,8 @@ static void rds_ib_dev_free(struct work_struct *work) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); + if (rds_ibdev->rid_hdrs_pool) + dma_pool_destroy(rds_ibdev->rid_hdrs_pool); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); @@ -143,6 +146,9 @@ static void rds_ib_add_one(struct ib_device *device) refcount_set(&rds_ibdev->refcount, 1); INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); + INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); + INIT_LIST_HEAD(&rds_ibdev->conn_list); + rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); @@ -179,6 +185,12 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->pd = NULL; goto put_dev; } + rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, + device->dma_device, + sizeof(struct rds_header), + L1_CACHE_BYTES, 0); + if (!rds_ibdev->rid_hdrs_pool) + goto put_dev; rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); @@ -203,9 +215,6 @@ static void rds_ib_add_one(struct ib_device *device) device->name, rds_ibdev->use_fastreg ? "FRMR" : "FMR"); - INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); - INIT_LIST_HEAD(&rds_ibdev->conn_list); - down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); up_write(&rds_ib_devices_lock); diff --git a/net/rds/ib.h b/net/rds/ib.h index f2b558e8b5ea..6e6f24753998 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -165,8 +165,8 @@ struct rds_ib_connection { /* tx */ struct rds_ib_work_ring i_send_ring; struct rm_data_op *i_data_op; - struct rds_header *i_send_hdrs; - dma_addr_t i_send_hdrs_dma; + struct rds_header **i_send_hdrs; + dma_addr_t *i_send_hdrs_dma; struct rds_ib_send_work *i_sends; atomic_t i_signaled_sends; @@ -175,8 +175,8 @@ struct rds_ib_connection { struct rds_ib_work_ring i_recv_ring; struct rds_ib_incoming *i_ibinc; u32 i_recv_data_rem; - struct rds_header *i_recv_hdrs; - dma_addr_t i_recv_hdrs_dma; + struct rds_header **i_recv_hdrs; + dma_addr_t *i_recv_hdrs_dma; struct rds_ib_recv_work *i_recvs; u64 i_ack_recv; /* last ACK received */ struct rds_ib_refill_cache i_cache_incs; @@ -246,6 +246,7 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; + struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ bool use_fastreg; unsigned int max_mrs; @@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event); - +struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, + struct dma_pool *pool, + dma_addr_t **dma_addrs, u32 num_hdrs); +void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, + dma_addr_t *dma_addrs, u32 num_hdrs); #define rds_ib_conn_error(conn, fmt...) \ __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 233f1368162b..6b345c858dba 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,6 +30,7 @@ * SOFTWARE. * */ +#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/slab.h> @@ -439,6 +440,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) rds_ibdev->vector_load[index]--; } +/* Allocate DMA coherent memory to be used to store struct rds_header for + * sending/receiving packets. The pointers to the DMA memory and the + * associated DMA addresses are stored in two arrays. + * + * @ibdev: the IB device + * @pool: the DMA memory pool + * @dma_addrs: pointer to the array for storing DMA addresses + * @num_hdrs: number of headers to allocate + * + * It returns the pointer to the array storing the DMA memory pointers. On + * error, NULL pointer is returned. + */ +struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, + struct dma_pool *pool, + dma_addr_t **dma_addrs, u32 num_hdrs) +{ + struct rds_header **hdrs; + dma_addr_t *hdr_daddrs; + u32 i; + + hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, + ibdev_to_node(ibdev)); + if (!hdrs) + return NULL; + + hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, + ibdev_to_node(ibdev)); + if (!hdr_daddrs) { + kvfree(hdrs); + return NULL; + } + + for (i = 0; i < num_hdrs; i++) { + hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); + if (!hdrs[i]) { + rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); + return NULL; + } + } + + *dma_addrs = hdr_daddrs; + return hdrs; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @pool: the DMA memory pool + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, + dma_addr_t *dma_addrs, u32 num_hdrs) +{ + u32 i; + + for (i = 0; i < num_hdrs; i++) + dma_pool_free(pool, hdrs[i], dma_addrs[i]); + kvfree(hdrs); + kvfree(dma_addrs); +} + /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -451,6 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct ib_cq_init_attr cq_attr = {}; struct rds_ib_device *rds_ibdev; int ret, fr_queue_space; + struct dma_pool *pool; /* * It's normal to see a null device if an incoming connection races @@ -541,31 +605,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto recv_cq_out; } - ic->i_send_hdrs = ib_dma_alloc_coherent(dev, - ic->i_send_ring.w_nr * - sizeof(struct rds_header), - &ic->i_send_hdrs_dma, GFP_KERNEL); + pool = rds_ibdev->rid_hdrs_pool; + ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr); if (!ic->i_send_hdrs) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent send failed\n"); + rdsdebug("DMA send hdrs alloc failed\n"); goto qp_out; } - ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, - ic->i_recv_ring.w_nr * - sizeof(struct rds_header), - &ic->i_recv_hdrs_dma, GFP_KERNEL); + ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr); if (!ic->i_recv_hdrs) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent recv failed\n"); + rdsdebug("DMA recv hdrs alloc failed\n"); goto send_hdrs_dma_out; } - ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), - &ic->i_ack_dma, GFP_KERNEL); + ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, + &ic->i_ack_dma); if (!ic->i_ack) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent ack failed\n"); + rdsdebug("DMA ack header alloc failed\n"); goto recv_hdrs_dma_out; } @@ -596,17 +657,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn) sends_out: vfree(ic->i_sends); + ack_dma_out: - ib_dma_free_coherent(dev, sizeof(struct rds_header), - ic->i_ack, ic->i_ack_dma); + dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + ic->i_ack = NULL; + recv_hdrs_dma_out: - ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * - sizeof(struct rds_header), - ic->i_recv_hdrs, ic->i_recv_hdrs_dma); + rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = NULL; + ic->i_recv_hdrs_dma = NULL; + send_hdrs_dma_out: - ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * - sizeof(struct rds_header), - ic->i_send_hdrs, ic->i_send_hdrs_dma); + rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr); + ic->i_send_hdrs = NULL; + ic->i_send_hdrs_dma = NULL; + qp_out: rdma_destroy_qp(ic->i_cm_id); recv_cq_out: @@ -984,8 +1051,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ic->i_cm_id ? ic->i_cm_id->qp : NULL); if (ic->i_cm_id) { - struct ib_device *dev = ic->i_cm_id->device; - rdsdebug("disconnecting cm %p\n", ic->i_cm_id); err = rdma_disconnect(ic->i_cm_id); if (err) { @@ -1035,24 +1100,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ib_destroy_cq(ic->i_recv_cq); } - /* then free the resources that ib callbacks use */ - if (ic->i_send_hdrs) - ib_dma_free_coherent(dev, - ic->i_send_ring.w_nr * - sizeof(struct rds_header), - ic->i_send_hdrs, - ic->i_send_hdrs_dma); - - if (ic->i_recv_hdrs) - ib_dma_free_coherent(dev, - ic->i_recv_ring.w_nr * - sizeof(struct rds_header), - ic->i_recv_hdrs, - ic->i_recv_hdrs_dma); - - if (ic->i_ack) - ib_dma_free_coherent(dev, sizeof(struct rds_header), - ic->i_ack, ic->i_ack_dma); + if (ic->rds_ibdev) { + struct dma_pool *pool; + + pool = ic->rds_ibdev->rid_hdrs_pool; + + /* then free the resources that ib callbacks use */ + if (ic->i_send_hdrs) { + rds_dma_hdrs_free(pool, ic->i_send_hdrs, + ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr); + ic->i_send_hdrs = NULL; + ic->i_send_hdrs_dma = NULL; + } + + if (ic->i_recv_hdrs) { + rds_dma_hdrs_free(pool, ic->i_recv_hdrs, + ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = NULL; + ic->i_recv_hdrs_dma = NULL; + } + + if (ic->i_ack) { + dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + ic->i_ack = NULL; + } + } else { + WARN_ON(ic->i_send_hdrs); + WARN_ON(ic->i_send_hdrs_dma); + WARN_ON(ic->i_recv_hdrs); + WARN_ON(ic->i_recv_hdrs_dma); + WARN_ON(ic->i_ack); + } if (ic->i_sends) rds_ib_send_clear_ring(ic); @@ -1071,9 +1151,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ic->i_pd = NULL; ic->i_send_cq = NULL; ic->i_recv_cq = NULL; - ic->i_send_hdrs = NULL; - ic->i_recv_hdrs = NULL; - ic->i_ack = NULL; } BUG_ON(ic->rds_ibdev); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index a0f99bbf362c..694d411dc72f 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic) recv->r_wr.num_sge = RDS_IB_RECV_SGE; sge = &recv->r_sge[0]; - sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); + sge->addr = ic->i_recv_hdrs_dma[i]; sge->length = sizeof(struct rds_header); sge->lkey = ic->i_pd->local_dma_lkey; @@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, WARN_ON(ret != 1); sge = &recv->r_sge[0]; - sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); + sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; sge->length = sizeof(struct rds_header); sge = &recv->r_sge[1]; @@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, } data_len -= sizeof(struct rds_header); - ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; + ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { @@ -993,10 +993,11 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, } else { /* We expect errors as the qp is drained during shutdown */ if (rds_conn_up(conn) || rds_conn_connecting(conn)) - rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n", + rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos, wc->status, - ib_wc_status_msg(wc->status)); + ib_wc_status_msg(wc->status), + wc->vendor_err); } /* rds_ib_process_recv() doesn't always consume the frag, and diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe6237dafe2..d1cc1d7778d8 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic) send->s_wr.ex.imm_data = 0; sge = &send->s_sge[0]; - sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); + sge->addr = ic->i_send_hdrs_dma[i]; + sge->length = sizeof(struct rds_header); sge->lkey = ic->i_pd->local_dma_lkey; @@ -300,10 +301,10 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) /* We expect errors as the qp is drained during shutdown */ if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { - rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n", + rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos, wc->status, - ib_wc_status_msg(wc->status)); + ib_wc_status_msg(wc->status), wc->vendor_err); } } @@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send->s_queued = jiffies; send->s_op = NULL; - send->s_sge[0].addr = ic->i_send_hdrs_dma - + (pos * sizeof(struct rds_header)); + send->s_sge[0].addr = ic->i_send_hdrs_dma[pos]; + send->s_sge[0].length = sizeof(struct rds_header); - memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); + memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, + sizeof(struct rds_header)); + /* Set up the data, if present */ if (i < work_alloc @@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, &send->s_wr, send->s_wr.num_sge, send->s_wr.next); if (ic->i_flowctl && adv_credits) { - struct rds_header *hdr = &ic->i_send_hdrs[pos]; + struct rds_header *hdr = ic->i_send_hdrs[pos]; /* add credit and redo the header checksum */ hdr->h_credit = adv_credits; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 9c3ac96f71cb..bf4dd6cf79a0 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -209,6 +209,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, */ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) { + const void *here = __builtin_return_address(0); struct rxrpc_peer *peer; _enter(""); @@ -230,6 +231,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer->cong_cwnd = 3; else peer->cong_cwnd = 4; + trace_rxrpc_peer(peer, rxrpc_peer_new, 1, here); } _leave(" = %p", peer); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 06c7a2da21bc..39b427dc7512 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1127,6 +1127,33 @@ static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, }; +static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + int err; + + if (!opt) { + NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); + return -EINVAL; + } + + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, + cbq_policy, extack); + if (err < 0) + return err; + + if (tb[TCA_CBQ_WRROPT]) { + const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); + + if (wrr->priority > TC_CBQ_MAXPRIO) { + NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); + err = -EINVAL; + } + } + return err; +} + static int cbq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1139,13 +1166,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); q->delay_timer.function = cbq_undelay; - if (!opt) { - NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, - extack); + err = cbq_opt_parse(tb, opt, extack); if (err < 0) return err; @@ -1464,13 +1485,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t struct cbq_class *parent; struct qdisc_rate_table *rtab = NULL; - if (!opt) { - NL_SET_ERR_MSG(extack, "Mandatory qdisc options missing"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, - extack); + err = cbq_opt_parse(tb, opt, extack); if (err < 0) return err; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 1bef152c5721..b2905b03a432 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -306,7 +306,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) if (err < 0) goto skip; - if (ecmd.base.speed != SPEED_UNKNOWN) + if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN) speed = ecmd.base.speed; skip: diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index bad1cbe59a56..05605b30bef3 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -361,6 +361,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, goto errout; err = -EINVAL; + if (!tb[TCA_DSMARK_INDICES]) + goto errout; indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); if (hweight32(indices) != 1) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 17bd8f539bc7..4c75dbabd343 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -382,13 +382,8 @@ void __qdisc_run(struct Qdisc *q) int packets; while (qdisc_restart(q, &packets)) { - /* - * Ordered by possible occurrence: Postpone processing if - * 1. we've exceeded packet quota - * 2. another process needs the CPU; - */ quota -= packets; - if (quota <= 0 || need_resched()) { + if (quota <= 0) { __netif_schedule(q); break; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 2f7b34205c82..68b543f85a96 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1044,12 +1044,11 @@ static void taprio_set_picos_per_byte(struct net_device *dev, if (err < 0) goto skip; - if (ecmd.base.speed != SPEED_UNKNOWN) + if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN) speed = ecmd.base.speed; skip: - picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, - speed * 1000 * 1000); + picos_per_byte = (USEC_PER_SEC * 8) / speed; atomic64_set(&q->picos_per_byte, picos_per_byte); netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d2ffc9a0ba3a..1ba893b85dad 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -429,6 +429,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, changeover = 1 ; asoc->peer.primary_path = transport; + sctp_ulpevent_nofity_peer_addr_change(transport, + SCTP_ADDR_MADE_PRIM, 0); /* Set a default msg_name for events. */ memcpy(&asoc->peer.primary_addr, &transport->ipaddr, @@ -569,6 +571,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, asoc->peer.transport_count--; + sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0); sctp_transport_free(peer); } @@ -707,6 +710,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); asoc->peer.transport_count++; + sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0); + /* If we do not yet have a primary path, set one. */ if (!asoc->peer.primary_path) { sctp_assoc_set_primary(asoc, peer); @@ -781,10 +786,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, enum sctp_transport_cmd command, sctp_sn_error_t error) { - struct sctp_ulpevent *event; - struct sockaddr_storage addr; - int spc_state = 0; bool ulp_notify = true; + int spc_state = 0; /* Record the transition on the transport. */ switch (command) { @@ -836,16 +839,9 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Generate and send a SCTP_PEER_ADDR_CHANGE notification * to the user. */ - if (ulp_notify) { - memset(&addr, 0, sizeof(struct sockaddr_storage)); - memcpy(&addr, &transport->ipaddr, - transport->af_specific->sockaddr_len); - - event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, - 0, spc_state, error, GFP_ATOMIC); - if (event) - asoc->stream.si->enqueue_event(&asoc->ulpq, event); - } + if (ulp_notify) + sctp_ulpevent_nofity_peer_addr_change(transport, + spc_state, error); /* Select new active and retran paths. */ sctp_select_active_and_retran_path(asoc); diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index cc0405c79dfc..cc3ce5d80b08 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -75,41 +75,39 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) struct list_head *pos, *temp; struct sctp_chunk *chunk; struct sctp_ulpevent *ev; - int error = 0, notify; - - /* If we failed, we may need to notify. */ - notify = msg->send_failed ? -1 : 0; + int error, sent; /* Release all references. */ list_for_each_safe(pos, temp, &msg->chunks) { list_del_init(pos); chunk = list_entry(pos, struct sctp_chunk, frag_list); - /* Check whether we _really_ need to notify. */ - if (notify < 0) { - asoc = chunk->asoc; - if (msg->send_error) - error = msg->send_error; - else - error = asoc->outqueue.error; - - notify = sctp_ulpevent_type_enabled(asoc->subscribe, - SCTP_SEND_FAILED); + + if (!msg->send_failed) { + sctp_chunk_put(chunk); + continue; } - /* Generate a SEND FAILED event only if enabled. */ - if (notify > 0) { - int sent; - if (chunk->has_tsn) - sent = SCTP_DATA_SENT; - else - sent = SCTP_DATA_UNSENT; + asoc = chunk->asoc; + error = msg->send_error ?: asoc->outqueue.error; + sent = chunk->has_tsn ? SCTP_DATA_SENT : SCTP_DATA_UNSENT; + if (sctp_ulpevent_type_enabled(asoc->subscribe, + SCTP_SEND_FAILED)) { ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent, error, GFP_ATOMIC); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } + if (sctp_ulpevent_type_enabled(asoc->subscribe, + SCTP_SEND_FAILED_EVENT)) { + ev = sctp_ulpevent_make_send_failed_event(asoc, chunk, + sent, error, + GFP_ATOMIC); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_chunk_put(chunk); } diff --git a/net/sctp/input.c b/net/sctp/input.c index 1008cdc44dd6..5a070fb5b278 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -201,7 +201,7 @@ int sctp_rcv(struct sk_buff *skb) if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; - nf_reset(skb); + nf_reset_ct(skb); if (sk_filter(sk, skb)) goto discard_release; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index e0cc1edf49a0..c82dbdcf13f2 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -238,7 +238,7 @@ fail: * When a destination address on a multi-homed peer encounters a change * an interface details event is sent. */ -struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( +static struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( const struct sctp_association *asoc, const struct sockaddr_storage *aaddr, int flags, int state, int error, gfp_t gfp) @@ -336,6 +336,22 @@ fail: return NULL; } +void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport, + int state, int error) +{ + struct sctp_association *asoc = transport->asoc; + struct sockaddr_storage addr; + struct sctp_ulpevent *event; + + memset(&addr, 0, sizeof(struct sockaddr_storage)); + memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); + + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, state, + error, GFP_ATOMIC); + if (event) + asoc->stream.si->enqueue_event(&asoc->ulpq, event); +} + /* Create and initialize an SCTP_REMOTE_ERROR notification. * * Note: This assumes that the chunk->skb->data already points to the @@ -511,6 +527,45 @@ fail: return NULL; } +struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event( + const struct sctp_association *asoc, struct sctp_chunk *chunk, + __u16 flags, __u32 error, gfp_t gfp) +{ + struct sctp_send_failed_event *ssf; + struct sctp_ulpevent *event; + struct sk_buff *skb; + int len; + + skb = skb_copy_expand(chunk->skb, sizeof(*ssf), 0, gfp); + if (!skb) + return NULL; + + len = ntohs(chunk->chunk_hdr->length); + len -= sctp_datachk_len(&asoc->stream); + + skb_pull(skb, sctp_datachk_len(&asoc->stream)); + event = sctp_skb2event(skb); + sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); + + ssf = skb_push(skb, sizeof(*ssf)); + ssf->ssf_type = SCTP_SEND_FAILED_EVENT; + ssf->ssf_flags = flags; + ssf->ssf_length = sizeof(*ssf) + len; + skb_trim(skb, ssf->ssf_length); + ssf->ssf_error = error; + + ssf->ssfe_info.snd_sid = chunk->sinfo.sinfo_stream; + ssf->ssfe_info.snd_ppid = chunk->sinfo.sinfo_ppid; + ssf->ssfe_info.snd_context = chunk->sinfo.sinfo_context; + ssf->ssfe_info.snd_assoc_id = chunk->sinfo.sinfo_assoc_id; + ssf->ssfe_info.snd_flags = chunk->chunk_hdr->flags; + + sctp_ulpevent_set_owner(event, asoc); + ssf->ssf_assoc_id = sctp_assoc2id(asoc); + + return event; +} + /* Create and initialize a SCTP_SHUTDOWN_EVENT notification. * * Socket Extensions for SCTP - draft-01 diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index fc06720b53c1..1a858e59fc31 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -65,8 +65,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) rc = sk_wait_event(sk, &timeout, !smc_tx_prepared_sends(&smc->conn) || - (sk->sk_err == ECONNABORTED) || - (sk->sk_err == ECONNRESET), + sk->sk_err == ECONNABORTED || + sk->sk_err == ECONNRESET, &wait); if (rc) break; @@ -113,9 +113,6 @@ static void smc_close_active_abort(struct smc_sock *smc) { struct sock *sk = &smc->sk; - struct smc_cdc_conn_state_flags *txflags = - &smc->conn.local_tx_ctrl.conn_state_flags; - if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { sk->sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { @@ -129,35 +126,26 @@ static void smc_close_active_abort(struct smc_sock *smc) release_sock(sk); cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); + sk->sk_state = SMC_CLOSED; sock_put(sk); /* passive closing */ break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - if (!smc_cdc_rxed_any_close(&smc->conn)) - sk->sk_state = SMC_PEERABORTWAIT; - else - sk->sk_state = SMC_CLOSED; release_sock(sk); cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); + sk->sk_state = SMC_CLOSED; break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: - if (!txflags->peer_conn_closed) { - /* just SHUTDOWN_SEND done */ - sk->sk_state = SMC_PEERABORTWAIT; - } else { - sk->sk_state = SMC_CLOSED; - } + case SMC_PEERFINCLOSEWAIT: + sk->sk_state = SMC_CLOSED; sock_put(sk); /* passive closing */ break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: sk->sk_state = SMC_CLOSED; break; - case SMC_PEERFINCLOSEWAIT: - sock_put(sk); /* passive closing */ - break; case SMC_INIT: case SMC_PEERABORTWAIT: case SMC_CLOSED: @@ -215,8 +203,6 @@ again: if (sk->sk_state == SMC_ACTIVE) { /* send close request */ rc = smc_close_final(conn); - if (rc) - break; sk->sk_state = SMC_PEERCLOSEWAIT1; } else { /* peer event has changed the state */ @@ -229,8 +215,6 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); - if (rc) - break; } sk->sk_state = SMC_CLOSED; break; @@ -246,8 +230,6 @@ again: goto again; /* confirm close from peer */ rc = smc_close_final(conn); - if (rc) - break; if (smc_cdc_rxed_any_close(conn)) { /* peer has closed the socket already */ sk->sk_state = SMC_CLOSED; @@ -263,8 +245,6 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); - if (rc) - break; } /* peer sending PeerConnectionClosed will cause transition */ break; @@ -272,10 +252,12 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - smc_close_abort(conn); + rc = smc_close_abort(conn); sk->sk_state = SMC_CLOSED; break; case SMC_PEERABORTWAIT: + sk->sk_state = SMC_CLOSED; + break; case SMC_CLOSED: /* nothing to do, add tracing in future patch */ break; @@ -451,8 +433,6 @@ again: goto again; /* send close wr request */ rc = smc_close_wr(conn); - if (rc) - break; sk->sk_state = SMC_PEERCLOSEWAIT1; break; case SMC_APPCLOSEWAIT1: @@ -466,8 +446,6 @@ again: goto again; /* confirm close from peer */ rc = smc_close_wr(conn); - if (rc) - break; sk->sk_state = SMC_APPCLOSEWAIT2; break; case SMC_APPCLOSEWAIT2: diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4ca50ddf8d16..5862784eedd4 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -42,6 +42,19 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc); +/* return head of link group list and its lock for a given link group */ +static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, + spinlock_t **lgr_lock) +{ + if (lgr->is_smcd) { + *lgr_lock = &lgr->smcd->lgr_lock; + return &lgr->smcd->lgr_list; + } + + *lgr_lock = &smc_lgr_list.lock; + return &smc_lgr_list.list; +} + static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) { /* client link group creation always follows the server link group @@ -157,19 +170,21 @@ static void smc_lgr_free_work(struct work_struct *work) struct smc_link_group *lgr = container_of(to_delayed_work(work), struct smc_link_group, free_work); + spinlock_t *lgr_lock; bool conns; - spin_lock_bh(&smc_lgr_list.lock); + smc_lgr_list_head(lgr, &lgr_lock); + spin_lock_bh(lgr_lock); read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); if (!conns) { /* number of lgr connections is no longer zero */ - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); return; } if (!list_empty(&lgr->list)) list_del_init(&lgr->list); /* remove from smc_lgr_list */ - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); if (!lgr->is_smcd && !lgr->terminating) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; @@ -198,7 +213,9 @@ static void smc_lgr_free_work(struct work_struct *work) static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_link_group *lgr; + struct list_head *lgr_list; struct smc_link *lnk; + spinlock_t *lgr_lock; u8 rndvec[3]; int rc = 0; int i; @@ -231,10 +248,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->conns_all = RB_ROOT; if (ini->is_smcd) { /* SMC-D specific settings */ + get_device(&ini->ism_dev->dev); lgr->peer_gid = ini->ism_gid; lgr->smcd = ini->ism_dev; + lgr_list = &ini->ism_dev->lgr_list; + lgr_lock = &lgr->smcd->lgr_lock; } else { /* SMC-R specific settings */ + get_device(&ini->ib_dev->ibdev->dev); lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); @@ -245,6 +266,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lnk->link_id = SMC_SINGLE_LINK; lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; + lgr_list = &smc_lgr_list.list; + lgr_lock = &smc_lgr_list.lock; lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; if (!ini->ib_dev->initialized) @@ -274,9 +297,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) goto destroy_qp; } smc->conn.lgr = lgr; - spin_lock_bh(&smc_lgr_list.lock); - list_add(&lgr->list, &smc_lgr_list.list); - spin_unlock_bh(&smc_lgr_list.lock); + spin_lock_bh(lgr_lock); + list_add(&lgr->list, lgr_list); + spin_unlock_bh(lgr_lock); return 0; destroy_qp: @@ -430,20 +453,27 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) static void smc_lgr_free(struct smc_link_group *lgr) { smc_lgr_free_bufs(lgr); - if (lgr->is_smcd) + if (lgr->is_smcd) { smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); - else + put_device(&lgr->smcd->dev); + } else { smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); + put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev); + } kfree(lgr); } void smc_lgr_forget(struct smc_link_group *lgr) { - spin_lock_bh(&smc_lgr_list.lock); + struct list_head *lgr_list; + spinlock_t *lgr_lock; + + lgr_list = smc_lgr_list_head(lgr, &lgr_lock); + spin_lock_bh(lgr_lock); /* do not use this link group for new connections */ - if (!list_empty(&lgr->list)) - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); + if (!list_empty(lgr_list)) + list_del_init(lgr_list); + spin_unlock_bh(lgr_lock); } /* terminate linkgroup abnormally */ @@ -484,9 +514,12 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) void smc_lgr_terminate(struct smc_link_group *lgr) { - spin_lock_bh(&smc_lgr_list.lock); + spinlock_t *lgr_lock; + + smc_lgr_list_head(lgr, &lgr_lock); + spin_lock_bh(lgr_lock); __smc_lgr_terminate(lgr); - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); } /* Called when IB port is terminated */ @@ -511,16 +544,15 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) LIST_HEAD(lgr_free_list); /* run common cleanup function and build free list */ - spin_lock_bh(&smc_lgr_list.lock); - list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { - if (lgr->is_smcd && lgr->smcd == dev && - (!peer_gid || lgr->peer_gid == peer_gid) && + spin_lock_bh(&dev->lgr_lock); + list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { + if ((!peer_gid || lgr->peer_gid == peer_gid) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { __smc_lgr_terminate(lgr); list_move(&lgr->list, &lgr_free_list); } } - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(&dev->lgr_lock); /* cancel the regular free workers and actually free lgrs */ list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { @@ -604,10 +636,14 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; + struct list_head *lgr_list; struct smc_link_group *lgr; enum smc_lgr_role role; + spinlock_t *lgr_lock; int rc = 0; + lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; + lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; ini->cln_first_contact = SMC_FIRST_CONTACT; role = smc->listen_smc ? SMC_SERV : SMC_CLNT; if (role == SMC_CLNT && ini->srv_first_contact) @@ -615,8 +651,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) goto create; /* determine if an existing link group can be reused */ - spin_lock_bh(&smc_lgr_list.lock); - list_for_each_entry(lgr, &smc_lgr_list.list, list) { + spin_lock_bh(lgr_lock); + list_for_each_entry(lgr, lgr_list, list) { write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : @@ -636,7 +672,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) } write_unlock_bh(&lgr->conns_lock); } - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); if (role == SMC_CLNT && !ini->srv_first_contact && ini->cln_first_contact == SMC_FIRST_CONTACT) { @@ -1024,16 +1060,45 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, return 0; } +static void smc_core_going_away(void) +{ + struct smc_ib_device *smcibdev; + struct smcd_dev *smcd; + + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { + int i; + + for (i = 0; i < SMC_MAX_PORTS; i++) + set_bit(i, smcibdev->ports_going_away); + } + spin_unlock(&smc_ib_devices.lock); + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(smcd, &smcd_dev_list.list, list) { + smcd->going_away = 1; + } + spin_unlock(&smcd_dev_list.lock); +} + /* Called (from smc_exit) when module is removed */ void smc_core_exit(void) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_freeing_list); + struct smcd_dev *smcd; + + smc_core_going_away(); spin_lock_bh(&smc_lgr_list.lock); - if (!list_empty(&smc_lgr_list.list)) - list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); + list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); spin_unlock_bh(&smc_lgr_list.lock); + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(smcd, &smcd_dev_list.list, list) + list_splice_init(&smcd->lgr_list, &lgr_freeing_list); + spin_unlock(&smcd_dev_list.lock); + list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); if (!lgr->is_smcd) { diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d14ca4af6f94..af05daeb0538 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -242,8 +242,12 @@ static void smc_ib_port_event_work(struct work_struct *work) for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { smc_ib_remember_port_attr(smcibdev, port_idx + 1); clear_bit(port_idx, &smcibdev->port_event_mask); - if (!smc_ib_port_active(smcibdev, port_idx + 1)) + if (!smc_ib_port_active(smcibdev, port_idx + 1)) { + set_bit(port_idx, smcibdev->ports_going_away); smc_port_terminate(smcibdev, port_idx + 1); + } else { + clear_bit(port_idx, smcibdev->ports_going_away); + } } } @@ -259,8 +263,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, switch (ibevent->event) { case IB_EVENT_DEVICE_FATAL: /* terminate all ports on device */ - for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) + for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) { set_bit(port_idx, &smcibdev->port_event_mask); + set_bit(port_idx, smcibdev->ports_going_away); + } schedule_work(&smcibdev->port_event_work); break; case IB_EVENT_PORT_ERR: @@ -269,6 +275,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, port_idx = ibevent->element.port_num - 1; if (port_idx < SMC_MAX_PORTS) { set_bit(port_idx, &smcibdev->port_event_mask); + if (ibevent->event == IB_EVENT_PORT_ERR) + set_bit(port_idx, smcibdev->ports_going_away); + else if (ibevent->event == IB_EVENT_PORT_ACTIVE) + clear_bit(port_idx, smcibdev->ports_going_away); schedule_work(&smcibdev->port_event_work); } break; @@ -307,6 +317,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) port_idx = ibevent->element.qp->port - 1; if (port_idx < SMC_MAX_PORTS) { set_bit(port_idx, &smcibdev->port_event_mask); + set_bit(port_idx, smcibdev->ports_going_away); schedule_work(&smcibdev->port_event_work); } break; diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index da60ab9e8d70..6a0069db6cae 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -47,6 +47,7 @@ struct smc_ib_device { /* ib-device infos for smc */ u8 initialized : 1; /* ib dev CQ, evthdl done */ struct work_struct port_event_work; unsigned long port_event_mask; + DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS); }; struct smc_buf_desc; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index e89e918b88e0..ee7340898cb4 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -286,7 +286,9 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, smc_pnetid_by_dev_port(parent, 0, smcd->pnetid); spin_lock_init(&smcd->lock); + spin_lock_init(&smcd->lgr_lock); INIT_LIST_HEAD(&smcd->vlan); + INIT_LIST_HEAD(&smcd->lgr_list); smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", WQ_MEM_RECLAIM, name); if (!smcd->event_wq) { @@ -313,6 +315,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd) spin_lock(&smcd_dev_list.lock); list_del(&smcd->list); spin_unlock(&smcd_dev_list.lock); + smcd->going_away = 1; flush_workqueue(smcd->event_wq); destroy_workqueue(smcd->event_wq); smc_smcd_terminate(smcd, 0, VLAN_VID_MASK); @@ -342,6 +345,8 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) { struct smc_ism_event_work *wrk; + if (smcd->going_away) + return; /* copy event to event work queue, and let it be handled there */ wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC); if (!wrk) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index bab2da8cf17a..6b7799b3f5ca 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -781,6 +781,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, dev_put(ndev); if (netdev == ndev && smc_ib_port_active(ibdev, i) && + !test_bit(i - 1, ibdev->ports_going_away) && !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL)) { ini->ib_dev = ibdev; @@ -820,6 +821,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, continue; if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && smc_ib_port_active(ibdev, i) && + !test_bit(i - 1, ibdev->ports_going_away) && !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL)) { ini->ib_dev = ibdev; @@ -846,7 +848,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, spin_lock(&smcd_dev_list.lock); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { - if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { + if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && + !ismdev->going_away) { ini->ism_dev = ismdev; break; } diff --git a/net/tipc/link.c b/net/tipc/link.c index 6cc75ffd9e2c..999eab592de8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -160,6 +160,7 @@ struct tipc_link { struct { u16 len; u16 limit; + struct sk_buff *target_bskb; } backlog[5]; u16 snd_nxt; u16 window; @@ -880,6 +881,7 @@ static void link_prepare_wakeup(struct tipc_link *l) void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; + u32 imp; __skb_queue_head_init(&list); @@ -901,11 +903,10 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_purge(&l->deferdq); __skb_queue_purge(&l->backlogq); __skb_queue_purge(&l->failover_deferdq); - l->backlog[TIPC_LOW_IMPORTANCE].len = 0; - l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; - l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; - l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; - l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; + for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) { + l->backlog[imp].len = 0; + l->backlog[imp].target_bskb = NULL; + } kfree_skb(l->reasm_buf); kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); @@ -947,7 +948,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; struct sk_buff_head *transmq = &l->transmq; struct sk_buff_head *backlogq = &l->backlogq; - struct sk_buff *skb, *_skb, *bskb; + struct sk_buff *skb, *_skb, **tskb; int pkt_cnt = skb_queue_len(list); int rc = 0; @@ -999,19 +1000,21 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, seqno++; continue; } - if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) { + tskb = &l->backlog[imp].target_bskb; + if (tipc_msg_bundle(*tskb, hdr, mtu)) { kfree_skb(__skb_dequeue(list)); l->stats.sent_bundled++; continue; } - if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) { + if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) { kfree_skb(__skb_dequeue(list)); - __skb_queue_tail(backlogq, bskb); - l->backlog[msg_importance(buf_msg(bskb))].len++; + __skb_queue_tail(backlogq, *tskb); + l->backlog[imp].len++; l->stats.sent_bundled++; l->stats.sent_bundles++; continue; } + l->backlog[imp].target_bskb = NULL; l->backlog[imp].len += skb_queue_len(list); skb_queue_splice_tail_init(list, backlogq); } @@ -1027,6 +1030,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l, u16 seqno = l->snd_nxt; u16 ack = l->rcv_nxt - 1; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u32 imp; while (skb_queue_len(&l->transmq) < l->window) { skb = skb_peek(&l->backlogq); @@ -1037,7 +1041,10 @@ static void tipc_link_advance_backlog(struct tipc_link *l, break; __skb_dequeue(&l->backlogq); hdr = buf_msg(skb); - l->backlog[msg_importance(hdr)].len--; + imp = msg_importance(hdr); + l->backlog[imp].len--; + if (unlikely(skb == l->backlog[imp].target_bskb)) + l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); /* next retransmit attempt */ if (link_is_bc_sndlink(l)) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e6d49cdc61b4..922d262e153f 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -543,10 +543,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, bmsg = buf_msg(_skb); tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode); - if (msg_isdata(msg)) - msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE); - else - msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE); + msg_set_importance(bmsg, msg_importance(msg)); msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index d6165ad384c0..d32bbd0f5e46 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -176,7 +176,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_PUBL_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_nl_publ_dump, }, { @@ -239,7 +240,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_MON_PEER_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_nl_node_dump_monitor_peer, }, { @@ -250,7 +252,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = { #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_udp_nl_dump_remoteip, }, #endif @@ -268,18 +271,6 @@ struct genl_family tipc_genl_family __ro_after_init = { .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), }; -int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) -{ - u32 maxattr = tipc_genl_family.maxattr; - - *attr = genl_family_attrbuf(&tipc_genl_family); - if (!*attr) - return -EOPNOTSUPP; - - return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr, - tipc_nl_policy, NULL); -} - int __init tipc_netlink_start(void) { int res; diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index 4ba0ad422110..7cf777723e3e 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -38,7 +38,6 @@ #include <net/netlink.h> extern struct genl_family tipc_genl_family; -int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); struct tipc_nl_msg { struct sk_buff *skb; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e135d4e11231..17a529739f8d 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -181,15 +181,18 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) { + struct genl_dumpit_info info; int len = 0; int err; struct sk_buff *buf; struct nlmsghdr *nlmsg; struct netlink_callback cb; + struct nlattr **attrbuf; memset(&cb, 0, sizeof(cb)); cb.nlh = (struct nlmsghdr *)arg->data; cb.skb = arg; + cb.data = &info; buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!buf) @@ -201,19 +204,35 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; } + attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto err_out; + } + + info.attrs = attrbuf; + err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, NULL); + if (err) + goto err_out; + do { int rem; len = (*cmd->dumpit)(buf, &cb); nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { - struct nlattr **attrs; - - err = tipc_nlmsg_parse(nlmsg, &attrs); + err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN, + attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, + NULL); if (err) goto err_out; - err = (*cmd->format)(msg, attrs); + err = (*cmd->format)(msg, attrbuf); if (err) goto err_out; @@ -231,6 +250,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, err = 0; err_out: + kfree(attrbuf); tipc_dump_done(&cb); kfree_skb(buf); diff --git a/net/tipc/node.c b/net/tipc/node.c index c8f6177dd5a2..f2e3cf70c922 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2484,13 +2484,9 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, int err; if (!prev_node) { - struct nlattr **attrs; + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_MON]) return -EINVAL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3b9f8cc328f5..d579b64705b1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3588,13 +3588,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) struct tipc_sock *tsk; if (!tsk_portid) { - struct nlattr **attrs; + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 287df68721df..43ca5fd6574d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -448,15 +448,11 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) int i; if (!bid && !skip_cnt) { + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; struct net *net = sock_net(skb->sk); struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; - struct nlattr **attrs; char *bname; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; diff --git a/net/tls/Kconfig b/net/tls/Kconfig index e4328b3b72eb..61ec78521a60 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -26,3 +26,13 @@ config TLS_DEVICE Enable kernel support for HW offload of the TLS protocol. If unsure, say N. + +config TLS_TOE + bool "Transport Layer Security TCP stack bypass" + depends on TLS + default n + help + Enable kernel support for legacy HW offload of the TLS protocol, + which is incompatible with the Linux networking stack semantics. + + If unsure, say N. diff --git a/net/tls/Makefile b/net/tls/Makefile index ef0dc74ce8f9..f1ffbfe8968d 100644 --- a/net/tls/Makefile +++ b/net/tls/Makefile @@ -3,8 +3,11 @@ # Makefile for the TLS subsystem. # +CFLAGS_trace.o := -I$(src) + obj-$(CONFIG_TLS) += tls.o -tls-y := tls_main.o tls_sw.o +tls-y := tls_main.o tls_sw.o tls_proc.o trace.o +tls-$(CONFIG_TLS_TOE) += tls_toe.o tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f959487c5cd1..33b267b052c0 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -38,6 +38,8 @@ #include <net/tcp.h> #include <net/tls.h> +#include "trace.h" + /* device_offload_lock is used to synchronize tls_dev_add * against NETDEV_DOWN notifications. */ @@ -202,6 +204,15 @@ void tls_device_free_resources_tx(struct sock *sk) tls_free_partial_record(sk, tls_ctx); } +void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + trace_tls_device_tx_resync_req(sk, got_seq, exp_seq); + WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags)); +} +EXPORT_SYMBOL_GPL(tls_offload_tx_resync_request); + static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { @@ -216,6 +227,7 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, rcd_sn = tls_ctx->tx.rec_seq; + trace_tls_device_tx_resync_send(sk, seq, rcd_sn); down_read(&device_offload_lock); netdev = tls_ctx->netdev; if (netdev) @@ -419,7 +431,7 @@ static int tls_push_data(struct sock *sk, ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) return -ENOTSUPP; - if (sk->sk_err) + if (unlikely(sk->sk_err)) return -sk->sk_err; flags |= MSG_SENDPAGE_DECRYPTED; @@ -440,9 +452,8 @@ static int tls_push_data(struct sock *sk, max_open_record_len = TLS_MAX_PAYLOAD_SIZE + prot->prepend_size; do { - rc = tls_do_allocation(sk, ctx, pfrag, - prot->prepend_size); - if (rc) { + rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size); + if (unlikely(rc)) { rc = sk_stream_wait_memory(sk, &timeo); if (!rc) continue; @@ -637,15 +648,19 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) static void tls_device_resync_rx(struct tls_context *tls_ctx, struct sock *sk, u32 seq, u8 *rcd_sn) { + struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); struct net_device *netdev; if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags))) return; + + trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type); netdev = READ_ONCE(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_RX); clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC); } void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) @@ -653,8 +668,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx; u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE]; + u32 sock_data, is_req_pending; struct tls_prot_info *prot; - u32 is_req_pending; s64 resync_req; u32 req_seq; @@ -683,8 +698,12 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) /* head of next rec is already in, note that the sock_inq will * include the currently parsed message when called from parser */ - if (tcp_inq(sk) > rcd_len) + sock_data = tcp_inq(sk); + if (sock_data > rcd_len) { + trace_tls_device_rx_resync_nh_delay(sk, sock_data, + rcd_len); return; + } rx_ctx->resync_nh_do_now = 0; seq += rcd_len; @@ -728,6 +747,7 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx, /* head of next rec is already in, parser will sync for us */ if (tcp_inq(sk) > rxm->full_len) { + trace_tls_device_rx_resync_nh_schedule(sk); ctx->resync_nh_do_now = 1; } else { struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -826,9 +846,9 @@ free_buf: return err; } -int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) +int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm) { - struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx); int is_decrypted = skb->decrypted; int is_encrypted = !is_decrypted; @@ -840,6 +860,10 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) is_encrypted &= !skb_iter->decrypted; } + trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len, + tls_ctx->rx.rec_seq, rxm->full_len, + is_encrypted, is_decrypted); + ctx->sw.decrypted |= is_decrypted; /* Return immediately if the record is either entirely plaintext or @@ -1013,6 +1037,8 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX, &ctx->crypto_send.info, tcp_sk(sk)->write_seq); + trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_TX, + tcp_sk(sk)->write_seq, rec_seq, rc); if (rc) goto release_lock; @@ -1049,6 +1075,7 @@ free_marker_record: int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) { + struct tls12_crypto_info_aes_gcm_128 *info; struct tls_offload_context_rx *context; struct net_device *netdev; int rc = 0; @@ -1096,6 +1123,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, tcp_sk(sk)->copied_seq); + info = (void *)&ctx->crypto_recv.info; + trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_RX, + tcp_sk(sk)->copied_seq, info->rec_seq, rc); if (rc) goto free_sw_resources; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ac88877dcade..f144b965704e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -41,7 +41,9 @@ #include <linux/inetdevice.h> #include <linux/inet_diag.h> +#include <net/snmp.h> #include <net/tls.h> +#include <net/tls_toe.h> MODULE_AUTHOR("Mellanox Technologies"); MODULE_DESCRIPTION("Transport Layer Security Support"); @@ -58,14 +60,12 @@ static struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); static struct proto *saved_tcpv4_prot; static DEFINE_MUTEX(tcpv4_prot_mutex); -static LIST_HEAD(device_list); -static DEFINE_SPINLOCK(device_spinlock); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static struct proto_ops tls_sw_proto_ops; static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], struct proto *base); -static void update_sk_prot(struct sock *sk, struct tls_context *ctx) +void update_sk_prot(struct sock *sk, struct tls_context *ctx) { int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; @@ -286,14 +286,19 @@ static void tls_sk_proto_cleanup(struct sock *sk, kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } - if (ctx->rx_conf == TLS_SW) + if (ctx->rx_conf == TLS_SW) { tls_sw_release_resources_rx(sk); - else if (ctx->rx_conf == TLS_HW) + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + } else if (ctx->rx_conf == TLS_HW) { tls_device_offload_cleanup_rx(sk); + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); + } } static void tls_sk_proto_close(struct sock *sk, long timeout) @@ -534,19 +539,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, if (tx) { rc = tls_set_device_offload(sk, ctx); conf = TLS_HW; - if (rc) { + if (!rc) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); + } else { rc = tls_set_sw_offload(sk, ctx, 1); if (rc) goto err_crypto_info; + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); conf = TLS_SW; } } else { rc = tls_set_device_offload_rx(sk, ctx); conf = TLS_HW; - if (rc) { + if (!rc) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); + } else { rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto err_crypto_info; + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); conf = TLS_SW; } tls_sw_strparser_arm(sk, ctx); @@ -603,7 +618,7 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } -static struct tls_context *create_ctx(struct sock *sk) +struct tls_context *tls_ctx_create(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx; @@ -643,90 +658,6 @@ static void tls_build_proto(struct sock *sk) } } -static void tls_hw_sk_destruct(struct sock *sk) -{ - struct tls_context *ctx = tls_get_ctx(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - - ctx->sk_destruct(sk); - /* Free ctx */ - rcu_assign_pointer(icsk->icsk_ulp_data, NULL); - tls_ctx_free(sk, ctx); -} - -static int tls_hw_prot(struct sock *sk) -{ - struct tls_context *ctx; - struct tls_device *dev; - int rc = 0; - - spin_lock_bh(&device_spinlock); - list_for_each_entry(dev, &device_list, dev_list) { - if (dev->feature && dev->feature(dev)) { - ctx = create_ctx(sk); - if (!ctx) - goto out; - - spin_unlock_bh(&device_spinlock); - tls_build_proto(sk); - ctx->sk_destruct = sk->sk_destruct; - sk->sk_destruct = tls_hw_sk_destruct; - ctx->rx_conf = TLS_HW_RECORD; - ctx->tx_conf = TLS_HW_RECORD; - update_sk_prot(sk, ctx); - spin_lock_bh(&device_spinlock); - rc = 1; - break; - } - } -out: - spin_unlock_bh(&device_spinlock); - return rc; -} - -static void tls_hw_unhash(struct sock *sk) -{ - struct tls_context *ctx = tls_get_ctx(sk); - struct tls_device *dev; - - spin_lock_bh(&device_spinlock); - list_for_each_entry(dev, &device_list, dev_list) { - if (dev->unhash) { - kref_get(&dev->kref); - spin_unlock_bh(&device_spinlock); - dev->unhash(dev, sk); - kref_put(&dev->kref, dev->release); - spin_lock_bh(&device_spinlock); - } - } - spin_unlock_bh(&device_spinlock); - ctx->sk_proto->unhash(sk); -} - -static int tls_hw_hash(struct sock *sk) -{ - struct tls_context *ctx = tls_get_ctx(sk); - struct tls_device *dev; - int err; - - err = ctx->sk_proto->hash(sk); - spin_lock_bh(&device_spinlock); - list_for_each_entry(dev, &device_list, dev_list) { - if (dev->hash) { - kref_get(&dev->kref); - spin_unlock_bh(&device_spinlock); - err |= dev->hash(dev, sk); - kref_put(&dev->kref, dev->release); - spin_lock_bh(&device_spinlock); - } - } - spin_unlock_bh(&device_spinlock); - - if (err) - tls_hw_unhash(sk); - return err; -} - static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], struct proto *base) { @@ -764,10 +695,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW]; #endif - +#ifdef CONFIG_TLS_TOE prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base; - prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash; - prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_toe_hash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_toe_unhash; +#endif } static int tls_init(struct sock *sk) @@ -775,8 +707,12 @@ static int tls_init(struct sock *sk) struct tls_context *ctx; int rc = 0; - if (tls_hw_prot(sk)) + tls_build_proto(sk); + +#ifdef CONFIG_TLS_TOE + if (tls_toe_bypass(sk)) return 0; +#endif /* The TLS ulp is currently supported only for TCP sockets * in ESTABLISHED state. @@ -787,11 +723,9 @@ static int tls_init(struct sock *sk) if (sk->sk_state != TCP_ESTABLISHED) return -ENOTSUPP; - tls_build_proto(sk); - /* allocate tls context */ write_lock_bh(&sk->sk_callback_lock); - ctx = create_ctx(sk); + ctx = tls_ctx_create(sk); if (!ctx) { rc = -ENOMEM; goto out; @@ -877,21 +811,34 @@ static size_t tls_get_info_size(const struct sock *sk) return size; } -void tls_register_device(struct tls_device *device) +static int __net_init tls_init_net(struct net *net) { - spin_lock_bh(&device_spinlock); - list_add_tail(&device->dev_list, &device_list); - spin_unlock_bh(&device_spinlock); + int err; + + net->mib.tls_statistics = alloc_percpu(struct linux_tls_mib); + if (!net->mib.tls_statistics) + return -ENOMEM; + + err = tls_proc_init(net); + if (err) + goto err_free_stats; + + return 0; +err_free_stats: + free_percpu(net->mib.tls_statistics); + return err; } -EXPORT_SYMBOL(tls_register_device); -void tls_unregister_device(struct tls_device *device) +static void __net_exit tls_exit_net(struct net *net) { - spin_lock_bh(&device_spinlock); - list_del(&device->dev_list); - spin_unlock_bh(&device_spinlock); + tls_proc_fini(net); + free_percpu(net->mib.tls_statistics); } -EXPORT_SYMBOL(tls_unregister_device); + +static struct pernet_operations tls_proc_ops = { + .init = tls_init_net, + .exit = tls_exit_net, +}; static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", @@ -904,6 +851,12 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { static int __init tls_register(void) { + int err; + + err = register_pernet_subsys(&tls_proc_ops); + if (err) + return err; + tls_sw_proto_ops = inet_stream_ops; tls_sw_proto_ops.splice_read = tls_sw_splice_read; @@ -917,6 +870,7 @@ static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); tls_device_cleanup(); + unregister_pernet_subsys(&tls_proc_ops); } module_init(tls_register); diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c new file mode 100644 index 000000000000..83d9c80a684e --- /dev/null +++ b/net/tls/tls_proc.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <net/snmp.h> +#include <net/tls.h> + +static const struct snmp_mib tls_mib_list[] = { + SNMP_MIB_ITEM("TlsCurrTxSw", LINUX_MIB_TLSCURRTXSW), + SNMP_MIB_ITEM("TlsCurrRxSw", LINUX_MIB_TLSCURRRXSW), + SNMP_MIB_ITEM("TlsCurrTxDevice", LINUX_MIB_TLSCURRTXDEVICE), + SNMP_MIB_ITEM("TlsCurrRxDevice", LINUX_MIB_TLSCURRRXDEVICE), + SNMP_MIB_ITEM("TlsTxSw", LINUX_MIB_TLSTXSW), + SNMP_MIB_ITEM("TlsRxSw", LINUX_MIB_TLSRXSW), + SNMP_MIB_ITEM("TlsTxDevice", LINUX_MIB_TLSTXDEVICE), + SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE), + SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR), + SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), + SNMP_MIB_SENTINEL +}; + +static int tls_statistics_seq_show(struct seq_file *seq, void *v) +{ + unsigned long buf[LINUX_MIB_TLSMAX] = {}; + struct net *net = seq->private; + int i; + + snmp_get_cpu_field_batch(buf, tls_mib_list, net->mib.tls_statistics); + for (i = 0; tls_mib_list[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", tls_mib_list[i].name, buf[i]); + + return 0; +} + +int __net_init tls_proc_init(struct net *net) +{ + if (!proc_create_net_single("tls_stat", 0444, net->proc_net, + tls_statistics_seq_show, NULL)) + return -ENOMEM; + return 0; +} + +void __net_exit tls_proc_fini(struct net *net) +{ + remove_proc_entry("tls_stat", net->proc_net); +} diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c2b5e0d2ba1a..de7561d4cfa5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -168,6 +168,9 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) /* Propagate if there was an err */ if (err) { + if (err == -EBADMSG) + TLS_INC_STATS(sock_net(skb->sk), + LINUX_MIB_TLSDECRYPTERROR); ctx->async_wait.err = err; tls_err_abort(skb->sk, err); } else { @@ -253,6 +256,8 @@ static int tls_do_decryption(struct sock *sk, return ret; ret = crypto_wait_req(ret, &ctx->async_wait); + } else if (ret == -EBADMSG) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); } if (async) @@ -1490,7 +1495,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, if (!ctx->decrypted) { if (tls_ctx->rx_conf == TLS_HW) { - err = tls_device_decrypted(sk, skb); + err = tls_device_decrypted(sk, tls_ctx, skb, rxm); if (err < 0) return err; } @@ -1518,7 +1523,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - ctx->decrypted = true; + ctx->decrypted = 1; ctx->saved_data_ready(sk); } else { *zc = false; @@ -1928,7 +1933,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, tls_err_abort(sk, EBADMSG); goto splice_read_end; } - ctx->decrypted = true; + ctx->decrypted = 1; } rxm = strp_msg(skb); @@ -2029,7 +2034,7 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb) struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - ctx->decrypted = false; + ctx->decrypted = 0; ctx->recv_pkt = skb; strp_pause(strp); @@ -2386,10 +2391,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); if (crypto_info->version == TLS_1_3_VERSION) - sw_ctx_rx->async_capable = false; + sw_ctx_rx->async_capable = 0; else sw_ctx_rx->async_capable = - tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + !!(tfm->__crt_alg->cra_flags & + CRYPTO_ALG_ASYNC); /* Set up strparser */ memset(&cb, 0, sizeof(cb)); diff --git a/net/tls/tls_toe.c b/net/tls/tls_toe.c new file mode 100644 index 000000000000..7e1330f19165 --- /dev/null +++ b/net/tls/tls_toe.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <[email protected]>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/list.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +#include <net/inet_connection_sock.h> +#include <net/tls.h> +#include <net/tls_toe.h> + +static LIST_HEAD(device_list); +static DEFINE_SPINLOCK(device_spinlock); + +static void tls_toe_sk_destruct(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tls_context *ctx = tls_get_ctx(sk); + + ctx->sk_destruct(sk); + /* Free ctx */ + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tls_ctx_free(sk, ctx); +} + +int tls_toe_bypass(struct sock *sk) +{ + struct tls_toe_device *dev; + struct tls_context *ctx; + int rc = 0; + + spin_lock_bh(&device_spinlock); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->feature && dev->feature(dev)) { + ctx = tls_ctx_create(sk); + if (!ctx) + goto out; + + ctx->sk_destruct = sk->sk_destruct; + sk->sk_destruct = tls_toe_sk_destruct; + ctx->rx_conf = TLS_HW_RECORD; + ctx->tx_conf = TLS_HW_RECORD; + update_sk_prot(sk, ctx); + rc = 1; + break; + } + } +out: + spin_unlock_bh(&device_spinlock); + return rc; +} + +void tls_toe_unhash(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_toe_device *dev; + + spin_lock_bh(&device_spinlock); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->unhash) { + kref_get(&dev->kref); + spin_unlock_bh(&device_spinlock); + dev->unhash(dev, sk); + kref_put(&dev->kref, dev->release); + spin_lock_bh(&device_spinlock); + } + } + spin_unlock_bh(&device_spinlock); + ctx->sk_proto->unhash(sk); +} + +int tls_toe_hash(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_toe_device *dev; + int err; + + err = ctx->sk_proto->hash(sk); + spin_lock_bh(&device_spinlock); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->hash) { + kref_get(&dev->kref); + spin_unlock_bh(&device_spinlock); + err |= dev->hash(dev, sk); + kref_put(&dev->kref, dev->release); + spin_lock_bh(&device_spinlock); + } + } + spin_unlock_bh(&device_spinlock); + + if (err) + tls_toe_unhash(sk); + return err; +} + +void tls_toe_register_device(struct tls_toe_device *device) +{ + spin_lock_bh(&device_spinlock); + list_add_tail(&device->dev_list, &device_list); + spin_unlock_bh(&device_spinlock); +} +EXPORT_SYMBOL(tls_toe_register_device); + +void tls_toe_unregister_device(struct tls_toe_device *device) +{ + spin_lock_bh(&device_spinlock); + list_del(&device->dev_list); + spin_unlock_bh(&device_spinlock); +} +EXPORT_SYMBOL(tls_toe_unregister_device); diff --git a/net/tls/trace.c b/net/tls/trace.c new file mode 100644 index 000000000000..e374913cf9c9 --- /dev/null +++ b/net/tls/trace.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/module.h> + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" + +#endif diff --git a/net/tls/trace.h b/net/tls/trace.h new file mode 100644 index 000000000000..9ba5f600ea43 --- /dev/null +++ b/net/tls/trace.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tls + +#if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _TLS_TRACE_H_ + +#include <asm/unaligned.h> +#include <linux/tracepoint.h> + +struct sock; + +TRACE_EVENT(tls_device_offload_set, + + TP_PROTO(struct sock *sk, int dir, u32 tcp_seq, u8 *rec_no, int ret), + + TP_ARGS(sk, dir, tcp_seq, rec_no, ret), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( int, dir ) + __field( u32, tcp_seq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->dir = dir; + __entry->tcp_seq = tcp_seq; + __entry->ret = ret; + ), + + TP_printk( + "sk=%p direction=%d tcp_seq=%u rec_no=%llu ret=%d", + __entry->sk, __entry->dir, __entry->tcp_seq, __entry->rec_no, + __entry->ret + ) +); + +TRACE_EVENT(tls_device_decrypted, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, u32 rec_len, + bool encrypted, bool decrypted), + + TP_ARGS(sk, tcp_seq, rec_no, rec_len, encrypted, decrypted), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( u32, tcp_seq ) + __field( u32, rec_len ) + __field( bool, encrypted ) + __field( bool, decrypted ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->tcp_seq = tcp_seq; + __entry->rec_len = rec_len; + __entry->encrypted = encrypted; + __entry->decrypted = decrypted; + ), + + TP_printk( + "sk=%p tcp_seq=%u rec_no=%llu len=%u encrypted=%d decrypted=%d", + __entry->sk, __entry->tcp_seq, + __entry->rec_no, __entry->rec_len, + __entry->encrypted, __entry->decrypted + ) +); + +TRACE_EVENT(tls_device_rx_resync_send, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, int sync_type), + + TP_ARGS(sk, tcp_seq, rec_no, sync_type), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( u32, tcp_seq ) + __field( int, sync_type ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->tcp_seq = tcp_seq; + __entry->sync_type = sync_type; + ), + + TP_printk( + "sk=%p tcp_seq=%u rec_no=%llu sync_type=%d", + __entry->sk, __entry->tcp_seq, __entry->rec_no, + __entry->sync_type + ) +); + +TRACE_EVENT(tls_device_rx_resync_nh_schedule, + + TP_PROTO(struct sock *sk), + + TP_ARGS(sk), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + ), + + TP_fast_assign( + __entry->sk = sk; + ), + + TP_printk( + "sk=%p", __entry->sk + ) +); + +TRACE_EVENT(tls_device_rx_resync_nh_delay, + + TP_PROTO(struct sock *sk, u32 sock_data, u32 rec_len), + + TP_ARGS(sk, sock_data, rec_len), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u32, sock_data ) + __field( u32, rec_len ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->sock_data = sock_data; + __entry->rec_len = rec_len; + ), + + TP_printk( + "sk=%p sock_data=%u rec_len=%u", + __entry->sk, __entry->sock_data, __entry->rec_len + ) +); + +TRACE_EVENT(tls_device_tx_resync_req, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u32 exp_tcp_seq), + + TP_ARGS(sk, tcp_seq, exp_tcp_seq), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u32, tcp_seq ) + __field( u32, exp_tcp_seq ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->tcp_seq = tcp_seq; + __entry->exp_tcp_seq = exp_tcp_seq; + ), + + TP_printk( + "sk=%p tcp_seq=%u exp_tcp_seq=%u", + __entry->sk, __entry->tcp_seq, __entry->exp_tcp_seq + ) +); + +TRACE_EVENT(tls_device_tx_resync_send, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no), + + TP_ARGS(sk, tcp_seq, rec_no), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( u32, tcp_seq ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->tcp_seq = tcp_seq; + ), + + TP_printk( + "sk=%p tcp_seq=%u rec_no=%llu", + __entry->sk, __entry->tcp_seq, __entry->rec_no + ) +); + +#endif /* _TLS_TRACE_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +#include <trace/define_trace.h> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 67e87db5877f..c853ad0875f4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -284,11 +284,9 @@ static struct sock *__unix_find_socket_byname(struct net *net, if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) - goto found; + return s; } - s = NULL; -found: - return s; + return NULL; } static inline struct sock *unix_find_socket_byname(struct net *net, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ab47bf3ab66e..2ab43b2bba31 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -638,7 +638,7 @@ struct sock *__vsock_create(struct net *net, } EXPORT_SYMBOL_GPL(__vsock_create); -static void __vsock_release(struct sock *sk) +static void __vsock_release(struct sock *sk, int level) { if (sk) { struct sk_buff *skb; @@ -648,9 +648,17 @@ static void __vsock_release(struct sock *sk) vsk = vsock_sk(sk); pending = NULL; /* Compiler warning. */ + /* The release call is supposed to use lock_sock_nested() + * rather than lock_sock(), if a sock lock should be acquired. + */ transport->release(vsk); - lock_sock(sk); + /* When "level" is SINGLE_DEPTH_NESTING, use the nested + * version to avoid the warning "possible recursive locking + * detected". When "level" is 0, lock_sock_nested(sk, level) + * is the same as lock_sock(sk). + */ + lock_sock_nested(sk, level); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; @@ -659,7 +667,7 @@ static void __vsock_release(struct sock *sk) /* Clean up any sockets that never were accepted. */ while ((pending = vsock_dequeue_accept(sk)) != NULL) { - __vsock_release(pending); + __vsock_release(pending, SINGLE_DEPTH_NESTING); sock_put(pending); } @@ -708,7 +716,7 @@ EXPORT_SYMBOL_GPL(vsock_stream_has_space); static int vsock_release(struct socket *sock) { - __vsock_release(sock->sk); + __vsock_release(sock->sk, 0); sock->sk = NULL; sock->state = SS_FREE; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 261521d286d6..c443db7af8d4 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -559,7 +559,7 @@ static void hvs_release(struct vsock_sock *vsk) struct sock *sk = sk_vsock(vsk); bool remove_sock; - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); remove_sock = hvs_close_lock_held(vsk); release_sock(sk); if (remove_sock) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 5bb70c692b1e..111dd8e08203 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -264,6 +264,55 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, } static ssize_t +virtio_transport_stream_do_peek(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt; + size_t bytes, total = 0, off; + int err = -EFAULT; + + spin_lock_bh(&vvs->rx_lock); + + list_for_each_entry(pkt, &vvs->rx_queue, list) { + off = pkt->off; + + if (total == len) + break; + + while (total < len && off < pkt->len) { + bytes = len - total; + if (bytes > pkt->len - off) + bytes = pkt->len - off; + + /* sk_lock is held by caller so no one else can dequeue. + * Unlock rx_lock since memcpy_to_msg() may sleep. + */ + spin_unlock_bh(&vvs->rx_lock); + + err = memcpy_to_msg(msg, pkt->buf + off, bytes); + if (err) + goto out; + + spin_lock_bh(&vvs->rx_lock); + + total += bytes; + off += bytes; + } + } + + spin_unlock_bh(&vvs->rx_lock); + + return total; + +out: + if (total) + err = total; + return err; +} + +static ssize_t virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len) @@ -335,9 +384,9 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk, size_t len, int flags) { if (flags & MSG_PEEK) - return -EOPNOTSUPP; - - return virtio_transport_stream_do_dequeue(vsk, msg, len); + return virtio_transport_stream_do_peek(vsk, msg, len); + else + return virtio_transport_stream_do_dequeue(vsk, msg, len); } EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); @@ -820,7 +869,7 @@ void virtio_transport_release(struct vsock_sock *vsk) struct sock *sk = &vsk->sk; bool remove_sock = true; - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_type == SOCK_STREAM) remove_sock = virtio_transport_close(vsk); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d21b1581a665..7a7b63550eb6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -201,6 +201,38 @@ cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) return __cfg80211_rdev_from_attrs(netns, info->attrs); } +static int validate_beacon_head(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + const struct element *elem; + const struct ieee80211_mgmt *mgmt = (void *)data; + unsigned int fixedlen = offsetof(struct ieee80211_mgmt, + u.beacon.variable); + + if (len < fixedlen) + goto err; + + if (ieee80211_hdrlen(mgmt->frame_control) != + offsetof(struct ieee80211_mgmt, u.beacon)) + goto err; + + data += fixedlen; + len -= fixedlen; + + for_each_element(elem, data, len) { + /* nothing */ + } + + if (for_each_element_completed(elem, data, len)) + return 0; + +err: + NL_SET_ERR_MSG_ATTR(extack, attr, "malformed beacon head"); + return -EINVAL; +} + static int validate_ie_attr(const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -338,8 +370,9 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 }, - [NL80211_ATTR_BEACON_HEAD] = { .type = NLA_BINARY, - .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_BEACON_HEAD] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_beacon_head, + IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_BEACON_TAIL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), @@ -2636,6 +2669,8 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, control_freq = nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]); + memset(chandef, 0, sizeof(*chandef)); + chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq); chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = control_freq; @@ -3176,7 +3211,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (rdev->ops->get_channel) { int ret; - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = {}; ret = rdev_get_channel(rdev, wdev, &chandef); if (ret == 0) { @@ -6270,6 +6305,9 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_mpath) return -EOPNOTSUPP; + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + return rdev_del_mpath(rdev, dev, dst); } @@ -8227,10 +8265,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, /* leave request id zero for legacy request * or if driver does not support multi-scheduled scan */ - if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) { - while (!sched_scan_req->reqid) - sched_scan_req->reqid = cfg80211_assign_cookie(rdev); - } + if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) + sched_scan_req->reqid = cfg80211_assign_cookie(rdev); err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5311d0ae2454..420c4207ab59 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2108,7 +2108,7 @@ static void reg_call_notifier(struct wiphy *wiphy, static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) { - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = {}; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d313c9befa23..ff1016607f0b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1723,7 +1723,12 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, return; new_ie_len -= trans_ssid[1]; mbssid = cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen); - if (!mbssid) + /* + * It's not valid to have the MBSSID element before SSID + * ignore if that happens - the code below assumes it is + * after (while copying things inbetween). + */ + if (!mbssid || mbssid < trans_ssid) return; new_ie_len -= mbssid[1]; rcu_read_lock(); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7b6529d81c61..cac9e28d852b 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -798,7 +798,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = {}; int ret; switch (wdev->iftype) { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 6088bc2dc11e..9b599ed66d97 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -706,7 +706,7 @@ resume: if (err) goto drop; - nf_reset(skb); + nf_reset_ct(skb); if (decaps) { sp = skb_sec_path(skb); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 2ab4859df55a..0f5131bc3342 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -185,7 +185,7 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); nf_reset_trace(skb); if (!xnet) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9499b35feb92..b1db55b50ba1 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -502,7 +502,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) struct net *net = xs_net(skb_dst(skb)->xfrm); while (likely((err = xfrm_output_one(skb, err)) == 0)) { - nf_reset(skb); + nf_reset_ct(skb); err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); if (unlikely(err != 1)) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 21e939235b39..f2d1e573ea55 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2808,7 +2808,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) continue; } - nf_reset(skb); + nf_reset_ct(skb); skb_dst_drop(skb); skb_dst_set(skb, dst); diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst index fd39215db508..3f6483e8b2df 100644 --- a/samples/pktgen/README.rst +++ b/samples/pktgen/README.rst @@ -18,7 +18,7 @@ across the sample scripts. Usage example is printed on errors:: Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX -i : ($DEV) output interface/device (required) -s : ($PKT_SIZE) packet size - -d : ($DEST_IP) destination IP + -d : ($DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed -m : ($DST_MAC) destination MAC-addr -p : ($DST_PORT) destination PORT range (e.g. 433-444) is also allowed -t : ($THREADS) threads to start diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index 4af4046d71be..dae06d5b38fa 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -5,6 +5,8 @@ # Author: Jesper Dangaaard Brouer # License: GPL +set -o errexit + ## -- General shell logging cmds -- function err() { local exitcode=$1 @@ -58,6 +60,7 @@ function pg_set() { function proc_cmd() { local result local proc_file=$1 + local status=0 # after shift, the remaining args are contained in $@ shift local proc_ctrl=${PROC_DIR}/$proc_file @@ -73,13 +76,13 @@ function proc_cmd() { echo "cmd: $@ > $proc_ctrl" fi # Quoting of "$@" is important for space expansion - echo "$@" > "$proc_ctrl" - local status=$? + echo "$@" > "$proc_ctrl" || status=$? - result=$(grep "Result: OK:" $proc_ctrl) - # Due to pgctrl, cannot use exit code $? from grep - if [[ "$result" == "" ]]; then - grep "Result:" $proc_ctrl >&2 + if [[ "$proc_file" != "pgctrl" ]]; then + result=$(grep "Result: OK:" $proc_ctrl) || true + if [[ "$result" == "" ]]; then + grep "Result:" $proc_ctrl >&2 + fi fi if (( $status != 0 )); then err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\"" @@ -105,6 +108,8 @@ function pgset() { fi } +[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT + ## -- General shell tricks -- function root_check_run_with_sudo() { @@ -163,6 +168,137 @@ function get_node_cpus() echo $node_cpu_list } +# Check $1 is in between $2, $3 ($2 <= $1 <= $3) +function in_between() { [[ ($1 -ge $2) && ($1 -le $3) ]] ; } + +# Extend shrunken IPv6 address. +# fe80::42:bcff:fe84:e10a => fe80:0:0:0:42:bcff:fe84:e10a +function extend_addr6() +{ + local addr=$1 + local sep=: sep2=:: + local sep_cnt=$(tr -cd $sep <<< $1 | wc -c) + local shrink + + # separator count should be (2 <= $sep_cnt <= 7) + if ! (in_between $sep_cnt 2 7); then + err 5 "Invalid IP6 address: $1" + fi + + # if shrink '::' occurs multiple, it's malformed. + shrink=( $(egrep -o "$sep{2,}" <<< $addr) ) + if [[ ${#shrink[@]} -ne 0 ]]; then + if [[ ${#shrink[@]} -gt 1 || ( ${shrink[0]} != $sep2 ) ]]; then + err 5 "Invalid IP6 address: $1" + fi + fi + + # add 0 at begin & end, and extend addr by adding :0 + [[ ${addr:0:1} == $sep ]] && addr=0${addr} + [[ ${addr: -1} == $sep ]] && addr=${addr}0 + echo "${addr/$sep2/$(printf ':0%.s' $(seq $[8-sep_cnt])):}" +} + +# Given a single IP(v4/v6) address, whether it is valid. +function validate_addr() +{ + # check function is called with (funcname)6 + [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6 + local bitlen=$[ IP6 ? 128 : 32 ] + local len=$[ IP6 ? 8 : 4 ] + local max=$[ 2**(len*2)-1 ] + local net prefix + local addr sep + + IFS='/' read net prefix <<< $1 + [[ $IP6 ]] && net=$(extend_addr6 $net) + + # if prefix exists, check (0 <= $prefix <= $bitlen) + if [[ -n $prefix ]]; then + if ! (in_between $prefix 0 $bitlen); then + err 5 "Invalid prefix: /$prefix" + fi + fi + + # set separator for each IP(v4/v6) + [[ $IP6 ]] && sep=: || sep=. + IFS=$sep read -a addr <<< $net + + # array length + if [[ ${#addr[@]} != $len ]]; then + err 5 "Invalid IP$IP6 address: $1" + fi + + # check each digit (0 <= $digit <= $max) + for digit in "${addr[@]}"; do + [[ $IP6 ]] && digit=$[ 16#$digit ] + if ! (in_between $digit 0 $max); then + err 5 "Invalid IP$IP6 address: $1" + fi + done + + return 0 +} + +function validate_addr6() { validate_addr $@ ; } + +# Given a single IP(v4/v6) or CIDR, return minimum and maximum IP addr. +function parse_addr() +{ + # check function is called with (funcname)6 + [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6 + local net prefix + local min_ip max_ip + + IFS='/' read net prefix <<< $1 + [[ $IP6 ]] && net=$(extend_addr6 $net) + + if [[ -z $prefix ]]; then + min_ip=$net + max_ip=$net + else + # defining array for converting Decimal 2 Binary + # 00000000 00000001 00000010 00000011 00000100 ... + local d2b='{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}' + [[ $IP6 ]] && d2b+=$d2b + eval local D2B=($d2b) + + local bitlen=$[ IP6 ? 128 : 32 ] + local remain=$[ bitlen-prefix ] + local octet=$[ IP6 ? 16 : 8 ] + local min_mask max_mask + local min max + local ip_bit + local ip sep + + # set separator for each IP(v4/v6) + [[ $IP6 ]] && sep=: || sep=. + IFS=$sep read -ra ip <<< $net + + min_mask="$(printf '1%.s' $(seq $prefix))$(printf '0%.s' $(seq $remain))" + max_mask="$(printf '0%.s' $(seq $prefix))$(printf '1%.s' $(seq $remain))" + + # calculate min/max ip with &,| operator + for i in "${!ip[@]}"; do + digit=$[ IP6 ? 16#${ip[$i]} : ${ip[$i]} ] + ip_bit=${D2B[$digit]} + + idx=$[ octet*i ] + min[$i]=$[ 2#$ip_bit & 2#${min_mask:$idx:$octet} ] + max[$i]=$[ 2#$ip_bit | 2#${max_mask:$idx:$octet} ] + [[ $IP6 ]] && { min[$i]=$(printf '%X' ${min[$i]}); + max[$i]=$(printf '%X' ${max[$i]}); } + done + + min_ip=$(IFS=$sep; echo "${min[*]}") + max_ip=$(IFS=$sep; echo "${max[*]}") + fi + + echo $min_ip $max_ip +} + +function parse_addr6() { parse_addr $@ ; } + # Given a single or range of port(s), return minimum and maximum port number. function parse_ports() { @@ -185,9 +321,9 @@ function validate_ports() local min_port=$1 local max_port=$2 - # 0 < port < 65536 - if [[ $min_port -gt 0 && $min_port -lt 65536 ]]; then - if [[ $max_port -gt 0 && $max_port -lt 65536 ]]; then + # 1 <= port <= 65535 + if (in_between $min_port 1 65535); then + if (in_between $max_port 1 65535); then if [[ $min_port -le $max_port ]]; then return 0 fi diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh index a06b00a0c7b6..ff0ed474fee9 100644 --- a/samples/pktgen/parameters.sh +++ b/samples/pktgen/parameters.sh @@ -8,7 +8,7 @@ function usage() { echo "Usage: $0 [-vx] -i ethX" echo " -i : (\$DEV) output interface/device (required)" echo " -s : (\$PKT_SIZE) packet size" - echo " -d : (\$DEST_IP) destination IP" + echo " -d : (\$DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed" echo " -m : (\$DST_MAC) destination MAC-addr" echo " -p : (\$DST_PORT) destination PORT range (e.g. 433-444) is also allowed" echo " -t : (\$THREADS) threads to start" diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index e14b1a9144d9..1b6204125d2d 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -41,9 +41,13 @@ fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=1024 [ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -71,13 +75,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Inject packet into RX path of stack diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh index 82c3e504e056..e607cb369b20 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -24,9 +24,13 @@ if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" fi [ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -54,13 +58,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Inject packet into TX qdisc egress path of stack diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index d1702fdde8f3..a4e250b45dce 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -22,17 +22,21 @@ fi # Example enforce param "-m" for dst_mac [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac" [ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config DELAY="0" # Zero means max speed # Flow variation random source port between min and max -UDP_MIN=9 -UDP_MAX=109 +UDP_SRC_MIN=9 +UDP_SRC_MAX=109 # General cleanup everything since last run # (especially important if other threads were configured by other scripts) @@ -61,19 +65,20 @@ pg_set $DEV "flag NO_TIMESTAMP" # Destination pg_set $DEV "dst_mac $DST_MAC" -pg_set $DEV "dst$IP6 $DEST_IP" +pg_set $DEV "dst${IP6}_min $DST_MIN" +pg_set $DEV "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $DEV "flag UDPDST_RND" - pg_set $DEV "udp_dst_min $DST_MIN" - pg_set $DEV "udp_dst_max $DST_MAX" + pg_set $DEV "udp_dst_min $UDP_DST_MIN" + pg_set $DEV "udp_dst_max $UDP_DST_MAX" fi # Setup random UDP port src range pg_set $DEV "flag UDPSRC_RND" -pg_set $DEV "udp_src_min $UDP_MIN" -pg_set $DEV "udp_src_max $UDP_MAX" +pg_set $DEV "udp_src_min $UDP_SRC_MIN" +pg_set $DEV "udp_src_max $UDP_SRC_MAX" # start_run echo "Running... ctrl^C to stop" >&2 diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index 7f7a9a27548f..cb2495fcdc60 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -21,17 +21,21 @@ DELAY="0" # Zero means max speed [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Flow variation random source port between min and max -UDP_MIN=9 -UDP_MAX=109 +UDP_SRC_MIN=9 +UDP_SRC_MAX=109 # (example of setting default params in your script) if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # General cleanup everything since last run @@ -62,19 +66,20 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup random UDP port src range pg_set $dev "flag UDPSRC_RND" - pg_set $dev "udp_src_min $UDP_MIN" - pg_set $dev "udp_src_max $UDP_MAX" + pg_set $dev "udp_src_min $UDP_SRC_MIN" + pg_set $dev "udp_src_max $UDP_SRC_MAX" done # start_run diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index b520637817ce..fff50765a5aa 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -33,9 +33,13 @@ fi [ -z "$BURST" ] && BURST=32 [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # No need for clones when bursting [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -62,13 +66,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup burst, for easy testing -b 0 disable bursting diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index 5b6e9d9cb5b5..2cd6b701400d 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -17,9 +17,13 @@ source ${basedir}/parameters.sh [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # NOTICE: Script specific settings @@ -37,6 +41,9 @@ if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" fi +# 198.18.0.0 / 198.19.255.255 +read -r SRC_MIN SRC_MAX <<< $(parse_addr 198.18.0.0/15) + # General cleanup everything since last run pg_ctrl "reset" @@ -58,19 +65,20 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst $DEST_IP" + pg_set $dev "dst_min $DST_MIN" + pg_set $dev "dst_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Randomize source IP-addresses pg_set $dev "flag IPSRC_RND" - pg_set $dev "src_min 198.18.0.0" - pg_set $dev "src_max 198.19.255.255" + pg_set $dev "src_min $SRC_MIN" + pg_set $dev "src_max $SRC_MAX" # Limit number of flows (max 65535) pg_set $dev "flows $FLOWS" diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 0c06e63fbe97..4cb6252ade39 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -22,9 +22,13 @@ source ${basedir}/parameters.sh [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$BURST" ] && BURST=32 [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -51,13 +55,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst $DEST_IP" + pg_set $dev "dst_min $DST_MIN" + pg_set $dev "dst_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup source IP-addresses based on thread number diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh index 97f0266c0356..728106060a02 100755 --- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh +++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh @@ -20,8 +20,8 @@ DELAY="0" # Zero means max speed [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Flow variation random source port between min and max -UDP_MIN=9 -UDP_MAX=109 +UDP_SRC_MIN=9 +UDP_SRC_MAX=109 node=`get_iface_node $DEV` irq_array=(`get_iface_irqs $DEV`) @@ -35,9 +35,13 @@ if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # General cleanup everything since last run @@ -79,19 +83,20 @@ for ((i = 0; i < $THREADS; i++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup random UDP port src range pg_set $dev "flag UDPSRC_RND" - pg_set $dev "udp_src_min $UDP_MIN" - pg_set $dev "udp_src_max $UDP_MAX" + pg_set $dev "udp_src_min $UDP_SRC_MIN" + pg_set $dev "udp_src_max $UDP_SRC_MAX" done # start_run diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 4b0432e095ae..10ba926ae292 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -143,11 +143,6 @@ cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || e # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) -# ar-option -# Usage: KBUILD_ARFLAGS := $(call ar-option,D) -# Important: no spaces around options -ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2)) - # ld-version # Note this is mainly for HJ Lu's 3 number binutil versions ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index f72aba64d611..a9e47953ca53 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -389,7 +389,7 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ; ifdef builtin-target quiet_cmd_ar_builtin = AR $@ - cmd_ar_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(real-prereqs) + cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs) $(builtin-target): $(real-obj-y) FORCE $(call if_changed,ar_builtin) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 4a0cdd6f5909..179d55af5852 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -232,7 +232,7 @@ quiet_cmd_ld = LD $@ # --------------------------------------------------------------------------- quiet_cmd_ar = AR $@ - cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(real-prereqs) + cmd_ar = rm -f $@; $(AR) cDPrsT $@ $(real-prereqs) # Objcopy # --------------------------------------------------------------------------- diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 3961941e8e7a..442d5e2ad688 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2652,15 +2652,20 @@ int main(int argc, char **argv) fatal("modpost: Section mismatches detected.\n" "Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.\n"); for (n = 0; n < SYMBOL_HASH_SIZE; n++) { - struct symbol *s = symbolhash[n]; + struct symbol *s; + + for (s = symbolhash[n]; s; s = s->next) { + /* + * Do not check "vmlinux". This avoids the same warnings + * shown twice, and false-positives for ARCH=um. + */ + if (is_vmlinux(s->module->name) && !s->module->is_dot_o) + continue; - while (s) { if (s->is_static) warn("\"%s\" [%s] is a static %s\n", s->name, s->module->name, export_str(s->export)); - - s = s->next; } } diff --git a/scripts/namespace.pl b/scripts/namespace.pl index 6135574a6f39..1da7bca201a4 100755 --- a/scripts/namespace.pl +++ b/scripts/namespace.pl @@ -65,13 +65,14 @@ use warnings; use strict; use File::Find; +use File::Spec; my $nm = ($ENV{'NM'} || "nm") . " -p"; my $objdump = ($ENV{'OBJDUMP'} || "objdump") . " -s -j .comment"; -my $srctree = ""; -my $objtree = ""; -$srctree = "$ENV{'srctree'}/" if (exists($ENV{'srctree'})); -$objtree = "$ENV{'objtree'}/" if (exists($ENV{'objtree'})); +my $srctree = File::Spec->curdir(); +my $objtree = File::Spec->curdir(); +$srctree = File::Spec->rel2abs($ENV{'srctree'}) if (exists($ENV{'srctree'})); +$objtree = File::Spec->rel2abs($ENV{'objtree'}) if (exists($ENV{'objtree'})); if ($#ARGV != -1) { print STDERR "usage: $0 takes no parameters\n"; @@ -231,9 +232,9 @@ sub do_nm } ($source = $basename) =~ s/\.o$//; if (-e "$source.c" || -e "$source.S") { - $source = "$objtree$File::Find::dir/$source"; + $source = File::Spec->catfile($objtree, $File::Find::dir, $source) } else { - $source = "$srctree$File::Find::dir/$source"; + $source = File::Spec->catfile($srctree, $File::Find::dir, $source) } if (! -e "$source.c" && ! -e "$source.S") { # No obvious source, exclude the object if it is conglomerate diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 365b3c2b8f43..220dae0db3f1 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -126,7 +126,7 @@ scm_version() collect_files() { - local file res + local file res= for file; do case "$file" in diff --git a/security/integrity/Makefile b/security/integrity/Makefile index 19faace69644..35e6ca773734 100644 --- a/security/integrity/Makefile +++ b/security/integrity/Makefile @@ -13,9 +13,6 @@ integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyrin integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \ platform_certs/load_uefi.o integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o -$(obj)/load_uefi.o: KBUILD_CFLAGS += -fshort-wchar -subdir-$(CONFIG_IMA) += ima obj-$(CONFIG_IMA) += ima/ -subdir-$(CONFIG_EVM) += evm obj-$(CONFIG_EVM) += evm/ diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 448d686da8b1..0bf5640f1f07 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -4,6 +4,7 @@ */ #ifndef __NFIT_TEST_H__ #define __NFIT_TEST_H__ +#include <linux/acpi.h> #include <linux/list.h> #include <linux/uuid.h> #include <linux/ioport.h> @@ -202,9 +203,6 @@ struct nd_intel_lss { __u32 status; } __packed; -union acpi_object; -typedef void *acpi_handle; - typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 115837355eaf..ee89cd2f5bee 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -3,7 +3,9 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="fw_flash_test params_test regions_test" +ALL_TESTS="fw_flash_test params_test regions_test reload_test \ + netns_reload_test resource_test dev_info_test \ + empty_reporter_test dummy_reporter_test" NUM_NETIFS=0 source $lib_dir/lib.sh @@ -142,6 +144,290 @@ regions_test() log_test "regions test" } +reload_test() +{ + RET=0 + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload" + + echo "y"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload to fail" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload not to fail" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after set not to fail" + + echo "y"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to forbid devlink reload" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to re-enable devlink reload" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after re-enable" + + log_test "reload test" +} + +netns_reload_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_err $? "Failed to reload from netns \"testns1\" into netns \"testns2\"" + + ip netns del testns2 + ip netns del testns1 + + log_test "netns reload test" +} + +DUMMYDEV="dummytest" + +res_val_get() +{ + local netns=$1 + local parentname=$2 + local name=$3 + local type=$4 + + cmd_jq "devlink -N $netns resource show $DL_HANDLE -j" \ + ".[][][] | select(.name == \"$parentname\").resources[] \ + | select(.name == \"$name\").$type" +} + +resource_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + # Create dummy dev to add the address and routes on. + + ip -n testns1 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns1 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns1 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + + local occ=$(res_val_get testns1 IPv4 fib occ) + local limit=$((occ+1)) + + # Set fib size limit to handle one another route only. + + devlink -N testns1 resource set $DL_HANDLE path IPv4/fib size $limit + check_err $? "Failed to set IPv4/fib resource size" + local size_new=$(res_val_get testns1 IPv4 fib size_new) + [ "$size_new" -eq "$limit" ] + check_err $? "Unexpected \"size_new\" value (got $size_new, expected $limit)" + + devlink -N testns1 dev reload $DL_HANDLE + check_err $? "Failed to reload" + local size=$(res_val_get testns1 IPv4 fib size) + [ "$size" -eq "$limit" ] + check_err $? "Unexpected \"size\" value (got $size, expected $limit)" + + # Insert 2 routes, the first is going to be inserted, + # the second is expected to fail to be inserted. + + ip -n testns1 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + ip -n testns1 r a 192.0.3.0/24 via 192.0.1.2 + check_fail $? "Unexpected successful route add over limit" + + # Now create another dummy in second network namespace and + # insert two routes. That is over the limit of the netdevsim + # instance in the first namespace. Move the netdevsim instance + # into the second namespace and expect it to fail. + + ip -n testns2 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns2 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns2 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + ip -n testns2 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + ip -n testns2 r a 192.0.3.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_fail $? "Unexpected successful reload from netns \"testns1\" into netns \"testns2\"" + + ip netns del testns2 + ip netns del testns1 + + log_test "resource test" +} + +info_get() +{ + local name=$1 + + cmd_jq "devlink dev info $DL_HANDLE -j" ".[][][\"$name\"]" "-e" +} + +dev_info_test() +{ + RET=0 + + driver=$(info_get "driver") + check_err $? "Failed to get driver name" + [ "$driver" == "netdevsim" ] + check_err $? "Unexpected driver name $driver" + + log_test "dev_info test" +} + +empty_reporter_test() +{ + RET=0 + + devlink health show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show empty reporter" + + devlink health dump show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show dump of empty reporter" + + devlink health diagnose $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed diagnose empty reporter" + + devlink health recover $DL_HANDLE reporter empty + check_err $? "Failed recover empty reporter" + + log_test "empty reporter test" +} + +check_reporter_info() +{ + local name=$1 + local expected_state=$2 + local expected_error=$3 + local expected_recover=$4 + local expected_grace_period=$5 + local expected_auto_recover=$6 + + local show=$(devlink health show $DL_HANDLE reporter $name -j | jq -e -r ".[][][]") + check_err $? "Failed show $name reporter" + + local state=$(echo $show | jq -r ".state") + [ "$state" == "$expected_state" ] + check_err $? "Unexpected \"state\" value (got $state, expected $expected_state)" + + local error=$(echo $show | jq -r ".error") + [ "$error" == "$expected_error" ] + check_err $? "Unexpected \"error\" value (got $error, expected $expected_error)" + + local recover=`echo $show | jq -r ".recover"` + [ "$recover" == "$expected_recover" ] + check_err $? "Unexpected \"recover\" value (got $recover, expected $expected_recover)" + + local grace_period=$(echo $show | jq -r ".grace_period") + check_err $? "Failed get $name reporter grace_period" + [ "$grace_period" == "$expected_grace_period" ] + check_err $? "Unexpected \"grace_period\" value (got $grace_period, expected $expected_grace_period)" + + local auto_recover=$(echo $show | jq -r ".auto_recover") + [ "$auto_recover" == "$expected_auto_recover" ] + check_err $? "Unexpected \"auto_recover\" value (got $auto_recover, expected $expected_auto_recover)" +} + +dummy_reporter_test() +{ + RET=0 + + check_reporter_info dummy healthy 0 0 0 false + + local BREAK_MSG="foo bar" + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy error 1 0 0 false + + local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) + check_err $? "Failed show dump of dummy reporter" + + local dump_break_msg=$(echo $dump | jq -r ".break_message") + [ "$dump_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected dump break message value (got $dump_break_msg, expected $BREAK_MSG)" + + devlink health dump clear $DL_HANDLE reporter dummy + check_err $? "Failed clear dump of dummy reporter" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 1 1 0 false + + devlink health set $DL_HANDLE reporter dummy auto_recover true + check_err $? "Failed to dummy reporter auto_recover option" + + check_reporter_info dummy healthy 1 1 0 true + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy healthy 2 2 0 true + + local diagnose=$(devlink health diagnose $DL_HANDLE reporter dummy -j -p) + check_err $? "Failed show diagnose of dummy reporter" + + local rcvrd_break_msg=$(echo $diagnose | jq -r ".recovered_break_message") + [ "$rcvrd_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected recovered break message value (got $rcvrd_break_msg, expected $BREAK_MSG)" + + devlink health set $DL_HANDLE reporter dummy grace_period 10 + check_err $? "Failed to dummy reporter grace_period option" + + check_reporter_info dummy healthy 2 2 10 true + + echo "Y"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to fail" + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_fail $? "Unexpected success of dummy reporter break" + + check_reporter_info dummy error 3 2 10 true + + devlink health recover $DL_HANDLE reporter dummy + check_fail $? "Unexpected success of dummy reporter recover" + + echo "N"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to be successful" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 3 3 10 true + + log_test "dummy reporter test" +} + setup_prepare() { modprobe netdevsim diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh new file mode 100755 index 000000000000..7effd35369e1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="check_devlink_test check_ports_test" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +BUS_ADDR=10 +PORT_COUNT=4 +DEV_NAME=netdevsim$BUS_ADDR +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ +DL_HANDLE=netdevsim/$DEV_NAME +NETNS_NAME=testns1 + +port_netdev_get() +{ + local port_index=$1 + + cmd_jq "devlink -N $NETNS_NAME port show -j" \ + ".[][\"$DL_HANDLE/$port_index\"].netdev" "-e" +} + +check_ports_test() +{ + RET=0 + + for i in $(seq 0 $(expr $PORT_COUNT - 1)); do + netdev_name=$(port_netdev_get $i) + check_err $? "Failed to get netdev name for port $DL_HANDLE/$i" + ip -n $NETNS_NAME link show $netdev_name &> /dev/null + check_err $? "Failed to find netdev $netdev_name" + done + + log_test "check ports test" +} + +check_devlink_test() +{ + RET=0 + + devlink -N $NETNS_NAME dev show $DL_HANDLE &> /dev/null + check_err $? "Failed to show devlink instance" + + log_test "check devlink test" +} + +setup_prepare() +{ + modprobe netdevsim + ip netns add $NETNS_NAME + ip netns exec $NETNS_NAME \ + echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done +} + +cleanup() +{ + pre_cleanup + echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device + ip netns del $NETNS_NAME + modprobe -r netdevsim +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 8a4025e912cb..ef1e9bafb098 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -95,7 +95,7 @@ echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent _do_fork' # SAME_PROBE +check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE fi exit 0 diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 62c591f87dab..c5ec868fa1e5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -22,6 +22,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test @@ -48,7 +49,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie) # On s390, build the testcases KVM-enabled pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0c17f2ee685e..ff234018219c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1083,6 +1083,9 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +/* VMX_EPT_VPID_CAP bits */ +#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) + /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 69b17055f63d..6ae5a47fe067 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -569,6 +569,10 @@ struct vmx_pages { void *enlightened_vmcs_hva; uint64_t enlightened_vmcs_gpa; void *enlightened_vmcs; + + void *eptp_hva; + uint64_t eptp_gpa; + void *eptp; }; struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); @@ -576,4 +580,14 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot); +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot); +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); + #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 80a338b5403c..41cf45416060 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -705,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * on error (e.g. currently no memory region using memslot as a KVM * memory slot ID). */ -static struct userspace_mem_region * +struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index f36262e0f655..ac50c42750cf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + #endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c53dbc6bc568..6698cb741e10 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1085,7 +1085,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 9cef0455b819..fab8f6b0bf52 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -7,11 +7,39 @@ #include "test_util.h" #include "kvm_util.h" +#include "../kvm_util_internal.h" #include "processor.h" #include "vmx.h" +#define PAGE_SHIFT_4K 12 + +#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 + bool enable_evmcs; +struct eptPageTableEntry { + uint64_t readable:1; + uint64_t writable:1; + uint64_t executable:1; + uint64_t memory_type:3; + uint64_t ignore_pat:1; + uint64_t page_size:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t ignored_11_10:2; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t suppress_ve:1; +}; + +struct eptPageTablePointer { + uint64_t memory_type:3; + uint64_t page_walk_length:3; + uint64_t ad_enabled:1; + uint64_t reserved_11_07:5; + uint64_t address:40; + uint64_t reserved_63_52:12; +}; int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) { uint16_t evmcs_ver; @@ -174,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { + uint32_t sec_exec_ctl = 0; + vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); - if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) + + if (vmx->eptp_gpa) { + uint64_t ept_paddr; + struct eptPageTablePointer eptp = { + .memory_type = VMX_BASIC_MEM_TYPE_WB, + .page_walk_length = 3, /* + 1 */ + .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), + .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, + }; + + memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); + vmwrite(EPT_POINTER, ept_paddr); + sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; + } + + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); - else + else { vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + GUEST_ASSERT(!sec_exec_ctl); + } + vmwrite(EXCEPTION_BITMAP, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ @@ -327,3 +375,152 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_host_state(); init_vmcs_guest_state(guest_rip, guest_rsp); } + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) +{ + uint16_t index[4]; + struct eptPageTableEntry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((nested_paddr % vm->page_size) == 0, + "Nested physical address not on page boundary,\n" + " nested_paddr: 0x%lx vm->page_size: 0x%x", + nested_paddr, vm->page_size); + TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (nested_paddr >> 12) & 0x1ffu; + index[1] = (nested_paddr >> 21) & 0x1ffu; + index[2] = (nested_paddr >> 30) & 0x1ffu; + index[3] = (nested_paddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = vmx->eptp_hva; + if (!pml4e[index[3]].readable) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].readable = true; + pml4e[index[3]].executable = true; + } + + /* Allocate page directory table if not present. */ + struct eptPageTableEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].readable) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].readable = true; + pdpe[index[2]].executable = true; + } + + /* Allocate page table if not present. */ + struct eptPageTableEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].readable) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].readable = true; + pde[index[1]].executable = true; + } + + /* Fill in page table entry. */ + struct eptPageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].readable = true; + pte[index[0]].executable = true; + + /* + * For now mark these as accessed and dirty because the only + * testcase we have needs that. Can be reconsidered later. + */ + pte[index[0]].accessed = true; + pte[index[0]].dirty = true; +} + +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * eptp_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot) +{ + size_t page_size = vm->page_size; + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot); + nested_paddr += page_size; + paddr += page_size; + } +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vmx, vm, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift, + eptp_memslot); + } +} + +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); + vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c new file mode 100644 index 000000000000..0bca1cfe2c1e --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VCPU_ID 1 + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_SIZE 3 + +/* L1 guest test virtual memory offset */ +#define GUEST_TEST_MEM 0xc0000000 + +/* L2 guest test virtual memory offset */ +#define NESTED_TEST_MEM1 0xc0001000 +#define NESTED_TEST_MEM2 0xc0002000 + +static void l2_guest_code(void) +{ + *(volatile uint64_t *)NESTED_TEST_MEM1; + *(volatile uint64_t *)NESTED_TEST_MEM1 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(false); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(false); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct vmx_pages *vmx; + unsigned long *bmap; + uint64_t *host_test_mem; + + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + bool done = false; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + run = vcpu_state(vm, VCPU_ID); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + GUEST_TEST_MEM, + TEST_MEM_SLOT_INDEX, + TEST_MEM_SIZE, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0002000). This + * affects both L1 and L2. However... + */ + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, + TEST_MEM_SIZE * 4096, 0); + + /* + * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to + * 0xc0000000. + * + * Note that prepare_eptp should be called only L1's GPA map is done, + * meaning after the last call to virt_map. + */ + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0); + + bmap = bitmap_alloc(TEST_MEM_SIZE); + host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); + + while (!done) { + memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096); + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + /* + * The nested guest wrote at offset 0x1000 in the memslot, but the + * dirty bitmap must be filled in according to L1 GPA, not L2. + */ + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + if (uc.args[1]) { + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + } else { + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + } + + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + } +} diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c7cced739c34..8aefd81fbc86 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -21,3 +21,4 @@ ipv6_flowlabel ipv6_flowlabel_mgr so_txtime tcp_fastopen_backup_key +nettest diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 85c587a03c8a..8b48ec54d058 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -254,6 +254,7 @@ cmd_jq() { local cmd=$1 local jq_exp=$2 + local jq_opts=$3 local ret local output @@ -263,7 +264,11 @@ cmd_jq() if [[ $ret -ne 0 ]]; then return $ret fi - output=$(echo $output | jq -r "$jq_exp") + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi echo $output # return success only in case of non-empty output [ ! -z "$output" ] diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index b8265ee9923f..614b31aad168 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -89,12 +89,9 @@ struct testcase testcases_v4[] = { .tfail = true, }, { - /* send a single MSS: will fail with GSO, because the segment - * logic in udp4_ufo_fragment demands a gso skb to be > MTU - */ + /* send a single MSS: will fall back to no GSO */ .tlen = CONST_MSS_V4, .gso_len = CONST_MSS_V4, - .tfail = true, .r_num_mss = 1, }, { @@ -139,10 +136,9 @@ struct testcase testcases_v4[] = { .tfail = true, }, { - /* send a single 1B MSS: will fail, see single MSS above */ + /* send a single 1B MSS: will fall back to no GSO */ .tlen = 1, .gso_len = 1, - .tfail = true, .r_num_mss = 1, }, { @@ -196,12 +192,9 @@ struct testcase testcases_v6[] = { .tfail = true, }, { - /* send a single MSS: will fail with GSO, because the segment - * logic in udp4_ufo_fragment demands a gso skb to be > MTU - */ + /* send a single MSS: will fall back to no GSO */ .tlen = CONST_MSS_V6, .gso_len = CONST_MSS_V6, - .tfail = true, .r_num_mss = 1, }, { @@ -246,10 +239,9 @@ struct testcase testcases_v6[] = { .tfail = true, }, { - /* send a single 1B MSS: will fail, see single MSS above */ + /* send a single 1B MSS: will fall back to no GSO */ .tlen = 1, .gso_len = 1, - .tfail = true, .r_num_mss = 1, }, { diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 464c9b76148f..7550f08822a3 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -lpthread +CFLAGS += -g -I../../../../usr/include/ -pthread TEST_GEN_PROGS := pidfd_test pidfd_open_test pidfd_poll_test pidfd_wait diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json index 0d319f1d01db..c30d37a0b9bc 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json @@ -424,6 +424,56 @@ ] }, { + "id": "7588", + "name": "Add pedit action with LAYERED_OP ip set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip src set 1.1.1.1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 12: val 01010101 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "0fa7", + "name": "Add pedit action with LAYERED_OP ip set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip dst set 2.2.2.2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 16: val 02020202 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "5810", "name": "Add pedit action with LAYERED_OP ip set src & dst", "category": [ @@ -674,6 +724,56 @@ ] }, { + "id": "815c", + "name": "Add pedit action with LAYERED_OP ip6 set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 src set 2001:0db8:0:f101::1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+8: val 20010db8 mask 00000000.*key #1 at ipv6\\+12: val 0000f101 mask 00000000.*key #2 at ipv6\\+16: val 00000000 mask 00000000.*key #3 at ipv6\\+20: val 00000001 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "4dae", + "name": "Add pedit action with LAYERED_OP ip6 set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 dst set 2001:0db8:0:f101::1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+24: val 20010db8 mask 00000000.*key #1 at ipv6\\+28: val 0000f101 mask 00000000.*key #2 at ipv6\\+32: val 00000000 mask 00000000.*key #3 at ipv6\\+36: val 00000001 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "fc1f", "name": "Add pedit action with LAYERED_OP ip6 set src & dst", "category": [ @@ -950,5 +1050,4 @@ "$TC actions flush action pedit" ] } - ] diff --git a/usr/include/Makefile b/usr/include/Makefile index c9449aaf438d..57b20f7b6729 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -29,13 +29,11 @@ header-test- += linux/android/binderfs.h header-test-$(CONFIG_CPU_BIG_ENDIAN) += linux/byteorder/big_endian.h header-test-$(CONFIG_CPU_LITTLE_ENDIAN) += linux/byteorder/little_endian.h header-test- += linux/coda.h -header-test- += linux/coda_psdev.h header-test- += linux/elfcore.h header-test- += linux/errqueue.h header-test- += linux/fsmap.h header-test- += linux/hdlc/ioctl.h header-test- += linux/ivtv.h -header-test- += linux/jffs2.h header-test- += linux/kexec.h header-test- += linux/matroxfb.h header-test- += linux/netfilter_ipv4/ipt_LOG.h @@ -55,20 +53,12 @@ header-test- += linux/v4l2-mediabus.h header-test- += linux/v4l2-subdev.h header-test- += linux/videodev2.h header-test- += linux/vm_sockets.h -header-test- += scsi/scsi_bsg_fc.h -header-test- += scsi/scsi_netlink.h -header-test- += scsi/scsi_netlink_fc.h header-test- += sound/asequencer.h header-test- += sound/asoc.h header-test- += sound/asound.h header-test- += sound/compress_offload.h header-test- += sound/emu10k1.h header-test- += sound/sfnt_info.h -header-test- += sound/sof/eq.h -header-test- += sound/sof/fw.h -header-test- += sound/sof/header.h -header-test- += sound/sof/manifest.h -header-test- += sound/sof/trace.h header-test- += xen/evtchn.h header-test- += xen/gntdev.h header-test- += xen/privcmd.h diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h index 55fed77a9f73..4fd4f6db181b 100644 --- a/virt/kvm/arm/vgic/trace.h +++ b/virt/kvm/arm/vgic/trace.h @@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending, #endif /* _TRACE_VGIC_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic +#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e6de3159e682..fd68fbe0a75d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -617,8 +617,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; + stat_data->mode = p->mode ? p->mode : 0644; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - debugfs_create_file(p->name, 0644, kvm->debugfs_dentry, + debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, stat_data, stat_fops_per_vm[p->kind]); } return 0; @@ -3929,7 +3930,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; - if (simple_attr_open(inode, file, get, set, fmt)) { + if (simple_attr_open(inode, file, get, + stat_data->mode & S_IWUGO ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -4177,7 +4180,8 @@ static void kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - debugfs_create_file(p->name, 0644, kvm_debugfs_dir, + int mode = p->mode ? p->mode : 0644; + debugfs_create_file(p->name, mode, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); } |