diff options
550 files changed, 5847 insertions, 2880 deletions
@@ -29,6 +29,7 @@ Alexandre Belloni <[email protected]> <alexandre.belloni@free-electr Alexei Starovoitov <[email protected]> <[email protected]> Alexei Starovoitov <[email protected]> <[email protected]> Alexei Starovoitov <[email protected]> <[email protected]> +Alex Hung <[email protected]> <[email protected]> Alex Shi <[email protected]> <[email protected]> Alex Shi <[email protected]> <[email protected]> Alex Shi <[email protected]> <[email protected]> @@ -382,6 +383,7 @@ Santosh Shilimkar <[email protected]> Santosh Shilimkar <[email protected]> Sarangdhar Joshi <[email protected]> Sascha Hauer <[email protected]> +Satya Priya <[email protected]> <[email protected]> S.Çağlar Onur <[email protected]> Sean Christopherson <[email protected]> <[email protected]> Sean Nyekjaer <[email protected]> <[email protected]> @@ -389,6 +391,7 @@ Sebastian Reichel <[email protected]> <[email protected]> Sebastian Reichel <[email protected]> <[email protected]> Sedat Dilek <[email protected]> <[email protected]> Seth Forshee <[email protected]> <[email protected]> +Shannon Nelson <[email protected]> <[email protected]> Shiraz Hashim <[email protected]> <[email protected]> Shuah Khan <[email protected]> <[email protected]> Shuah Khan <[email protected]> <[email protected]> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..42af9ca0127e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6959,3 +6959,14 @@ memory, and other data can't be written using xmon commands. off xmon is disabled. + + amd_pstate= [X86] + disable + Do not enable amd_pstate as the default + scaling driver for the supported processors + passive + Use amd_pstate as a scaling driver, driver requests a + desired performance on this abstract scale and the power + management firmware translates the requests into actual + hardware states (core frequency, data fabric and memory + clocks etc.) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 8f3d30c5a0d8..06e23538f79c 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -283,23 +283,19 @@ efficiency frequency management method on AMD processors. Kernel Module Options for ``amd-pstate`` ========================================= -.. _shared_mem: - -``shared_mem`` -Use a module param (shared_mem) to enable related processors manually with -**amd_pstate.shared_mem=1**. -Due to the performance issue on the processors with `Shared Memory Support -<perf_cap_>`_, we disable it presently and will re-enable this by default -once we address performance issue with this solution. - -To check whether the current processor is using `Full MSR Support <perf_cap_>`_ -or `Shared Memory Support <perf_cap_>`_ : :: - - ray@hr-test1:~$ lscpu | grep cppc - Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm - -If the CPU flags have ``cppc``, then this processor supports `Full MSR Support -<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_. +Passive Mode +------------ + +``amd_pstate=passive`` + +It will be enabled if the ``amd_pstate=passive`` is passed to the kernel in the command line. +In this mode, ``amd_pstate`` driver software specifies a desired QoS target in the CPPC +performance scale as a relative number. This can be expressed as percentage of nominal +performance (infrastructure max). Below the nominal sustained performance level, +desired performance expresses the average performance level of the processor subject +to the Performance Reduction Tolerance register. Above the nominal performance level, +processor must provide at least nominal performance requested and go higher if current +operating conditions allow. ``cpupower`` tool support for ``amd-pstate`` diff --git a/Documentation/cpu-freq/index.rst b/Documentation/cpu-freq/index.rst index aba7831ab1cb..2fe32dad562a 100644 --- a/Documentation/cpu-freq/index.rst +++ b/Documentation/cpu-freq/index.rst @@ -20,18 +20,15 @@ Author: Dominik Brodowski <[email protected]> Mailing List ------------ -There is a CPU frequency changing CVS commit and general list where -you can report bugs, problems or submit patches. To post a message, -send an email to [email protected]. +There is a CPU frequency general list where you can report bugs, +problems or submit patches. To post a message, send an email to Links ----- the FTP archives: * ftp://ftp.linux.org.uk/pub/linux/cpufreq/ -how to access the CVS repository: -* http://cvs.arm.linux.org.uk/ - the CPUFreq Mailing list: * http://vger.kernel.org/vger-lists.html#linux-pm diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml index 2ab4642679c0..55c4f94a14d1 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml @@ -148,7 +148,7 @@ allOf: items: - const: oscclk - const: dout_clkcmu_fsys1_bus - - const: dout_clkcmu_fsys1_mmc_card + - const: gout_clkcmu_fsys1_mmc_card - const: dout_clkcmu_fsys1_usbdrd - if: diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 24fa3d87a40b..903b31129f01 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -25,6 +25,7 @@ properties: - description: v2 of CPUFREQ HW (EPSS) items: - enum: + - qcom,qdu1000-cpufreq-epss - qcom,sm6375-cpufreq-epss - qcom,sm8250-cpufreq-epss - const: qcom,cpufreq-epss @@ -56,6 +57,9 @@ properties: '#freq-domain-cells': const: 1 + '#clock-cells': + const: 1 + required: - compatible - reg @@ -83,11 +87,16 @@ examples: enable-method = "psci"; next-level-cache = <&L2_0>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_0: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-unified; + cache-level = <3>; }; }; }; @@ -99,8 +108,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_100>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_100: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -112,8 +124,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_200>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_200: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -125,8 +140,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_300>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_300: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -138,8 +156,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_400>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_400: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -151,8 +172,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_500>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_500: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -164,8 +188,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_600>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_600: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -177,8 +204,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_700>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_700: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -197,6 +227,7 @@ examples: clock-names = "xo", "alternate"; #freq-domain-cells = <1>; + #clock-cells = <1>; }; }; ... diff --git a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml index b283c8ca2bbf..5c08d8b6e995 100644 --- a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml @@ -62,13 +62,6 @@ properties: description: Inform the driver that last channel will be used to sensor battery. - aspeed,trim-data-valid: - type: boolean - description: | - The ADC reference voltage can be calibrated to obtain the trimming - data which will be stored in otp. This property informs the driver that - the data store in the otp is valid. - required: - compatible - reg diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml index 2684562df4d9..be29e0b80995 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml @@ -24,7 +24,7 @@ properties: oneOf: - items: - enum: - - qcom,sc7280-bwmon + - qcom,sc7280-cpu-bwmon - qcom,sdm845-bwmon - const: qcom,msm8998-bwmon - const: qcom,msm8998-bwmon # BWMON v4 diff --git a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml index 24d7bf21499e..9d44236f2deb 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml @@ -36,6 +36,9 @@ properties: resets: maxItems: 1 + iommus: + maxItems: 1 + required: - compatible - reg @@ -43,6 +46,7 @@ required: - clocks - clock-names - resets + - iommus additionalProperties: false @@ -59,6 +63,7 @@ examples: clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; ... diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml index 66d0ec763f0b..cf9c2f7bddc2 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml @@ -108,7 +108,7 @@ patternProperties: The power for the OPP in micro-Watts. Entries for multiple regulators shall be provided in the same field - separated by angular brackets <>. If current values aren't required + separated by angular brackets <>. If power values aren't required for a regulator, then it shall be filled with 0. If power values aren't required for any of the regulators, then this field is not required. The OPP binding doesn't provide any provisions to relate the @@ -230,9 +230,9 @@ patternProperties: minItems: 1 maxItems: 8 # Should be enough regulators - '^opp-microwatt': + '^opp-microwatt-': description: - Named opp-microwatt property. Similar to opp-microamp property, + Named opp-microwatt property. Similar to opp-microamp-<name> property, but for microwatt instead. $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 1 diff --git a/Documentation/devicetree/bindings/opp/opp-v2.yaml b/Documentation/devicetree/bindings/opp/opp-v2.yaml index eaf8fba2c691..2f05920fce79 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2.yaml @@ -155,7 +155,7 @@ examples: opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000>; - lock-latency-ns = <290000>; + clock-latency-ns = <290000>; turbo-mode; }; }; diff --git a/Documentation/networking/generic_netlink.rst b/Documentation/networking/generic_netlink.rst index 59e04ccf80c1..d960dbd7e80e 100644 --- a/Documentation/networking/generic_netlink.rst +++ b/Documentation/networking/generic_netlink.rst @@ -6,4 +6,4 @@ Generic Netlink A wiki document on how to use Generic Netlink can be found here: - * http://www.linuxfoundation.org/collaborate/workgroups/networking/generic_netlink_howto + * https://wiki.linuxfoundation.org/networking/generic_netlink_howto diff --git a/Documentation/translations/zh_CN/loongarch/introduction.rst b/Documentation/translations/zh_CN/loongarch/introduction.rst index 128878f5bb70..f3ec25b163d7 100644 --- a/Documentation/translations/zh_CN/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/loongarch/introduction.rst @@ -70,8 +70,8 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其� ================= ================== =================== ========== .. note:: - 注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 - ``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。 + 注意:在一些遗留代码中有时可能见到 ``$fv0`` 和 ``$fv1`` ,它们是 + ``$fa0`` 和 ``$fa1`` 的别名,属于已经废弃的用法。 向量寄存器 diff --git a/MAINTAINERS b/MAINTAINERS index 2585e7edc335..19c7bdfa26fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5585,8 +5585,6 @@ F: drivers/scsi/cxgbi/cxgb3i CXGB4 CRYPTO DRIVER (chcr) M: Ayush Sawal <[email protected]> -M: Vinay Kumar Yadav <[email protected]> -M: Rohit Maheshwari <[email protected]> S: Supported W: http://www.chelsio.com @@ -5594,8 +5592,6 @@ F: drivers/crypto/chelsio CXGB4 INLINE CRYPTO DRIVER M: Ayush Sawal <[email protected]> -M: Vinay Kumar Yadav <[email protected]> -M: Rohit Maheshwari <[email protected]> S: Supported W: http://www.chelsio.com @@ -10287,7 +10283,7 @@ T: git https://github.com/intel/gvt-linux.git F: drivers/gpu/drm/i915/gvt/ INTEL HID EVENT DRIVER -M: Alex Hung <[email protected]> +M: Alex Hung <[email protected]> S: Maintained F: drivers/platform/x86/intel/hid.c @@ -15952,6 +15948,7 @@ Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git +F: Documentation/devicetree/bindings/pci/ F: drivers/pci/controller/ F: drivers/pci/pci-bridge-emul.c F: drivers/pci/pci-bridge-emul.h @@ -16058,7 +16055,7 @@ F: Documentation/devicetree/bindings/pci/microchip* F: drivers/pci/controller/*microchip* PCIE DRIVER FOR QUALCOMM MSM -M: Stanimir Varbanov <[email protected]> +M: Manivannan Sadhasivam <[email protected]> S: Maintained @@ -16148,7 +16145,8 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson <[email protected]> +M: Shannon Nelson <[email protected]> +M: Brett Creeley <[email protected]> S: Supported @@ -17484,10 +17482,8 @@ S: Maintained F: drivers/net/wireless/realtek/rtw89/ REDPINE WIRELESS DRIVER -M: Amitkumar Karwar <[email protected]> -M: Siva Rebbagondla <[email protected]> -S: Maintained +S: Orphan F: drivers/net/wireless/rsi/ REGISTER MAP ABSTRACTION @@ -18010,7 +18006,7 @@ L: [email protected] S: Maintained F: drivers/video/fbdev/savage/ -S390 +S390 ARCHITECTURE M: Heiko Carstens <[email protected]> M: Vasily Gorbik <[email protected]> M: Alexander Gordeev <[email protected]> @@ -18065,6 +18061,15 @@ L: [email protected] S: Supported F: drivers/s390/net/ +S390 MM +M: Alexander Gordeev <[email protected]> +M: Gerald Schaefer <[email protected]> +S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git +F: arch/s390/include/asm/pgtable.h +F: arch/s390/mm + S390 PCI SUBSYSTEM M: Niklas Schnelle <[email protected]> M: Gerald Schaefer <[email protected]> @@ -19999,6 +20004,7 @@ F: drivers/clk/clk-sc[mp]i.c F: drivers/cpufreq/sc[mp]i-cpufreq.c F: drivers/firmware/arm_scmi/ F: drivers/firmware/arm_scpi.c +F: drivers/powercap/arm_scmi_powercap.c F: drivers/regulator/scmi-regulator.c F: drivers/reset/reset-scmi.c F: include/linux/sc[mp]i_protocol.h @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc8 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi index dae448040a97..947497413977 100644 --- a/arch/arm/boot/dts/am335x-pcm-953.dtsi +++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi @@ -12,22 +12,20 @@ compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"; /* Power */ - regulators { - vcc3v3: fixedregulator@1 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-boot-on; - }; + vcc3v3: fixedregulator1 { + compatible = "regulator-fixed"; + regulator-name = "vcc3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + }; - vcc1v8: fixedregulator@2 { - compatible = "regulator-fixed"; - regulator-name = "vcc1v8"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - }; + vcc1v8: fixedregulator2 { + compatible = "regulator-fixed"; + regulator-name = "vcc1v8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; }; /* User IO */ diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 7a113325abb9..6f9004ebf424 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -666,7 +666,7 @@ compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 60d61291f344..024af2db638e 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -39,6 +39,13 @@ }; + usb1 { + pinctrl_usb1_vbus_gpio: usb1_vbus_gpio { + atmel,pins = + <AT91_PIOC 5 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>; /* PC5 GPIO */ + }; + }; + mmc0_slot1 { pinctrl_board_mmc0_slot1: mmc0_slot1-board { atmel,pins = @@ -84,6 +91,8 @@ }; usb1: gadget@fffa4000 { + pinctrl-0 = <&pinctrl_usb1_vbus_gpio>; + pinctrl-names = "default"; atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index b4605edfd2ab..d8fa83effd63 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -364,8 +364,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_wifi>; interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>; - ref-clock-frequency = "38400000"; - tcxo-clock-frequency = "19200000"; + ref-clock-frequency = <38400000>; + tcxo-clock-frequency = <19200000>; }; }; diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts index 9fd4d9db9f8f..becdc0b664bf 100644 --- a/arch/arm/boot/dts/rk3036-evb.dts +++ b/arch/arm/boot/dts/rk3036-evb.dts @@ -35,11 +35,10 @@ &i2c1 { status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; }; diff --git a/arch/arm/boot/dts/rk3066a-mk808.dts b/arch/arm/boot/dts/rk3066a-mk808.dts index cfa318a506eb..2db5ba706208 100644 --- a/arch/arm/boot/dts/rk3066a-mk808.dts +++ b/arch/arm/boot/dts/rk3066a-mk808.dts @@ -32,7 +32,7 @@ keyup-threshold-microvolt = <2500000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index e7cf18823558..118deacd38c4 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -71,7 +71,7 @@ #sound-dai-cells = <0>; }; - ir_recv: gpio-ir-receiver { + ir_recv: ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index cdd4a0bd5133..44b54af0bbf9 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -379,7 +379,7 @@ rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; }; - lcdc1_rgb24: ldcd1-rgb24 { + lcdc1_rgb24: lcdc1-rgb24 { rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, <2 RK_PA1 1 &pcfg_pull_none>, <2 RK_PA2 1 &pcfg_pull_none>, @@ -607,7 +607,6 @@ &global_timer { interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>; - status = "disabled"; }; &local_timer { diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts index be695b8c1f67..8a635c243127 100644 --- a/arch/arm/boot/dts/rk3288-evb-act8846.dts +++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts @@ -54,7 +54,7 @@ vin-supply = <&vcc_sys>; }; - hym8563@51 { + rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi index 399d6b9c5fd4..382d2839cf47 100644 --- a/arch/arm/boot/dts/rk3288-evb.dtsi +++ b/arch/arm/boot/dts/rk3288-evb.dtsi @@ -28,19 +28,19 @@ press-threshold-microvolt = <300000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <640000>; }; - esc { + button-esc { label = "Esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1000000>; }; - home { + button-home { label = "Home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <1300000>; diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi index 052afe5543e2..3836c61cfb76 100644 --- a/arch/arm/boot/dts/rk3288-firefly.dtsi +++ b/arch/arm/boot/dts/rk3288-firefly.dtsi @@ -233,11 +233,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio7>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts index 713f55e143c6..db1eb648e0e1 100644 --- a/arch/arm/boot/dts/rk3288-miqi.dts +++ b/arch/arm/boot/dts/rk3288-miqi.dts @@ -162,11 +162,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts index 80e0f07c8e87..13cfdaa95cc7 100644 --- a/arch/arm/boot/dts/rk3288-rock2-square.dts +++ b/arch/arm/boot/dts/rk3288-rock2-square.dts @@ -165,11 +165,10 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio0>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi index 0ae2bd150e37..793951655b73 100644 --- a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi +++ b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi @@ -241,7 +241,6 @@ interrupt-parent = <&gpio5>; interrupts = <RK_PC3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index bf285091a9eb..cb4e42ede56a 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -76,6 +76,13 @@ reg = <0x1013c200 0x20>; interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>; clocks = <&cru CORE_PERI>; + status = "disabled"; + /* The clock source and the sched_clock provided by the arm_global_timer + * on Rockchip rk3066a/rk3188 are quite unstable because their rates + * depend on the CPU frequency. + * Keep the arm_global_timer disabled in order to have the + * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. + */ }; local_timer: local-timer@1013c600 { diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index fe87397c3d8c..bdbc1e590891 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ARM_pc = (__ip); \ - (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ (regs)->ARM_sp = current_stack_pointer; \ (regs)->ARM_cpsr = SVC_MODE; \ } diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d16aba48fa0a..090011394477 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -45,12 +45,6 @@ typedef pte_t *pte_addr_t; /* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) - -/* * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) (prot) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 78a532068fec..ef48a55e9af8 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -10,6 +10,15 @@ #include <linux/const.h> #include <asm/proc-fns.h> +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) +#endif + #ifndef CONFIG_MMU #include <asm-generic/pgtable-nopud.h> @@ -139,13 +148,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, */ #ifndef __ASSEMBLY__ -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) - extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 25c9d184fa4c..1c57ac401649 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -393,8 +393,10 @@ static void __init mxs_machine_init(void) root = of_find_node_by_path("/"); ret = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (ret) + if (ret) { + kfree(soc_dev_attr); return; + } soc_dev_attr->family = "Freescale MXS Family"; soc_dev_attr->soc_id = mxs_get_soc_id(); diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index c42debaded95..c1494a4dee25 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -26,6 +26,13 @@ unsigned long vectors_base; +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; #endif @@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) */ void __init paging_init(const struct machine_desc *mdesc) { + void *zero_page; + early_trap_init((void *)vectors_base); mpu_setup(); + + /* allocate the zero page. */ + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!zero_page) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); } /* diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 53f6660656ac..ca1d287a0a01 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -161,6 +161,7 @@ clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; video-codec@1c0e000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 9f1469db554d..b4c1ef2559f2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -544,14 +544,14 @@ pinctrl_pcie0: pcie0grp { fsl,pins = < - MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x61 /* open drain, pull up */ - MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x41 + MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x60 /* open drain, pull up */ + MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x40 >; }; pinctrl_pcie0_reg: pcie0reggrp { fsl,pins = < - MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x41 + MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x40 >; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-evb.dts b/arch/arm64/boot/dts/rockchip/px30-evb.dts index 07008d84434c..c1bbd555f5f5 100644 --- a/arch/arm64/boot/dts/rockchip/px30-evb.dts +++ b/arch/arm64/boot/dts/rockchip/px30-evb.dts @@ -30,31 +30,31 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - esc-key { + button-esc { label = "esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1310000>; }; - home-key { + button-home { label = "home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <624000>; }; - menu-key { + button-menu { label = "menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <987000>; }; - vol-down-key { + button-down { label = "volume down"; linux,code = <KEY_VOLUMEDOWN>; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { label = "volume up"; linux,code = <KEY_VOLUMEUP>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts index 9fe9b0d11003..184b84fdde07 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts @@ -23,7 +23,7 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - func-key { + button-func { linux,code = <KEY_FN>; label = "function"; press-threshold-microvolt = <18000>; @@ -37,31 +37,31 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - esc-key { + button-esc { linux,code = <KEY_MICMUTE>; label = "micmute"; press-threshold-microvolt = <1130000>; }; - home-key { + button-home { linux,code = <KEY_MODE>; label = "mode"; press-threshold-microvolt = <901000>; }; - menu-key { + button-menu { linux,code = <KEY_PLAY>; label = "play"; press-threshold-microvolt = <624000>; }; - vol-down-key { + button-down { linux,code = <KEY_VOLUMEDOWN>; label = "volume down"; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { linux,code = <KEY_VOLUMEUP>; label = "volume up"; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index ea6820902ede..7ea48167747c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -19,7 +19,7 @@ stdout-path = "serial2:1500000n8"; }; - ir_rx { + ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts index 43c928ac98f0..1deef53a4c94 100644 --- a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts @@ -25,7 +25,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 7f5bba0c6001..81d1064fdb21 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -208,11 +208,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts index 38d757c00548..5589f3db6b36 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts @@ -192,11 +192,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index ed3348b558f8..a47d9f758611 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -734,10 +734,6 @@ camera: &i2c7 { }; /* PINCTRL OVERRIDES */ -&ec_ap_int_l { - rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; -}; - &ap_fw_wp { rockchip,pins = <0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 2a332763c35c..9d9297bc5f04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -123,7 +123,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 452728b82e42..3bf8f959e42c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -39,7 +39,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts index 72182c58cc46..65cb21837b0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts @@ -19,7 +19,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi index 278123b4f911..b6e082f1f6d9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi @@ -167,6 +167,7 @@ }; &emmc_phy { + rockchip,enable-strobe-pulldown; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts index 9e2e246e0bab..dba4d03bfc2b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts @@ -52,13 +52,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 04c752f49be9..115c14c0a3c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -207,7 +207,7 @@ cap-sd-highspeed; cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>; disable-wp; - max-frequency = <150000000>; + max-frequency = <40000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; vmmc-supply = <&vcc3v3_baseboard>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 5a2661ae0131..7ba1c28f70a9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -98,13 +98,12 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; interrupt-parent = <&gpio0>; interrupts = <RK_PA5 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index 2f4b1b2e3ac7..bbf1e3f24585 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -41,7 +41,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 645ced6617a6..1f76d3501bda 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -509,7 +509,6 @@ &i2s1 { rockchip,playback-channels = <2>; rockchip,capture-channels = <2>; - status = "okay"; }; &i2s2 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index 13927e7d0724..dbec2b7173a0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -33,13 +33,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi index 935b8c68a71d..bf9eb0405b62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi @@ -297,11 +297,10 @@ clock-frequency = <400000>; status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index 0d45868132b9..8d61f824c12d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -23,7 +23,7 @@ io-channel-names = "buttons"; keyup-threshold-microvolt = <1750000>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index a05460b92415..25a8c781f4e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -740,7 +740,7 @@ &uart1 { pinctrl-names = "default"; - pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn>; + pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn &uart1m0_rtsn>; status = "okay"; uart-has-rtscts; @@ -748,13 +748,14 @@ compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; - host-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; + device-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio2 RK_PB7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; vbat-supply = <&vcc_sys>; vddio-supply = <&vcca1v8_pmu>; + max-speed = <3000000>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 77b179cd20e7..b276eb0810c7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -246,7 +246,7 @@ compatible = "rockchip,rk809"; reg = <0x20>; interrupt-parent = <&gpio0>; - interrupts = <RK_PA7 IRQ_TYPE_LEVEL_LOW>; + interrupts = <RK_PA3 IRQ_TYPE_LEVEL_LOW>; assigned-clocks = <&cru I2S1_MCLKOUT_TX>; assigned-clock-parents = <&cru CLK_I2S1_8CH_TX>; clock-names = "mclk"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index dba648c2f57e..9fd262334d77 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -142,7 +142,7 @@ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m0_miim @@ -432,11 +432,7 @@ &i2c3 { pinctrl-names = "default"; - pinctrl-0 = <&i2c3m1_xfer>; - status = "okay"; -}; - -&i2c5 { + pinctrl-0 = <&i2c3m0_xfer>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index c282f6e79960..26d7fda275ed 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -500,7 +500,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index fb87a168fe96..539ef8cc7792 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -509,7 +509,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/ti/k3-am625-sk.dts b/arch/arm64/boot/dts/ti/k3-am625-sk.dts index 93a5f0817efc..4620ef5e19bb 100644 --- a/arch/arm64/boot/dts/ti/k3-am625-sk.dts +++ b/arch/arm64/boot/dts/ti/k3-am625-sk.dts @@ -31,6 +31,15 @@ bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,0x02800000"; }; + opp-table { + /* Add 1.4GHz OPP for am625-sk board. Requires VDD_CORE to be at 0.85V */ + opp-1400000000 { + opp-hz = /bits/ 64 <1400000000>; + opp-supported-hw = <0x01 0x0004>; + clock-latency-ns = <6000000>; + }; + }; + memory@80000000 { device_type = "memory"; /* 2G RAM */ diff --git a/arch/arm64/boot/dts/ti/k3-am625.dtsi b/arch/arm64/boot/dts/ti/k3-am625.dtsi index 887f31c23fef..cea2cc7de5dd 100644 --- a/arch/arm64/boot/dts/ti/k3-am625.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am625.dtsi @@ -48,6 +48,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 135 0>; }; cpu1: cpu@1 { @@ -62,6 +64,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 136 0>; }; cpu2: cpu@2 { @@ -76,6 +80,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 137 0>; }; cpu3: cpu@3 { @@ -90,6 +96,51 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 138 0>; + }; + }; + + a53_opp_table: opp-table { + compatible = "operating-points-v2-ti-cpu"; + opp-shared; + syscon = <&wkup_conf>; + + opp-200000000 { + opp-hz = /bits/ 64 <200000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-400000000 { + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x01 0x0006>; + clock-latency-ns = <6000000>; + }; + + opp-1250000000 { + opp-hz = /bits/ 64 <1250000000>; + opp-supported-hw = <0x01 0x0004>; + clock-latency-ns = <6000000>; + opp-suspend; }; }; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..439e2bc5d5d8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,16 +14,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() - -static inline -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - return false; -} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 67babd5f04c2..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include <linux/linkage.h> SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-112]! + stp x29, x30, [sp, #-32]! mov x29, sp /* @@ -17,20 +17,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) stp x1, x18, [sp, #16] /* - * Preserve all callee saved registers and record the stack pointer - * value in a per-CPU variable so we can recover from synchronous - * exceptions occurring while running the firmware routines. - */ - stp x19, x20, [sp, #32] - stp x21, x22, [sp, #48] - stp x23, x24, [sp, #64] - stp x25, x26, [sp, #80] - stp x27, x28, [sp, #96] - - adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - str x29, [x8] - - /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the * stack. @@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #112 + ldp x29, x30, [sp], #32 b.ne 0f ret 0: @@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) - -SYM_FUNC_START(__efi_rt_asm_recover) - ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - mov sp, x8 - - ldp x0, x18, [sp, #16] - ldp x19, x20, [sp, #32] - ldp x21, x22, [sp, #48] - ldp x23, x24, [sp, #64] - ldp x25, x26, [sp, #80] - ldp x27, x28, [sp, #96] - ldp x29, x30, [sp], #112 - - b efi_handle_runtime_exception -SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ee53f2a0aa03..a908a37f0367 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,7 +9,6 @@ #include <linux/efi.h> #include <linux/init.h> -#include <linux/percpu.h> #include <asm/efi.h> @@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } - -asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); - -asmlinkage efi_status_t __efi_rt_asm_recover(void); - -asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) -{ - pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return EFI_ABORTED; -} - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - /* Check whether the exception occurred while running the firmware */ - if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) - return false; - - pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dump_stack(); - - regs->pc = (u64)__efi_rt_asm_recover; - return true; -} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3e9cf9826417..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,7 +30,6 @@ #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> -#include <asm/efi.h> #include <asm/exception.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> @@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } - if (efi_runtime_fixup_exception(regs, msg)) - return; - die_kernel_fault(msg, addr, esr, regs); } diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index f4cb54d5afd6..01b57b726322 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -97,7 +97,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_LOONGARCH CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index d06d4542b634..5332b1433f38 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -117,7 +117,7 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev); +extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); #include <asm-generic/irq.h> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 946704bee599..79d5bfd913e0 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -349,13 +349,17 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pte_val(pte) |= _PAGE_MODIFIED; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_MODIFIED) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -455,7 +459,9 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + pmd_val(pmd) |= _PAGE_WRITE; + if (pmd_val(pmd) & _PAGE_MODIFIED) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } @@ -478,10 +484,13 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pmd_val(pmd) |= _PAGE_MODIFIED; + if (pmd_val(pmd) & _PAGE_WRITE) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 71189b28bfb2..3dd172d9ffea 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -19,21 +19,21 @@ extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; -void loongson3_smp_setup(void); -void loongson3_prepare_cpus(unsigned int max_cpus); -void loongson3_boot_secondary(int cpu, struct task_struct *idle); -void loongson3_init_secondary(void); -void loongson3_smp_finish(void); -void loongson3_send_ipi_single(int cpu, unsigned int action); -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action); +void loongson_smp_setup(void); +void loongson_prepare_cpus(unsigned int max_cpus); +void loongson_boot_secondary(int cpu, struct task_struct *idle); +void loongson_init_secondary(void); +void loongson_smp_finish(void); +void loongson_send_ipi_single(int cpu, unsigned int action); +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void); -void loongson3_cpu_die(unsigned int cpu); +int loongson_cpu_disable(void); +void loongson_cpu_die(unsigned int cpu); #endif static inline void plat_smp_setup(void) { - loongson3_smp_setup(); + loongson_smp_setup(); } static inline int raw_smp_processor_id(void) @@ -85,28 +85,28 @@ extern void show_ipi_list(struct seq_file *p, int prec); */ static inline void smp_send_reschedule(int cpu) { - loongson3_send_ipi_single(cpu, SMP_RESCHEDULE); + loongson_send_ipi_single(cpu, SMP_RESCHEDULE); } static inline void arch_send_call_function_single_ipi(int cpu) { - loongson3_send_ipi_single(cpu, SMP_CALL_FUNCTION); + loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - loongson3_send_ipi_mask(mask, SMP_CALL_FUNCTION); + loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); } #ifdef CONFIG_HOTPLUG_CPU static inline int __cpu_disable(void) { - return loongson3_cpu_disable(); + return loongson_cpu_disable(); } static inline void __cpu_die(unsigned int cpu) { - loongson3_cpu_die(cpu); + loongson_cpu_die(cpu); } extern void play_dead(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 335398482038..8319cc409009 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -56,23 +56,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -void __init acpi_boot_table_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return; - - /* - * Initialize the ACPI boot-time table parser. - */ - if (acpi_table_init()) { - disable_acpi(); - return; - } -} - #ifdef CONFIG_SMP static int set_processor_mask(u32 id, u32 flags) { @@ -156,13 +139,21 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } -int __init acpi_boot_init(void) +void __init acpi_boot_table_init(void) { /* * If acpi_disabled, bail out */ if (acpi_disabled) - return -1; + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -173,8 +164,6 @@ int __init acpi_boot_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); - - return 0; } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 1ba19c76563e..0524bf1169b7 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -117,7 +117,7 @@ void __init init_IRQ(void) if (ipi_irq < 0) panic("IPI IRQ mapping failed\n"); irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); if (r < 0) panic("IPI IRQ request failed\n"); #endif diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2526b68f1c0f..ddb8ba4eb399 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->csr_crmd = p->thread.csr_crmd; childregs->csr_prmd = p->thread.csr_prmd; childregs->csr_ecfg = p->thread.csr_ecfg; - return 0; + goto out; } /* user thread */ @@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ childregs->csr_euen = 0; + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + +out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); - if (clone_flags & CLONE_SETTLS) - childregs->regs[2] = tls; - return 0; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 1eb63fa9bc81..ae436def7ee9 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -257,7 +257,6 @@ void __init platform_init(void) #ifdef CONFIG_ACPI acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); - acpi_boot_init(); #endif #ifdef CONFIG_NUMA diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 781a4d4bdddc..6ed72f7ff278 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -136,12 +136,12 @@ static void ipi_write_action(int cpu, u32 action) } } -void loongson3_send_ipi_single(int cpu, unsigned int action) +void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -149,7 +149,7 @@ void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } -irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) +irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -169,7 +169,7 @@ irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } -void __init loongson3_smp_setup(void) +void __init loongson_smp_setup(void) { cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; @@ -178,7 +178,7 @@ void __init loongson3_smp_setup(void) pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } -void __init loongson3_prepare_cpus(unsigned int max_cpus) +void __init loongson_prepare_cpus(unsigned int max_cpus) { int i = 0; @@ -193,7 +193,7 @@ void __init loongson3_prepare_cpus(unsigned int max_cpus) /* * Setup the PC, SP, and TP of a secondary processor and start it running! */ -void loongson3_boot_secondary(int cpu, struct task_struct *idle) +void loongson_boot_secondary(int cpu, struct task_struct *idle) { unsigned long entry; @@ -205,13 +205,13 @@ void loongson3_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson3_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, SMP_BOOT_CPU); } /* * SMP init and finish on secondary CPUs */ -void loongson3_init_secondary(void) +void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | @@ -231,7 +231,7 @@ void loongson3_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; } -void loongson3_smp_finish(void) +void loongson_smp_finish(void) { local_irq_enable(); iocsr_write64(0, LOONGARCH_IOCSR_MBUF0); @@ -240,7 +240,7 @@ void loongson3_smp_finish(void) #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void) +int loongson_cpu_disable(void) { unsigned long flags; unsigned int cpu = smp_processor_id(); @@ -262,7 +262,7 @@ int loongson3_cpu_disable(void) return 0; } -void loongson3_cpu_die(unsigned int cpu) +void loongson_cpu_die(unsigned int cpu) { while (per_cpu(cpu_state, cpu) != CPU_DEAD) cpu_relax(); @@ -300,19 +300,19 @@ void play_dead(void) */ #ifdef CONFIG_PM -static int loongson3_ipi_suspend(void) +static int loongson_ipi_suspend(void) { return 0; } -static void loongson3_ipi_resume(void) +static void loongson_ipi_resume(void) { iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); } -static struct syscore_ops loongson3_ipi_syscore_ops = { - .resume = loongson3_ipi_resume, - .suspend = loongson3_ipi_suspend, +static struct syscore_ops loongson_ipi_syscore_ops = { + .resume = loongson_ipi_resume, + .suspend = loongson_ipi_suspend, }; /* @@ -321,7 +321,7 @@ static struct syscore_ops loongson3_ipi_syscore_ops = { */ static int __init ipi_pm_init(void) { - register_syscore_ops(&loongson3_ipi_syscore_ops); + register_syscore_ops(&loongson_ipi_syscore_ops); return 0; } @@ -425,7 +425,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { init_new_context(current, &init_mm); current_thread_info()->cpu = 0; - loongson3_prepare_cpus(max_cpus); + loongson_prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); calculate_cpu_foreign_map(); @@ -436,7 +436,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - loongson3_boot_secondary(cpu, tidle); + loongson_boot_secondary(cpu, tidle); /* Wait for CPU to start and be ready to sync counters */ if (!wait_for_completion_timeout(&cpu_starting, @@ -465,7 +465,7 @@ asmlinkage void start_secondary(void) cpu_probe(); constant_clockevent_init(); - loongson3_init_secondary(); + loongson_init_secondary(); set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); @@ -487,11 +487,11 @@ asmlinkage void start_secondary(void) complete(&cpu_running); /* - * irq will be enabled in loongson3_smp_finish(), enabling it too + * irq will be enabled in loongson_smp_finish(), enabling it too * early is dangerous. */ WARN_ON_ONCE(!irqs_disabled()); - loongson3_smp_finish(); + loongson_smp_finish(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index b206d9159205..4571c3c87cd4 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -43,7 +43,8 @@ static bool unwind_by_prologue(struct unwind_state *state) { struct stack_info *info = &state->stack_info; union loongarch_instruction *ip, *ip_end; - unsigned long frame_size = 0, frame_ra = -1; + long frame_ra = -1; + unsigned long frame_size = 0; unsigned long size, offset, pc = state->pc; if (state->sp >= info->end || state->sp < info->begin) diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 3f8a86c4336a..02e6be9c5b0d 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -67,12 +67,12 @@ linux.bin.ub linux.bin.gz: linux.bin linux.bin: vmlinux linux.bin linux.bin.gz linux.bin.ub: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' PHONY += simpleImage.$(DTB) simpleImage.$(DTB): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(addprefix $(boot)/$@., ub unstrip strip) - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' define archhelp echo '* linux.bin - Create raw binary' diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..4678627673df 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 8c3ad76602f3..29c11a06b750 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) -$(obj)/vmImage: $(obj)/vmlinux.gz +$(obj)/vmImage: $(obj)/vmlinux.gz FORCE $(call if_changed,uimage) @$(kecho) 'Kernel: $@ is ready' diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4745bb9998bd..6d8492b6e2b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs) /* kernel/traps.c */ DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); #ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot); DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async); #endif DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43f1c76d48ce..a379b0ce19ff 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; - /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ + EMIT(PPC_RAW_LI(_R4, 0)); + +#define BPF_TAILCALL_PROLOGUE_SIZE 4 + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); - /* - * Initialize tail_call_cnt in stack frame if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. - */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, - bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - else - EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); -#define BPF_TAILCALL_PROLOGUE_SIZE 16 + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); /* * We need a stack frame, but we don't necessarily need to @@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); -} - -void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); - - bpf_jit_emit_common_epilogue(image, ctx); - - /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + /* Tear down our stack frame */ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_MTLR(_R0)); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + EMIT(PPC_RAW_BLR()); } @@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) @@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(_R3)); EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); + /* Put tail_call_cnt in r4 */ + EMIT(PPC_RAW_MR(_R4, _R0)); + /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa78595a6089..593cf09264d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -317,9 +317,9 @@ config SMP config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP - range 2 512 if !SBI_V01 - range 2 32 if SBI_V01 && 32BIT - range 2 64 if SBI_V01 && 64BIT + range 2 512 if !RISCV_SBI_V01 + range 2 32 if RISCV_SBI_V01 && 32BIT + range 2 64 if RISCV_SBI_V01 && 64BIT default "32" if 32BIT default "64" if 64BIT diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 1b471ff73178..816e753de636 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index f74879a8f1ea..e229d7be4b66 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -10,6 +10,7 @@ #include <asm/mmu_context.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> +#include <asm/pgalloc.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -20,7 +21,10 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_setup() ({ \ + sync_kernel_mappings(efi_mm.pgd); \ + efi_virtmap_load(); \ + }) #define arch_efi_call_virt_teardown() efi_virtmap_unload() #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 947f23d7b6af..59dc12b5b7e8 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ +static inline void sync_kernel_mappings(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + sync_kernel_mappings(pgd); } return pgd; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..92ec2d9d7273 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd) return pte_dirty(pmd_pte(pmd)); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pte_young(pmd_pte(pmd)); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..3831b638ecab 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); +/* Check other CPUs stop or not */ +bool smp_crash_stop_failed(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..186abd146eaf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -404,6 +404,19 @@ handle_syscall_trace_exit: #ifdef CONFIG_VMAP_STACK handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + la sp, shadow_stack addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ee79e6839b86..2d139b724bc8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -15,6 +15,8 @@ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ #include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/irq.h> /* * kexec_image_info - Print received image details @@ -138,20 +140,35 @@ void machine_shutdown(void) #endif } -/* Override the weak function in kernel/panic.c */ -void crash_smp_send_stop(void) +static void machine_kexec_mask_interrupts(void) { - static int cpus_stopped; + unsigned int i; + struct irq_desc *desc; - /* - * This function can be called twice in panic path, but obviously - * we execute this only once. - */ - if (cpus_stopped) - return; + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); - smp_send_stop(); - cpus_stopped = 1; + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } } /* @@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + pr_info("Starting crashdump kernel...\n"); } @@ -195,6 +214,11 @@ machine_kexec(struct kimage *image) void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 67ec1fadcfe2..86acd690d529 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,10 +322,11 @@ subsys_initcall(topology_init); void free_initmem(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), - IS_ENABLED(CONFIG_64BIT) ? - set_memory_rw : set_memory_rw_nx); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 760a64518c58..8c3b59f1f9b8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/kexec.h> #include <linux/profile.h> #include <linux/smp.h> #include <linux/sched.h> @@ -22,11 +23,13 @@ #include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, IPI_MAX @@ -71,6 +74,32 @@ static void ipi_stop(void) wait_for_interrupt(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) @@ -124,8 +153,9 @@ void arch_irq_work_raise(void) void handle_IPI(struct pt_regs *regs) { - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; - unsigned long *stats = ipi_data[smp_processor_id()].stats; + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; riscv_clear_ipi(); @@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + if (ops & (1 << IPI_IRQ_WORK)) { stats[IPI_IRQ_WORK]++; irq_work_run(); @@ -176,6 +210,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", }; @@ -235,6 +270,64 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f3e96d60a2ff..7abd8e4c4df6 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void) OVERFLOW_STACK_SIZE; } +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index db6548509bb3..06e6b27f3bcc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..11e901286414 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd74fe664ed1..e4ef67e4da0a 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,7 +46,7 @@ struct save_area { u64 fprs[16]; u32 fpc; u32 prefix; - u64 todpreg; + u32 todpreg; u64 timer; u64 todcmp; u64 vxrs_low[16]; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a779418ceba9..3bc9736bddb1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd) return pte_dirty(pte); } +#define pmd_young pmd_young static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9860ca5979f8..9e38ffaadb5d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -83,7 +83,7 @@ cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) - @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')' + @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f49bc3ec76e6..a269049a43ce 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; int ret; ret = hv_common_cpu_init(cpu); @@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; - if (!*hvp) { - if (hv_root_partition) { - /* - * For root partition we get the hypervisor provided VP assist - * page, instead of allocating a new page. - */ - rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - *hvp = memremap(msr.pfn << - HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, - PAGE_SIZE, MEMREMAP_WB); - } else { - /* - * The VP assist page is an "overlay" page (see Hyper-V TLFS's - * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed - * out to make sure we always write the EOI MSR in - * hv_apic_eoi_write() *after* the EOI optimization is disabled - * in hv_cpu_die(), otherwise a CPU may not be stopped in the - * case of CPU offlining and the VM will hang. - */ + if (hv_root_partition) { + /* + * For root partition we get the hypervisor provided VP assist + * page, instead of allocating a new page. + */ + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, + PAGE_SIZE, MEMREMAP_WB); + } else { + /* + * The VP assist page is an "overlay" page (see Hyper-V TLFS's + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed + * out to make sure we always write the EOI MSR in + * hv_apic_eoi_write() *after* the EOI optimization is disabled + * in hv_cpu_die(), otherwise a CPU may not be stopped in the + * case of CPU offlining and the VM will hang. + */ + if (!*hvp) *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); - if (*hvp) - msr.pfn = vmalloc_to_pfn(*hvp); - } - WARN_ON(!(*hvp)); - if (*hvp) { - msr.enable = 1; - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - } + if (*hvp) + msr.pfn = vmalloc_to_pfn(*hvp); + + } + if (!WARN_ON(!(*hvp))) { + msr.enable = 1; + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } return hyperv_init_ghcb(); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b71f4f2ecdd5..b2da7cb64b31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -305,6 +305,9 @@ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..dfdb103ae4f6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..286a71810f9e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_DIRTY; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; @@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e3230cccaa7..6daf84229548 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index ec7bbac3a9f2..8009c8346d8f 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -58,24 +58,6 @@ static void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool tsx_ctrl_is_supported(void) -{ - u64 ia32_cap = x86_read_arch_cap_msr(); - - /* - * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this - * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. - * - * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a - * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES - * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get - * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, - * tsx= cmdline requests will do nothing on CPUs without - * MSR_IA32_TSX_CTRL support. - */ - return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); -} - static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) { if (boot_cpu_has_bug(X86_BUG_TAA)) @@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); - } else if (tsx_ctrl_is_supported()) { + } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) { rdmsrl(MSR_IA32_TSX_CTRL, msr); msr |= TSX_CTRL_CPUID_CLEAR; wrmsrl(MSR_IA32_TSX_CTRL, msr); @@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void) u64 mcu_opt_ctrl; /* Check if RTM_ALLOW exists */ - if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || + if (!boot_cpu_has_bug(X86_BUG_TAA) || + !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) || !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) return; @@ -191,7 +174,20 @@ void __init tsx_init(void) return; } - if (!tsx_ctrl_is_supported()) { + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) { + setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); + } else { tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; return; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21b7347a26d..e436c9c1ef3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1ccb769f62af..b6f96d47e596 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2443,6 +2443,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, { bool list_unstable, zapped_root = false; + lockdep_assert_held_write(&kvm->mmu_lock); trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); @@ -4262,14 +4263,14 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; - r = make_mmu_pages_available(vcpu); - if (r) - goto out_unlock; - - if (is_tdp_mmu_fault) + if (is_tdp_mmu_fault) { r = kvm_tdp_mmu_map(vcpu, fault); - else + } else { + r = make_mmu_pages_available(vcpu); + if (r) + goto out_unlock; r = __direct_map(vcpu, fault); + } out_unlock: if (is_tdp_mmu_fault) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 4c620999d230..995bc0f90759 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN)) + return; + + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); } @@ -1125,6 +1131,9 @@ void svm_free_nested(struct vcpu_svm *svm) if (!svm->nested.initialized) return; + if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) + svm_switch_vmcb(svm, &svm->vmcb01); + svm_vcpu_free_msrpm(svm->nested.msrpm); svm->nested.msrpm = NULL; @@ -1143,9 +1152,6 @@ void svm_free_nested(struct vcpu_svm *svm) svm->nested.initialized = false; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void svm_leave_nested(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4b6d2b050e57..ce362e88a567 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return 0; } -static int is_external_interrupt(u32 info) -{ - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); -} - static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1438,6 +1432,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + svm_leave_nested(vcpu); svm_free_nested(svm); sev_free_vcpu(vcpu); @@ -3425,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } - if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && - exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && - exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " - "exit_code 0x%x\n", - __func__, svm->vmcb->control.exit_int_info, - exit_code); - if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c62352dda6a..5b0d4859e4b7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu) { + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); } @@ -6440,9 +6441,6 @@ out: return kvm_state.size; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 490ec23c8450..2835bd796639 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ex->payload = payload; } +/* Forcibly leave the nested mode in cases like a vCPU reset */ +static void kvm_leave_nested(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops.nested_ops->leave_nested(vcpu); +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) @@ -5195,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_SMM) { if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { - kvm_x86_ops.nested_ops->leave_nested(vcpu); + kvm_leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); } @@ -9805,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) int kvm_check_nested_events(struct kvm_vcpu *vcpu) { - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { kvm_x86_ops.nested_ops->triple_fault(vcpu); return 1; } @@ -10560,15 +10566,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { - if (is_guest_mode(vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (is_guest_mode(vcpu)) kvm_x86_ops.nested_ops->triple_fault(vcpu); - } else { + + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; - goto out; } + goto out; } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ @@ -11997,8 +12004,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) WARN_ON_ONCE(!init_event && (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu))); + /* + * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's + * possible to INIT the vCPU while L2 is active. Force the vCPU back + * into L1 as EFER.SVME is cleared on INIT (along with all other EFER + * bits), i.e. virtualization is disabled. + */ + if (is_guest_mode(vcpu)) + kvm_leave_nested(vcpu); + kvm_lapic_reset(vcpu, init_event); + WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2dae413bd62a..f3098c0e386a 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -954,6 +954,14 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } +static inline int max_evtchn_port(struct kvm *kvm) +{ + if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) + return EVTCHN_2L_NR_CHANNELS; + else + return COMPAT_EVTCHN_2L_NR_CHANNELS; +} + static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, evtchn_port_t *ports) { @@ -1042,6 +1050,10 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, *r = -EFAULT; goto out; } + if (ports[i] >= max_evtchn_port(vcpu->kvm)) { + *r = -EINVAL; + goto out; + } } if (sched_poll.nr_ports == 1) @@ -1215,6 +1227,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) bool longmode; u64 input, params[6], r = -ENOSYS; bool handled = false; + u8 cpl; input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); @@ -1242,9 +1255,17 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) params[5] = (u64)kvm_r9_read(vcpu); } #endif + cpl = static_call(kvm_x86_get_cpl)(vcpu); trace_kvm_xen_hypercall(input, params[0], params[1], params[2], params[3], params[4], params[5]); + /* + * Only allow hypercall acceleration for CPL0. The rare hypercalls that + * are permitted in guest userspace can be handled by the VMM. + */ + if (unlikely(cpl > 0)) + goto handle_in_userspace; + switch (input) { case __HYPERVISOR_xen_version: if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { @@ -1279,10 +1300,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) if (handled) return kvm_xen_hypercall_set_result(vcpu, r); +handle_in_userspace: vcpu->run->exit_reason = KVM_EXIT_XEN; vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; vcpu->run->xen.u.hcall.longmode = longmode; - vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu); + vcpu->run->xen.u.hcall.cpl = cpl; vcpu->run->xen.u.hcall.input = input; vcpu->run->xen.u.hcall.params[0] = params[0]; vcpu->run->xen.u.hcall.params[1] = params[1]; @@ -1297,14 +1319,6 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) return 0; } -static inline int max_evtchn_port(struct kvm *kvm) -{ - if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) - return EVTCHN_2L_NR_CHANNELS; - else - return COMPAT_EVTCHN_2L_NR_CHANNELS; -} - static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) { int poll_evtchn = vcpu->arch.xen.poll_evtchn; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 78c5bc654cff..6453fbaedb08 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -217,9 +217,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 4cd39f304e20..93ae33248f42 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -513,16 +513,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c) static void pm_save_spec_msr(void) { - u32 spec_msr_id[] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_TSX_CTRL, - MSR_TSX_FORCE_ABORT, - MSR_IA32_MCU_OPT_CTRL, - MSR_AMD64_LS_CFG, - MSR_AMD64_DE_CFG, + struct msr_enumeration { + u32 msr_no; + u32 feature; + } msr_enum[] = { + { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, + { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, + { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, + { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, + { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, + { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, }; + int i; - msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); + for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { + if (boot_cpu_has(msr_enum[i].feature)) + msr_build_context(&msr_enum[i].msr_no, 1); + } } static int pm_check_save_msr(void) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6a789cda68a5..228a6696d835 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4045,9 +4045,14 @@ EXPORT_SYMBOL(__blk_mq_alloc_disk); struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass) { + struct gendisk *disk; + if (!blk_get_queue(q)) return NULL; - return __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + disk = __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + if (!disk) + blk_put_queue(q); + return disk; } EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue); diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 23f49a2f4d14..6cceca64a6bc 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, { struct memory_initiator *ia; struct memory_initiator *ib; - unsigned long *p_nodes = priv; ia = list_entry(a, struct memory_initiator, node); ib = list_entry(b, struct memory_initiator, node); - set_bit(ia->processor_pxm, p_nodes); - set_bit(ib->processor_pxm, p_nodes); - return ia->processor_pxm - ib->processor_pxm; } +static int initiators_to_nodemask(unsigned long *p_nodes) +{ + struct memory_initiator *initiator; + + if (list_empty(&initiators)) + return -ENXIO; + + list_for_each_entry(initiator, &initiators, node) + set_bit(initiator->processor_pxm, p_nodes); + + return 0; +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target) * initiators. */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + list_sort(NULL, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + if (!access0done) { for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; @@ -643,8 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); - best = 0; + if (initiators_to_nodemask(p_nodes) < 0) + return; + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6471b559230e..b46aa490b4cd 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -939,8 +939,8 @@ static int genpd_runtime_suspend(struct device *dev) return 0; genpd_lock(genpd); - gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_power_off(genpd, true, 0); + gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_unlock(genpd); return 0; @@ -978,9 +978,8 @@ static int genpd_runtime_resume(struct device *dev) goto out; genpd_lock(genpd); + genpd_restore_performance_state(dev, gpd_data->rpm_pstate); ret = genpd_power_on(genpd, 0); - if (!ret) - genpd_restore_performance_state(dev, gpd_data->rpm_pstate); genpd_unlock(genpd); if (ret) @@ -1018,8 +1017,8 @@ err_stop: err_poweroff: if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) { genpd_lock(genpd); - gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_power_off(genpd, true, 0); + gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_unlock(genpd); } @@ -1189,12 +1188,15 @@ static int genpd_prepare(struct device *dev) * genpd_finish_suspend - Completion of suspend or hibernation of device in an * I/O pm domain. * @dev: Device to suspend. - * @poweroff: Specifies if this is a poweroff_noirq or suspend_noirq callback. + * @suspend_noirq: Generic suspend_noirq callback. + * @resume_noirq: Generic resume_noirq callback. * * Stop the device and remove power from the domain if all devices in it have * been stopped. */ -static int genpd_finish_suspend(struct device *dev, bool poweroff) +static int genpd_finish_suspend(struct device *dev, + int (*suspend_noirq)(struct device *dev), + int (*resume_noirq)(struct device *dev)) { struct generic_pm_domain *genpd; int ret = 0; @@ -1203,10 +1205,7 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff) if (IS_ERR(genpd)) return -EINVAL; - if (poweroff) - ret = pm_generic_poweroff_noirq(dev); - else - ret = pm_generic_suspend_noirq(dev); + ret = suspend_noirq(dev); if (ret) return ret; @@ -1217,10 +1216,7 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff) !pm_runtime_status_suspended(dev)) { ret = genpd_stop_dev(genpd, dev); if (ret) { - if (poweroff) - pm_generic_restore_noirq(dev); - else - pm_generic_resume_noirq(dev); + resume_noirq(dev); return ret; } } @@ -1244,16 +1240,20 @@ static int genpd_suspend_noirq(struct device *dev) { dev_dbg(dev, "%s()\n", __func__); - return genpd_finish_suspend(dev, false); + return genpd_finish_suspend(dev, + pm_generic_suspend_noirq, + pm_generic_resume_noirq); } /** - * genpd_resume_noirq - Start of resume of device in an I/O PM domain. + * genpd_finish_resume - Completion of resume of device in an I/O PM domain. * @dev: Device to resume. + * @resume_noirq: Generic resume_noirq callback. * * Restore power to the device's PM domain, if necessary, and start the device. */ -static int genpd_resume_noirq(struct device *dev) +static int genpd_finish_resume(struct device *dev, + int (*resume_noirq)(struct device *dev)) { struct generic_pm_domain *genpd; int ret; @@ -1265,7 +1265,7 @@ static int genpd_resume_noirq(struct device *dev) return -EINVAL; if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd)) - return pm_generic_resume_noirq(dev); + return resume_noirq(dev); genpd_lock(genpd); genpd_sync_power_on(genpd, true, 0); @@ -1283,6 +1283,19 @@ static int genpd_resume_noirq(struct device *dev) } /** + * genpd_resume_noirq - Start of resume of device in an I/O PM domain. + * @dev: Device to resume. + * + * Restore power to the device's PM domain, if necessary, and start the device. + */ +static int genpd_resume_noirq(struct device *dev) +{ + dev_dbg(dev, "%s()\n", __func__); + + return genpd_finish_resume(dev, pm_generic_resume_noirq); +} + +/** * genpd_freeze_noirq - Completion of freezing a device in an I/O PM domain. * @dev: Device to freeze. * @@ -1293,24 +1306,11 @@ static int genpd_resume_noirq(struct device *dev) */ static int genpd_freeze_noirq(struct device *dev) { - const struct generic_pm_domain *genpd; - int ret = 0; - dev_dbg(dev, "%s()\n", __func__); - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - ret = pm_generic_freeze_noirq(dev); - if (ret) - return ret; - - if (genpd->dev_ops.stop && genpd->dev_ops.start && - !pm_runtime_status_suspended(dev)) - ret = genpd_stop_dev(genpd, dev); - - return ret; + return genpd_finish_suspend(dev, + pm_generic_freeze_noirq, + pm_generic_thaw_noirq); } /** @@ -1322,23 +1322,9 @@ static int genpd_freeze_noirq(struct device *dev) */ static int genpd_thaw_noirq(struct device *dev) { - const struct generic_pm_domain *genpd; - int ret = 0; - dev_dbg(dev, "%s()\n", __func__); - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - if (genpd->dev_ops.stop && genpd->dev_ops.start && - !pm_runtime_status_suspended(dev)) { - ret = genpd_start_dev(genpd, dev); - if (ret) - return ret; - } - - return pm_generic_thaw_noirq(dev); + return genpd_finish_resume(dev, pm_generic_thaw_noirq); } /** @@ -1353,7 +1339,9 @@ static int genpd_poweroff_noirq(struct device *dev) { dev_dbg(dev, "%s()\n", __func__); - return genpd_finish_suspend(dev, true); + return genpd_finish_suspend(dev, + pm_generic_poweroff_noirq, + pm_generic_restore_noirq); } /** @@ -1365,40 +1353,9 @@ static int genpd_poweroff_noirq(struct device *dev) */ static int genpd_restore_noirq(struct device *dev) { - struct generic_pm_domain *genpd; - int ret = 0; - dev_dbg(dev, "%s()\n", __func__); - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - /* - * At this point suspended_count == 0 means we are being run for the - * first time for the given domain in the present cycle. - */ - genpd_lock(genpd); - if (genpd->suspended_count++ == 0) { - /* - * The boot kernel might put the domain into arbitrary state, - * so make it appear as powered off to genpd_sync_power_on(), - * so that it tries to power it on in case it was really off. - */ - genpd->status = GENPD_STATE_OFF; - } - - genpd_sync_power_on(genpd, true, 0); - genpd_unlock(genpd); - - if (genpd->dev_ops.stop && genpd->dev_ops.start && - !pm_runtime_status_suspended(dev)) { - ret = genpd_start_dev(genpd, dev); - if (ret) - return ret; - } - - return pm_generic_restore_noirq(dev); + return genpd_finish_resume(dev, pm_generic_restore_noirq); } /** @@ -2749,17 +2706,6 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; - if (power_on) { - genpd_lock(pd); - ret = genpd_power_on(pd, 0); - genpd_unlock(pd); - } - - if (ret) { - genpd_remove_device(pd, dev); - return -EPROBE_DEFER; - } - /* Set the default performance state */ pstate = of_get_required_opp_performance_state(dev->of_node, index); if (pstate < 0 && pstate != -ENODEV && pstate != -EOPNOTSUPP) { @@ -2771,6 +2717,24 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, goto err; dev_gpd_data(dev)->default_pstate = pstate; } + + if (power_on) { + genpd_lock(pd); + ret = genpd_power_on(pd, 0); + genpd_unlock(pd); + } + + if (ret) { + /* Drop the default performance state */ + if (dev_gpd_data(dev)->default_pstate) { + dev_pm_genpd_set_performance_state(dev, 0); + dev_gpd_data(dev)->default_pstate = 0; + } + + genpd_remove_device(pd, dev); + return -EPROBE_DEFER; + } + return 1; err: diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index b52049098d4e..50e726b6c2cf 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -243,8 +243,7 @@ void pm_runtime_set_memalloc_noio(struct device *dev, bool enable) * flag was set by any one of the descendants. */ if (!dev || (!enable && - device_for_each_child(dev, NULL, - dev_memalloc_noio))) + device_for_each_child(dev, NULL, dev_memalloc_noio))) break; } mutex_unlock(&dev_hotplug_mutex); @@ -265,15 +264,13 @@ static int rpm_check_suspend_allowed(struct device *dev) retval = -EACCES; else if (atomic_read(&dev->power.usage_count)) retval = -EAGAIN; - else if (!dev->power.ignore_children && - atomic_read(&dev->power.child_count)) + else if (!dev->power.ignore_children && atomic_read(&dev->power.child_count)) retval = -EBUSY; /* Pending resume requests take precedence over suspends. */ - else if ((dev->power.deferred_resume - && dev->power.runtime_status == RPM_SUSPENDING) - || (dev->power.request_pending - && dev->power.request == RPM_REQ_RESUME)) + else if ((dev->power.deferred_resume && + dev->power.runtime_status == RPM_SUSPENDING) || + (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME)) retval = -EAGAIN; else if (__dev_pm_qos_resume_latency(dev) == 0) retval = -EPERM; @@ -404,9 +401,9 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) * * Do that if resume fails too. */ - if (use_links - && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) - || (dev->power.runtime_status == RPM_RESUMING && retval))) { + if (use_links && + ((dev->power.runtime_status == RPM_SUSPENDING && !retval) || + (dev->power.runtime_status == RPM_RESUMING && retval))) { idx = device_links_read_lock(); __rpm_put_suppliers(dev, false); @@ -422,6 +419,38 @@ fail: } /** + * rpm_callback - Run a given runtime PM callback for a given device. + * @cb: Runtime PM callback to run. + * @dev: Device to run the callback for. + */ +static int rpm_callback(int (*cb)(struct device *), struct device *dev) +{ + int retval; + + if (dev->power.memalloc_noio) { + unsigned int noio_flag; + + /* + * Deadlock might be caused if memory allocation with + * GFP_KERNEL happens inside runtime_suspend and + * runtime_resume callbacks of one block device's + * ancestor or the block device itself. Network + * device might be thought as part of iSCSI block + * device, so network device and its ancestor should + * be marked as memalloc_noio too. + */ + noio_flag = memalloc_noio_save(); + retval = __rpm_callback(cb, dev); + memalloc_noio_restore(noio_flag); + } else { + retval = __rpm_callback(cb, dev); + } + + dev->power.runtime_error = retval; + return retval != -EACCES ? retval : -EIO; +} + +/** * rpm_idle - Notify device bus type if the device can be suspended. * @dev: Device to notify the bus type about. * @rpmflags: Flag bits. @@ -459,6 +488,7 @@ static int rpm_idle(struct device *dev, int rpmflags) /* Act as though RPM_NOWAIT is always set. */ else if (dev->power.idle_notification) retval = -EINPROGRESS; + if (retval) goto out; @@ -484,7 +514,17 @@ static int rpm_idle(struct device *dev, int rpmflags) dev->power.idle_notification = true; - retval = __rpm_callback(callback, dev); + if (dev->power.irq_safe) + spin_unlock(&dev->power.lock); + else + spin_unlock_irq(&dev->power.lock); + + retval = callback(dev); + + if (dev->power.irq_safe) + spin_lock(&dev->power.lock); + else + spin_lock_irq(&dev->power.lock); dev->power.idle_notification = false; wake_up_all(&dev->power.wait_queue); @@ -495,38 +535,6 @@ static int rpm_idle(struct device *dev, int rpmflags) } /** - * rpm_callback - Run a given runtime PM callback for a given device. - * @cb: Runtime PM callback to run. - * @dev: Device to run the callback for. - */ -static int rpm_callback(int (*cb)(struct device *), struct device *dev) -{ - int retval; - - if (dev->power.memalloc_noio) { - unsigned int noio_flag; - - /* - * Deadlock might be caused if memory allocation with - * GFP_KERNEL happens inside runtime_suspend and - * runtime_resume callbacks of one block device's - * ancestor or the block device itself. Network - * device might be thought as part of iSCSI block - * device, so network device and its ancestor should - * be marked as memalloc_noio too. - */ - noio_flag = memalloc_noio_save(); - retval = __rpm_callback(cb, dev); - memalloc_noio_restore(noio_flag); - } else { - retval = __rpm_callback(cb, dev); - } - - dev->power.runtime_error = retval; - return retval != -EACCES ? retval : -EIO; -} - -/** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. * @rpmflags: Flag bits. @@ -564,12 +572,12 @@ static int rpm_suspend(struct device *dev, int rpmflags) /* Synchronous suspends are not allowed in the RPM_RESUMING state. */ if (dev->power.runtime_status == RPM_RESUMING && !(rpmflags & RPM_ASYNC)) retval = -EAGAIN; + if (retval) goto out; /* If the autosuspend_delay time hasn't expired yet, reschedule. */ - if ((rpmflags & RPM_AUTO) - && dev->power.runtime_status != RPM_SUSPENDING) { + if ((rpmflags & RPM_AUTO) && dev->power.runtime_status != RPM_SUSPENDING) { u64 expires = pm_runtime_autosuspend_expiration(dev); if (expires != 0) { @@ -584,7 +592,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) * rest. */ if (!(dev->power.timer_expires && - dev->power.timer_expires <= expires)) { + dev->power.timer_expires <= expires)) { /* * We add a slack of 25% to gather wakeups * without sacrificing the granularity. @@ -594,9 +602,9 @@ static int rpm_suspend(struct device *dev, int rpmflags) dev->power.timer_expires = expires; hrtimer_start_range_ns(&dev->power.suspend_timer, - ns_to_ktime(expires), - slack, - HRTIMER_MODE_ABS); + ns_to_ktime(expires), + slack, + HRTIMER_MODE_ABS); } dev->power.timer_autosuspends = 1; goto out; @@ -787,8 +795,8 @@ static int rpm_resume(struct device *dev, int rpmflags) goto out; } - if (dev->power.runtime_status == RPM_RESUMING - || dev->power.runtime_status == RPM_SUSPENDING) { + if (dev->power.runtime_status == RPM_RESUMING || + dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) { @@ -815,8 +823,8 @@ static int rpm_resume(struct device *dev, int rpmflags) for (;;) { prepare_to_wait(&dev->power.wait_queue, &wait, TASK_UNINTERRUPTIBLE); - if (dev->power.runtime_status != RPM_RESUMING - && dev->power.runtime_status != RPM_SUSPENDING) + if (dev->power.runtime_status != RPM_RESUMING && + dev->power.runtime_status != RPM_SUSPENDING) break; spin_unlock_irq(&dev->power.lock); @@ -836,9 +844,9 @@ static int rpm_resume(struct device *dev, int rpmflags) */ if (dev->power.no_callbacks && !parent && dev->parent) { spin_lock_nested(&dev->parent->power.lock, SINGLE_DEPTH_NESTING); - if (dev->parent->power.disable_depth > 0 - || dev->parent->power.ignore_children - || dev->parent->power.runtime_status == RPM_ACTIVE) { + if (dev->parent->power.disable_depth > 0 || + dev->parent->power.ignore_children || + dev->parent->power.runtime_status == RPM_ACTIVE) { atomic_inc(&dev->parent->power.child_count); spin_unlock(&dev->parent->power.lock); retval = 1; @@ -867,6 +875,7 @@ static int rpm_resume(struct device *dev, int rpmflags) parent = dev->parent; if (dev->power.irq_safe) goto skip_parent; + spin_unlock(&dev->power.lock); pm_runtime_get_noresume(parent); @@ -876,8 +885,8 @@ static int rpm_resume(struct device *dev, int rpmflags) * Resume the parent if it has runtime PM enabled and not been * set to ignore its children. */ - if (!parent->power.disable_depth - && !parent->power.ignore_children) { + if (!parent->power.disable_depth && + !parent->power.ignore_children) { rpm_resume(parent, 0); if (parent->power.runtime_status != RPM_ACTIVE) retval = -EBUSY; @@ -887,6 +896,7 @@ static int rpm_resume(struct device *dev, int rpmflags) spin_lock(&dev->power.lock); if (retval) goto out; + goto repeat; } skip_parent: @@ -1291,9 +1301,9 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) * not active, has runtime PM enabled and the * 'power.ignore_children' flag unset. */ - if (!parent->power.disable_depth - && !parent->power.ignore_children - && parent->power.runtime_status != RPM_ACTIVE) { + if (!parent->power.disable_depth && + !parent->power.ignore_children && + parent->power.runtime_status != RPM_ACTIVE) { dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n", dev_name(dev), dev_name(parent)); @@ -1358,9 +1368,9 @@ static void __pm_runtime_barrier(struct device *dev) dev->power.request_pending = false; } - if (dev->power.runtime_status == RPM_SUSPENDING - || dev->power.runtime_status == RPM_RESUMING - || dev->power.idle_notification) { + if (dev->power.runtime_status == RPM_SUSPENDING || + dev->power.runtime_status == RPM_RESUMING || + dev->power.idle_notification) { DEFINE_WAIT(wait); /* Suspend, wake-up or idle notification in progress. */ @@ -1445,8 +1455,8 @@ void __pm_runtime_disable(struct device *dev, bool check_resume) * means there probably is some I/O to process and disabling runtime PM * shouldn't prevent the device from processing the I/O. */ - if (check_resume && dev->power.request_pending - && dev->power.request == RPM_REQ_RESUME) { + if (check_resume && dev->power.request_pending && + dev->power.request == RPM_REQ_RESUME) { /* * Prevent suspends and idle notifications from being carried * out after we have woken up the device. @@ -1606,6 +1616,7 @@ void pm_runtime_irq_safe(struct device *dev) { if (dev->parent) pm_runtime_get_sync(dev->parent); + spin_lock_irq(&dev->power.lock); dev->power.irq_safe = 1; spin_unlock_irq(&dev->power.lock); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f96cb01e9604..e9de9d846b73 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -57,10 +57,8 @@ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) struct ublk_rq_data { - union { - struct callback_head work; - struct llist_node node; - }; + struct llist_node node; + struct callback_head work; }; struct ublk_uring_cmd_pdu { @@ -766,15 +764,31 @@ static inline void __ublk_rq_task_work(struct request *req) ubq_complete_io_cmd(io, UBLK_IO_RES_OK); } +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + io_cmds = llist_reverse_order(io_cmds); + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); +} + +static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); +} + static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - llist_for_each_entry(data, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + ublk_forward_io_cmds(ubq); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -782,14 +796,20 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data *data = container_of(work, struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); + struct ublk_queue *ubq = req->mq_hctx->driver_data; - __ublk_rq_task_work(req); + ublk_forward_io_cmds(ubq); } -static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) +static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + struct ublk_io *io; + if (!llist_add(&data->node, &ubq->io_cmds)) + return; + + io = &ubq->ios[rq->tag]; /* * If the check pass, we know that this is a re-issued request aborted * previously in monitor_work because the ubq_daemon(cmd's task) is @@ -803,11 +823,11 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) * guarantees that here is a re-issued request aborted previously. */ if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - - llist_for_each_entry(data, io_cmds, node) - __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); + ublk_abort_io_cmds(ubq); + } else if (ublk_can_use_task_work(ubq)) { + if (task_work_add(ubq->ubq_daemon, &data->work, + TWA_SIGNAL_NO_IPI)) + ublk_abort_io_cmds(ubq); } else { struct io_uring_cmd *cmd = io->cmd; struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); @@ -817,23 +837,6 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) } } -static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq, - bool last) -{ - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); - - if (ublk_can_use_task_work(ubq)) { - enum task_work_notify_mode notify_mode = last ? - TWA_SIGNAL_NO_IPI : TWA_NONE; - - if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) - __ublk_abort_rq(ubq, rq); - } else { - if (llist_add(&data->node, &ubq->io_cmds)) - ublk_submit_cmd(ubq, rq); - } -} - static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -865,19 +868,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } - ublk_queue_cmd(ubq, rq, bd->last); + ublk_queue_cmd(ubq, rq); return BLK_STS_OK; } -static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx) -{ - struct ublk_queue *ubq = hctx->driver_data; - - if (ublk_can_use_task_work(ubq)) - __set_notify_signal(ubq->ubq_daemon); -} - static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) { @@ -899,7 +894,6 @@ static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req, static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, - .commit_rqs = ublk_commit_rqs, .init_hctx = ublk_init_hctx, .init_request = ublk_init_rq, }; @@ -1197,7 +1191,7 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - ublk_queue_cmd(ubq, req, true); + ublk_queue_cmd(ubq, req); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) diff --git a/drivers/bus/intel-ixp4xx-eb.c b/drivers/bus/intel-ixp4xx-eb.c index a4388440aca7..91db001eb69a 100644 --- a/drivers/bus/intel-ixp4xx-eb.c +++ b/drivers/bus/intel-ixp4xx-eb.c @@ -49,7 +49,7 @@ #define IXP4XX_EXP_SIZE_SHIFT 10 #define IXP4XX_EXP_CNFG_0 BIT(9) /* Always zero */ #define IXP43X_EXP_SYNC_INTEL BIT(8) /* Only on IXP43x */ -#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x */ +#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x, dangerous to touch on IXP42x */ #define IXP4XX_EXP_BYTE_RD16 BIT(6) #define IXP4XX_EXP_HRDY_POL BIT(5) /* Only on IXP42x */ #define IXP4XX_EXP_MUX_EN BIT(4) @@ -57,8 +57,6 @@ #define IXP4XX_EXP_WORD BIT(2) /* Always zero */ #define IXP4XX_EXP_WR_EN BIT(1) #define IXP4XX_EXP_BYTE_EN BIT(0) -#define IXP42X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(8)|BIT(7)|IXP4XX_EXP_WORD) -#define IXP43X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(5)|IXP4XX_EXP_WORD) #define IXP4XX_EXP_CNFG0 0x20 #define IXP4XX_EXP_CNFG0_MEM_MAP BIT(31) @@ -252,10 +250,9 @@ static void ixp4xx_exp_setup_chipselect(struct ixp4xx_eb *eb, cs_cfg |= val << IXP4XX_EXP_CYC_TYPE_SHIFT; } - if (eb->is_42x) - cs_cfg &= ~IXP42X_RESERVED; if (eb->is_43x) { - cs_cfg &= ~IXP43X_RESERVED; + /* Should always be zero */ + cs_cfg &= ~IXP4XX_EXP_WORD; /* * This bit for Intel strata flash is currently unused, but let's * report it if we find one. diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 4cd2e127946e..3aa91aed3bf7 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -267,6 +267,9 @@ EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register); /* common code that starts a transfer */ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) { + u32 int_mask, status; + bool timeout; + if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) { dev_dbg(rsb->dev, "RSB transfer still in progress\n"); return -EBUSY; @@ -274,13 +277,23 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) reinit_completion(&rsb->complete); - writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER, - rsb->regs + RSB_INTE); + int_mask = RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER; + writel(int_mask, rsb->regs + RSB_INTE); writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB, rsb->regs + RSB_CTRL); - if (!wait_for_completion_io_timeout(&rsb->complete, - msecs_to_jiffies(100))) { + if (irqs_disabled()) { + timeout = readl_poll_timeout_atomic(rsb->regs + RSB_INTS, + status, (status & int_mask), + 10, 100000); + writel(status, rsb->regs + RSB_INTS); + } else { + timeout = !wait_for_completion_io_timeout(&rsb->complete, + msecs_to_jiffies(100)); + status = rsb->status; + } + + if (timeout) { dev_dbg(rsb->dev, "RSB timeout\n"); /* abort the transfer */ @@ -292,18 +305,18 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) return -ETIMEDOUT; } - if (rsb->status & RSB_INTS_LOAD_BSY) { + if (status & RSB_INTS_LOAD_BSY) { dev_dbg(rsb->dev, "RSB busy\n"); return -EBUSY; } - if (rsb->status & RSB_INTS_TRANS_ERR) { - if (rsb->status & RSB_INTS_TRANS_ERR_ACK) { + if (status & RSB_INTS_TRANS_ERR) { + if (status & RSB_INTS_TRANS_ERR_ACK) { dev_dbg(rsb->dev, "RSB slave nack\n"); return -EINVAL; } - if (rsb->status & RSB_INTS_TRANS_ERR_DATA) { + if (status & RSB_INTS_TRANS_ERR_DATA) { dev_dbg(rsb->dev, "RSB transfer data error\n"); return -EIO; } @@ -812,14 +825,6 @@ static int sunxi_rsb_remove(struct platform_device *pdev) return 0; } -static void sunxi_rsb_shutdown(struct platform_device *pdev) -{ - struct sunxi_rsb *rsb = platform_get_drvdata(pdev); - - pm_runtime_disable(&pdev->dev); - sunxi_rsb_hw_exit(rsb); -} - static const struct dev_pm_ops sunxi_rsb_dev_pm_ops = { SET_RUNTIME_PM_OPS(sunxi_rsb_runtime_suspend, sunxi_rsb_runtime_resume, NULL) @@ -835,7 +840,6 @@ MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table); static struct platform_driver sunxi_rsb_driver = { .probe = sunxi_rsb_probe, .remove = sunxi_rsb_remove, - .shutdown = sunxi_rsb_shutdown, .driver = { .name = RSB_CTRL_NAME, .of_match_table = sunxi_rsb_of_match_table, diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 1621ce818705..d69905233aff 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev) !pm_suspend_via_firmware()) goto suspended; - if (!tpm_chip_start(chip)) { + rc = tpm_try_get_ops(chip); + if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); else rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); - tpm_chip_stop(chip); + tpm_put_ops(chip); } suspended: diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index b174f727a8ef..16870943a13e 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { }; static const struct sck at91rm9200_systemck[] = { - { .n = "udpck", .p = "usbck", .id = 2 }, + { .n = "udpck", .p = "usbck", .id = 1 }, { .n = "uhpck", .p = "usbck", .id = 4 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c index a18ed88f3b82..b3198784e1c3 100644 --- a/drivers/clk/qcom/gcc-sc8280xp.c +++ b/drivers/clk/qcom/gcc-sc8280xp.c @@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_1_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_ref_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 7cf5e130e92f..0f21a8a767ac 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/pm_domain.h> -#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/reset-controller.h> @@ -56,22 +55,6 @@ enum gdsc_status { GDSC_ON }; -static int gdsc_pm_runtime_get(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_resume_and_get(sc->dev); -} - -static int gdsc_pm_runtime_put(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_put_sync(sc->dev); -} - /* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */ static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status) { @@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); } -static int _gdsc_enable(struct gdsc *sc) +static int gdsc_enable(struct generic_pm_domain *domain) { + struct gdsc *sc = domain_to_gdsc(domain); int ret; if (sc->pwrsts == PWRSTS_ON) @@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc) return 0; } -static int gdsc_enable(struct generic_pm_domain *domain) +static int gdsc_disable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); int ret; - ret = gdsc_pm_runtime_get(sc); - if (ret) - return ret; - - return _gdsc_enable(sc); -} - -static int _gdsc_disable(struct gdsc *sc) -{ - int ret; - if (sc->pwrsts == PWRSTS_ON) return gdsc_assert_reset(sc); @@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc) return 0; } -static int gdsc_disable(struct generic_pm_domain *domain) -{ - struct gdsc *sc = domain_to_gdsc(domain); - int ret; - - ret = _gdsc_disable(sc); - - gdsc_pm_runtime_put(sc); - - return ret; -} - static int gdsc_init(struct gdsc *sc) { u32 mask, val; @@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc) return ret; } - /* ...and the power-domain */ - ret = gdsc_pm_runtime_get(sc); - if (ret) - goto err_disable_supply; - /* * Votable GDSCs can be ON due to Vote from other masters. * If a Votable GDSC is ON, make sure we have a Vote. @@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc) if (sc->flags & VOTABLE) { ret = gdsc_update_collapse_bit(sc, false); if (ret) - goto err_put_rpm; + goto err_disable_supply; } /* Turn on HW trigger mode if supported */ if (sc->flags & HW_CTRL) { ret = gdsc_hwctrl(sc, true); if (ret < 0) - goto err_put_rpm; + goto err_disable_supply; } /* @@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc) ret = pm_genpd_init(&sc->pd, NULL, !on); if (ret) - goto err_put_rpm; + goto err_disable_supply; return 0; -err_put_rpm: - if (on) - gdsc_pm_runtime_put(sc); err_disable_supply: if (on && sc->rsupply) regulator_disable(sc->rsupply); @@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc, for (i = 0; i < num; i++) { if (!scs[i]) continue; - if (pm_runtime_enabled(dev)) - scs[i]->dev = dev; scs[i]->regmap = regmap; scs[i]->rcdev = rcdev; ret = gdsc_init(scs[i]); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 981a12c8502d..803512688336 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -30,7 +30,6 @@ struct reset_controller_dev; * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller - * @dev: the device holding the GDSC, used for pm_runtime calls */ struct gdsc { struct generic_pm_domain pd; @@ -74,7 +73,6 @@ struct gdsc { const char *supply; struct regulator *rsupply; - struct device *dev; }; struct gdsc_desc { diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 273f77d54dab..e6d6cbf8c4e6 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids); static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask) { const struct exynos_clkout_variant *variant; + const struct of_device_id *match; if (!dev->parent) { dev_err(dev, "not instantiated from MFD\n"); return -EINVAL; } - variant = of_device_get_match_data(dev->parent); - if (!variant) { + match = of_match_device(exynos_clkout_ids, dev->parent); + if (!match) { dev_err(dev, "cannot match parent device\n"); return -EINVAL; } + variant = match->data; *mux_mask = variant->mux_mask; diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c index 62ce6814f141..0d2a950ed184 100644 --- a/drivers/clk/samsung/clk-exynos7885.c +++ b/drivers/clk/samsung/clk-exynos7885.c @@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1), DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll", + DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2", CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1), DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3), @@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1), DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll", CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll", + DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2", CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1), /* CORE */ diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a7ff77550e17..933bb960490d 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -806,6 +806,9 @@ static u64 __arch_timer_check_delta(void) /* * XGene-1 implements CVAL in terms of TVAL, meaning * that the maximum timer range is 32bit. Shame on them. + * + * Note that TVAL is signed, thus has only 31 of its + * 32 bits to express magnitude. */ MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, APM_CPU_PART_POTENZA)), @@ -813,8 +816,8 @@ static u64 __arch_timer_check_delta(void) }; if (is_midr_in_range_list(read_cpuid_id(), broken_cval_midrs)) { - pr_warn_once("Broken CNTx_CVAL_EL1, limiting width to 32bits"); - return CLOCKSOURCE_MASK(32); + pr_warn_once("Broken CNTx_CVAL_EL1, using 31 bit TVAL instead.\n"); + return CLOCKSOURCE_MASK(31); } #endif return CLOCKSOURCE_MASK(arch_counter_get_width()); diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 969a552da8d2..a0d66fabf073 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -51,7 +51,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, + .features = CLOCK_EVT_FEAT_ONESHOT, .rating = 100, .set_next_event = riscv_clock_next_event, }; diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 82e5de1f6f8c..0a0352d8fa45 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -41,6 +41,15 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM To compile this driver as a module, choose M here: the module will be called sun50i-cpufreq-nvmem. +config ARM_APPLE_SOC_CPUFREQ + tristate "Apple Silicon SoC CPUFreq support" + depends on ARCH_APPLE || (COMPILE_TEST && 64BIT) + select PM_OPP + default ARCH_APPLE + help + This adds the CPUFreq driver for Apple Silicon machines + (e.g. Apple M1). + config ARM_ARMADA_37XX_CPUFREQ tristate "Armada 37xx CPUFreq support" depends on ARCH_MVEBU && CPUFREQ_DT @@ -340,8 +349,8 @@ config ARM_TEGRA194_CPUFREQ config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" - depends on ARCH_OMAP2PLUS - default ARCH_OMAP2PLUS + depends on ARCH_OMAP2PLUS || ARCH_K3 + default y help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 310779b07daf..00476e94db90 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -35,7 +35,7 @@ config X86_PCC_CPUFREQ If in doubt, say N. config X86_AMD_PSTATE - tristate "AMD Processor P-State driver" + bool "AMD Processor P-State driver" depends on X86 && ACPI select ACPI_PROCESSOR select ACPI_CPPC_LIB if X86_64 diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 49b98c62c5af..32a7029e25ed 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o ################################################################################## # ARM SoC drivers +obj-$(CONFIG_ARM_APPLE_SOC_CPUFREQ) += apple-soc-cpufreq.o obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o obj-$(CONFIG_ARM_ARMADA_8K_CPUFREQ) += armada-8k-cpufreq.o obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 1bb2b90ebb21..78adfb2ffff6 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -19,6 +19,7 @@ #include <linux/compiler.h> #include <linux/dmi.h> #include <linux/slab.h> +#include <linux/string_helpers.h> #include <linux/acpi.h> #include <linux/io.h> @@ -135,8 +136,8 @@ static int set_boost(struct cpufreq_policy *policy, int val) { on_each_cpu_mask(policy->cpus, boost_set_msr_each, (void *)(long)val, 1); - pr_debug("CPU %*pbl: Core Boosting %sabled.\n", - cpumask_pr_args(policy->cpus), val ? "en" : "dis"); + pr_debug("CPU %*pbl: Core Boosting %s.\n", + cpumask_pr_args(policy->cpus), str_enabled_disabled(val)); return 0; } @@ -535,15 +536,6 @@ static void free_acpi_perf_data(void) free_percpu(acpi_perf_data); } -static int cpufreq_boost_online(unsigned int cpu) -{ - /* - * On the CPU_UP path we simply keep the boost-disable flag - * in sync with the current global state. - */ - return boost_set_msr(acpi_cpufreq_driver.boost_enabled); -} - static int cpufreq_boost_down_prep(unsigned int cpu) { /* @@ -897,6 +889,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); + if (acpi_cpufreq_driver.set_boost) + set_boost(policy, acpi_cpufreq_driver.boost_enabled); + return result; err_unreg: @@ -916,6 +911,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) pr_debug("%s\n", __func__); + cpufreq_boost_down_prep(policy->cpu); policy->fast_switch_possible = false; policy->driver_data = NULL; acpi_processor_unregister_performance(data->acpi_perf_cpu); @@ -958,12 +954,8 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .attr = acpi_cpufreq_attr, }; -static enum cpuhp_state acpi_cpufreq_online; - static void __init acpi_cpufreq_boost_init(void) { - int ret; - if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA))) { pr_debug("Boost capabilities not present in the processor\n"); return; @@ -971,24 +963,6 @@ static void __init acpi_cpufreq_boost_init(void) acpi_cpufreq_driver.set_boost = set_boost; acpi_cpufreq_driver.boost_enabled = boost_state(0); - - /* - * This calls the online callback on all online cpu and forces all - * MSRs to the same value. - */ - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpufreq/acpi:online", - cpufreq_boost_online, cpufreq_boost_down_prep); - if (ret < 0) { - pr_err("acpi_cpufreq: failed to register hotplug callbacks\n"); - return; - } - acpi_cpufreq_online = ret; -} - -static void acpi_cpufreq_boost_exit(void) -{ - if (acpi_cpufreq_online > 0) - cpuhp_remove_state_nocalls(acpi_cpufreq_online); } static int __init acpi_cpufreq_init(void) @@ -1032,7 +1006,6 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) { free_acpi_perf_data(); - acpi_cpufreq_boost_exit(); } return ret; } @@ -1041,8 +1014,6 @@ static void __exit acpi_cpufreq_exit(void) { pr_debug("%s\n", __func__); - acpi_cpufreq_boost_exit(); - cpufreq_unregister_driver(&acpi_cpufreq_driver); free_acpi_perf_data(); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ace7d50cf2ac..204e39006dda 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -59,12 +59,8 @@ * we disable it by default to go acpi-cpufreq on these processors and add a * module parameter to be able to enable it manually for debugging. */ -static bool shared_mem = false; -module_param(shared_mem, bool, 0444); -MODULE_PARM_DESC(shared_mem, - "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); - static struct cpufreq_driver amd_pstate_driver; +static int cppc_load __initdata; static inline int pstate_enable(bool enable) { @@ -424,12 +420,22 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) amd_pstate_driver.boost_enabled = true; } +static void amd_perf_ctl_reset(unsigned int cpu) +{ + wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; struct device *dev; struct amd_cpudata *cpudata; + /* + * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, + * which is ideal for initialization process. + */ + amd_perf_ctl_reset(policy->cpu); dev = get_cpu_device(policy->cpu); if (!dev) return -ENODEV; @@ -616,6 +622,15 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* + * by default the pstate driver is disabled to load + * enable the amd_pstate passive mode driver explicitly + * with amd_pstate=passive in kernel command line + */ + if (!cppc_load) { + pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n"); + return -ENODEV; + } if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); @@ -630,13 +645,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; - } else if (shared_mem) { + } else { + pr_debug("AMD CPPC shared memory based functionality is supported\n"); static_call_update(amd_pstate_enable, cppc_enable); static_call_update(amd_pstate_init_perf, cppc_init_perf); static_call_update(amd_pstate_update_perf, cppc_update_perf); - } else { - pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); - return -ENODEV; } /* enable amd pstate feature */ @@ -653,16 +666,22 @@ static int __init amd_pstate_init(void) return ret; } +device_initcall(amd_pstate_init); -static void __exit amd_pstate_exit(void) +static int __init amd_pstate_param(char *str) { - cpufreq_unregister_driver(&amd_pstate_driver); + if (!str) + return -EINVAL; - amd_pstate_enable(false); -} + if (!strcmp(str, "disable")) { + cppc_load = 0; + pr_info("driver is explicitly disabled\n"); + } else if (!strcmp(str, "passive")) + cppc_load = 1; -module_init(amd_pstate_init); -module_exit(amd_pstate_exit); + return 0; +} +early_param("amd_pstate", amd_pstate_param); MODULE_AUTHOR("Huang Rui <[email protected]>"); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 6448e03bcf48..59b19b9975e8 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -125,6 +125,8 @@ static int __init amd_freq_sensitivity_init(void) if (!pcidev) { if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK)) return -ENODEV; + } else { + pci_dev_put(pcidev); } if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val)) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c new file mode 100644 index 000000000000..d1801281cdd9 --- /dev/null +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Apple SoC CPU cluster performance state driver + * + * Copyright The Asahi Linux Contributors + * + * Based on scpi-cpufreq.c + */ + +#include <linux/bitfield.h> +#include <linux/bitops.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> +#include <linux/cpumask.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/pm_opp.h> +#include <linux/slab.h> + +#define APPLE_DVFS_CMD 0x20 +#define APPLE_DVFS_CMD_BUSY BIT(31) +#define APPLE_DVFS_CMD_SET BIT(25) +#define APPLE_DVFS_CMD_PS2 GENMASK(16, 12) +#define APPLE_DVFS_CMD_PS1 GENMASK(4, 0) + +/* Same timebase as CPU counter (24MHz) */ +#define APPLE_DVFS_LAST_CHG_TIME 0x38 + +/* + * Apple ran out of bits and had to shift this in T8112... + */ +#define APPLE_DVFS_STATUS 0x50 +#define APPLE_DVFS_STATUS_CUR_PS_T8103 GENMASK(7, 4) +#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103 4 +#define APPLE_DVFS_STATUS_TGT_PS_T8103 GENMASK(3, 0) +#define APPLE_DVFS_STATUS_CUR_PS_T8112 GENMASK(9, 5) +#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112 5 +#define APPLE_DVFS_STATUS_TGT_PS_T8112 GENMASK(4, 0) + +/* + * Div is +1, base clock is 12MHz on existing SoCs. + * For documentation purposes. We use the OPP table to + * get the frequency. + */ +#define APPLE_DVFS_PLL_STATUS 0xc0 +#define APPLE_DVFS_PLL_FACTOR 0xc8 +#define APPLE_DVFS_PLL_FACTOR_MULT GENMASK(31, 16) +#define APPLE_DVFS_PLL_FACTOR_DIV GENMASK(15, 0) + +#define APPLE_DVFS_TRANSITION_TIMEOUT 100 + +struct apple_soc_cpufreq_info { + u64 max_pstate; + u64 cur_pstate_mask; + u64 cur_pstate_shift; +}; + +struct apple_cpu_priv { + struct device *cpu_dev; + void __iomem *reg_base; + const struct apple_soc_cpufreq_info *info; +}; + +static struct cpufreq_driver apple_soc_cpufreq_driver; + +static const struct apple_soc_cpufreq_info soc_t8103_info = { + .max_pstate = 15, + .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8103, + .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103, +}; + +static const struct apple_soc_cpufreq_info soc_t8112_info = { + .max_pstate = 31, + .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8112, + .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112, +}; + +static const struct apple_soc_cpufreq_info soc_default_info = { + .max_pstate = 15, + .cur_pstate_mask = 0, /* fallback */ +}; + +static const struct of_device_id apple_soc_cpufreq_of_match[] = { + { + .compatible = "apple,t8103-cluster-cpufreq", + .data = &soc_t8103_info, + }, + { + .compatible = "apple,t8112-cluster-cpufreq", + .data = &soc_t8112_info, + }, + { + .compatible = "apple,cluster-cpufreq", + .data = &soc_default_info, + }, + {} +}; + +static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); + struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_frequency_table *p; + unsigned int pstate; + + if (priv->info->cur_pstate_mask) { + u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_STATUS); + + pstate = (reg & priv->info->cur_pstate_mask) >> priv->info->cur_pstate_shift; + } else { + /* + * For the fallback case we might not know the layout of DVFS_STATUS, + * so just use the command register value (which ignores boost limitations). + */ + u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_CMD); + + pstate = FIELD_GET(APPLE_DVFS_CMD_PS1, reg); + } + + cpufreq_for_each_valid_entry(p, policy->freq_table) + if (p->driver_data == pstate) + return p->frequency; + + dev_err(priv->cpu_dev, "could not find frequency for pstate %d\n", + pstate); + return 0; +} + +static int apple_soc_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int index) +{ + struct apple_cpu_priv *priv = policy->driver_data; + unsigned int pstate = policy->freq_table[index].driver_data; + u64 reg; + + /* Fallback for newer SoCs */ + if (index > priv->info->max_pstate) + index = priv->info->max_pstate; + + if (readq_poll_timeout_atomic(priv->reg_base + APPLE_DVFS_CMD, reg, + !(reg & APPLE_DVFS_CMD_BUSY), 2, + APPLE_DVFS_TRANSITION_TIMEOUT)) { + return -EIO; + } + + reg &= ~(APPLE_DVFS_CMD_PS1 | APPLE_DVFS_CMD_PS2); + reg |= FIELD_PREP(APPLE_DVFS_CMD_PS1, pstate); + reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate); + reg |= APPLE_DVFS_CMD_SET; + + writeq_relaxed(reg, priv->reg_base + APPLE_DVFS_CMD); + + return 0; +} + +static unsigned int apple_soc_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + if (apple_soc_cpufreq_set_target(policy, policy->cached_resolved_idx) < 0) + return 0; + + return policy->freq_table[policy->cached_resolved_idx].frequency; +} + +static int apple_soc_cpufreq_find_cluster(struct cpufreq_policy *policy, + void __iomem **reg_base, + const struct apple_soc_cpufreq_info **info) +{ + struct of_phandle_args args; + const struct of_device_id *match; + int ret = 0; + + ret = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", + "#performance-domain-cells", + policy->cpus, &args); + if (ret < 0) + return ret; + + match = of_match_node(apple_soc_cpufreq_of_match, args.np); + of_node_put(args.np); + if (!match) + return -ENODEV; + + *info = match->data; + + *reg_base = of_iomap(args.np, 0); + if (IS_ERR(*reg_base)) + return PTR_ERR(*reg_base); + + return 0; +} + +static struct freq_attr *apple_soc_cpufreq_hw_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* Filled in below if boost is enabled */ + NULL, +}; + +static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) +{ + int ret, i; + unsigned int transition_latency; + void __iomem *reg_base; + struct device *cpu_dev; + struct apple_cpu_priv *priv; + const struct apple_soc_cpufreq_info *info; + struct cpufreq_frequency_table *freq_table; + + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", policy->cpu); + return -ENODEV; + } + + ret = dev_pm_opp_of_add_table(cpu_dev); + if (ret < 0) { + dev_err(cpu_dev, "%s: failed to add OPP table: %d\n", __func__, ret); + return ret; + } + + ret = apple_soc_cpufreq_find_cluster(policy, ®_base, &info); + if (ret) { + dev_err(cpu_dev, "%s: failed to get cluster info: %d\n", __func__, ret); + return ret; + } + + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); + if (ret) { + dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); + goto out_iounmap; + } + + ret = dev_pm_opp_get_opp_count(cpu_dev); + if (ret <= 0) { + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); + ret = -EPROBE_DEFER; + goto out_free_opp; + } + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_free_opp; + } + + ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); + if (ret) { + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); + goto out_free_priv; + } + + /* Get OPP levels (p-state indexes) and stash them in driver_data */ + for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + unsigned long rate = freq_table[i].frequency * 1000 + 999; + struct dev_pm_opp *opp = dev_pm_opp_find_freq_floor(cpu_dev, &rate); + + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto out_free_cpufreq_table; + } + freq_table[i].driver_data = dev_pm_opp_get_level(opp); + dev_pm_opp_put(opp); + } + + priv->cpu_dev = cpu_dev; + priv->reg_base = reg_base; + priv->info = info; + policy->driver_data = priv; + policy->freq_table = freq_table; + + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) + transition_latency = CPUFREQ_ETERNAL; + + policy->cpuinfo.transition_latency = transition_latency; + policy->dvfs_possible_from_any_cpu = true; + policy->fast_switch_possible = true; + + if (policy_has_boost_freq(policy)) { + ret = cpufreq_enable_boost_support(); + if (ret) { + dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); + } else { + apple_soc_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + apple_soc_cpufreq_driver.boost_enabled = true; + } + } + + return 0; + +out_free_cpufreq_table: + dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); +out_free_priv: + kfree(priv); +out_free_opp: + dev_pm_opp_remove_all_dynamic(cpu_dev); +out_iounmap: + iounmap(reg_base); + return ret; +} + +static int apple_soc_cpufreq_exit(struct cpufreq_policy *policy) +{ + struct apple_cpu_priv *priv = policy->driver_data; + + dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); + dev_pm_opp_remove_all_dynamic(priv->cpu_dev); + iounmap(priv->reg_base); + kfree(priv); + + return 0; +} + +static struct cpufreq_driver apple_soc_cpufreq_driver = { + .name = "apple-cpufreq", + .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, + .verify = cpufreq_generic_frequency_table_verify, + .attr = cpufreq_generic_attr, + .get = apple_soc_cpufreq_get_rate, + .init = apple_soc_cpufreq_init, + .exit = apple_soc_cpufreq_exit, + .target_index = apple_soc_cpufreq_set_target, + .fast_switch = apple_soc_cpufreq_fast_switch, + .register_em = cpufreq_register_em_with_opp, + .attr = apple_soc_cpufreq_hw_attr, +}; + +static int __init apple_soc_cpufreq_module_init(void) +{ + if (!of_machine_is_compatible("apple,arm-platform")) + return -ENODEV; + + return cpufreq_register_driver(&apple_soc_cpufreq_driver); +} +module_init(apple_soc_cpufreq_module_init); + +static void __exit apple_soc_cpufreq_module_exit(void) +{ + cpufreq_unregister_driver(&apple_soc_cpufreq_driver); +} +module_exit(apple_soc_cpufreq_module_exit); + +MODULE_DEVICE_TABLE(of, apple_soc_cpufreq_of_match); +MODULE_AUTHOR("Hector Martin <[email protected]>"); +MODULE_DESCRIPTION("Apple SoC CPU cluster DVFS driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 6ac3800db450..8ab672883043 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,8 @@ static const struct of_device_id allowlist[] __initconst = { static const struct of_device_id blocklist[] __initconst = { { .compatible = "allwinner,sun50i-h6", }, + { .compatible = "apple,arm-platform", }, + { .compatible = "arm,vexpress", }, { .compatible = "calxeda,highbank", }, @@ -160,6 +162,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "ti,am43", }, { .compatible = "ti,dra7", }, { .compatible = "ti,omap3", }, + { .compatible = "ti,am625", }, { .compatible = "qcom,ipq8064", }, { .compatible = "qcom,apq8064", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 69b3d61852ac..7e56a42750ea 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1207,6 +1207,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) goto err_free_rcpumask; + init_completion(&policy->kobj_unregister); ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, cpufreq_global_kobject, "policy%u", cpu); if (ret) { @@ -1245,7 +1246,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); init_waitqueue_head(&policy->transition_wait); - init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); policy->cpu = cpu; diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 1570d6f3e75d..55c7ffd37d1c 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -128,25 +128,23 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i, j, count; - len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); - len += scnprintf(buf + len, PAGE_SIZE - len, " : "); + len += sysfs_emit_at(buf, len, " From : To\n"); + len += sysfs_emit_at(buf, len, " : "); for (i = 0; i < stats->state_num; i++) { if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", - stats->freq_table[i]); + len += sysfs_emit_at(buf, len, "%9u ", stats->freq_table[i]); } if (len >= PAGE_SIZE) return PAGE_SIZE; - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); for (i = 0; i < stats->state_num; i++) { if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u: ", - stats->freq_table[i]); + len += sysfs_emit_at(buf, len, "%9u: ", stats->freq_table[i]); for (j = 0; j < stats->state_num; j++) { if (len >= PAGE_SIZE) @@ -157,11 +155,11 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) else count = stats->trans_table[i * stats->max_state + j]; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", count); + len += sysfs_emit_at(buf, len, "%9u ", count); } if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); } if (len >= PAGE_SIZE) { diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6ff73c30769f..fd73d6d2b808 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -298,6 +298,7 @@ static int hwp_active __read_mostly; static int hwp_mode_bdw __read_mostly; static bool per_cpu_limits __read_mostly; static bool hwp_boost __read_mostly; +static bool hwp_forced __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -1679,12 +1680,12 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) return; /* - * If powerup EPP is something other than chipset default 0x80 and - * - is more performance oriented than 0x80 (default balance_perf EPP) + * If the EPP is set by firmware, which means that firmware enabled HWP + * - Is equal or less than 0x80 (default balance_perf EPP) * - But less performance oriented than performance EPP * then use this as new balance_perf EPP. */ - if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE && + if (hwp_forced && cpudata->epp_default <= HWP_EPP_BALANCE_PERFORMANCE && cpudata->epp_default > HWP_EPP_PERFORMANCE) { epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default; return; @@ -2378,6 +2379,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(COMETLAKE, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(TIGERLAKE, core_funcs), + X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); @@ -3384,7 +3386,7 @@ static int __init intel_pstate_init(void) id = x86_match_cpu(hwp_support_ids); if (id) { - bool hwp_forced = intel_pstate_hwp_is_enabled(); + hwp_forced = intel_pstate_hwp_is_enabled(); if (hwp_forced) pr_info("HWP enabled by BIOS\n"); diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 3e000e1a75c6..4c57c6725c13 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -407,10 +407,10 @@ static int guess_fsb(int mult) { int speed = cpu_khz / 1000; int i; - int speeds[] = { 666, 1000, 1333, 2000 }; + static const int speeds[] = { 666, 1000, 1333, 2000 }; int f_max, f_min; - for (i = 0; i < 4; i++) { + for (i = 0; i < ARRAY_SIZE(speeds); i++) { f_max = ((speeds[i] * mult) + 50) / 100; f_max += (ROUNDING / 2); f_min = f_max - ROUNDING; diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f0e0a35c7f21..f80339779084 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -160,6 +160,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, struct mtk_cpufreq_data *data; struct device *dev = &pdev->dev; struct resource *res; + struct of_phandle_args args; void __iomem *base; int ret, i; int index; @@ -168,11 +169,14 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, if (!data) return -ENOMEM; - index = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", - "#performance-domain-cells", - policy->cpus); - if (index < 0) - return index; + ret = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", + "#performance-domain-cells", + policy->cpus, &args); + if (ret < 0) + return ret; + + index = args.args[0]; + of_node_put(args.np); res = platform_get_resource(pdev, IORESOURCE_MEM, index); if (!res) { diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 833589bc95e4..340fed35e45d 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -4,6 +4,7 @@ */ #include <linux/bitfield.h> +#include <linux/clk-provider.h> #include <linux/cpufreq.h> #include <linux/init.h> #include <linux/interconnect.h> @@ -43,7 +44,6 @@ struct qcom_cpufreq_soc_data { struct qcom_cpufreq_data { void __iomem *base; struct resource *res; - const struct qcom_cpufreq_soc_data *soc_data; /* * Mutex to synchronize between de-init sequence and re-starting LMh @@ -55,12 +55,18 @@ struct qcom_cpufreq_data { bool cancel_throttle; struct delayed_work throttle_work; struct cpufreq_policy *policy; + struct clk_hw cpu_clk; bool per_core_dcvs; struct freq_qos_request throttle_freq_req; }; +static struct { + struct qcom_cpufreq_data *data; + const struct qcom_cpufreq_soc_data *soc_data; +} qcom_cpufreq; + static unsigned long cpu_hw_rate, xo_rate; static bool icc_scaling_enabled; @@ -109,7 +115,7 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, unsigned int index) { struct qcom_cpufreq_data *data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; unsigned long freq = policy->freq_table[index].frequency; unsigned int i; @@ -125,9 +131,37 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, return 0; } +static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) +{ + unsigned int lval; + + if (qcom_cpufreq.soc_data->reg_current_vote) + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_current_vote) & 0x3ff; + else + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_domain_state) & 0xff; + + return lval * xo_rate; +} + +/* Get the current frequency of the CPU (after throttling) */ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) { struct qcom_cpufreq_data *data; + struct cpufreq_policy *policy; + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) + return 0; + + data = policy->driver_data; + + return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; +} + +/* Get the frequency requested by the cpufreq core for the CPU */ +static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) +{ + struct qcom_cpufreq_data *data; const struct qcom_cpufreq_soc_data *soc_data; struct cpufreq_policy *policy; unsigned int index; @@ -137,7 +171,7 @@ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) return 0; data = policy->driver_data; - soc_data = data->soc_data; + soc_data = qcom_cpufreq.soc_data; index = readl_relaxed(data->base + soc_data->reg_perf_state); index = min(index, LUT_MAX_ENTRIES - 1); @@ -149,7 +183,7 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { struct qcom_cpufreq_data *data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; unsigned int index; unsigned int i; @@ -173,7 +207,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, unsigned long rate; int ret; struct qcom_cpufreq_data *drv_data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = drv_data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL); if (!table) @@ -193,6 +227,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, } } else if (ret != -ENODEV) { dev_err(cpu_dev, "Invalid opp table in device tree\n"); + kfree(table); return ret; } else { policy->fast_switch_possible = true; @@ -286,18 +321,6 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } -static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) -{ - unsigned int lval; - - if (data->soc_data->reg_current_vote) - lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff; - else - lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff; - - return lval * xo_rate; -} - static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) { struct cpufreq_policy *policy = data->policy; @@ -341,7 +364,7 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) * If h/w throttled frequency is higher than what cpufreq has requested * for, then stop polling and switch back to interrupt mechanism. */ - if (throttled_freq >= qcom_cpufreq_hw_get(cpu)) + if (throttled_freq >= qcom_cpufreq_get_freq(cpu)) enable_irq(data->throttle_irq); else mod_delayed_work(system_highpri_wq, &data->throttle_work, @@ -367,9 +390,9 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) disable_irq_nosync(c_data->throttle_irq); schedule_delayed_work(&c_data->throttle_work, 0); - if (c_data->soc_data->reg_intr_clr) + if (qcom_cpufreq.soc_data->reg_intr_clr) writel_relaxed(GT_IRQ_STATUS, - c_data->base + c_data->soc_data->reg_intr_clr); + c_data->base + qcom_cpufreq.soc_data->reg_intr_clr); return IRQ_HANDLED; } @@ -503,8 +526,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) struct of_phandle_args args; struct device_node *cpu_np; struct device *cpu_dev; - struct resource *res; - void __iomem *base; struct qcom_cpufreq_data *data; int ret, index; @@ -526,51 +547,18 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) return ret; index = args.args[0]; - - res = platform_get_resource(pdev, IORESOURCE_MEM, index); - if (!res) { - dev_err(dev, "failed to get mem resource %d\n", index); - return -ENODEV; - } - - if (!request_mem_region(res->start, resource_size(res), res->name)) { - dev_err(dev, "failed to request resource %pR\n", res); - return -EBUSY; - } - - base = ioremap(res->start, resource_size(res)); - if (!base) { - dev_err(dev, "failed to map resource %pR\n", res); - ret = -ENOMEM; - goto release_region; - } - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto unmap_base; - } - - data->soc_data = of_device_get_match_data(&pdev->dev); - data->base = base; - data->res = res; + data = &qcom_cpufreq.data[index]; /* HW should be in enabled state to proceed */ - if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { + if (!(readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_enable) & 0x1)) { dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index); - ret = -ENODEV; - goto error; + return -ENODEV; } - if (readl_relaxed(base + data->soc_data->reg_dcvs_ctrl) & 0x1) + if (readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_dcvs_ctrl) & 0x1) data->per_core_dcvs = true; qcom_get_related_cpus(index, policy->cpus); - if (cpumask_empty(policy->cpus)) { - dev_err(dev, "Domain-%d failed to get related CPUs\n", index); - ret = -ENOENT; - goto error; - } policy->driver_data = data; policy->dvfs_possible_from_any_cpu = true; @@ -578,14 +566,13 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy); if (ret) { dev_err(dev, "Domain-%d failed to read LUT\n", index); - goto error; + return ret; } ret = dev_pm_opp_get_opp_count(cpu_dev); if (ret <= 0) { dev_err(cpu_dev, "Failed to add OPPs\n"); - ret = -ENODEV; - goto error; + return -ENODEV; } if (policy_has_boost_freq(policy)) { @@ -594,18 +581,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); } - ret = qcom_cpufreq_hw_lmh_init(policy, index); - if (ret) - goto error; - - return 0; -error: - kfree(data); -unmap_base: - iounmap(base); -release_region: - release_mem_region(res->start, resource_size(res)); - return ret; + return qcom_cpufreq_hw_lmh_init(policy, index); } static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) @@ -658,20 +634,33 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .ready = qcom_cpufreq_ready, }; +static unsigned long qcom_cpufreq_hw_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +{ + struct qcom_cpufreq_data *data = container_of(hw, struct qcom_cpufreq_data, cpu_clk); + + return qcom_lmh_get_throttle_freq(data); +} + +static const struct clk_ops qcom_cpufreq_hw_clk_ops = { + .recalc_rate = qcom_cpufreq_hw_recalc_rate, +}; + static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { + struct clk_hw_onecell_data *clk_data; + struct device *dev = &pdev->dev; struct device *cpu_dev; struct clk *clk; - int ret; + int ret, i, num_domains; - clk = clk_get(&pdev->dev, "xo"); + clk = clk_get(dev, "xo"); if (IS_ERR(clk)) return PTR_ERR(clk); xo_rate = clk_get_rate(clk); clk_put(clk); - clk = clk_get(&pdev->dev, "alternate"); + clk = clk_get(dev, "alternate"); if (IS_ERR(clk)) return PTR_ERR(clk); @@ -689,11 +678,70 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) if (ret) return ret; + /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ + num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * 4); + if (num_domains <= 0) + return num_domains; + + qcom_cpufreq.data = devm_kzalloc(dev, sizeof(struct qcom_cpufreq_data) * num_domains, + GFP_KERNEL); + if (!qcom_cpufreq.data) + return -ENOMEM; + + qcom_cpufreq.soc_data = of_device_get_match_data(dev); + + clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_domains), GFP_KERNEL); + if (!clk_data) + return -ENOMEM; + + clk_data->num = num_domains; + + for (i = 0; i < num_domains; i++) { + struct qcom_cpufreq_data *data = &qcom_cpufreq.data[i]; + struct clk_init_data clk_init = {}; + struct resource *res; + void __iomem *base; + + base = devm_platform_get_and_ioremap_resource(pdev, i, &res); + if (IS_ERR(base)) { + dev_err(dev, "Failed to map resource %pR\n", res); + return PTR_ERR(base); + } + + data->base = base; + data->res = res; + + /* Register CPU clock for each frequency domain */ + clk_init.name = kasprintf(GFP_KERNEL, "qcom_cpufreq%d", i); + if (!clk_init.name) + return -ENOMEM; + + clk_init.flags = CLK_GET_RATE_NOCACHE; + clk_init.ops = &qcom_cpufreq_hw_clk_ops; + data->cpu_clk.init = &clk_init; + + ret = devm_clk_hw_register(dev, &data->cpu_clk); + if (ret < 0) { + dev_err(dev, "Failed to register clock %d: %d\n", i, ret); + kfree(clk_init.name); + return ret; + } + + clk_data->hws[i] = &data->cpu_clk; + kfree(clk_init.name); + } + + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, clk_data); + if (ret < 0) { + dev_err(dev, "Failed to add clock provider\n"); + return ret; + } + ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver); if (ret) - dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n"); + dev_err(dev, "CPUFreq HW driver failed to register\n"); else - dev_dbg(&pdev->dev, "QCOM CPUFreq HW driver initialized\n"); + dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n"); return ret; } diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 7d0d62a06bf3..c6fdf019dbde 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -39,7 +39,7 @@ static struct clk *spear1340_cpu_get_possible_parent(unsigned long newfreq) * In SPEAr1340, cpu clk's parent sys clk can take input from * following sources */ - const char *sys_clk_src[] = { + static const char * const sys_clk_src[] = { "sys_syn_clk", "pll1_clk", "pll2_clk", diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 6c88827f4e62..f98f53bf1011 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -65,8 +65,8 @@ struct tegra186_cpufreq_cluster { struct tegra186_cpufreq_data { void __iomem *regs; - struct tegra186_cpufreq_cluster *clusters; const struct tegra186_cpufreq_cpu *cpus; + struct tegra186_cpufreq_cluster clusters[]; }; static int tegra186_cpufreq_init(struct cpufreq_policy *policy) @@ -221,15 +221,12 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) struct tegra_bpmp *bpmp; unsigned int i = 0, err; - data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, + struct_size(data, clusters, TEGRA186_NUM_CLUSTERS), + GFP_KERNEL); if (!data) return -ENOMEM; - data->clusters = devm_kcalloc(&pdev->dev, TEGRA186_NUM_CLUSTERS, - sizeof(*data->clusters), GFP_KERNEL); - if (!data->clusters) - return -ENOMEM; - data->cpus = tegra186_cpus; bpmp = tegra_bpmp_get(&pdev->dev); diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index f64180dd2005..be4209d97cb3 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -39,6 +39,14 @@ #define OMAP34xx_ProdID_SKUID 0x4830A20C #define OMAP3_SYSCON_BASE (0x48000000 + 0x2000 + 0x270) +#define AM625_EFUSE_K_MPU_OPP 11 +#define AM625_EFUSE_S_MPU_OPP 19 +#define AM625_EFUSE_T_MPU_OPP 20 + +#define AM625_SUPPORT_K_MPU_OPP BIT(0) +#define AM625_SUPPORT_S_MPU_OPP BIT(1) +#define AM625_SUPPORT_T_MPU_OPP BIT(2) + #define VERSION_COUNT 2 struct ti_cpufreq_data; @@ -104,6 +112,25 @@ static unsigned long omap3_efuse_xlate(struct ti_cpufreq_data *opp_data, return BIT(efuse); } +static unsigned long am625_efuse_xlate(struct ti_cpufreq_data *opp_data, + unsigned long efuse) +{ + unsigned long calculated_efuse = AM625_SUPPORT_K_MPU_OPP; + + switch (efuse) { + case AM625_EFUSE_T_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_T_MPU_OPP; + fallthrough; + case AM625_EFUSE_S_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_S_MPU_OPP; + fallthrough; + case AM625_EFUSE_K_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_K_MPU_OPP; + } + + return calculated_efuse; +} + static struct ti_cpufreq_soc_data am3x_soc_data = { .efuse_xlate = amx3_efuse_xlate, .efuse_fallback = AM33XX_800M_ARM_MPU_MAX_FREQ, @@ -198,6 +225,14 @@ static struct ti_cpufreq_soc_data am3517_soc_data = { .multi_regulator = false, }; +static struct ti_cpufreq_soc_data am625_soc_data = { + .efuse_xlate = am625_efuse_xlate, + .efuse_offset = 0x0018, + .efuse_mask = 0x07c0, + .efuse_shift = 0x6, + .rev_offset = 0x0014, + .multi_regulator = false, +}; /** * ti_cpufreq_get_efuse() - Parse and return efuse value present on SoC @@ -301,6 +336,7 @@ static const struct of_device_id ti_cpufreq_of_match[] = { { .compatible = "ti,dra7", .data = &dra7_soc_data }, { .compatible = "ti,omap34xx", .data = &omap34xx_soc_data, }, { .compatible = "ti,omap36xx", .data = &omap36xx_soc_data, }, + { .compatible = "ti,am625", .data = &am625_soc_data, }, /* legacy */ { .compatible = "ti,omap3430", .data = &omap34xx_soc_data, }, { .compatible = "ti,omap3630", .data = &omap36xx_soc_data, }, diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 821984947ed9..c80cf9ddabd8 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -181,7 +181,8 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) if (ret) goto remove_pd; - pr_info("Initialized CPU PM domain topology\n"); + pr_info("Initialized CPU PM domain topology using %s mode\n", + use_osi ? "OSI" : "PC"); return 0; put_node: diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c index 252f2a9686a6..7ca3d7d9b5ea 100644 --- a/drivers/cpuidle/dt_idle_states.c +++ b/drivers/cpuidle/dt_idle_states.c @@ -211,18 +211,15 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, of_node_put(cpu_node); if (err) return err; - /* - * Update the driver state count only if some valid DT idle states - * were detected - */ - if (i) - drv->state_count = state_idx; + + /* Set the number of total supported idle states. */ + drv->state_count = state_idx; /* * Return the number of present and valid DT idle states, which can * also be 0 on platforms with missing DT idle states or legacy DT * configuration predating the DT idle states bindings. */ - return i; + return state_idx - start_idx; } EXPORT_SYMBOL_GPL(dt_init_idle_driver); diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 97086fab698e..903325aac991 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,13 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static struct resource hmem_active = { + .name = "HMEM devices", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + void hmem_register_device(int target_nid, struct resource *r) { /* define a clean / non-busy resource for the platform device */ @@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r) goto out_pdev; } + if (!__request_region(&hmem_active, res.start, resource_size(&res), + dev_name(&pdev->dev), 0)) { + dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + goto out_active; + } + pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r) return; out_resource: + __release_region(&hmem_active, res.start, resource_size(&res)); +out_active: platform_device_put(pdev); out_pdev: memregion_free(id); @@ -73,15 +88,6 @@ out_pdev: static __init int hmem_register_one(struct resource *res, void *data) { - /* - * If the resource is not a top-level resource it was already - * assigned to a device by the HMAT parsing. - */ - if (res->parent != &iomem_resource) { - pr_info("HMEM: skip %pr, already claimed\n", res); - return 0; - } - hmem_register_device(phys_to_target_node(res->start), res); return 0; diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index dd0f83ee505b..e6f36c014c4c 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/dma-buf.h> #include <linux/dma-fence.h> +#include <linux/dma-fence-unwrap.h> #include <linux/anon_inodes.h> #include <linux/export.h> #include <linux/debugfs.h> @@ -391,8 +392,10 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, const void __user *user_data) { struct dma_buf_import_sync_file arg; - struct dma_fence *fence; + struct dma_fence *fence, *f; enum dma_resv_usage usage; + struct dma_fence_unwrap iter; + unsigned int num_fences; int ret = 0; if (copy_from_user(&arg, user_data, sizeof(arg))) @@ -411,13 +414,21 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, usage = (arg.flags & DMA_BUF_SYNC_WRITE) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ; - dma_resv_lock(dmabuf->resv, NULL); + num_fences = 0; + dma_fence_unwrap_for_each(f, &iter, fence) + ++num_fences; - ret = dma_resv_reserve_fences(dmabuf->resv, 1); - if (!ret) - dma_resv_add_fence(dmabuf->resv, fence, usage); + if (num_fences > 0) { + dma_resv_lock(dmabuf->resv, NULL); - dma_resv_unlock(dmabuf->resv); + ret = dma_resv_reserve_fences(dmabuf->resv, num_fences); + if (!ret) { + dma_fence_unwrap_for_each(f, &iter, fence) + dma_resv_add_fence(dmabuf->resv, f, usage); + } + + dma_resv_unlock(dmabuf->resv); + } dma_fence_put(fence); diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 8f5848aa144f..59d158873f4c 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -233,18 +233,6 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) return ERR_PTR(-EINVAL); } - /* check the name is unique */ - mutex_lock(&heap_list_lock); - list_for_each_entry(h, &heap_list, list) { - if (!strcmp(h->name, exp_info->name)) { - mutex_unlock(&heap_list_lock); - pr_err("dma_heap: Already registered heap named %s\n", - exp_info->name); - return ERR_PTR(-EINVAL); - } - } - mutex_unlock(&heap_list_lock); - heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) return ERR_PTR(-ENOMEM); @@ -283,13 +271,27 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) err_ret = ERR_CAST(dev_ret); goto err2; } - /* Add heap to the list */ + mutex_lock(&heap_list_lock); + /* check the name is unique */ + list_for_each_entry(h, &heap_list, list) { + if (!strcmp(h->name, exp_info->name)) { + mutex_unlock(&heap_list_lock); + pr_err("dma_heap: Already registered heap named %s\n", + exp_info->name); + err_ret = ERR_PTR(-EINVAL); + goto err3; + } + } + + /* Add heap to the list */ list_add(&heap->list, &heap_list); mutex_unlock(&heap_list_lock); return heap; +err3: + device_destroy(dma_heap_class, heap->heap_devt); err2: cdev_del(&heap->heap_cdev); err1: diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index 6c416955da53..bbe0a7cabb75 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -246,7 +246,9 @@ config FPGA_MGR_VERSAL_FPGA config FPGA_M10_BMC_SEC_UPDATE tristate "Intel MAX10 BMC Secure Update driver" - depends on MFD_INTEL_M10_BMC && FW_UPLOAD + depends on MFD_INTEL_M10_BMC + select FW_LOADER + select FW_UPLOAD help Secure update support for the Intel MAX10 board management controller. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index c8935d718207..4485bb29bec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -41,5 +41,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d1ff7b9258d..1f76e27f1a35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -986,6 +986,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; struct ttm_operation_ctx ctx = { true, false }; + struct hmm_range *range; int ret = 0; mutex_lock(&process_info->lock); @@ -1015,7 +1016,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; @@ -1033,7 +1034,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, amdgpu_bo_unreserve(bo); release_out: - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -2370,6 +2371,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, /* Go through userptr_inval_list and update any invalid user_pages */ list_for_each_entry(mem, &process_info->userptr_inval_list, validate_list.head) { + struct hmm_range *range; + invalid = atomic_read(&mem->invalid); if (!invalid) /* BO hasn't been invalidated since the last @@ -2380,7 +2383,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2399,7 +2403,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * FIXME: Cannot ignore the return code, must hold * notifier_lock */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); } /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 2168163aad2d..252a876b0725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -209,6 +209,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, list_add_tail(&e->tv.head, &bucket[priority]); e->user_pages = NULL; + e->range = NULL; } /* Connect the sorted buckets in the output list. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 9caea1688fc3..e4d78491bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -26,6 +26,8 @@ #include <drm/ttm/ttm_execbuf_util.h> #include <drm/amdgpu_drm.h> +struct hmm_range; + struct amdgpu_device; struct amdgpu_bo; struct amdgpu_bo_va; @@ -36,6 +38,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; + struct hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 491d4846fc02..cfb262911bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -328,7 +328,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; - drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d038b258cc92..365e3fb6a9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -913,7 +913,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; } - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); if (r) { kvfree(e->user_pages); e->user_pages = NULL; @@ -991,9 +991,10 @@ out_free_user_pages: if (!e->user_pages) continue; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); kvfree(e->user_pages); e->user_pages = NULL; + e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); return r; @@ -1273,7 +1274,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); + e->range = NULL; } if (r) { r = -EAGAIN; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 8ef31d687ef3..91571b1324f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -378,6 +378,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_gem_userptr *args = data; struct drm_gem_object *gobj; + struct hmm_range *range; struct amdgpu_bo *bo; uint32_t handle; int r; @@ -413,14 +414,13 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; - if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) { - r = amdgpu_mn_register(bo, args->addr); - if (r) - goto release_object; - } + r = amdgpu_mn_register(bo, args->addr); + if (r) + goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (r) goto release_object; @@ -443,7 +443,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, user_pages_done: if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); release_object: drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b06cb0bb166c..28612e56d0d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -479,6 +479,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned i; unsigned vmhub, inv_eng; + if (adev->enable_mes) { + /* reserve engine 5 for firmware */ + for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) + vm_inv_engs[vmhub] &= ~(1 << 5); + } + for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; vmhub = ring->funcs->vmhub; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index cd968e781077..adac650cf544 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -169,7 +169,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - dma_fence_put(&job->hw_fence); + /* only put the hw fence if has embedded fence */ + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(&job->hw_fence); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -254,6 +258,9 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, DRM_ERROR("Error adding fence (%d)\n", r); } + if (!fence && job->gang_submit) + fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + while (fence == NULL && vm && !job->vmid) { r = amdgpu_vmid_grab(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -264,9 +271,6 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, fence = amdgpu_sync_get_fence(&job->sync); } - if (!fence && job->gang_submit) - fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); - return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index effa7df3ddbf..7978307e1d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -172,6 +172,7 @@ void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); + mem_ctx->shared_bo = NULL; } static void psp_free_shared_bufs(struct psp_context *psp) @@ -182,6 +183,7 @@ static void psp_free_shared_bufs(struct psp_context *psp) /* free TMR memory buffer */ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + psp->tmr_bo = NULL; /* free xgmi shared memory */ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); @@ -743,7 +745,7 @@ static int psp_load_toc(struct psp_context *psp, /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { - int ret; + int ret = 0; int tmr_size; void *tmr_buf; void **pptr; @@ -770,10 +772,12 @@ static int psp_tmr_init(struct psp_context *psp) } } - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + if (!psp->tmr_bo) { + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + } return ret; } @@ -2732,8 +2736,6 @@ static int psp_suspend(void *handle) } out: - psp_free_shared_bufs(psp); - return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 57277b1cf183..b64938ed8cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -643,9 +643,6 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - struct hmm_range *range; -#endif }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -658,7 +655,8 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range) { struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -668,16 +666,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) bool readonly; int r = 0; + /* Make sure get_user_pages_done() can cleanup gracefully */ + *range = NULL; + mm = bo->notifier.mm; if (unlikely(!mm)) { DRM_DEBUG_DRIVER("BO is not registered?\n"); return -EFAULT; } - /* Another get_user_pages is running at the same time?? */ - if (WARN_ON(gtt->range)) - return -EFAULT; - if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; @@ -695,7 +692,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, - ttm->num_pages, >t->range, readonly, + ttm->num_pages, range, readonly, true, NULL); out_unlock: mmap_read_unlock(mm); @@ -713,30 +710,24 @@ out_unlock: * * Returns: true if pages are still valid */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); - bool r = false; - if (!gtt || !gtt->userptr) + if (!gtt || !gtt->userptr || !range) return false; DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", gtt->userptr, ttm->num_pages); - WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, - "No user pages to check\n"); + WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); - if (gtt->range) { - /* - * FIXME: Must always hold notifier_lock for this, and must - * not ignore the return code. - */ - r = amdgpu_hmm_range_get_pages_done(gtt->range); - gtt->range = NULL; - } - - return !r; + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + return !amdgpu_hmm_range_get_pages_done(range); } #endif @@ -813,20 +804,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, /* unmap the pages mapped to the device */ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); - -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range) { - unsigned long i; - - for (i = 0; i < ttm->num_pages; i++) { - if (ttm->pages[i] != - hmm_pfn_to_page(gtt->range->hmm_pfns[i])) - break; - } - - WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); - } -#endif } static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6a70818039dd..a37207011a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -39,6 +39,8 @@ #define AMDGPU_POISON 0xd0bed0be +struct hmm_range; + struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; @@ -149,15 +151,19 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages); -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct page **pages) + struct page **pages, + struct hmm_range **range) { return -EPERM; } -static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 0b52af415b28..ce64ca1c6e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 2): fw_name = FIRMWARE_VANGOGH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case IP_VERSION(3, 0, 16): fw_name = FIRMWARE_DIMGREY_CAVEFISH; diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index f772bb499f3e..0312c71c3af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -32,7 +32,6 @@ #define RB_ENABLED (1 << 0) #define RB4_ENABLED (1 << 1) -#define MMSCH_DOORBELL_OFFSET 0x8 #define MMSCH_VF_ENGINE_STATUS__PASS 0x1 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 897a5ce9c9da..dcc49b01bd59 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -100,7 +100,6 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -112,12 +111,6 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - if (amdgpu_sriov_vf(adev)) { - vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; - /* get DWORD offset */ - vcn_doorbell_index = vcn_doorbell_index << 1; - } - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -135,7 +128,7 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) - ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 6925e0280dbe..f4f3d2665a6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e1ecca72430..512c32327eb1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1372,7 +1372,44 @@ static const struct dmi_system_id hpd_disconnect_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), + }, + }, {} + /* TODO: refactor this from a fixed table to a dynamic option */ }; static void retrieve_dmi_info(struct amdgpu_display_manager *dm) @@ -6475,7 +6512,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, struct drm_connector_state *new_con_state; struct amdgpu_dm_connector *aconnector; struct dm_connector_state *dm_conn_state; - int i, j; + int i, j, ret; int vcpi, pbn_div, pbn, slot_num = 0; for_each_new_connector_in_state(state, connector, new_con_state, i) { @@ -6522,8 +6559,11 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, dm_conn_state->pbn = pbn; dm_conn_state->vcpi_slots = slot_num; - drm_dp_mst_atomic_enable_dsc(state, aconnector->port, dm_conn_state->pbn, - false); + ret = drm_dp_mst_atomic_enable_dsc(state, aconnector->port, + dm_conn_state->pbn, false); + if (ret < 0) + return ret; + continue; } @@ -9537,10 +9577,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, #if defined(CONFIG_DRM_AMD_DC_DCN) if (dc_resource_is_dsc_encoding_supported(dc)) { - if (!pre_validate_dsc(state, &dm_state, vars)) { - ret = -EINVAL; + ret = pre_validate_dsc(state, &dm_state, vars); + if (ret != 0) goto fail; - } } #endif @@ -9635,9 +9674,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto fail; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6ff96b4bdda5..6483ba266893 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -703,13 +703,13 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn) return dsc_config.bits_per_pixel; } -static bool increase_dsc_bpp(struct drm_atomic_state *state, - struct drm_dp_mst_topology_state *mst_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int increase_dsc_bpp(struct drm_atomic_state *state, + struct drm_dp_mst_topology_state *mst_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool bpp_increased[MAX_PIPES]; @@ -719,6 +719,7 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, int remaining_to_increase = 0; int link_timeslots_used; int fair_pbn_alloc; + int ret = 0; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { @@ -757,52 +758,60 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, if (initial_slack[next_index] > fair_pbn_alloc) { vars[next_index].pbn += fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn); } else { vars[next_index].pbn -= fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } else { vars[next_index].pbn += initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16; } else { vars[next_index].pbn -= initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } bpp_increased[next_index] = true; remaining_to_increase--; } - return true; + return 0; } -static bool try_disable_dsc(struct drm_atomic_state *state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int try_disable_dsc(struct drm_atomic_state *state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool tried[MAX_PIPES]; @@ -810,6 +819,7 @@ static bool try_disable_dsc(struct drm_atomic_state *state, int max_kbps_increase; int next_index; int remaining_to_try = 0; + int ret; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -840,49 +850,52 @@ static bool try_disable_dsc(struct drm_atomic_state *state, break; vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } tried[next_index] = true; remaining_to_try--; } - return true; + return 0; } -static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_vars *vars, - struct drm_dp_mst_topology_mgr *mgr, - int *link_vars_start_index) +static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_vars *vars, + struct drm_dp_mst_topology_mgr *mgr, + int *link_vars_start_index) { struct dc_stream_state *stream; struct dsc_mst_fairness_params params[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_mst_topology_state(state, mgr); int count = 0; - int i, k; + int i, k, ret; bool debugfs_overwrite = false; memset(params, 0, sizeof(params)); if (IS_ERR(mst_state)) - return false; + return PTR_ERR(mst_state); mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link); #if defined(CONFIG_DRM_AMD_DC_DCN) @@ -933,7 +946,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (count == 0) { ASSERT(0); - return true; + return 0; } /* k is start index of vars for current phy link used by mst hub */ @@ -947,13 +960,17 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, - vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, + vars[i + k].pbn); + if (ret < 0) + return ret; } - if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0 && !debugfs_overwrite) { set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; + } else if (ret != -ENOSPC) { + return ret; } /* Try max compression */ @@ -962,31 +979,36 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } else { vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } } - if (drm_dp_mst_atomic_check(state)) - return false; + ret = drm_dp_mst_atomic_check(state); + if (ret != 0) + return ret; /* Optimize degree of compression */ - if (!increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k)) - return false; + ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; - if (!try_disable_dsc(state, dc_link, params, vars, count, k)) - return false; + ret = try_disable_dsc(state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; } static bool is_dsc_need_re_compute( @@ -1087,15 +1109,17 @@ static bool is_dsc_need_re_compute( return is_dsc_need_re_compute; } -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1108,7 +1132,7 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1118,19 +1142,16 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, continue; if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { - mutex_unlock(&aconnector->mst_mgr.lock); - return false; - } - mutex_unlock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, + &link_vars_start_index); + if (ret != 0) + return ret; for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1143,22 +1164,23 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (stream->timing.flags.DSC == 1) if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; } - return true; + return ret; } -static bool - pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1171,7 +1193,7 @@ static bool aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1183,14 +1205,11 @@ static bool if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { - mutex_unlock(&aconnector->mst_mgr.lock); - return false; - } - mutex_unlock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, + &link_vars_start_index); + if (ret != 0) + return ret; for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1198,7 +1217,7 @@ static bool } } - return true; + return ret; } static int find_crtc_index_in_state_by_stream(struct drm_atomic_state *state, @@ -1253,9 +1272,9 @@ static bool is_dsc_precompute_needed(struct drm_atomic_state *state) return ret; } -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars) +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars) { int i; struct dm_atomic_state *dm_state; @@ -1264,11 +1283,12 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (!is_dsc_precompute_needed(state)) { DRM_INFO_ONCE("DSC precompute is not needed.\n"); - return true; + return 0; } - if (dm_atomic_get_state(state, dm_state_ptr)) { + ret = dm_atomic_get_state(state, dm_state_ptr); + if (ret != 0) { DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); - return false; + return ret; } dm_state = *dm_state_ptr; @@ -1280,7 +1300,7 @@ bool pre_validate_dsc(struct drm_atomic_state *state, local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL); if (!local_dc_state) - return false; + return -ENOMEM; for (i = 0; i < local_dc_state->stream_count; i++) { struct dc_stream_state *stream = dm_state->context->streams[i]; @@ -1316,9 +1336,9 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (ret != 0) goto clean_exit; - if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) { + ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars); + if (ret != 0) { DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto clean_exit; } @@ -1349,7 +1369,7 @@ clean_exit: kfree(local_dc_state); - return (ret == 0); + return ret; } static unsigned int kbps_from_pbn(unsigned int pbn) @@ -1392,6 +1412,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; + struct drm_dp_mst_topology_mgr *mst_mgr; /* * check if the mode could be supported if DSC pass-through is supported @@ -1400,7 +1421,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( */ if (is_dsc_common_config_possible(stream, &bw_range) && aconnector->port->passthrough_aux) { - mutex_lock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + mutex_lock(&mst_mgr->lock); cur_link_settings = stream->link->verified_link_cap; @@ -1413,7 +1435,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - mutex_unlock(&aconnector->mst_mgr.lock); + mutex_unlock(&mst_mgr->lock); /* * use the maximum dsc compression bandwidth as the required diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index b92a7c5671aa..97fd70df531b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -53,15 +53,15 @@ struct dsc_mst_fairness_vars { struct amdgpu_dm_connector *aconnector; }; -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars); +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars); bool needs_dsc_aux_workaround(struct dc_link *link); -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars); +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars); enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c index ef0795b14a1f..2db595672a46 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c @@ -123,9 +123,10 @@ static int dcn314_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t result; result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000); - ASSERT(result == VBIOSSMC_Result_OK); - smu_print("SMU response after wait: %d\n", result); + if (result != VBIOSSMC_Result_OK) + smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", + result); if (result == VBIOSSMC_Status_BUSY) return -1; @@ -216,6 +217,12 @@ int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request VBIOSSMC_MSG_SetHardMinDcfclkByFreq, khz_to_mhz_ceil(requested_dcfclk_khz)); +#ifdef DBG + smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", + actual_dcfclk_set_mhz, + actual_dcfclk_set_mhz * 1000); +#endif + return actual_dcfclk_set_mhz * 1000; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 1b70b78e2fa1..af631085e88c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -359,7 +359,8 @@ static const struct dce_audio_registers audio_regs[] = { audio_regs(2), audio_regs(3), audio_regs(4), - audio_regs(5) + audio_regs(5), + audio_regs(6), }; #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index b9765b3899e1..ef52e6b6eccf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -436,34 +436,48 @@ void dpp1_set_cursor_position( uint32_t height) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; + int x_hotspot = pos->x_hotspot; + int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; + int cursor_height = (int)height; + int cursor_width = (int)width; uint32_t cur_en = pos->enable ? 1 : 0; - // Cursor width/height and hotspots need to be rotated for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { - swap(width, height); + swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } if (src_x_offset >= (int)param->viewport.width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)width <= 0) + if (src_x_offset + cursor_width <= 0) cur_en = 0; /* not visible beyond left edge*/ if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset + (int)height <= 0) + if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ REG_UPDATE(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 52e201e9b091..a142a00bc432 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1179,10 +1179,12 @@ void hubp1_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -1200,18 +1202,26 @@ void hubp1_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1248,8 +1258,8 @@ void hubp1_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 938dba5249d4..4566bc7abf17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -973,10 +973,12 @@ void hubp2_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -994,18 +996,26 @@ void hubp2_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1042,8 +1052,8 @@ void hubp2_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); @@ -1052,8 +1062,8 @@ void hubp2_cursor_set_position( hubp->pos.cur_ctl.bits.cur_enable = cur_en; hubp->pos.position.bits.x_pos = pos->x; hubp->pos.position.bits.y_pos = pos->y; - hubp->pos.hot_spot.bits.x_hot = x_hotspot; - hubp->pos.hot_spot.bits.y_hot = y_hotspot; + hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot; + hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot; hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset; /* Cursor Rectangle Cache * Cursor bitmaps have different hotspot values diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 1bd7e0f327d8..389a8938ee45 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -96,6 +96,13 @@ static void dccg314_set_pixel_rate_div( struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA; + // Don't program 0xF into the register field. Not valid since + // K1 / K2 field is only 1 / 2 bits wide + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); + return; + } + dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA || (k1 == cur_k1 && k2 == cur_k2)) return; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 588c1c71241f..a0741794db62 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -348,10 +348,8 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; @@ -359,7 +357,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + } else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index e4daed44ef5f..df4f25119142 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -96,8 +96,10 @@ static void dccg32_set_pixel_rate_div( // Don't program 0xF into the register field. Not valid since // K1 / K2 field is only 1 / 2 bits wide - if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); return; + } dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == cur_k1 && k2 == cur_k2) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index ac41a763bb1d..d0b46a3e0155 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1171,10 +1171,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index b03a7814e96d..fa3778849db1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -111,7 +111,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ - mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) / mblk_height * mblk_height + mblk_height; /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 12e17bcfca3f..2abe3967f7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) @@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; @@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; @@ -1910,7 +1910,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == dm_dram_clock_change_unsupported) { - int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1; min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 432b4ecd01a7..f4b176599be7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -126,9 +126,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 9.35, - .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, - .urgent_latency_vm_data_only_us = 9.35, + .urgent_latency_pixel_data_only_us = 4, + .urgent_latency_pixel_mixed_with_vm_data_us = 4, + .urgent_latency_vm_data_only_us = 4, .fclk_change_latency_us = 20, .usr_retraining_latency_us = 2, .smn_latency_us = 2, @@ -156,7 +156,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 25c08f963f49..d6b13933a98f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2C +#define SMU13_DRIVER_IF_VERSION 0x35 //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x20 +#define PPTABLE_VERSION 0x27 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -96,7 +96,7 @@ #define FEATURE_MEM_TEMP_READ_BIT 47 #define FEATURE_ATHUB_MMHUB_PG_BIT 48 #define FEATURE_SOC_PCC_BIT 49 -#define FEATURE_SPARE_50_BIT 50 +#define FEATURE_EDC_PWRBRK_BIT 50 #define FEATURE_SPARE_51_BIT 51 #define FEATURE_SPARE_52_BIT 52 #define FEATURE_SPARE_53_BIT 53 @@ -282,15 +282,15 @@ typedef enum { } I2cControllerPort_e; typedef enum { - I2C_CONTROLLER_NAME_VR_GFX = 0, - I2C_CONTROLLER_NAME_VR_SOC, - I2C_CONTROLLER_NAME_VR_VMEMP, - I2C_CONTROLLER_NAME_VR_VDDIO, - I2C_CONTROLLER_NAME_LIQUID0, - I2C_CONTROLLER_NAME_LIQUID1, - I2C_CONTROLLER_NAME_PLX, - I2C_CONTROLLER_NAME_OTHER, - I2C_CONTROLLER_NAME_COUNT, + I2C_CONTROLLER_NAME_VR_GFX = 0, + I2C_CONTROLLER_NAME_VR_SOC, + I2C_CONTROLLER_NAME_VR_VMEMP, + I2C_CONTROLLER_NAME_VR_VDDIO, + I2C_CONTROLLER_NAME_LIQUID0, + I2C_CONTROLLER_NAME_LIQUID1, + I2C_CONTROLLER_NAME_PLX, + I2C_CONTROLLER_NAME_FAN_INTAKE, + I2C_CONTROLLER_NAME_COUNT, } I2cControllerName_e; typedef enum { @@ -302,6 +302,7 @@ typedef enum { I2C_CONTROLLER_THROTTLER_LIQUID0, I2C_CONTROLLER_THROTTLER_LIQUID1, I2C_CONTROLLER_THROTTLER_PLX, + I2C_CONTROLLER_THROTTLER_FAN_INTAKE, I2C_CONTROLLER_THROTTLER_INA3221, I2C_CONTROLLER_THROTTLER_COUNT, } I2cControllerThrottler_e; @@ -309,8 +310,9 @@ typedef enum { typedef enum { I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5, I2C_CONTROLLER_PROTOCOL_VR_IR35217, - I2C_CONTROLLER_PROTOCOL_TMP_TMP102A, + I2C_CONTROLLER_PROTOCOL_TMP_MAX31875, I2C_CONTROLLER_PROTOCOL_INA3221, + I2C_CONTROLLER_PROTOCOL_TMP_MAX6604, I2C_CONTROLLER_PROTOCOL_COUNT, } I2cControllerProtocol_e; @@ -690,6 +692,9 @@ typedef struct { #define PP_OD_FEATURE_UCLK_BIT 8 #define PP_OD_FEATURE_ZERO_FAN_BIT 9 #define PP_OD_FEATURE_TEMPERATURE_BIT 10 +#define PP_OD_FEATURE_POWER_FEATURE_CTRL_BIT 11 +#define PP_OD_FEATURE_ASIC_TDC_BIT 12 +#define PP_OD_FEATURE_COUNT 13 typedef enum { PP_OD_POWER_FEATURE_ALWAYS_ENABLED, @@ -697,6 +702,11 @@ typedef enum { PP_OD_POWER_FEATURE_ALWAYS_DISABLED, } PP_OD_POWER_FEATURE_e; +typedef enum { + FAN_MODE_AUTO = 0, + FAN_MODE_MANUAL_LINEAR, +} FanMode_e; + typedef struct { uint32_t FeatureCtrlMask; @@ -708,8 +718,8 @@ typedef struct { uint8_t RuntimePwrSavingFeaturesCtrl; //Frequency changes - int16_t GfxclkFmin; // MHz - int16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -730,7 +740,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround } OverDriveTable_t; @@ -748,8 +763,8 @@ typedef struct { uint8_t IdlePwrSavingFeaturesCtrl; uint8_t RuntimePwrSavingFeaturesCtrl; - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -769,7 +784,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; } OverDriveLimits_t; @@ -903,7 +923,8 @@ typedef struct { uint16_t FanStartTempMin; uint16_t FanStartTempMax; - uint32_t Spare[12]; + uint16_t PowerMinPpt0[POWER_SOURCE_COUNT]; + uint32_t Spare[11]; } MsgLimits_t; @@ -1086,11 +1107,13 @@ typedef struct { uint32_t GfxoffSpare[15]; // GFX GPO - float DfllBtcMasterScalerM; + uint32_t DfllBtcMasterScalerM; int32_t DfllBtcMasterScalerB; - float DfllBtcSlaveScalerM; + uint32_t DfllBtcSlaveScalerM; int32_t DfllBtcSlaveScalerB; - uint32_t GfxGpoSpare[12]; + uint32_t DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg + uint32_t DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg + uint32_t GfxGpoSpare[10]; // GFX DCS @@ -1106,7 +1129,10 @@ typedef struct { uint16_t DcsTimeout; //This is the amount of time SMU FW waits for RLC to put GFX into GFXOFF before reverting to the fallback mechanism of throttling GFXCLK to Fmin. - uint32_t DcsSpare[16]; + uint32_t DcsSpare[14]; + + // UCLK section + uint16_t ShadowFreqTableUclk[NUM_UCLK_DPM_LEVELS]; // In MHz // UCLK section uint8_t UseStrobeModeOptimizations; //Set to indicate that FW should use strobe mode optimizations @@ -1163,13 +1189,14 @@ typedef struct { uint16_t IntakeTempHighIntakeAcousticLimit; uint16_t IntakeTempAcouticLimitReleaseRate; - uint16_t FanStalledTempLimitOffset; + int16_t FanAbnormalTempLimitOffset; uint16_t FanStalledTriggerRpm; - uint16_t FanAbnormalTriggerRpm; - uint16_t FanPadding; - - uint32_t FanSpare[14]; + uint16_t FanAbnormalTriggerRpmCoeff; + uint16_t FanAbnormalDetectionEnable; + uint8_t FanIntakeSensorSupport; + uint8_t FanIntakePadding[3]; + uint32_t FanSpare[13]; // SECTION: VDD_GFX AVFS uint8_t OverrideGfxAvfsFuses; @@ -1193,7 +1220,6 @@ typedef struct { uint32_t dGbV_dT_vmin; uint32_t dGbV_dT_vmax; - //Unused: PMFW-9370 uint32_t V2F_vmin_range_low; uint32_t V2F_vmin_range_high; uint32_t V2F_vmax_range_low; @@ -1238,8 +1264,21 @@ typedef struct { // SECTION: Advanced Options uint32_t DebugOverrides; + // Section: Total Board Power idle vs active coefficients + uint8_t TotalBoardPowerSupport; + uint8_t TotalBoardPowerPadding[3]; + + int16_t TotalIdleBoardPowerM; + int16_t TotalIdleBoardPowerB; + int16_t TotalBoardPowerM; + int16_t TotalBoardPowerB; + + QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT]; + // SECTION: Sku Reserved - uint32_t Spare[64]; + uint32_t Spare[43]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; @@ -1304,7 +1343,8 @@ typedef struct { // SECTION: Clock Spread Spectrum // UCLK Spread Spectrum - uint16_t UclkSpreadPadding; + uint8_t UclkTrainingModeSpreadPercent; // Q4.4 + uint8_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // UCLK Spread Spectrum @@ -1317,11 +1357,7 @@ typedef struct { // Section: Memory Config uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e - uint8_t PaddingMem1[3]; - - // Section: Total Board Power - uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power - uint16_t BoardPowerPadding; + uint8_t PaddingMem1[7]; // SECTION: UMC feature flags uint8_t HsrEnabled; @@ -1423,8 +1459,11 @@ typedef struct { uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; - uint16_t AverageSocketPower ; + uint16_t AverageSocketPower; + uint16_t AverageTotalBoardPower; + uint16_t AvgTemperature[TEMP_COUNT]; + uint16_t AvgTemperatureFanIntake; uint8_t PcieRate ; uint8_t PcieWidth ; @@ -1592,5 +1631,7 @@ typedef struct { #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0 0x5 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3 0x6 #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING 0x7 +#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 +#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index b7f4569aff2a..865d6358918d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -31,7 +31,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index ecd22c038c8c..51a46689cda7 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5186,7 +5186,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm mst_state = drm_atomic_get_mst_topology_state(state, mgr); if (IS_ERR(mst_state)) - return -EINVAL; + return PTR_ERR(mst_state); list_for_each_entry(pos, &mst_state->payloads, next) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 461c62c88413..de77054195c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3723,12 +3723,16 @@ out: static u8 bigjoiner_pipes(struct drm_i915_private *i915) { + u8 pipes; + if (DISPLAY_VER(i915) >= 12) - return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); + pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); else if (DISPLAY_VER(i915) >= 11) - return BIT(PIPE_B) | BIT(PIPE_C); + pipes = BIT(PIPE_B) | BIT(PIPE_C); else - return 0; + pipes = 0; + + return pipes & RUNTIME_INFO(i915)->pipe_mask; } static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 1e608b9e5055..1a63da28f330 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2434,7 +2434,7 @@ intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port) { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_io == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_IO_A; return domains->ddi_io + (int)(port - domains->port_start); @@ -2445,7 +2445,7 @@ intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port po { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_lanes == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_LANES_A; return domains->ddi_lanes + (int)(port - domains->port_start); @@ -2471,7 +2471,7 @@ intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_A; return domains->aux_legacy_usbc + (int)(aux_ch - domains->aux_ch_start); @@ -2482,7 +2482,7 @@ intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch au { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_tbt == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_TBT1; return domains->aux_tbt + (int)(aux_ch - domains->aux_ch_start); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 3d4305eea1aa..0d6d640225fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -612,6 +612,10 @@ static int i915_ttm_truncate(struct drm_i915_gem_object *obj) WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + err = ttm_bo_wait(bo, true, false); + if (err) + return err; + err = i915_ttm_move_notify(bo); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..7caa3412a244 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) @@ -1017,6 +1022,11 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; + if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); awake |= engine->mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7a45e5360caf..714221f9a131 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -664,8 +664,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) return -ESRCH; } - kvm_get_kvm(vgpu->vfio_device.kvm); - if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; @@ -676,6 +674,7 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); @@ -715,15 +714,14 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->vfio_device.kvm); + kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; - - if (vgpu->vfio_device.kvm) - kvm_put_kvm(vgpu->vfio_device.kvm); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 2403ccd52c74..bba8cb6e8ae4 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL); struct dram_info *dram_info = &i915->dram_info; - val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val); - switch (val) { + switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) { case 0: dram_info->type = INTEL_DRAM_DDR4; break; diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5b120402d405..cc23b90cae02 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work) * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. + * + * If vmbus_device_register() fails, the 'device_obj' is freed in + * vmbus_device_release() as called by device_unregister() in the + * error path of vmbus_device_register(). In the outside error + * path, there's no need to free it. */ ret = vmbus_device_register(newchannel->device_obj); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); goto err_deq_chan; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8b2e413bf19c..e592c481f7ae 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = device_register(&child_device_obj->device); if (ret) { pr_err("Unable to register child device\n"); + put_device(&child_device_obj->device); return ret; } diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 81e688975c6a..a901e4e33d81 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev) ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors); ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, sizeof(struct ec_sensor), GFP_KERNEL); + if (!ec_data->sensors) + return -ENOMEM; status = setup_lock_data(dev); if (status) { diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 8bf32c6c85d9..9bee4d33fbdf 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { - if (host_bridge->device == tjmax_pci_table[i].device) + if (host_bridge->device == tjmax_pci_table[i].device) { + pci_dev_put(host_bridge); return tjmax_pci_table[i].tjmax; + } } } + pci_dev_put(host_bridge); for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { if (strstr(c->x86_model_id, tjmax_table[i].id)) @@ -533,6 +536,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) { struct temp_data *tdata = pdata->core_data[indx]; + /* if we errored on add then this is already gone */ + if (!tdata) + return; + /* Remove the sysfs attributes */ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c index 05f68e9c9477..23b9f94fe0a9 100644 --- a/drivers/hwmon/i5500_temp.c +++ b/drivers/hwmon/i5500_temp.c @@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, u32 tstimer; s8 tsfsc; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Failed to enable device\n"); return err; diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index f6ec165c0fa8..1837cccd993c 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) return; out_register: + list_del(&data->list); hwmon_device_unregister(data->hwmon_dev); out_user: ipmi_destroy_user(data->user); diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 2a57f4b60c29..e06186986444 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, * Shunt Voltage Sum register has 14-bit value with 1-bit shift * Other Shunt Voltage registers have 12 bits with 3-bit shift */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) *val = sign_extend32(regval >> 1, 14); else *val = sign_extend32(regval >> 3, 12); @@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; else regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index 7404e974762f..2dbbbac9de09 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, return ret; /* in milidegrees celcius, temp is given by: */ - *val = (__val * 204) + 550; + *val = (__val * 204) + 5500; return 0; } diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index fe0cd205502d..f58943cb1341 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -852,7 +852,8 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, CDNS_I2C_POLL_US, CDNS_I2C_TIMEOUT_US); if (ret) { ret = -EAGAIN; - i2c_recover_bus(adap); + if (id->adap.bus_recovery_info) + i2c_recover_bus(adap); goto out; } @@ -1263,8 +1264,13 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->rinfo.pinctrl = devm_pinctrl_get(&pdev->dev); if (IS_ERR(id->rinfo.pinctrl)) { + int err = PTR_ERR(id->rinfo.pinctrl); + dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(id->rinfo.pinctrl); + if (err != -ENODEV) + return err; + } else { + id->adap.bus_recovery_info = &id->rinfo; } id->membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r_mem); @@ -1283,7 +1289,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->adap.retries = 3; /* Default retry value. */ id->adap.algo_data = id; id->adap.dev.parent = &pdev->dev; - id->adap.bus_recovery_info = &id->rinfo; init_completion(&id->xfer_done); snprintf(id->adap.name, sizeof(id->adap.name), "Cadence I2C at %08lx", (unsigned long)r_mem->start); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 3082183bd66a..fc70920c4dda 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1132,7 +1132,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; - int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; + int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE && + msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1298,7 +1299,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter, result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic); } else { if (!atomic && - i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD) + i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD && + msgs[i].flags & I2C_M_DMA_SAFE) result = i2c_imx_dma_write(i2c_imx, &msgs[i]); else result = i2c_imx_write(i2c_imx, &msgs[i], atomic); diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 0c365b57d957..83457359ec45 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2393,8 +2393,17 @@ static struct platform_driver npcm_i2c_bus_driver = { static int __init npcm_i2c_init(void) { + int ret; + npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - return platform_driver_register(&npcm_i2c_bus_driver); + + ret = platform_driver_register(&npcm_i2c_bus_driver); + if (ret) { + debugfs_remove_recursive(npcm_i2c_debugfs_dir); + return ret; + } + + return 0; } module_init(npcm_i2c_init); diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 84a77512614d..8fce98bb77ff 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -626,7 +626,6 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i dev_err(gi2c->se.dev, "I2C timeout gpi flags:%d addr:0x%x\n", gi2c->cur->flags, gi2c->cur->addr); gi2c->err = -ETIMEDOUT; - goto err; } if (gi2c->err) { diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index b4edf10e8fd0..7539b0740351 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -467,6 +467,7 @@ static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); struct i2c_driver *driver; + bool do_power_on; int status; if (!client) @@ -545,8 +546,8 @@ static int i2c_device_probe(struct device *dev) if (status < 0) goto err_clear_wakeup_irq; - status = dev_pm_domain_attach(&client->dev, - !i2c_acpi_waive_d0_probe(dev)); + do_power_on = !i2c_acpi_waive_d0_probe(dev); + status = dev_pm_domain_attach(&client->dev, do_power_on); if (status) goto err_clear_wakeup_irq; @@ -585,7 +586,7 @@ static int i2c_device_probe(struct device *dev) err_release_driver_resources: devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, do_power_on); err_clear_wakeup_irq: dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -610,7 +611,7 @@ static void i2c_device_remove(struct device *dev) devres_release_group(&client->dev, client->devres_group_id); - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, true); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index 490c342ef72a..6e4d10a7cd32 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -805,8 +805,10 @@ static int bma400_get_steps_reg(struct bma400_data *data, int *val) ret = regmap_bulk_read(data->regmap, BMA400_STEP_CNT0_REG, steps_raw, BMA400_STEP_RAW_LEN); - if (ret) + if (ret) { + kfree(steps_raw); return ret; + } *val = get_unaligned_le24(steps_raw); kfree(steps_raw); return IIO_VAL_INT; diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 9341e0e0eb55..998e8bcc06e1 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -202,6 +202,8 @@ static int aspeed_adc_set_trim_data(struct iio_dev *indio_dev) ((scu_otp) & (data->model_data->trim_locate->field)) >> __ffs(data->model_data->trim_locate->field); + if (!trimming_val) + trimming_val = 0x8; } dev_dbg(data->dev, "trimming val = %d, offset = %08x, fields = %08x\n", @@ -563,12 +565,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) if (ret) return ret; - if (of_find_property(data->dev->of_node, "aspeed,trim-data-valid", - NULL)) { - ret = aspeed_adc_set_trim_data(indio_dev); - if (ret) - return ret; - } + ret = aspeed_adc_set_trim_data(indio_dev); + if (ret) + return ret; if (of_find_property(data->dev->of_node, "aspeed,battery-sensing", NULL)) { diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 3bb4028c5d74..df3bc5c3d378 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -245,14 +245,14 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4403_data *afe = iio_priv(indio_dev); - unsigned int reg = afe4403_channel_values[chan->address]; - unsigned int field = afe4403_channel_leds[chan->address]; + unsigned int reg, field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + reg = afe4403_channel_values[chan->address]; ret = afe4403_read(afe, reg, val); if (ret) return ret; @@ -262,6 +262,7 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + field = afe4403_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[field], val); if (ret) return ret; diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 8fca787b2524..836da31b7e30 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -250,20 +250,20 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int value_reg = afe4404_channel_values[chan->address]; - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int value_reg, led_field, offdac_field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + value_reg = afe4404_channel_values[chan->address]; ret = regmap_read(afe->regmap, value_reg, val); if (ret) return ret; return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; ret = regmap_field_read(afe->fields[offdac_field], val); if (ret) return ret; @@ -273,6 +273,7 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[led_field], val); if (ret) return ret; @@ -295,19 +296,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int led_field, offdac_field; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; return regmap_field_write(afe->fields[offdac_field], val); } break; case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; return regmap_field_write(afe->fields[led_field], val); } break; diff --git a/drivers/iio/industrialio-sw-trigger.c b/drivers/iio/industrialio-sw-trigger.c index 994f03a71520..d86a3305d9e8 100644 --- a/drivers/iio/industrialio-sw-trigger.c +++ b/drivers/iio/industrialio-sw-trigger.c @@ -58,8 +58,12 @@ int iio_register_sw_trigger_type(struct iio_sw_trigger_type *t) t->group = configfs_register_default_group(iio_triggers_group, t->name, &iio_trigger_type_group_type); - if (IS_ERR(t->group)) + if (IS_ERR(t->group)) { + mutex_lock(&iio_trigger_types_lock); + list_del(&t->list); + mutex_unlock(&iio_trigger_types_lock); ret = PTR_ERR(t->group); + } return ret; } diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7cf6e8490123..0d4447df7200 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -293,6 +293,8 @@ config RPR0521 tristate "ROHM RPR0521 ALS and proximity sensor driver" depends on I2C select REGMAP_I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here if you want to build support for ROHM's RPR0521 ambient light and proximity sensor device. diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index b62c139baf41..38d4c7644bef 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -54,9 +54,6 @@ #define APDS9960_REG_CONTROL_PGAIN_MASK_SHIFT 2 #define APDS9960_REG_CONFIG_2 0x90 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK 0x60 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT 5 - #define APDS9960_REG_ID 0x92 #define APDS9960_REG_STATUS 0x93 @@ -77,6 +74,9 @@ #define APDS9960_REG_GCONF_1_GFIFO_THRES_MASK_SHIFT 6 #define APDS9960_REG_GCONF_2 0xa3 +#define APDS9960_REG_GCONF_2_GGAIN_MASK 0x60 +#define APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT 5 + #define APDS9960_REG_GOFFSET_U 0xa4 #define APDS9960_REG_GOFFSET_D 0xa5 #define APDS9960_REG_GPULSE 0xa6 @@ -396,9 +396,9 @@ static int apds9960_set_pxs_gain(struct apds9960_data *data, int val) } ret = regmap_update_bits(data->regmap, - APDS9960_REG_CONFIG_2, - APDS9960_REG_CONFIG_2_GGAIN_MASK, - idx << APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT); + APDS9960_REG_GCONF_2, + APDS9960_REG_GCONF_2_GGAIN_MASK, + idx << APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT); if (!ret) data->pxs_gain = idx; mutex_unlock(&data->lock); diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 3a4952935366..3d9c5758d8a4 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -211,12 +211,14 @@ static int raydium_i2c_send(struct i2c_client *client, error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer)); if (likely(!error)) - return 0; + goto out; msleep(RM_RETRY_DELAY_MS); } while (++tries < RM_MAX_RETRIES); dev_err(&client->dev, "%s failed: %d\n", __func__, error); +out: + kfree(tx_buf); return error; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..bc94059a5b87 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 996a8b5ee5ee..5287efe247b1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The extra devTLB flush quirk impacts those QAT devices with PCI device + * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() + * check because it applies only to the built-in QAT devices and it doesn't + * grant additional privileges. + */ +#define BUGGY_QAT_DEVID_MASK 0x494c +static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) +{ + if (pdev->vendor != PCI_VENDOR_ID_INTEL) + return false; + + if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) + return false; + + return true; +} + static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -3854,8 +3873,10 @@ static inline bool has_external_pci(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->external_facing) + if (pdev->external_facing) { + pci_dev_put(pdev); return true; + } return false; } @@ -4490,9 +4511,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (dev_is_pci(dev)) { if (ecap_dev_iotlb_support(iommu->ecap) && pci_ats_supported(pdev) && - dmar_ats_supported(pdev, iommu)) + dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; - + info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); @@ -4931,3 +4953,48 @@ static void __init check_tylersburg_isoch(void) pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", vtisochctrl); } + +/* + * Here we deal with a device TLB defect where device may inadvertently issue ATS + * invalidation completion before posted writes initiated with translated address + * that utilized translations matching the invalidation address range, violating + * the invalidation completion ordering. + * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is + * vulnerable to this defect. In other words, any dTLB invalidation initiated not + * under the control of the trusted/privileged host device driver must use this + * quirk. + * Device TLBs are invalidated under the following six conditions: + * 1. Device driver does DMA API unmap IOVA + * 2. Device driver unbind a PASID from a process, sva_unbind_device() + * 3. PASID is torn down, after PASID cache is flushed. e.g. process + * exit_mmap() due to crash + * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where + * VM has to free pages that were unmapped + * 5. Userspace driver unmaps a DMA buffer + * 6. Cache invalidation in vSVA usage (upcoming) + * + * For #1 and #2, device drivers are responsible for stopping DMA traffic + * before unmap/unbind. For #3, iommu driver gets mmu_notifier to + * invalidate TLB the same way as normal user unmap which will use this quirk. + * The dTLB invalidation after PASID cache flush does not need this quirk. + * + * As a reminder, #6 will *NEED* this quirk as we enable nested translation. + */ +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long mask, + u32 pasid, u16 qdep) +{ + u16 sid; + + if (likely(!info->dtlb_extra_inval)) + return; + + sid = PCI_DEVID(info->bus, info->devfn); + if (pasid == PASID_RID2PASID) { + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, address, mask); + } else { + qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, + pasid, qdep, address, mask); + } +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..db9df7c3790c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -623,6 +623,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -728,6 +729,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order); +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long pages, + u32 pasid, u16 qdep); void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, u32 pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7d08eb034f2d..03b25358946c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm, return; qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); - if (info->ats_enabled) + if (info->ats_enabled) { qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, svm->pasid, sdev->qdep, address, order_base_2(pages)); + quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), + svm->pasid, sdev->qdep); + } } static void intel_flush_svm_range_dev(struct intel_svm *svm, @@ -745,12 +748,16 @@ bad_req: * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + if (!pdev) + goto bad_req; - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); + if (intel_svm_prq_report(iommu, &pdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + else + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); + pci_dev_put(pdev); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 542dde9d2609..144027035892 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -35,11 +35,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, struct frame_vector *vec) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int ret_pin_user_pages_fast = 0; - int ret = 0; - int err; + int ret; if (nr_frames == 0) return 0; @@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = pin_user_pages_fast(start, nr_frames, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, (struct page **)(vec->ptrs)); - if (ret > 0) { - vec->got_ref = true; - vec->is_pfns = false; - goto out_unlocked; - } - ret_pin_user_pages_fast = ret; - - mmap_read_lock(mm); - vec->got_ref = false; - vec->is_pfns = true; - ret = 0; - do { - unsigned long *nums = frame_vector_pfns(vec); - - vma = vma_lookup(mm, start); - if (!vma) - break; - - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret) - goto out; - // If follow_pfn() returns -EINVAL, then this - // is not an IO mapping or a raw PFN mapping. - // In that case, return the original error from - // pin_user_pages_fast(). Otherwise this - // function would return -EINVAL when - // pin_user_pages_fast() returned -ENOMEM, - // which makes debugging hard. - if (err == -EINVAL && ret_pin_user_pages_fast) - ret = ret_pin_user_pages_fast; - else - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* Bail out if VMA doesn't completely cover the tail page. */ - if (start < vma->vm_end) - break; - } while (ret < nr_frames); -out: - mmap_read_unlock(mm); -out_unlocked: - if (!ret) - ret = -EFAULT; - if (ret > 0) - vec->nr_frames = ret; - return ret; + vec->got_ref = true; + vec->is_pfns = false; + vec->nr_frames = ret; + + if (likely(ret > 0)) + return ret; + + /* This used to (racily) return non-refcounted pfns. Let people know */ + WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); + vec->nr_frames = 0; + return ret ? ret : -EFAULT; } EXPORT_SYMBOL(get_vaddr_frames); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index c5de202f530a..de1cc9e1ae57 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card) card->pref_erase = 0; } +static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP; - if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP; @@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8d9bceeff986..155ce2bdfe62 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, struct mmc_test_dbgfs_file *df; if (card->debugfs_root) - debugfs_create_file(name, mode, card->debugfs_root, card, fops); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); df = kmalloc(sizeof(*df), GFP_KERNEL); if (!df) { diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index df941438aef5..26bc59b5a7cc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev, return PTR_ERR(host->src_clk_cg); } - host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg"); + /* If present, always enable for this clock gate */ + host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg"); if (IS_ERR(host->sys_clk_cg)) host->sys_clk_cg = NULL; - /* If present, always enable for this clock gate */ - clk_prepare_enable(host->sys_clk_cg); - host->bulk_clks[0].id = "pclk_cg"; host->bulk_clks[1].id = "axi_cg"; host->bulk_clks[2].id = "ahb_cg"; diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 31ea0a2fce35..ffeb5759830f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) * system resume back. */ cqhci_writel(cq_host, 0, CQHCI_CTL); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) dev_err(mmc_dev(host->mmc), "failed to exit halt state when enable CQE\n"); diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index b92a408f138d..bec3f9e3cd3f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) } if (IS_ERR(sprd_host->pinctrl)) - return 0; + goto reset; switch (ios->signal_voltage) { case MMC_SIGNAL_VOLTAGE_180: @@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) /* Wait for 300 ~ 500 us for pin state stable */ usleep_range(300, 500); + +reset: sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fef03de85b99..c7ad32a75b57 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); +static bool sdhci_timing_has_preset(unsigned char timing) +{ + switch (timing) { + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + return true; + }; + return false; +} + +static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) +{ + return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + sdhci_timing_has_preset(timing); +} + +static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) +{ + /* + * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK + * Frequency. Check if preset values need to be enabled, or the Driver + * Strength needs updating. Note, clock changes are handled separately. + */ + return !host->preset_enabled && + (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); +} + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); + bool reinit_uhs = host->reinit_uhs; + bool turning_on_clk = false; u8 ctrl; + host->reinit_uhs = false; + if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { + turning_on_clk = ios->clock && !host->clock; + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); + /* + * Special case to avoid multiple clock changes during voltage + * switching. + */ + if (!reinit_uhs && + turning_on_clk && + host->timing == ios->timing && + host->version >= SDHCI_SPEC_300 && + !sdhci_presetable_values_change(host, ios)) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - ((ios->timing == MMC_TIMING_UHS_SDR12) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR50) || - (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_MMC_DDR52))) { + if (sdhci_preset_needed(host, ios->timing)) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); + host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d750c464bd1e..87a3aaa07438 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -524,6 +524,8 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ + u8 drv_type; /* Current UHS-I driver type */ + bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index 24150c933fcb..dc3253b318da 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -113,6 +113,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) struct com20020_dev *info; struct net_device *dev; struct arcnet_local *lp; + int ret = -ENOMEM; dev_dbg(&p_dev->dev, "com20020_attach()\n"); @@ -142,12 +143,18 @@ static int com20020_probe(struct pcmcia_device *p_dev) info->dev = dev; p_dev->priv = info; - return com20020_config(p_dev); + ret = com20020_config(p_dev); + if (ret) + goto fail_config; + + return 0; +fail_config: + free_arcdev(dev); fail_alloc_dev: kfree(info); fail_alloc_info: - return -ENOMEM; + return ret; } /* com20020_attach */ static void com20020_detach(struct pcmcia_device *link) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e84c49bf4d0c..f298b9b3eb77 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3231,16 +3231,23 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct slave *curr_active_slave, *curr_arp_slave; - struct icmp6hdr *hdr = icmp6_hdr(skb); struct in6_addr *saddr, *daddr; + struct { + struct ipv6hdr ip6; + struct icmp6hdr icmp6; + } *combined, _combined; if (skb->pkt_type == PACKET_OTHERHOST || - skb->pkt_type == PACKET_LOOPBACK || - hdr->icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) + skb->pkt_type == PACKET_LOOPBACK) + goto out; + + combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined); + if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP || + combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) goto out; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; + saddr = &combined->ip6.saddr; + daddr = &combined->ip6.saddr; slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", __func__, slave->dev->name, bond_slave_state(slave), diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index 094197780776..ed3d0b8989a0 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -263,8 +263,10 @@ static void can327_feed_frame_to_netdev(struct can327 *elm, struct sk_buff *skb) { lockdep_assert_held(&elm->lock); - if (!netif_running(elm->dev)) + if (!netif_running(elm->dev)) { + kfree_skb(skb); return; + } /* Queue for NAPI pickup. * rx-offload will update stats and LEDs for us. diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c index 194c86e0f340..8f6dccd5a587 100644 --- a/drivers/net/can/cc770/cc770_isa.c +++ b/drivers/net/can/cc770/cc770_isa.c @@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "couldn't register device (err=%d)\n", err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n", priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_cc770dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 00d11e95fd98..e5575d2755e4 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1909,7 +1909,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) cdev->hclk = devm_clk_get(cdev->dev, "hclk"); cdev->cclk = devm_clk_get(cdev->dev, "cclk"); - if (IS_ERR(cdev->cclk)) { + if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) { dev_err(cdev->dev, "no clock found\n"); ret = -ENODEV; } diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 8f184a852a0a..f2219aa2824b 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -120,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) - return ret; + goto err_free_dev; mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); @@ -132,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = m_can_class_register(mcan_class); if (ret) - goto err; + goto err_free_irq; /* Enable interrupt control at CAN wrapper IP */ writel(0x1, base + CTL_CSR_INT_CTL_OFFSET); @@ -144,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return 0; -err: +err_free_irq: pci_free_irq_vectors(pci); +err_free_dev: + m_can_class_free_dev(mcan_class->net); return ret; } @@ -161,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci) writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET); m_can_class_unregister(mcan_class); + m_can_class_free_dev(mcan_class->net); pci_free_irq_vectors(pci); } diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c index d513fac50718..db3e767d5320 100644 --- a/drivers/net/can/sja1000/sja1000_isa.c +++ b/drivers/net/can/sja1000/sja1000_isa.c @@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", DRV_NAME, err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n", DRV_NAME, priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_sja1000dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 25f863b4f5f0..ddb7c5735c9a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -2091,8 +2091,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) netdev->dev_port = channel_idx; ret = register_candev(netdev); - if (ret) + if (ret) { + es58x_dev->netdev[channel_idx] = NULL; + free_candev(netdev); return ret; + } netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), es58x_dev->param->dql_min_limit); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 218b098b261d..47619e9cb005 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -47,6 +47,10 @@ #define MCBA_VER_REQ_USB 1 #define MCBA_VER_REQ_CAN 2 +/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */ +#define MCBA_VER_TERMINATION_ON 0 +#define MCBA_VER_TERMINATION_OFF 1 + #define MCBA_SIDL_EXID_MASK 0x8 #define MCBA_DLC_MASK 0xf #define MCBA_DLC_RTR_MASK 0x40 @@ -463,7 +467,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, priv->usb_ka_first_pass = false; } - if (msg->termination_state) + if (msg->termination_state == MCBA_VER_TERMINATION_ON) priv->can.termination = MCBA_TERMINATION_ENABLED; else priv->can.termination = MCBA_TERMINATION_DISABLED; @@ -785,9 +789,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term) }; if (term == MCBA_TERMINATION_ENABLED) - usb_msg.termination = 1; + usb_msg.termination = MCBA_VER_TERMINATION_ON; else - usb_msg.termination = 0; + usb_msg.termination = MCBA_VER_TERMINATION_OFF; mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg); diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 438e46af03e9..80f07bd20593 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -961,7 +961,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = { { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", }, { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", }, { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", }, - { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", }, + { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", }, { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", }, { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", }, { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", }, diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 215dd17ca790..4059fcc8c832 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -256,6 +256,9 @@ static int sja1105_base_tx_mdio_read(struct mii_bus *bus, int phy, int reg) u32 tmp; int rc; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + rc = sja1105_xfer_u32(priv, SPI_READ, regs->mdio_100base_tx + reg, &tmp, NULL); if (rc < 0) @@ -272,6 +275,9 @@ static int sja1105_base_tx_mdio_write(struct mii_bus *bus, int phy, int reg, const struct sja1105_regs *regs = priv->info->regs; u32 tmp = val; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + return sja1105_xfer_u32(priv, SPI_WRITE, regs->mdio_100base_tx + reg, &tmp, NULL); } diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 7633b227b2ca..711d5b5a4c49 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -990,6 +990,7 @@ static int tse_shutdown(struct net_device *dev) int ret; phylink_stop(priv->phylink); + phylink_disconnect_phy(priv->phylink); netif_stop_queue(dev); napi_disable(&priv->napi); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a08f221e30d4..ac4ea93bd8dd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -13,6 +13,7 @@ #include "aq_ptp.h" #include "aq_filters.h" #include "aq_macsec.h" +#include "aq_main.h" #include <linux/ptp_clock_kernel.h> @@ -858,7 +859,7 @@ static int aq_set_ringparam(struct net_device *ndev, if (netif_running(ndev)) { ndev_running = true; - dev_close(ndev); + aq_ndev_close(ndev); } cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); @@ -874,7 +875,7 @@ static int aq_set_ringparam(struct net_device *ndev, goto err_exit; if (ndev_running) - err = dev_open(ndev, NULL); + err = aq_ndev_open(ndev); err_exit: return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 8a0af371e7dc..77609dc0a08d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -58,7 +58,7 @@ struct net_device *aq_ndev_alloc(void) return ndev; } -static int aq_ndev_open(struct net_device *ndev) +int aq_ndev_open(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; @@ -88,7 +88,7 @@ err_exit: return err; } -static int aq_ndev_close(struct net_device *ndev) +int aq_ndev_close(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h index 99870865f66d..a78c1a168d8e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h @@ -16,5 +16,7 @@ DECLARE_STATIC_KEY_FALSE(aq_xdp_locking_key); void aq_ndev_schedule_work(struct work_struct *work); struct net_device *aq_ndev_alloc(void); +int aq_ndev_open(struct net_device *ndev); +int aq_ndev_close(struct net_device *ndev); #endif /* AQ_MAIN_H */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 11d15cd03600..77d4cb4ad782 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -795,16 +795,20 @@ static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf) static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid) { - struct pci_dev *dev; struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid); + struct pci_dev *dev; + bool pending; if (!vf) return false; dev = pci_get_domain_bus_and_slot(vf->domain, vf->bus, vf->devfn); - if (dev) - return bnx2x_is_pcie_pending(dev); - return false; + if (!dev) + return false; + pending = bnx2x_is_pcie_pending(dev); + pci_dev_put(dev); + + return pending; } int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 75771825c3f9..98793b2ac2c7 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1794,7 +1794,7 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) { ret = setup_tx_poll_fn(netdev); if (ret) goto err_poll; @@ -1824,7 +1824,7 @@ static int liquidio_open(struct net_device *netdev) return 0; err_rx_ctrl: - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) cleanup_tx_poll_fn(netdev); err_poll: if (lio->ptp_clock) { diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 2f6484dc186a..7eb2ddbe9bad 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1436,8 +1436,10 @@ static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl, return AE_OK; } - if (strncmp(string.pointer, bgx_sel, 4)) + if (strncmp(string.pointer, bgx_sel, 4)) { + kfree(string.pointer); return AE_OK; + } acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, bgx_acpi_register_phy, NULL, bgx, NULL); diff --git a/drivers/net/ethernet/davicom/dm9051.c b/drivers/net/ethernet/davicom/dm9051.c index a523ddda7609..de7105a84747 100644 --- a/drivers/net/ethernet/davicom/dm9051.c +++ b/drivers/net/ethernet/davicom/dm9051.c @@ -798,8 +798,10 @@ static int dm9051_loop_rx(struct board_info *db) } ret = dm9051_stop_mrcmd(db); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } skb->protocol = eth_type_trans(skb, db->ndev); if (db->ndev->features & NETIF_F_RXCSUM) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 48fb391951dd..13d5ff4e0e02 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -542,6 +542,27 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) return (budget != 0); } +static bool tsnep_tx_pending(struct tsnep_tx *tx) +{ + unsigned long flags; + struct tsnep_tx_entry *entry; + bool pending = false; + + spin_lock_irqsave(&tx->lock, flags); + + if (tx->read != tx->write) { + entry = &tx->entry[tx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_TX_DESC_OWNER_MASK) == + (entry->properties & TSNEP_TX_DESC_OWNER_MASK)) + pending = true; + } + + spin_unlock_irqrestore(&tx->lock, flags); + + return pending; +} + static int tsnep_tx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_tx *tx) { @@ -821,6 +842,19 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, return done; } +static bool tsnep_rx_pending(struct tsnep_rx *rx) +{ + struct tsnep_rx_entry *entry; + + entry = &rx->entry[rx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_DESC_OWNER_COUNTER_MASK) == + (entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK)) + return true; + + return false; +} + static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_rx *rx) { @@ -866,6 +900,17 @@ static void tsnep_rx_close(struct tsnep_rx *rx) tsnep_rx_ring_cleanup(rx); } +static bool tsnep_pending(struct tsnep_queue *queue) +{ + if (queue->tx && tsnep_tx_pending(queue->tx)) + return true; + + if (queue->rx && tsnep_rx_pending(queue->rx)) + return true; + + return false; +} + static int tsnep_poll(struct napi_struct *napi, int budget) { struct tsnep_queue *queue = container_of(napi, struct tsnep_queue, @@ -886,9 +931,19 @@ static int tsnep_poll(struct napi_struct *napi, int budget) if (!complete) return budget; - if (likely(napi_complete_done(napi, done))) + if (likely(napi_complete_done(napi, done))) { tsnep_enable_irq(queue->adapter, queue->irq_mask); + /* reschedule if work is already pending, prevent rotten packets + * which are transmitted or received after polling but before + * interrupt enable + */ + if (tsnep_pending(queue)) { + tsnep_disable_irq(queue->adapter, queue->irq_mask); + napi_schedule(napi); + } + } + return min(done, budget - 1); } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index f8c06c3f9464..8671591cb750 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2058,7 +2058,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); - tbmr = ENETC_TBMR_EN; + tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio); if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX) tbmr |= ENETC_TBMR_VIH; @@ -2461,7 +2461,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, 0); + tx_ring->prio = 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } return 0; @@ -2480,7 +2481,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) */ for (i = 0; i < num_tc; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, i); + tx_ring->prio = i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } /* Reset the number of netdev queues based on the TC count */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 161930a65f61..c6d8cc15c270 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -95,6 +95,7 @@ struct enetc_bdr { void __iomem *rcir; }; u16 index; + u16 prio; int bd_count; /* # of BDs */ int next_to_use; int next_to_clean; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index a842e1999122..fcebb54224c0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -137,6 +137,7 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_bdr *tx_ring; int err; int i; @@ -145,16 +146,20 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) if (priv->tx_ring[i]->tsd_enable) return -EBUSY; - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? i : 0); + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? i : 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } err = enetc_setup_taprio(ndev, taprio); - - if (err) - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? 0 : i); + if (err) { + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? 0 : i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } + } return err; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f623c12eaf95..2ca2b61b451f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -74,7 +74,7 @@ #include "fec.h" static void set_multicast_list(struct net_device *ndev); -static void fec_enet_itr_coal_init(struct net_device *ndev); +static void fec_enet_itr_coal_set(struct net_device *ndev); #define DRIVER_NAME "fec" @@ -1220,8 +1220,7 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_init(ndev); - + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) @@ -2856,19 +2855,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev, return 0; } -static void fec_enet_itr_coal_init(struct net_device *ndev) -{ - struct ethtool_coalesce ec; - - ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - fec_enet_set_coalesce(ndev, &ec, NULL, NULL); -} - static int fec_enet_get_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, void *data) @@ -3623,6 +3609,10 @@ static int fec_enet_init(struct net_device *ndev) fep->rx_align = 0x3; fep->tx_align = 0x3; #endif + fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT; + fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT; /* Check mask of the streaming and coherent API */ ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32)); diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 560d1d442232..d3fdc290937f 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1741,11 +1741,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { - dev_kfree_skb_any(skb); - skb = NULL; + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) return -ENOMEM; - } /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 4a6630586ec9..fc373472e4e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue; **/ static int __init fm10k_init_module(void) { + int ret; + pr_info("%s\n", fm10k_driver_string); pr_info("%s\n", fm10k_copyright); @@ -43,7 +45,13 @@ static int __init fm10k_init_module(void) fm10k_dbg_init(); - return fm10k_register_pci_driver(); + ret = fm10k_register_pci_driver(); + if (ret) { + fm10k_dbg_exit(); + destroy_workqueue(fm10k_workqueue); + } + + return ret; } module_init(fm10k_init_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b5dcd15ced36..b3cb587a2032 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16644,6 +16644,8 @@ static struct pci_driver i40e_driver = { **/ static int __init i40e_init_module(void) { + int err; + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); @@ -16661,7 +16663,14 @@ static int __init i40e_init_module(void) } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 3f6187c16424..0d1bab4ac1b0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -298,7 +298,6 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) -#define IAVF_FLAG_INITIAL_MAC_SET BIT(23) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3fc572341781..f71e132ede09 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1087,12 +1087,6 @@ static int iavf_set_mac(struct net_device *netdev, void *p) if (ret) return ret; - /* If this is an initial set MAC during VF spawn do not wait */ - if (adapter->flags & IAVF_FLAG_INITIAL_MAC_SET) { - adapter->flags &= ~IAVF_FLAG_INITIAL_MAC_SET; - return 0; - } - ret = wait_event_interruptible_timeout(adapter->vc_waitqueue, iavf_is_mac_set_handled(netdev, addr->sa_data), msecs_to_jiffies(2500)); @@ -2605,8 +2599,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - adapter->flags |= IAVF_FLAG_INITIAL_MAC_SET; - adapter->tx_desc_count = IAVF_DEFAULT_TXD; adapter->rx_desc_count = IAVF_DEFAULT_RXD; err = iavf_init_interrupt_scheme(adapter); @@ -2921,7 +2913,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); - adapter->netdev->flags &= ~IFF_UP; adapter->flags &= ~IAVF_FLAG_RESET_PENDING; iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); @@ -3021,6 +3012,11 @@ static void iavf_reset_task(struct work_struct *work) iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + if (netif_running(netdev)) { + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3033,6 +3029,7 @@ continue_reset: if (running) { netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); adapter->link_up = false; iavf_napi_disable_all(adapter); } @@ -3172,6 +3169,16 @@ reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + + if (netif_running(netdev)) { + /* Close device to ensure that Tx queues will not be started + * during netif_device_attach() at the end of the reset task. + */ + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } + dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); reset_finish: rtnl_lock(); @@ -5035,23 +5042,21 @@ static int __maybe_unused iavf_resume(struct device *dev_d) static void iavf_remove(struct pci_dev *pdev) { struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); - struct net_device *netdev = adapter->netdev; struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; + struct iavf_cloud_filter *cf, *cftmp; struct iavf_adv_rss *rss, *rsstmp; struct iavf_mac_filter *f, *ftmp; - struct iavf_cloud_filter *cf, *cftmp; - struct iavf_hw *hw = &adapter->hw; + struct net_device *netdev; + struct iavf_hw *hw; int err; - /* When reboot/shutdown is in progress no need to do anything - * as the adapter is already REMOVE state that was set during - * iavf_shutdown() callback. - */ - if (adapter->state == __IAVF_REMOVE) + netdev = adapter->netdev; + hw = &adapter->hw; + + if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return; - set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); /* Wait until port initialization is complete. * There are flows where register/unregister netdev may race. */ @@ -5191,6 +5196,8 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { + int ret; + pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -5201,7 +5208,12 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - return pci_register_driver(&iavf_driver); + + ret = pci_register_driver(&iavf_driver); + if (ret) + destroy_workqueue(iavf_wq); + + return ret; } module_init(iavf_init_module); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0f6718719453..ca2898467dcb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3145,15 +3145,15 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) */ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) { - irqreturn_t ret = IRQ_HANDLED; struct ice_pf *pf = data; - bool irq_handled; - irq_handled = ice_ptp_process_ts(pf); - if (!irq_handled) - ret = IRQ_WAKE_THREAD; + if (ice_is_reset_in_progress(pf->state)) + return IRQ_HANDLED; - return ret; + while (!ice_ptp_process_ts(pf)) + usleep_range(50, 100); + + return IRQ_HANDLED; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 011b727ab190..0f668468d141 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -614,11 +614,14 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * 2) extend the 40b timestamp value to get a 64bit timestamp * 3) send that timestamp to the stack * - * After looping, if we still have waiting SKBs, return true. This may cause us - * effectively poll even when not strictly necessary. We do this because it's - * possible a new timestamp was requested around the same time as the interrupt. - * In some cases hardware might not interrupt us again when the timestamp is - * captured. + * Returns true if all timestamps were handled, and false if any slots remain + * without a timestamp. + * + * After looping, if we still have waiting SKBs, return false. This may cause + * us effectively poll even when not strictly necessary. We do this because + * it's possible a new timestamp was requested around the same time as the + * interrupt. In some cases hardware might not interrupt us again when the + * timestamp is captured. * * Note that we only take the tracking lock when clearing the bit and when * checking if we need to re-queue this task. The only place where bits can be @@ -641,7 +644,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) u8 idx; if (!tx->init) - return false; + return true; ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); @@ -2381,10 +2384,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) */ bool ice_ptp_process_ts(struct ice_pf *pf) { - if (pf->ptp.port.tx.init) - return ice_ptp_tx_tstamp(&pf->ptp.port.tx); - - return false; + return ice_ptp_tx_tstamp(&pf->ptp.port.tx); } static void ice_ptp_periodic_work(struct kthread_work *work) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 99933e89717a..e338fa572793 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4869,6 +4869,8 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { + int err; + pr_info("%s\n", ixgbevf_driver_string); pr_info("%s\n", ixgbevf_copyright); ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); @@ -4877,7 +4879,13 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - return pci_register_driver(&ixgbevf_driver); + err = pci_register_driver(&ixgbevf_driver); + if (err) { + destroy_workqueue(ixgbevf_wq); + return err; + } + + return 0; } module_init(ixgbevf_init_module); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index eb0fb8128096..b399bdb1ca36 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7350,6 +7350,7 @@ static int mvpp2_get_sram(struct platform_device *pdev, struct mvpp2 *priv) { struct resource *res; + void __iomem *base; res = platform_get_resource(pdev, IORESOURCE_MEM, 2); if (!res) { @@ -7360,9 +7361,12 @@ static int mvpp2_get_sram(struct platform_device *pdev, return 0; } - priv->cm3_base = devm_ioremap_resource(&pdev->dev, res); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); - return PTR_ERR_OR_ZERO(priv->cm3_base); + priv->cm3_base = base; + return 0; } static int mvpp2_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 6b4f640163f7..993ac180a5db 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -32,7 +32,6 @@ config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX select NET_DEVLINK - depends on MACSEC || !MACSEC depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index 4a343f853b28..c0bedf402da9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -951,7 +951,7 @@ static void mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction d else event.intr_mask = (dir == MCS_RX) ? MCS_BBE_RX_PLFIFO_OVERFLOW_INT : - MCS_BBE_RX_PLFIFO_OVERFLOW_INT; + MCS_BBE_TX_PLFIFO_OVERFLOW_INT; /* Notify the lmac_id info which ran into BBE fatal error */ event.lmac_id = i & 0x3ULL; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1970ebedf95..f66dde2b0f92 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -880,6 +880,8 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) sprintf(lmac, "LMAC%d", lmac_id); seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n", dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac); + + pci_dev_put(pdev); } return 0; } @@ -2566,6 +2568,7 @@ static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id) } } + pci_dev_put(pdev); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 7646bb2ec89b..a62c1b322012 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4985,6 +4985,8 @@ static int nix_setup_ipolicers(struct rvu *rvu, ipolicer->ref_count = devm_kcalloc(rvu->dev, ipolicer->band_prof.max, sizeof(u16), GFP_KERNEL); + if (!ipolicer->ref_count) + return -ENOMEM; } /* Set policer timeunit to 2us ie (19 + 1) * 100 nsec = 2us */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c index b04fb226f708..ae50d56258ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c @@ -62,15 +62,18 @@ int rvu_sdp_init(struct rvu *rvu) pfvf->sdp_info = devm_kzalloc(rvu->dev, sizeof(struct sdp_node_info), GFP_KERNEL); - if (!pfvf->sdp_info) + if (!pfvf->sdp_info) { + pci_dev_put(pdev); return -ENOMEM; + } dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]); - put_device(&pdev->dev); i++; } + pci_dev_put(pdev); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 282db6fe3b08..67aa02bb2b85 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -884,7 +884,7 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx) { #ifdef CONFIG_DCB - if (pfvf->pfc_alloc_status[qidx]) + if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx]) return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx]; #endif diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 24f9d6024745..47796e4d900c 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -746,6 +746,7 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) return 0; err_sfp_bind: + unregister_netdev(dev); err_register_netdev: prestera_port_list_del(port); err_port_init: diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 4046be0e86ff..a9a1028cb17b 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -457,7 +457,7 @@ prestera_kern_neigh_cache_find(struct prestera_switch *sw, n_cache = rhashtable_lookup_fast(&sw->router->kern_neigh_cache_ht, key, __prestera_kern_neigh_cache_ht_params); - return IS_ERR(n_cache) ? NULL : n_cache; + return n_cache; } static void diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index aa080dc57ff0..02faaea2aefa 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -330,7 +330,7 @@ prestera_nh_neigh_find(struct prestera_switch *sw, nh_neigh = rhashtable_lookup_fast(&sw->router->nh_neigh_ht, key, __prestera_nh_neigh_ht_params); - return IS_ERR(nh_neigh) ? NULL : nh_neigh; + return nh_neigh; } struct prestera_nh_neigh * @@ -484,7 +484,7 @@ __prestera_nexthop_group_find(struct prestera_switch *sw, nh_grp = rhashtable_lookup_fast(&sw->router->nexthop_group_ht, key, __prestera_nexthop_group_ht_params); - return IS_ERR(nh_grp) ? NULL : nh_grp; + return nh_grp; } static struct prestera_nexthop_group * diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7cd381530aa4..1d36619c5ec9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2378,8 +2378,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) data + NET_SKB_PAD + eth->ip_align, ring->buf_size, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(eth->dma_dev, - dma_addr))) + dma_addr))) { + skb_free_frag(data); return -ENOMEM; + } } rxd->rxd1 = (unsigned int)dma_addr; ring->data[i] = data; @@ -2996,8 +2998,10 @@ static int mtk_open(struct net_device *dev) int i; err = mtk_start_dma(eth); - if (err) + if (err) { + phylink_disconnect_phy(mac->phylink); return err; + } for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) mtk_ppe_start(eth->ppe[i]); @@ -4143,13 +4147,13 @@ static int mtk_probe(struct platform_device *pdev) eth->soc->offload_version, i); if (!eth->ppe[i]) { err = -ENOMEM; - goto err_free_dev; + goto err_deinit_ppe; } } err = mtk_eth_offload_init(eth); if (err) - goto err_free_dev; + goto err_deinit_ppe; } for (i = 0; i < MTK_MAX_DEVS; i++) { @@ -4159,7 +4163,7 @@ static int mtk_probe(struct platform_device *pdev) err = register_netdev(eth->netdev[i]); if (err) { dev_err(eth->dev, "error bringing up device\n"); - goto err_deinit_mdio; + goto err_deinit_ppe; } else netif_info(eth, probe, eth->netdev[i], "mediatek frame engine at 0x%08lx, irq %d\n", @@ -4177,7 +4181,8 @@ static int mtk_probe(struct platform_device *pdev) return 0; -err_deinit_mdio: +err_deinit_ppe: + mtk_ppe_deinit(eth); mtk_mdio_cleanup(eth); err_free_dev: mtk_free_dev(eth); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 2d8ca99f2467..784ecb2dc9fb 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -737,7 +737,7 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, MTK_PPE_ENTRIES * soc->foe_entry_size, &ppe->foe_phys, GFP_KERNEL); if (!foe) - return NULL; + goto err_free_l2_flows; ppe->foe_table = foe; @@ -745,11 +745,26 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, sizeof(*ppe->foe_flow); ppe->foe_flow = devm_kzalloc(dev, foe_flow_size, GFP_KERNEL); if (!ppe->foe_flow) - return NULL; + goto err_free_l2_flows; mtk_ppe_debugfs_init(ppe, index); return ppe; + +err_free_l2_flows: + rhashtable_destroy(&ppe->l2_flows); + return NULL; +} + +void mtk_ppe_deinit(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) { + if (!eth->ppe[i]) + return; + rhashtable_destroy(ð->ppe[i]->l2_flows); + } } static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 0b7a67a958e4..a09c32539bcc 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -304,6 +304,7 @@ struct mtk_ppe { struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version, int index); +void mtk_ppe_deinit(struct mtk_eth *eth); void mtk_ppe_start(struct mtk_ppe *ppe); int mtk_ppe_stop(struct mtk_ppe *ppe); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index b149e601f673..48cfaa7eaf50 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -697,7 +697,8 @@ static int mlx4_create_zones(struct mlx4_dev *dev, err = mlx4_bitmap_init(*bitmap + k, 1, MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, 0); - mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + if (!err) + mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2e0d59ca62b5..e7a894ba5c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -45,6 +45,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/tout.h" +#define CREATE_TRACE_POINTS +#include "diag/cmd_tracepoint.h" enum { CMD_IF_REV = 5, @@ -785,27 +787,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err); static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { u16 opcode, op_mod; - u32 syndrome; - u8 status; u16 uid; - int err; - - syndrome = MLX5_GET(mbox_out, out, syndrome); - status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); - err = cmd_status_to_err(status); - if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) mlx5_cmd_out_err(dev, opcode, op_mod, out); - else - mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", - mlx5_command_str(opcode), opcode, op_mod, uid, - cmd_status_str(status), status, syndrome, err); } int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) @@ -1016,6 +1005,7 @@ static void cmd_work_handler(struct work_struct *work) cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { ent->ret = -ENXIO; @@ -1023,7 +1013,6 @@ static void cmd_work_handler(struct work_struct *work) return; } - cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -1508,8 +1497,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, return -EFAULT; err = sscanf(outlen_str, "%d", &outlen); - if (err < 0) - return err; + if (err != 1) + return -EINVAL; ptr = kzalloc(outlen, GFP_KERNEL); if (!ptr) @@ -1672,8 +1661,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force cmd_ent_put(ent); /* timeout work was canceled */ if (!forced || /* Real FW completion */ - pci_channel_offline(dev->pdev) || /* FW is inaccessible */ - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) cmd_ent_put(ent); ent->ts2 = ktime_get_ns(); @@ -1892,6 +1881,16 @@ out_in: return err; } +static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, + cmd_status_to_err(status)); +} + static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, u32 syndrome, int err) { @@ -1914,7 +1913,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, } /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ -static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) { u32 syndrome = MLX5_GET(mbox_out, out, syndrome); u8 status = MLX5_GET(mbox_out, out, status); @@ -1922,8 +1921,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void * if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ err = -EIO; - if (!err && status != MLX5_CMD_STAT_OK) + if (!err && status != MLX5_CMD_STAT_OK) { err = -EREMOTEIO; + mlx5_cmd_err_trace(dev, opcode, op_mod, out); + } cmd_status_log(dev, opcode, status, syndrome, err); return err; @@ -1951,9 +1952,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); - return err; + return cmd_status_err(dev, err, opcode, op_mod, out); } EXPORT_SYMBOL(mlx5_cmd_do); @@ -1997,8 +1998,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); + err = cmd_status_err(dev, err, opcode, op_mod, out); return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec_polling); @@ -2034,7 +2036,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) struct mlx5_async_ctx *ctx; ctx = work->ctx; - status = cmd_status_err(ctx->dev, status, work->opcode, work->out); + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); @@ -2049,6 +2051,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; work->opcode = MLX5_GET(mbox_in, in, opcode); + work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h new file mode 100644 index 000000000000..406ebe17405f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_CMD_TP_H_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +TRACE_EVENT(mlx5_cmd, + TP_PROTO(const char *command_str, u16 opcode, u16 op_mod, + const char *status_str, u8 status, u32 syndrome, int err), + TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err), + TP_STRUCT__entry(__string(command_str, command_str) + __field(u16, opcode) + __field(u16, op_mod) + __string(status_str, status_str) + __field(u8, status) + __field(u32, syndrome) + __field(int, err) + ), + TP_fast_assign(__assign_str(command_str, command_str); + __entry->opcode = opcode; + __entry->op_mod = op_mod; + __assign_str(status_str, status_str); + __entry->status = status; + __entry->syndrome = syndrome; + __entry->err = err; + ), + TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)", + __get_str(command_str), __entry->opcode, __entry->op_mod, + __get_str(status_str), __entry->status, __entry->syndrome, + __entry->err) +); + +#endif /* _MLX5_CMD_TP_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cmd_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 978a2bb8e122..21831386b26e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -638,7 +638,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); else - trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 5aff97914367..ff73d25bc6eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -224,15 +224,16 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) continue; - spec = &flow->attr->parse_attr->spec; - - /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; /* mark the flow's encap dest as non-valid */ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + + /* update from encap rule to slow path rule */ + spec = &flow->attr->parse_attr->spec; + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -251,6 +252,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, /* we know that the encap is valid */ e->flags &= ~MLX5_ENCAP_ENTRY_VALID; mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; } static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, @@ -762,8 +764,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid) + struct net_device **encap_dev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -878,9 +879,8 @@ attach_flow: if (e->flags & MLX5_ENCAP_ENTRY_VALID) { attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - *encap_valid = true; } else { - *encap_valid = false; + flow_flag_set(flow, SLOW); } mutex_unlock(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index d542b8476491..8ad273dde40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -17,8 +17,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid); + struct net_device **encap_dev); int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 285d32d2fd08..d7c020f72401 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); } @@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) err_destroy_tables: while (--i >= 0) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 2ef36cb9555a..f900709639f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -368,15 +368,15 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - if (is_tx) { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci); - key = &ctx->sa.tx_sa->key; - } else { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - key = &ctx->sa.rx_sa->key; + key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; + + if (sa->epn_state.epn_enabled) { + obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : + cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); + + memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); } - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); obj_attrs.replay_window = ctx->secy->replay_window; obj_attrs.replay_protect = ctx->secy->replay_protect; @@ -427,15 +427,15 @@ mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) return NULL; } -static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *rx_sa, - bool active) +static int macsec_rx_sa_active_update(struct macsec_context *ctx, + struct mlx5e_macsec_sa *rx_sa, + bool active) { - struct mlx5_core_dev *mdev = macsec->mdev; - struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; int err = 0; - if (rx_sa->active != active) + if (rx_sa->active == active) return 0; rx_sa->active = active; @@ -444,13 +444,11 @@ static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, return 0; } - attrs.sci = cpu_to_be64((__force u64)rx_sa->sci); - attrs.enc_key_id = rx_sa->enc_key_id; - err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) - return err; + rx_sa->active = false; - return 0; + return err; } static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) @@ -476,6 +474,11 @@ static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) return false; } + if (!ctx->secy->tx_sc.encrypt) { + netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n"); + return false; + } + return true; } @@ -620,6 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) if (tx_sa->active == ctx_tx_sa->active) goto out; + tx_sa->active = ctx_tx_sa->active; if (tx_sa->assoc_num != tx_sc->encoding_sa) goto out; @@ -635,8 +639,6 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); } - - tx_sa->active = ctx_tx_sa->active; out: mutex_unlock(&macsec->lock); @@ -736,9 +738,14 @@ static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) sc_xarray_element->rx_sc = rx_sc; err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, - XA_LIMIT(1, USHRT_MAX), GFP_KERNEL); - if (err) + XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL); + if (err) { + if (err == -EBUSY) + netdev_err(ctx->netdev, + "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n", + MLX5_MACEC_RX_FS_ID_MAX); goto destroy_sc_xarray_elemenet; + } rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); if (!rx_sc->md_dst) { @@ -798,16 +805,16 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) goto out; } - rx_sc->active = ctx_rx_sc->active; if (rx_sc->active == ctx_rx_sc->active) goto out; + rx_sc->active = ctx_rx_sc->active; for (i = 0; i < MACSEC_NUM_AN; ++i) { rx_sa = rx_sc->rx_sa[i]; if (!rx_sa) continue; - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active); if (err) goto out; } @@ -818,16 +825,43 @@ out: return err; } +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc) +{ + struct mlx5e_macsec_sa *rx_sa; + int i; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + /* At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + kfree_rcu(rx_sc); +} + static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) { struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec *macsec; struct list_head *list; int err = 0; - int i; mutex_lock(&priv->macsec->lock); @@ -849,31 +883,7 @@ static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) goto out; } - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - -/* - * At this point the relevant MACsec offload Rx rule already removed at - * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current - * Rx related data propagating using xa_erase which uses rcu to sync, - * once fs_id is erased then this rx_sc is hidden from datapath. - */ - list_del_rcu(&rx_sc->rx_sc_list_element); - xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); - metadata_dst_free(rx_sc->md_dst); - kfree(rx_sc->sc_xarray_element); - - kfree_rcu(rx_sc); - + macsec_del_rxsc_ctx(macsec, rx_sc); out: mutex_unlock(&macsec->lock); @@ -1015,7 +1025,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active); out: mutex_unlock(&macsec->lock); @@ -1155,7 +1165,7 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) goto out; } @@ -1234,7 +1244,6 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc, *tmp; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec_sa *tx_sa; struct mlx5e_macsec *macsec; struct list_head *list; @@ -1263,28 +1272,15 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) } list = &macsec_device->macsec_rx_sc_list_head; - list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - - list_del_rcu(&rx_sc->rx_sc_list_element); - - kfree_rcu(rx_sc); - } + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) + macsec_del_rxsc_ctx(macsec, rx_sc); kfree(macsec_device->dev_addr); macsec_device->dev_addr = NULL; list_del_rcu(&macsec_device->macsec_device_list_element); --macsec->num_of_devices; + kfree(macsec_device); out: mutex_unlock(&macsec->lock); @@ -1536,6 +1532,8 @@ static void macsec_async_event(struct work_struct *work) async_work = container_of(work, struct mlx5e_macsec_async_work, work); macsec = async_work->macsec; + mutex_lock(&macsec->lock); + mdev = async_work->mdev; obj_id = async_work->obj_id; macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); @@ -1557,6 +1555,7 @@ static void macsec_async_event(struct work_struct *work) out_async_work: kfree(async_work); + mutex_unlock(&macsec->lock); } static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data) @@ -1745,7 +1744,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, if (!macsec) return; - fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data); + fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data); rcu_read_lock(); sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h index d580b4a91253..347380a2cd9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -10,9 +10,11 @@ #include <net/macsec.h> #include <net/dst_metadata.h> -/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */ +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX #define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) -#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0)) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) struct mlx5e_priv; struct mlx5e_macsec; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 1ac0cf04e811..5b658a5588c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -250,7 +250,7 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); if (!ns) @@ -261,8 +261,10 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto out_spec; + } tx_tables = &tx_fs->tables; ft_crypto = &tx_tables->ft_crypto; @@ -898,7 +900,7 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); if (!ns) @@ -909,8 +911,10 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto free_spec; + } rx_tables = &rx_fs->tables; ft_crypto = &rx_tables->ft_crypto; @@ -1142,10 +1146,10 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, ft_crypto = &rx_tables->ft_crypto; /* Set bit[31 - 30] macsec marker - 0x01 */ - /* Set bit[3-0] fs id */ + /* Set bit[15-0] fs id */ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(set_action_in, action, data, fs_id | BIT(30)); + MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30)); MLX5_SET(set_action_in, action, offset, 0); MLX5_SET(set_action_in, action, length, 32); @@ -1205,6 +1209,7 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, rx_rule->rule[1] = rule; } + kvfree(spec); return macsec_rule; err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 24aa25da482b..1728e197558d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -35,7 +35,6 @@ #include "en.h" #include "en/port.h" #include "en/params.h" -#include "en/xsk/pool.h" #include "en/ptp.h" #include "lib/clock.h" #include "en/fs_ethtool.h" @@ -412,15 +411,8 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { mutex_lock(&priv->state_lock); - ch->max_combined = priv->max_nch; ch->combined_count = priv->channels.params.num_channels; - if (priv->xsk.refcnt) { - /* The upper half are XSK queues. */ - ch->max_combined *= 2; - ch->combined_count *= 2; - } - mutex_unlock(&priv->state_lock); } @@ -454,16 +446,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); - /* Don't allow changing the number of channels if there is an active - * XSK, because the numeration of the XSK and regular RQs will change. - */ - if (priv->xsk.refcnt) { - err = -EINVAL; - netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", - __func__); - goto out; - } - /* Don't allow changing the number of channels if HTB offload is active, * because the numeration of the QoS SQs will change, while per-queue * qdiscs are attached. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e3a4f01bcceb..5e41dfdf79c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -206,10 +206,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u32 sz; - WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD); + sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); - return entries * umr_entry_size / MLX5_OCTWORD; + return sz / MLX5_OCTWORD; } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5a6aa61ec82a..bd9936af4582 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1634,7 +1634,6 @@ set_encap_dests(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, struct netlink_ext_ack *extack, - bool *encap_valid, bool *vf_tun) { struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -1651,7 +1650,6 @@ set_encap_dests(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; - *encap_valid = true; for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { struct net_device *out_dev; @@ -1668,7 +1666,7 @@ set_encap_dests(struct mlx5e_priv *priv, goto out; } err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, - extack, &encap_dev, encap_valid); + extack, &encap_dev); dev_put(out_dev); if (err) goto out; @@ -1732,8 +1730,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun, encap_valid; u32 max_prio, max_chain; + bool vf_tun; int err = 0; parse_attr = attr->parse_attr; @@ -1823,7 +1821,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(priv, flow, attr, extack, &vf_tun); if (err) goto err_out; @@ -1853,7 +1851,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid || flow_flag_test(flow, SLOW)) + if (flow_flag_test(flow, SLOW)) flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); @@ -3759,7 +3757,7 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) struct mlx5e_post_act *post_act = get_post_action(flow->priv); struct mlx5_flow_attr *attr, *next_attr = NULL; struct mlx5e_post_act_handle *handle; - bool vf_tun, encap_valid = true; + bool vf_tun; int err; /* This is going in reverse order as needed. @@ -3781,13 +3779,10 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) if (list_is_last(&attr->list, &flow->attrs)) break; - err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); if (err) goto out_free; - if (!encap_valid) - flow_flag_set(flow, SLOW); - err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); if (err) goto out_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2169486c4bfb..374e3fbdc2cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1362,6 +1362,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) devl_rate_nodes_destroy(devlink); } + /* Destroy legacy fdb when disabling sriov in legacy mode. */ + if (esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_disable_locked(esw); esw->esw_funcs.num_vfs = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f68dc2d0dbe6..3029bc1c0dd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -736,6 +736,14 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->esw_funcs.num_vfs; + + return 0; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 728ca9f2bb9d..8c6c9bcb3dc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -433,7 +433,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f mlx5_lag_mpesw_is_activated(esw->dev)) dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } - if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; @@ -3387,6 +3387,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, int err; esw->mode = MLX5_ESWITCH_LEGACY; + + /* If changing from switchdev to legacy mode without sriov enabled, + * no need to create legacy fdb. + */ + if (!mlx5_sriov_is_enabled(esw->dev)) + return 0; + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 108a3503f413..edd910258314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -312,6 +312,8 @@ revert_changes: for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + attr->dests[curr_dest].termtbl = NULL; + /* search for the destination associated with the * current term table */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 9d908a0ccfef..1e46f9afa40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -9,7 +9,8 @@ enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, - MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED }; struct mlx5_fw_reset { @@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) err = mlx5_pci_link_toggle(dev); if (err) { mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); - goto done; + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); } mlx5_enter_error_state(dev, true); @@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) goto out; } err = fw_reset->ret; + if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { + mlx5_unload_one_devl_locked(dev); + mlx5_load_one_devl_locked(dev, false); + } out: clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index a9f4ede4a9bf..32c3e0a649a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -228,9 +228,8 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); - mlx5_lag_mpesw_cleanup(ldev); - cancel_work_sync(&ldev->mpesw_work); destroy_workqueue(ldev->wq); + mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); kfree(ldev); } @@ -701,10 +700,13 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; #ifdef CONFIG_MLX5_ESWITCH - dev = ldev->pf[MLX5_LAG_P1].dev; - if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) - return false; + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) + return false; + } + dev = ldev->pf[MLX5_LAG_P1].dev; mode = mlx5_eswitch_mode(dev); for (i = 0; i < ldev->ports; i++) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index ce2ce8ccbd70..f30ac2de639f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -50,6 +50,19 @@ struct lag_tracker { enum netdev_lag_hash hash_type; }; +enum mpesw_op { + MLX5_MPESW_OP_ENABLE, + MLX5_MPESW_OP_DISABLE, +}; + +struct mlx5_mpesw_work_st { + struct work_struct work; + struct mlx5_lag *lag; + enum mpesw_op op; + struct completion comp; + int result; +}; + /* LAG data of a ConnectX card. * It serves both its phys functions. */ @@ -66,7 +79,6 @@ struct mlx5_lag { struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; - struct work_struct mpesw_work; struct notifier_block nb; struct lag_mp lag_mp; struct mlx5_lag_port_sel port_sel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index f643202b29c6..c17e8f1ec914 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -7,63 +7,95 @@ #include "eswitch.h" #include "lib/mlx5.h" -void mlx5_mpesw_work(struct work_struct *work) +static int add_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work); + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int err; - mutex_lock(&ldev->lock); - mlx5_disable_lag(ldev); - mutex_unlock(&ldev->lock); -} + if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) + return 0; -static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev = dev->priv.lag; + if (ldev->mode != MLX5_LAG_MODE_NONE) { + err = -EINVAL; + goto out_err; + } - if (!queue_work(ldev->wq, &ldev->mpesw_work)) - mlx5_core_warn(dev, "failed to queue work\n"); + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + if (err) { + mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); + goto out_err; + } + + return 0; + +out_err: + atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); + return err; } -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +static void del_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = dev->priv.lag; + if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && + ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_disable_lag(ldev); +} - if (!ldev) - return; +static void mlx5_mpesw_work(struct work_struct *work) +{ + struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); + struct mlx5_lag *ldev = mpesww->lag; mutex_lock(&ldev->lock); - if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && - ldev->mode == MLX5_LAG_MODE_MPESW) - mlx5_lag_disable_mpesw(dev); + if (mpesww->op == MLX5_MPESW_OP_ENABLE) + mpesww->result = add_mpesw_rule(ldev); + else if (mpesww->op == MLX5_MPESW_OP_DISABLE) + del_mpesw_rule(ldev); mutex_unlock(&ldev->lock); + + complete(&mpesww->comp); } -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, + enum mpesw_op op) { struct mlx5_lag *ldev = dev->priv.lag; + struct mlx5_mpesw_work_st *work; int err = 0; if (!ldev) return 0; - mutex_lock(&ldev->lock); - if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) - goto out; + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; - if (ldev->mode != MLX5_LAG_MODE_NONE) { + INIT_WORK(&work->work, mlx5_mpesw_work); + init_completion(&work->comp); + work->op = op; + work->lag = ldev; + + if (!queue_work(ldev->wq, &work->work)) { + mlx5_core_warn(dev, "failed to queue mpesw work\n"); err = -EINVAL; goto out; } - - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); - if (err) - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); - + wait_for_completion(&work->comp); + err = work->result; out: - mutex_unlock(&ldev->lock); + kfree(work); return err; } +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +{ + mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); +} + +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +{ + return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); +} + int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) { struct mlx5_lag *ldev = mdev->priv.lag; @@ -71,12 +103,9 @@ int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) if (!netif_is_bond_master(out_dev) || !ldev) return 0; - mutex_lock(&ldev->lock); - if (ldev->mode == MLX5_LAG_MODE_MPESW) { - mutex_unlock(&ldev->lock); + if (ldev->mode == MLX5_LAG_MODE_MPESW) return -EOPNOTSUPP; - } - mutex_unlock(&ldev->lock); + return 0; } @@ -90,11 +119,10 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) { - INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work); atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); } void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) { - cancel_delayed_work_sync(&ldev->bond_work); + WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index be4abcb8fcd5..88e8daffcf92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -12,7 +12,6 @@ struct lag_mpesw { atomic_t mpesw_rule_count; }; -void mlx5_mpesw_work(struct work_struct *work); int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); #if IS_ENABLED(CONFIG_MLX5_ESWITCH) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 283c4cc28944..e58775a7d955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1798,7 +1798,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, res = state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; - mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", + __func__, dev->state, dev->pci_status, res, result2str(res)); return res; } @@ -1837,7 +1838,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_pci_trace(dev, "Enter\n"); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", + __func__, dev->state, dev->pci_status); err = mlx5_pci_enable_device(dev); if (err) { @@ -1859,7 +1861,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) res = PCI_ERS_RESULT_RECOVERED; out: - mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", + __func__, dev->state, dev->pci_status, err, res, result2str(res)); return res; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index 7da012ff0d41..8e2abbab05f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -18,6 +18,10 @@ struct mlx5_sf_dev_table { phys_addr_t base_address; u64 sf_bar_length; struct notifier_block nb; + struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ + struct workqueue_struct *active_wq; + struct work_struct work; + u8 stop_active_wq:1; struct mlx5_core_dev *dev; }; @@ -168,6 +172,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ return 0; sf_index = event->function_id - base_id; + mutex_lock(&table->table_lock); sf_dev = xa_load(&table->devices, sf_index); switch (event->new_vhca_state) { case MLX5_VHCA_STATE_INVALID: @@ -191,6 +196,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ default: break; } + mutex_unlock(&table->table_lock); return 0; } @@ -215,6 +221,78 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) return 0; } +static void mlx5_sf_dev_add_active_work(struct work_struct *work) +{ + struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + u16 sw_func_id; + int err = 0; + u8 state; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = MLX5_CAP_GEN(dev, sf_base_id); + for (i = 0; i < max_functions; i++, function_id++) { + if (table->stop_active_wq) + return; + err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); + if (err) + /* A failure of specific vhca doesn't mean others will + * fail as well. + */ + continue; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state != MLX5_VHCA_STATE_ACTIVE) + continue; + + sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); + mutex_lock(&table->table_lock); + /* Don't probe device which is already probe */ + if (!xa_load(&table->devices, i)) + mlx5_sf_dev_add(dev, i, function_id, sw_func_id); + /* There is a race where SF got inactive after the query + * above. e.g.: the query returns that the state of the + * SF is active, and after that the eswitch manager set it to + * inactive. + * This case cannot be managed in SW, since the probing of the + * SF is on one system, and the inactivation is on a different + * system. + * If the inactive is done after the SF perform init_hca(), + * the SF will fully probe and then removed. If it was + * done before init_hca(), the SF probe will fail. + */ + mutex_unlock(&table->table_lock); + } +} + +/* In case SFs are generated externally, probe active SFs */ +static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) +{ + if (MLX5_CAP_GEN(table->dev, eswitch_manager)) + return 0; /* the table is local */ + + /* Use a workqueue to probe active SFs, which are in large + * quantity and may take up to minutes to probe. + */ + table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); + if (!table->active_wq) + return -ENOMEM; + INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); + queue_work(table->active_wq, &table->work); + return 0; +} + +static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) +{ + if (table->active_wq) { + table->stop_active_wq = true; + destroy_workqueue(table->active_wq); + } +} + void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) { struct mlx5_sf_dev_table *table; @@ -240,11 +318,17 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) table->base_address = pci_resource_start(dev->pdev, 2); table->max_sfs = max_sfs; xa_init(&table->devices); + mutex_init(&table->table_lock); dev->priv.sf_dev_table = table; err = mlx5_vhca_event_notifier_register(dev, &table->nb); if (err) goto vhca_err; + + err = mlx5_sf_dev_queue_active_work(table); + if (err) + goto add_active_err; + err = mlx5_sf_dev_vhca_arm_all(table); if (err) goto arm_err; @@ -252,6 +336,8 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) return; arm_err: + mlx5_sf_dev_destroy_active_work(table); +add_active_err: mlx5_vhca_event_notifier_unregister(dev, &table->nb); vhca_err: table->max_sfs = 0; @@ -279,7 +365,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) if (!table) return; + mlx5_sf_dev_destroy_active_work(table); mlx5_vhca_event_notifier_unregister(dev, &table->nb); + mutex_destroy(&table->table_lock); /* Now that event handler is not running, it is safe to destroy * the sf device without race. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 31d443dd8386..f68461b13391 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -46,7 +46,7 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action) { - int ret; + int ret = -EOPNOTSUPP; if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; @@ -67,6 +67,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, goto out; } + if (ret) + goto out; + /* Release old action */ if (tbl->miss_action) refcount_dec(&tbl->miss_action->refcount); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 19516ccad533..d078156581d5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -104,7 +104,7 @@ static int sparx5_port_open(struct net_device *ndev) err = phylink_of_phy_connect(port->phylink, port->of_node, 0); if (err) { netdev_err(ndev, "Could not attach to PHY\n"); - return err; + goto err_connect; } phylink_start(port->phylink); @@ -116,10 +116,20 @@ static int sparx5_port_open(struct net_device *ndev) err = sparx5_serdes_set(port->sparx5, port, &port->conf); else err = phy_power_on(port->serdes); - if (err) + if (err) { netdev_err(ndev, "%s failed\n", __func__); + goto out_power; + } } + return 0; + +out_power: + phylink_stop(port->phylink); + phylink_disconnect_phy(port->phylink); +err_connect: + sparx5_port_enable(port, false); + return err; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c index e05429c751ee..dc2c3756e3a2 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c @@ -90,13 +90,10 @@ static int sparx5_tc_setup_qdisc_ets(struct net_device *ndev, } } - sparx5_tc_ets_add(port, params); - break; + return sparx5_tc_ets_add(port, params); case TC_ETS_DESTROY: - sparx5_tc_ets_del(port); - - break; + return sparx5_tc_ets_del(port); case TC_ETS_GRAFT: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 405786c00334..cb08d7bf9524 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -341,7 +341,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) return ret; attrs.split = eth_port.is_split; - attrs.splittable = !attrs.split; + attrs.splittable = eth_port.port_lanes > 1 && !attrs.split; attrs.lanes = eth_port.port_lanes; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = eth_port.label_port; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1775997f9c69..991059d6cb32 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1432,6 +1432,9 @@ nfp_port_get_module_info(struct net_device *netdev, u8 data; port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + /* update port state to get latest interface */ set_bit(NFP_PORT_CHANGED, &port->flags); eth_port = nfp_port_get_eth_port(port); diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 19d043b593cc..62320be4de5a 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev) struct sk_buff *skb; int i; - for (i = 0; i < RX_BD_NUM; i++) { - phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - phys); - - dma_unmap_single(ndev->dev.parent, phys_addr, - NIXGE_MAX_JUMBO_FRAME_SIZE, - DMA_FROM_DEVICE); - - skb = (struct sk_buff *)(uintptr_t) - nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - sw_id_offset); - dev_kfree_skb(skb); - } + if (priv->rx_bd_v) { + for (i = 0; i < RX_BD_NUM; i++) { + phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + phys); + + dma_unmap_single(ndev->dev.parent, phys_addr, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); + + skb = (struct sk_buff *)(uintptr_t) + nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + sw_id_offset); + dev_kfree_skb(skb); + } - if (priv->rx_bd_v) dma_free_coherent(ndev->dev.parent, sizeof(*priv->rx_bd_v) * RX_BD_NUM, priv->rx_bd_v, priv->rx_bd_p); + } if (priv->tx_skb) devm_kfree(ndev->dev.parent, priv->tx_skb); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3f2c30184752..28b7cec485ef 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1143,6 +1143,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter, buffer_info->dma = 0; buffer_info->time_stamp = 0; tx_ring->next_to_use = ring_num; + dev_kfree_skb_any(skb); return; } buffer_info->mapped = true; @@ -2459,6 +2460,7 @@ static void pch_gbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); pch_gbe_phy_hw_reset(&adapter->hw); + pci_dev_put(adapter->ptp_pdev); free_netdev(netdev); } @@ -2533,7 +2535,7 @@ static int pch_gbe_probe(struct pci_dev *pdev, /* setup the private structure */ ret = pch_gbe_sw_init(adapter); if (ret) - goto err_free_netdev; + goto err_put_dev; /* Initialize PHY */ ret = pch_gbe_init_phy(adapter); @@ -2591,6 +2593,8 @@ static int pch_gbe_probe(struct pci_dev *pdev, err_free_adapter: pch_gbe_phy_hw_reset(&adapter->hw); +err_put_dev: + pci_dev_put(adapter->ptp_pdev); err_free_netdev: free_netdev(netdev); return ret; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9fb1fa479d4b..16e6bd466143 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -767,34 +767,34 @@ static int qed_mcp_cancel_load_req(struct qed_hwfn *p_hwfn, return rc; } -#define CONFIG_QEDE_BITMAP_IDX BIT(0) -#define CONFIG_QED_SRIOV_BITMAP_IDX BIT(1) -#define CONFIG_QEDR_BITMAP_IDX BIT(2) -#define CONFIG_QEDF_BITMAP_IDX BIT(4) -#define CONFIG_QEDI_BITMAP_IDX BIT(5) -#define CONFIG_QED_LL2_BITMAP_IDX BIT(6) +#define BITMAP_IDX_FOR_CONFIG_QEDE BIT(0) +#define BITMAP_IDX_FOR_CONFIG_QED_SRIOV BIT(1) +#define BITMAP_IDX_FOR_CONFIG_QEDR BIT(2) +#define BITMAP_IDX_FOR_CONFIG_QEDF BIT(4) +#define BITMAP_IDX_FOR_CONFIG_QEDI BIT(5) +#define BITMAP_IDX_FOR_CONFIG_QED_LL2 BIT(6) static u32 qed_get_config_bitmap(void) { u32 config_bitmap = 0x0; if (IS_ENABLED(CONFIG_QEDE)) - config_bitmap |= CONFIG_QEDE_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDE; if (IS_ENABLED(CONFIG_QED_SRIOV)) - config_bitmap |= CONFIG_QED_SRIOV_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_SRIOV; if (IS_ENABLED(CONFIG_QED_RDMA)) - config_bitmap |= CONFIG_QEDR_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDR; if (IS_ENABLED(CONFIG_QED_FCOE)) - config_bitmap |= CONFIG_QEDF_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDF; if (IS_ENABLED(CONFIG_QED_ISCSI)) - config_bitmap |= CONFIG_QEDI_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDI; if (IS_ENABLED(CONFIG_QED_LL2)) - config_bitmap |= CONFIG_QED_LL2_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_LL2; return config_bitmap; } diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 76072f8c3d2f..0d57ffcedf0c 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2471,6 +2471,7 @@ static netdev_tx_t ql3xxx_send(struct sk_buff *skb, skb_shinfo(skb)->nr_frags); if (tx_cb->seg_count == -1) { netdev_err(ndev, "%s: invalid segment count!\n", __func__); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bd0607680329..2fd5c6fdb500 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2991,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter) QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val); dev_info(&adapter->pdev->dev, "%s: lock recovery initiated\n", __func__); - msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); + mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK); id = ((val >> 2) & 0xF); if (id == adapter->portnum) { @@ -3027,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter) if (status) break; - msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY); + mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY); i++; if (i == 1) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 36324126db6d..6bc923326268 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3020,6 +3020,7 @@ static int __maybe_unused ravb_resume(struct device *dev) ret = ravb_open(ndev); if (ret < 0) return ret; + ravb_set_rx_mode(ndev); netif_device_attach(ndev); } diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 88fa29572e23..ddcc325ed570 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -218,6 +218,7 @@ netdev_tx_t __ef100_hard_start_xmit(struct sk_buff *skb, skb->len, skb->data_len, channel->channel); if (!efx->n_channels || !efx->n_tx_channels || !channel) { netif_stop_queue(net_dev); + dev_kfree_skb_any(skb); goto err; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c25bfecb4a2d..e5cfde1cbd5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -748,6 +748,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, if (fc & FLOW_RX) { pr_debug("\tReceive Flow-Control ON\n"); flow |= GMAC_RX_FLOW_CTRL_RFE; + } else { + pr_debug("\tReceive Flow-Control OFF\n"); } writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6b43da78cdf0..23ec0a9e396c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1061,8 +1061,16 @@ static void stmmac_mac_link_up(struct phylink_config *config, ctrl |= priv->hw->link.duplex; /* Flow Control operation */ - if (tx_pause && rx_pause) - stmmac_mac_flow_ctrl(priv, duplex); + if (rx_pause && tx_pause) + priv->flow_ctrl = FLOW_AUTO; + else if (rx_pause && !tx_pause) + priv->flow_ctrl = FLOW_RX; + else if (!rx_pause && tx_pause) + priv->flow_ctrl = FLOW_TX; + else + priv->flow_ctrl = FLOW_OFF; + + stmmac_mac_flow_ctrl(priv, duplex); if (ctrl != old_ctrl) writel(ctrl, priv->ioaddr + MAC_CTRL_REG); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c50b137f92d7..d04a239c658e 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2082,7 +2082,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; - if (port->ndev) + if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED) unregister_netdev(port->ndev); } } diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index de94921cbef9..025e0c19ec25 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -98,6 +98,7 @@ struct ipvl_port { struct sk_buff_head backlog; int count; struct ida ida; + netdevice_tracker dev_tracker; }; struct ipvl_skb_cb { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 54c94a69c2bb..796a38f9d7b2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -83,6 +83,7 @@ static int ipvlan_port_create(struct net_device *dev) if (err) goto err; + netdev_hold(dev, &port->dev_tracker, GFP_KERNEL); return 0; err: @@ -95,6 +96,7 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct sk_buff *skb; + netdev_put(dev, &port->dev_tracker); if (port->mode == IPVLAN_MODE_L3S) ipvlan_l3s_unregister(port); netdev_rx_handler_unregister(dev); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 14e8d04cb434..2e9742952c4e 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -211,7 +211,7 @@ static __net_init int loopback_net_init(struct net *net) int err; err = -ENOMEM; - dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); + dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup); if (!dev) goto out; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 85376d2f24ca..f41f67b583db 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3835,7 +3835,6 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; - int ret; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 689e728345ce..eb344f6d4a7b 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -148,7 +148,7 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, /* Associate the fwnode with the device structure so it * can be looked up later. */ - phy->mdio.dev.fwnode = child; + phy->mdio.dev.fwnode = fwnode_handle_get(child); /* All data is now stored in the phy struct, so register it */ rc = phy_device_register(phy); diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 464d88ca8ab0..a4abea921046 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void) rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_transport_register_client(&ntb_netdev_client); + + rc = ntb_transport_register_client(&ntb_netdev_client); + if (rc) { + ntb_transport_unregister_client_dev(KBUILD_MODNAME); + return rc; + } + + return 0; } module_init(ntb_netdev_init_module); diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 349b7b1dbbf2..d49965907561 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -870,8 +870,10 @@ static int at803x_probe(struct phy_device *phydev) .wolopts = 0, }; - if (ccr < 0) + if (ccr < 0) { + ret = ccr; goto err; + } mode_cfg = ccr & AT803X_MODE_CFG_MASK; switch (mode_cfg) { diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 57849ac0384e..8cff61dbc4b5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -217,6 +217,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev) static void phy_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_phy_device(dev)); } @@ -1520,6 +1521,7 @@ error: error_module_put: module_put(d->driver->owner); + d->driver = NULL; error_put_device: put_device(d); if (ndev_owner != bus->owner) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6547b6cc6cbe..2805b04d6402 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1603,19 +1603,29 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, linkmode_copy(supported, phy->supported); linkmode_copy(config.advertising, phy->advertising); - /* Clause 45 PHYs switch their Serdes lane between several different - * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G - * speeds. We really need to know which interface modes the PHY and - * MAC supports to properly work out which linkmodes can be supported. + /* Check whether we would use rate matching for the proposed interface + * mode. */ - if (phy->is_c45 && + config.rate_matching = phy_get_rate_matching(phy, interface); + + /* Clause 45 PHYs may switch their Serdes lane between, e.g. 10GBASE-R, + * 5GBASE-R, 2500BASE-X and SGMII if they are not using rate matching. + * For some interface modes (e.g. RXAUI, XAUI and USXGMII) switching + * their Serdes is either unnecessary or not reasonable. + * + * For these which switch interface modes, we really need to know which + * interface modes the PHY supports to properly work out which ethtool + * linkmodes can be supported. For now, as a work-around, we validate + * against all interface modes, which may lead to more ethtool link + * modes being advertised than are actually supported. + */ + if (phy->is_c45 && config.rate_matching == RATE_MATCH_NONE && interface != PHY_INTERFACE_MODE_RXAUI && interface != PHY_INTERFACE_MODE_XAUI && interface != PHY_INTERFACE_MODE_USXGMII) config.interface = PHY_INTERFACE_MODE_NA; else config.interface = interface; - config.rate_matching = phy_get_rate_matching(phy, config.interface); ret = phylink_validate(pl, supported, &config); if (ret) { diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7a3ab3427369..24001112c323 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -686,7 +686,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - sock_put(&tfile->sk); } } @@ -702,6 +701,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) if (dev) netdev_state_change(dev); rtnl_unlock(); + + if (clean) + sock_put(&tfile->sk); } static void tun_detach_all(struct net_device *dev) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8d5cbda33f66..0897fdb6254b 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1915,6 +1915,7 @@ static const struct driver_info cdc_ncm_zlp_info = { .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, + .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as cdc_ncm_info, but with FLAG_WWAN */ diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index afd6faa4c2ec..554d4e2a84a4 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1423,6 +1423,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ + {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7106932c6f88..86e52454b5b5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3949,12 +3949,11 @@ static int virtnet_probe(struct virtio_device *vdev) return 0; free_unregister_netdev: - virtio_reset_device(vdev); - unregister_netdev(dev); free_failover: net_failover_destroy(vi->failover); free_vqs: + virtio_reset_device(vdev); cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); virtnet_del_vqs(vi); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 9bbfff803357..b545d93c6e37 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -959,30 +959,51 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (unsigned int i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index eb1d1ba3a443..67df8221b5ae 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -482,14 +482,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index d41e373f9c0a..d6b166fc5c0e 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -365,7 +365,8 @@ static void ipc_mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) /* Pass the DL packet to the netif layer. */ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, struct iosm_wwan *wwan, u32 offset, - u8 service_class, struct sk_buff *skb) + u8 service_class, struct sk_buff *skb, + u32 pkt_len) { struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC); @@ -373,7 +374,7 @@ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, return -ENOMEM; skb_pull(dest_skb, offset); - skb_set_tail_pointer(dest_skb, dest_skb->len); + skb_trim(dest_skb, pkt_len); /* Pass the packet to the netif layer. */ dest_skb->priority = service_class; @@ -429,7 +430,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) { - u32 pad_len, packet_offset; + u32 pad_len, packet_offset, adgh_len; struct iosm_wwan *wwan; struct mux_adgh *adgh; u8 *block = skb->data; @@ -470,10 +471,12 @@ static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, packet_offset = sizeof(*adgh) + pad_len; if_id += ipc_mux->wwan_q_offset; + adgh_len = le16_to_cpu(adgh->length); /* Pass the packet to the netif layer */ rc = ipc_mux_net_receive(ipc_mux, if_id, wwan, packet_offset, - adgh->service_class, skb); + adgh->service_class, skb, + adgh_len - packet_offset); if (rc) { dev_err(ipc_mux->dev, "mux adgh decoding error"); return; @@ -547,7 +550,7 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, int if_id, int nr_of_dg) { u32 dl_head_pad_len = ipc_mux->session[if_id].dl_head_pad_len; - u32 packet_offset, i, rc; + u32 packet_offset, i, rc, dg_len; for (i = 0; i < nr_of_dg; i++, dg++) { if (le32_to_cpu(dg->datagram_index) @@ -562,11 +565,12 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, packet_offset = le32_to_cpu(dg->datagram_index) + dl_head_pad_len; + dg_len = le16_to_cpu(dg->datagram_length); /* Pass the packet to the netif layer. */ rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan, packet_offset, - dg->service_class, - skb); + dg->service_class, skb, + dg_len - dl_head_pad_len); if (rc) goto dg_error; } @@ -1207,10 +1211,9 @@ static int mux_ul_dg_update_tbl_index(struct iosm_mux *ipc_mux, qlth_n_ql_size, ul_list); ipc_mux_ul_adb_finish(ipc_mux); if (ipc_mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed, - IOSM_AGGR_MUX_SIG_ADBH)) { - dev_kfree_skb(src_skb); + IOSM_AGGR_MUX_SIG_ADBH)) return -ENOMEM; - } + ipc_mux->size_needed = le32_to_cpu(adb->adbh->block_length); ipc_mux->size_needed += offsetof(struct mux_adth, dg); @@ -1471,8 +1474,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb) ipc_mux->ul_data_pend_bytes); /* Reset the skb settings. */ - skb->tail = 0; - skb->len = 0; + skb_trim(skb, 0); /* Add the consumed ADB to the free list. */ skb_queue_tail((&ipc_mux->ul_adb.free_list), skb); diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index d3d34d1c4704..5bf5a93937c9 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -249,7 +249,7 @@ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) if (object->integer.value == 3) sleep_state = IPC_PCIE_D3L2; - kfree(object); + ACPI_FREE(object); default_ret: return sleep_state; diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h index 9b3a6d86ece7..289397c4ea6c 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h @@ -122,7 +122,7 @@ struct iosm_protocol { struct iosm_imem *imem; struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES]; struct device *dev; - phys_addr_t phy_ap_shm; + dma_addr_t phy_ap_shm; u32 old_msg_tail; }; diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 3458af31e864..7d0f5e4f0a78 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c @@ -165,6 +165,8 @@ static int t7xx_acpi_reset(struct t7xx_pci_dev *t7xx_dev, char *fn_name) return -EFAULT; } + kfree(buffer.pointer); + #endif return 0; } diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 24436c9e54c9..97600826af69 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -112,8 +112,10 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, struct nfcmrvl_i2c_drv_data *drv_data = priv->drv_data; int ret; - if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) + if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) { + kfree_skb(skb); return -EREMOTEIO; + } ret = i2c_master_send(drv_data->i2c, skb->data, skb->len); diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 580cb6ecffee..66b198663387 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -73,11 +73,15 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct nxp_nci_info *info = nci_get_drvdata(ndev); int r; - if (!info->phy_ops->write) + if (!info->phy_ops->write) { + kfree_skb(skb); return -EOPNOTSUPP; + } - if (info->mode != NXP_NCI_MODE_NCI) + if (info->mode != NXP_NCI_MODE_NCI) { + kfree_skb(skb); return -EINVAL; + } r = info->phy_ops->write(info->phy_id, skb); if (r < 0) { diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 0270e05b68df..aec356880adf 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -105,6 +105,7 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) mutex_lock(&info->mutex); if (s3fwrn5_get_mode(info) != S3FWRN5_MODE_NCI) { + kfree_skb(skb); mutex_unlock(&info->mutex); return -EINVAL; } diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 7764b1a4c3cf..ec87dd21e054 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -312,6 +312,8 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, int r = 0; struct device *dev = &ndev->nfc_dev->dev; struct nfc_evt_transaction *transaction; + u32 aid_len; + u8 params_len; pr_debug("connectivity gate event: %x\n", event); @@ -325,26 +327,47 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, * Description Tag Length * AID 81 5 to 16 * PARAMETERS 82 0 to 255 + * + * The key differences are aid storage length is variably sized + * in the packet, but fixed in nfc_evt_transaction, and that + * the aid_len is u8 in the packet, but u32 in the structure, + * and the tags in the packet are not included in + * nfc_evt_transaction. + * + * size(b): 1 1 5-16 1 1 0-255 + * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4 + * mem name: aid_tag(M) aid_len aid params_tag(M) params_len params + * example: 0x81 5-16 X 0x82 0-255 X */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; - transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); - if (!transaction) - return -ENOMEM; + aid_len = skb->data[1]; - transaction->aid_len = skb->data[1]; - memcpy(transaction->aid, &skb->data[2], transaction->aid_len); + if (skb->len < aid_len + 4 || + aid_len > sizeof(transaction->aid)) + return -EPROTO; - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) + params_len = skb->data[aid_len + 3]; + + /* Verify PARAMETERS tag is (82), and final check that there is + * enough space in the packet to read everything. + */ + if (skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG || + skb->len < aid_len + 4 + params_len) return -EPROTO; - transaction->params_len = skb->data[transaction->aid_len + 3]; - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); + transaction = devm_kzalloc(dev, sizeof(*transaction) + + params_len, GFP_KERNEL); + if (!transaction) + return -ENOMEM; + + transaction->aid_len = aid_len; + transaction->params_len = params_len; + + memcpy(transaction->aid, &skb->data[2], aid_len); + memcpy(transaction->params, &skb->data[aid_len + 4], + params_len); r = nfc_se_transaction(ndev->nfc_dev, host, transaction); break; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da55ce45ac70..69e333922bea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ - synchronize_rcu(); + synchronize_srcu(&ns->head->srcu); if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 93e2138a8b42..7e025b8948cb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) struct nvme_ns_head *head = ns->head; sector_t capacity = get_capacity(head->disk); int node; + int srcu_idx; + srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { if (capacity != get_capacity(ns->disk)) clear_bit(NVME_NS_READY, &ns->flags); } + srcu_read_unlock(&head->srcu, srcu_idx); for_each_node(node) rcu_assign_pointer(head->current_path[node], NULL); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f4335519399d..488ad7dabeb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); if (bv->bv_len > first_prp_len) cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + else + cmnd->dptr.prp2 = 0; return BLK_STS_OK; } diff --git a/drivers/nvmem/lan9662-otpc.c b/drivers/nvmem/lan9662-otpc.c index 377bf34c2946..56fc19f092a7 100644 --- a/drivers/nvmem/lan9662-otpc.c +++ b/drivers/nvmem/lan9662-otpc.c @@ -36,7 +36,7 @@ struct lan9662_otp { void __iomem *base; }; -static bool lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) +static int lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) { u32 val; diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c index b11c3c974b3d..80cb187f1481 100644 --- a/drivers/nvmem/rmem.c +++ b/drivers/nvmem/rmem.c @@ -37,9 +37,9 @@ static int rmem_read(void *context, unsigned int offset, * but as of Dec 2020 this isn't possible on arm64. */ addr = memremap(priv->mem->base, available, MEMREMAP_WB); - if (IS_ERR(addr)) { + if (!addr) { dev_err(priv->dev, "Failed to remap memory region\n"); - return PTR_ERR(addr); + return -ENOMEM; } count = memory_read_from_buffer(val, bytes, &off, addr, available); diff --git a/drivers/of/property.c b/drivers/of/property.c index 967f79b59016..134cfc980b70 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -993,8 +993,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, nargs, index, &of_args); if (ret < 0) return ret; - if (!args) + if (!args) { + of_node_put(of_args.np); return 0; + } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 605d68673f92..e55c6095adf0 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -578,169 +578,140 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, return false; } -static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, - struct opp_table *opp_table) +static u32 *_parse_named_prop(struct dev_pm_opp *opp, struct device *dev, + struct opp_table *opp_table, + const char *prop_type, bool *triplet) { - u32 *microvolt, *microamp = NULL, *microwatt = NULL; - int supplies = opp_table->regulator_count; - int vcount, icount, pcount, ret, i, j; struct property *prop = NULL; char name[NAME_MAX]; + int count, ret; + u32 *out; - /* Search for "opp-microvolt-<name>" */ + /* Search for "opp-<prop_type>-<name>" */ if (opp_table->prop_name) { - snprintf(name, sizeof(name), "opp-microvolt-%s", + snprintf(name, sizeof(name), "opp-%s-%s", prop_type, opp_table->prop_name); prop = of_find_property(opp->np, name, NULL); } if (!prop) { - /* Search for "opp-microvolt" */ - sprintf(name, "opp-microvolt"); + /* Search for "opp-<prop_type>" */ + snprintf(name, sizeof(name), "opp-%s", prop_type); prop = of_find_property(opp->np, name, NULL); - - /* Missing property isn't a problem, but an invalid entry is */ - if (!prop) { - if (unlikely(supplies == -1)) { - /* Initialize regulator_count */ - opp_table->regulator_count = 0; - return 0; - } - - if (!supplies) - return 0; - - dev_err(dev, "%s: opp-microvolt missing although OPP managing regulators\n", - __func__); - return -EINVAL; - } + if (!prop) + return NULL; } - if (unlikely(supplies == -1)) { - /* Initialize regulator_count */ - supplies = opp_table->regulator_count = 1; - } else if (unlikely(!supplies)) { - dev_err(dev, "%s: opp-microvolt wasn't expected\n", __func__); - return -EINVAL; + count = of_property_count_u32_elems(opp->np, name); + if (count < 0) { + dev_err(dev, "%s: Invalid %s property (%d)\n", __func__, name, + count); + return ERR_PTR(count); } - vcount = of_property_count_u32_elems(opp->np, name); - if (vcount < 0) { - dev_err(dev, "%s: Invalid %s property (%d)\n", - __func__, name, vcount); - return vcount; - } - - /* There can be one or three elements per supply */ - if (vcount != supplies && vcount != supplies * 3) { - dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n", - __func__, name, vcount, supplies); - return -EINVAL; + /* + * Initialize regulator_count, if regulator information isn't provided + * by the platform. Now that one of the properties is available, fix the + * regulator_count to 1. + */ + if (unlikely(opp_table->regulator_count == -1)) + opp_table->regulator_count = 1; + + if (count != opp_table->regulator_count && + (!triplet || count != opp_table->regulator_count * 3)) { + dev_err(dev, "%s: Invalid number of elements in %s property (%u) with supplies (%d)\n", + __func__, prop_type, count, opp_table->regulator_count); + return ERR_PTR(-EINVAL); } - microvolt = kmalloc_array(vcount, sizeof(*microvolt), GFP_KERNEL); - if (!microvolt) - return -ENOMEM; + out = kmalloc_array(count, sizeof(*out), GFP_KERNEL); + if (!out) + return ERR_PTR(-EINVAL); - ret = of_property_read_u32_array(opp->np, name, microvolt, vcount); + ret = of_property_read_u32_array(opp->np, name, out, count); if (ret) { dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret); - ret = -EINVAL; - goto free_microvolt; - } - - /* Search for "opp-microamp-<name>" */ - prop = NULL; - if (opp_table->prop_name) { - snprintf(name, sizeof(name), "opp-microamp-%s", - opp_table->prop_name); - prop = of_find_property(opp->np, name, NULL); + kfree(out); + return ERR_PTR(-EINVAL); } - if (!prop) { - /* Search for "opp-microamp" */ - sprintf(name, "opp-microamp"); - prop = of_find_property(opp->np, name, NULL); - } + if (triplet) + *triplet = count != opp_table->regulator_count; - if (prop) { - icount = of_property_count_u32_elems(opp->np, name); - if (icount < 0) { - dev_err(dev, "%s: Invalid %s property (%d)\n", __func__, - name, icount); - ret = icount; - goto free_microvolt; - } + return out; +} - if (icount != supplies) { - dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n", - __func__, name, icount, supplies); - ret = -EINVAL; - goto free_microvolt; - } +static u32 *opp_parse_microvolt(struct dev_pm_opp *opp, struct device *dev, + struct opp_table *opp_table, bool *triplet) +{ + u32 *microvolt; - microamp = kmalloc_array(icount, sizeof(*microamp), GFP_KERNEL); - if (!microamp) { - ret = -EINVAL; - goto free_microvolt; - } + microvolt = _parse_named_prop(opp, dev, opp_table, "microvolt", triplet); + if (IS_ERR(microvolt)) + return microvolt; - ret = of_property_read_u32_array(opp->np, name, microamp, - icount); - if (ret) { - dev_err(dev, "%s: error parsing %s: %d\n", __func__, - name, ret); - ret = -EINVAL; - goto free_microamp; + if (!microvolt) { + /* + * Missing property isn't a problem, but an invalid + * entry is. This property isn't optional if regulator + * information is provided. Check only for the first OPP, as + * regulator_count may get initialized after that to a valid + * value. + */ + if (list_empty(&opp_table->opp_list) && + opp_table->regulator_count > 0) { + dev_err(dev, "%s: opp-microvolt missing although OPP managing regulators\n", + __func__); + return ERR_PTR(-EINVAL); } } - /* Search for "opp-microwatt" */ - sprintf(name, "opp-microwatt"); - prop = of_find_property(opp->np, name, NULL); - - if (prop) { - pcount = of_property_count_u32_elems(opp->np, name); - if (pcount < 0) { - dev_err(dev, "%s: Invalid %s property (%d)\n", __func__, - name, pcount); - ret = pcount; - goto free_microamp; - } + return microvolt; +} - if (pcount != supplies) { - dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n", - __func__, name, pcount, supplies); - ret = -EINVAL; - goto free_microamp; - } +static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, + struct opp_table *opp_table) +{ + u32 *microvolt, *microamp, *microwatt; + int ret = 0, i, j; + bool triplet; - microwatt = kmalloc_array(pcount, sizeof(*microwatt), - GFP_KERNEL); - if (!microwatt) { - ret = -EINVAL; - goto free_microamp; - } + microvolt = opp_parse_microvolt(opp, dev, opp_table, &triplet); + if (IS_ERR(microvolt)) + return PTR_ERR(microvolt); - ret = of_property_read_u32_array(opp->np, name, microwatt, - pcount); - if (ret) { - dev_err(dev, "%s: error parsing %s: %d\n", __func__, - name, ret); - ret = -EINVAL; - goto free_microwatt; - } + microamp = _parse_named_prop(opp, dev, opp_table, "microamp", NULL); + if (IS_ERR(microamp)) { + ret = PTR_ERR(microamp); + goto free_microvolt; } - for (i = 0, j = 0; i < supplies; i++) { - opp->supplies[i].u_volt = microvolt[j++]; + microwatt = _parse_named_prop(opp, dev, opp_table, "microwatt", NULL); + if (IS_ERR(microwatt)) { + ret = PTR_ERR(microwatt); + goto free_microamp; + } - if (vcount == supplies) { - opp->supplies[i].u_volt_min = opp->supplies[i].u_volt; - opp->supplies[i].u_volt_max = opp->supplies[i].u_volt; - } else { - opp->supplies[i].u_volt_min = microvolt[j++]; - opp->supplies[i].u_volt_max = microvolt[j++]; + /* + * Initialize regulator_count if it is uninitialized and no properties + * are found. + */ + if (unlikely(opp_table->regulator_count == -1)) { + opp_table->regulator_count = 0; + return 0; + } + + for (i = 0, j = 0; i < opp_table->regulator_count; i++) { + if (microvolt) { + opp->supplies[i].u_volt = microvolt[j++]; + + if (triplet) { + opp->supplies[i].u_volt_min = microvolt[j++]; + opp->supplies[i].u_volt_max = microvolt[j++]; + } else { + opp->supplies[i].u_volt_min = opp->supplies[i].u_volt; + opp->supplies[i].u_volt_max = opp->supplies[i].u_volt; + } } if (microamp) @@ -750,7 +721,6 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, opp->supplies[i].u_watt = microwatt[i]; } -free_microwatt: kfree(microwatt); free_microamp: kfree(microamp); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ba64284eaf9f..f1ec8931dfbc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1613,7 +1613,7 @@ out: } static u32 hv_compose_msi_req_v1( - struct pci_create_interrupt *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt *int_pkt, u32 slot, u8 vector, u16 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; @@ -1632,6 +1632,35 @@ static u32 hv_compose_msi_req_v1( } /* + * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and + * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be + * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V + * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is + * not irrelevant because Hyper-V chooses the physical CPU to handle the + * interrupts based on the vCPU specified in message sent to the vPCI VSP in + * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest, + * but assigning too many vPCI device interrupts to the same pCPU can cause a + * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V + * to spread out the pCPUs that it selects. + * + * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu() + * to always return the same dummy vCPU, because a second call to + * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a + * new pCPU for the interrupt. But for the multi-MSI case, the second call to + * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the + * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that + * the pCPUs are spread out. All interrupts for a multi-MSI device end up using + * the same pCPU, even though the vCPUs will be spread out by later calls + * to hv_irq_unmask(), but that is the best we can do now. + * + * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not* + * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an + * enhancement is planned for a future version. With that enhancement, the + * dummy vCPU selection won't matter, and interrupts for the same multi-MSI + * device will be spread across multiple pCPUs. + */ + +/* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten * by subsequent retarget in hv_irq_unmask(). */ @@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity) return cpumask_first_and(affinity, cpu_online_mask); } -static u32 hv_compose_msi_req_v2( - struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity, - u32 slot, u8 vector, u16 vector_count) +/* + * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0. + */ +static int hv_compose_multi_msi_req_get_cpu(void) { + static DEFINE_SPINLOCK(multi_msi_cpu_lock); + + /* -1 means starting with CPU 0 */ + static int cpu_next = -1; + + unsigned long flags; int cpu; + spin_lock_irqsave(&multi_msi_cpu_lock, flags); + + cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids, + false); + cpu = cpu_next; + + spin_unlock_irqrestore(&multi_msi_cpu_lock, flags); + + return cpu; +} + +static u32 hv_compose_msi_req_v2( + struct pci_create_interrupt2 *int_pkt, int cpu, + u32 slot, u8 vector, u16 vector_count) +{ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2( } static u32 hv_compose_msi_req_v3( - struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt3 *int_pkt, int cpu, u32 slot, u32 vector, u16 vector_count) { - int cpu; - int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.reserved = 0; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct pci_create_interrupt3 v3; } int_pkts; } __packed ctxt; + bool multi_msi; u64 trans_id; u32 size; int ret; + int cpu; + + msi_desc = irq_data_get_msi_desc(data); + multi_msi = !msi_desc->pci.msi_attrib.is_msix && + msi_desc->nvec_used > 1; /* Reuse the previous allocation */ - if (data->chip_data) { + if (data->chip_data && multi_msi) { int_desc = data->chip_data; msg->address_hi = int_desc->address >> 32; msg->address_lo = int_desc->address & 0xffffffff; @@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - msi_desc = irq_data_get_msi_desc(data); pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; @@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; + /* Free any previous message that might have already been composed. */ + if (data->chip_data && !multi_msi) { + int_desc = data->chip_data; + data->chip_data = NULL; + hv_int_desc_free(hpdev, int_desc); + } + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; - if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) { + if (multi_msi) { /* * If this is not the first MSI of Multi MSI, we already have * a mapping. Can exit early. @@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ vector = 32; vector_count = msi_desc->nvec_used; + cpu = hv_compose_multi_msi_req_get_cpu(); } else { vector = hv_msi_get_int_vector(data); vector_count = 1; + cpu = hv_compose_msi_req_get_cpu(dest); } /* @@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) switch (hbus->protocol_version) { case PCI_PROTOCOL_VERSION_1_1: size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, - dest, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_2: case PCI_PROTOCOL_VERSION_1_3: size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, - dest, + cpu, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_4: size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, - dest, + cpu, hpdev->desc.win_slot.slot, vector, vector_count); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 52ecd66ce357..047a8374b4fd 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int __intel_gpio_get_gpio_mode(u32 value) +{ + return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) { - return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; + return __intel_gpio_get_gpio_mode(readl(padcfg0)); } static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) @@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + u32 value; if (!pd || !intel_pad_usable(pctrl, pin)) return false; @@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) return true; + /* + * The firmware on some systems may configure GPIO pins to be + * an interrupt source in so called "direct IRQ" mode. In such + * cases the GPIO controller driver has no idea if those pins + * are being used or not. At the same time, there is a known bug + * in the firmwares that don't restore the pin settings correctly + * after suspend, i.e. by an unknown reason the Rx value becomes + * inverted. + * + * Hence, let's save and restore the pins that are configured + * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. + * + * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. + */ + value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); + if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + return true; + return false; } diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 65d312967619..27f0a54e12bf 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = { static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) { - void __iomem *reg = eint->base + eint->regs->dom_en; + void __iomem *dom_en = eint->base + eint->regs->dom_en; + void __iomem *mask_set = eint->base + eint->regs->mask_set; unsigned int i; for (i = 0; i < eint->hw->ap_num; i += 32) { - writel(0xffffffff, reg); - reg += 4; + writel(0xffffffff, dom_en); + writel(0xffffffff, mask_set); + dom_en += 4; + mask_set += 4; } return 0; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 67bec7ea0f8b..414ee6bb8ac9 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (pcs->bits_per_mux) { + if (pcs->bits_per_mux && pcs->fmask) { pcs->bits_per_pin = fls(pcs->fmask); nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; } else { diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c index 863fabe05bdc..307ee6f71042 100644 --- a/drivers/power/supply/ab8500_btemp.c +++ b/drivers/power/supply/ab8500_btemp.c @@ -725,7 +725,14 @@ static int ab8500_btemp_probe(struct platform_device *pdev) /* Get thermal zone and ADC */ di->tz = thermal_zone_get_zone_by_name("battery-thermal"); if (IS_ERR(di->tz)) { - return dev_err_probe(dev, PTR_ERR(di->tz), + ret = PTR_ERR(di->tz); + /* + * This usually just means we are probing before the thermal + * zone, so just defer. + */ + if (ret == -ENODEV) + ret = -EPROBE_DEFER; + return dev_err_probe(dev, ret, "failed to get battery thermal zone\n"); } di->bat_ctrl = devm_iio_channel_get(dev, "bat_ctrl"); diff --git a/drivers/power/supply/ip5xxx_power.c b/drivers/power/supply/ip5xxx_power.c index 218e8e689a3f..00221e9c0bfc 100644 --- a/drivers/power/supply/ip5xxx_power.c +++ b/drivers/power/supply/ip5xxx_power.c @@ -352,7 +352,7 @@ static int ip5xxx_battery_get_property(struct power_supply *psy, ret = ip5xxx_battery_read_adc(ip5xxx, IP5XXX_BATIADC_DAT0, IP5XXX_BATIADC_DAT1, &raw); - val->intval = DIV_ROUND_CLOSEST(raw * 745985, 1000); + val->intval = DIV_ROUND_CLOSEST(raw * 149197, 200); return 0; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 635f051b0821..f20a6ac584cc 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -121,7 +121,7 @@ struct rk817_charger { #define ADC_TO_CHARGE_UAH(adc_value, res_div) \ (adc_value / 3600 * 172 / res_div) -static u8 rk817_chg_cur_to_reg(u32 chg_cur_ma) +static int rk817_chg_cur_to_reg(u32 chg_cur_ma) { if (chg_cur_ma >= 3500) return CHG_3_5A; @@ -864,8 +864,8 @@ static int rk817_battery_init(struct rk817_charger *charger, { struct rk808 *rk808 = charger->rk808; u32 tmp, max_chg_vol_mv, max_chg_cur_ma; - u8 max_chg_vol_reg, chg_term_i_reg, max_chg_cur_reg; - int ret, chg_term_ma; + u8 max_chg_vol_reg, chg_term_i_reg; + int ret, chg_term_ma, max_chg_cur_reg; u8 bulk_reg[2]; /* Get initial plug state */ @@ -1116,14 +1116,12 @@ static int rk817_charger_probe(struct platform_device *pdev) charger->bat_ps = devm_power_supply_register(&pdev->dev, &rk817_bat_desc, &pscfg); - - charger->chg_ps = devm_power_supply_register(&pdev->dev, - &rk817_chg_desc, &pscfg); - - if (IS_ERR(charger->chg_ps)) + if (IS_ERR(charger->bat_ps)) return dev_err_probe(dev, -EINVAL, "Battery failed to probe\n"); + charger->chg_ps = devm_power_supply_register(&pdev->dev, + &rk817_chg_desc, &pscfg); if (IS_ERR(charger->chg_ps)) return dev_err_probe(dev, -EINVAL, "Charger failed to probe\n"); diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 515e3ceb3393..90d33cd1b670 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -44,6 +44,19 @@ config IDLE_INJECT synchronously on a set of specified CPUs or alternatively on a per CPU basis. +config ARM_SCMI_POWERCAP + tristate "ARM SCMI Powercap driver" + depends on ARM_SCMI_PROTOCOL + help + This enables support for the ARM Powercap based on ARM SCMI + Powercap protocol. + + ARM SCMI Powercap protocol allows power limits to be enforced + and monitored against the SCMI Powercap domains advertised as + available by the SCMI platform firmware. + + When compiled as module it will be called arm_scmi_powercap.ko. + config DTPM bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)" depends on OF diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index 494617cdad88..4474201b4aa7 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_POWERCAP) += powercap_sys.o obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o obj-$(CONFIG_IDLE_INJECT) += idle_inject.o +obj-$(CONFIG_ARM_SCMI_POWERCAP) += arm_scmi_powercap.o diff --git a/drivers/powercap/arm_scmi_powercap.c b/drivers/powercap/arm_scmi_powercap.c new file mode 100644 index 000000000000..05d0e516176a --- /dev/null +++ b/drivers/powercap/arm_scmi_powercap.c @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SCMI Powercap support. + * + * Copyright (C) 2022 ARM Ltd. + */ + +#include <linux/device.h> +#include <linux/math.h> +#include <linux/limits.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/powercap.h> +#include <linux/scmi_protocol.h> + +#define to_scmi_powercap_zone(z) \ + container_of(z, struct scmi_powercap_zone, zone) + +static const struct scmi_powercap_proto_ops *powercap_ops; + +struct scmi_powercap_zone { + unsigned int height; + struct device *dev; + struct scmi_protocol_handle *ph; + const struct scmi_powercap_info *info; + struct scmi_powercap_zone *spzones; + struct powercap_zone zone; + struct list_head node; +}; + +struct scmi_powercap_root { + unsigned int num_zones; + struct scmi_powercap_zone *spzones; + struct list_head *registered_zones; +}; + +static struct powercap_control_type *scmi_top_pcntrl; + +static int scmi_powercap_zone_release(struct powercap_zone *pz) +{ + return 0; +} + +static int scmi_powercap_get_max_power_range_uw(struct powercap_zone *pz, + u64 *max_power_range_uw) +{ + *max_power_range_uw = U32_MAX; + return 0; +} + +static int scmi_powercap_get_power_uw(struct powercap_zone *pz, + u64 *power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 avg_power, pai; + int ret; + + if (!spz->info->powercap_monitoring) + return -EINVAL; + + ret = powercap_ops->measurements_get(spz->ph, spz->info->id, &avg_power, + &pai); + if (ret) + return ret; + + *power_uw = avg_power; + if (spz->info->powercap_scale_mw) + *power_uw *= 1000; + + return 0; +} + +static const struct powercap_zone_ops zone_ops = { + .get_max_power_range_uw = scmi_powercap_get_max_power_range_uw, + .get_power_uw = scmi_powercap_get_power_uw, + .release = scmi_powercap_zone_release, +}; + +static void scmi_powercap_normalize_cap(const struct scmi_powercap_zone *spz, + u64 power_limit_uw, u32 *norm) +{ + bool scale_mw = spz->info->powercap_scale_mw; + u64 val; + + val = scale_mw ? DIV_ROUND_UP_ULL(power_limit_uw, 1000) : power_limit_uw; + /* + * This cast is lossless since here @req_power is certain to be within + * the range [min_power_cap, max_power_cap] whose bounds are assured to + * be two unsigned 32bits quantities. + */ + *norm = clamp_t(u32, val, spz->info->min_power_cap, + spz->info->max_power_cap); + *norm = rounddown(*norm, spz->info->power_cap_step); + + val = (scale_mw) ? *norm * 1000 : *norm; + if (power_limit_uw != val) + dev_dbg(spz->dev, + "Normalized %s:CAP - requested:%llu - normalized:%llu\n", + spz->info->name, power_limit_uw, val); +} + +static int scmi_powercap_set_power_limit_uw(struct powercap_zone *pz, int cid, + u64 power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 norm_power; + + if (!spz->info->powercap_cap_config) + return -EINVAL; + + scmi_powercap_normalize_cap(spz, power_uw, &norm_power); + + return powercap_ops->cap_set(spz->ph, spz->info->id, norm_power, false); +} + +static int scmi_powercap_get_power_limit_uw(struct powercap_zone *pz, int cid, + u64 *power_limit_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 power; + int ret; + + ret = powercap_ops->cap_get(spz->ph, spz->info->id, &power); + if (ret) + return ret; + + *power_limit_uw = power; + if (spz->info->powercap_scale_mw) + *power_limit_uw *= 1000; + + return 0; +} + +static void scmi_powercap_normalize_time(const struct scmi_powercap_zone *spz, + u64 time_us, u32 *norm) +{ + /* + * This cast is lossless since here @time_us is certain to be within the + * range [min_pai, max_pai] whose bounds are assured to be two unsigned + * 32bits quantities. + */ + *norm = clamp_t(u32, time_us, spz->info->min_pai, spz->info->max_pai); + *norm = rounddown(*norm, spz->info->pai_step); + + if (time_us != *norm) + dev_dbg(spz->dev, + "Normalized %s:PAI - requested:%llu - normalized:%u\n", + spz->info->name, time_us, *norm); +} + +static int scmi_powercap_set_time_window_us(struct powercap_zone *pz, int cid, + u64 time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 norm_pai; + + if (!spz->info->powercap_pai_config) + return -EINVAL; + + scmi_powercap_normalize_time(spz, time_window_us, &norm_pai); + + return powercap_ops->pai_set(spz->ph, spz->info->id, norm_pai); +} + +static int scmi_powercap_get_time_window_us(struct powercap_zone *pz, int cid, + u64 *time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + int ret; + u32 pai; + + ret = powercap_ops->pai_get(spz->ph, spz->info->id, &pai); + if (ret) + return ret; + + *time_window_us = pai; + + return 0; +} + +static int scmi_powercap_get_max_power_uw(struct powercap_zone *pz, int cid, + u64 *max_power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *max_power_uw = spz->info->max_power_cap; + if (spz->info->powercap_scale_mw) + *max_power_uw *= 1000; + + return 0; +} + +static int scmi_powercap_get_min_power_uw(struct powercap_zone *pz, int cid, + u64 *min_power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *min_power_uw = spz->info->min_power_cap; + if (spz->info->powercap_scale_mw) + *min_power_uw *= 1000; + + return 0; +} + +static int scmi_powercap_get_max_time_window_us(struct powercap_zone *pz, + int cid, u64 *time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *time_window_us = spz->info->max_pai; + + return 0; +} + +static int scmi_powercap_get_min_time_window_us(struct powercap_zone *pz, + int cid, u64 *time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *time_window_us = (u64)spz->info->min_pai; + + return 0; +} + +static const char *scmi_powercap_get_name(struct powercap_zone *pz, int cid) +{ + return "SCMI power-cap"; +} + +static const struct powercap_zone_constraint_ops constraint_ops = { + .set_power_limit_uw = scmi_powercap_set_power_limit_uw, + .get_power_limit_uw = scmi_powercap_get_power_limit_uw, + .set_time_window_us = scmi_powercap_set_time_window_us, + .get_time_window_us = scmi_powercap_get_time_window_us, + .get_max_power_uw = scmi_powercap_get_max_power_uw, + .get_min_power_uw = scmi_powercap_get_min_power_uw, + .get_max_time_window_us = scmi_powercap_get_max_time_window_us, + .get_min_time_window_us = scmi_powercap_get_min_time_window_us, + .get_name = scmi_powercap_get_name, +}; + +static void scmi_powercap_unregister_all_zones(struct scmi_powercap_root *pr) +{ + int i; + + /* Un-register children zones first starting from the leaves */ + for (i = pr->num_zones - 1; i >= 0; i--) { + if (!list_empty(&pr->registered_zones[i])) { + struct scmi_powercap_zone *spz; + + list_for_each_entry(spz, &pr->registered_zones[i], node) + powercap_unregister_zone(scmi_top_pcntrl, + &spz->zone); + } + } +} + +static inline bool +scmi_powercap_is_zone_registered(struct scmi_powercap_zone *spz) +{ + return !list_empty(&spz->node); +} + +static inline unsigned int +scmi_powercap_get_zone_height(struct scmi_powercap_zone *spz) +{ + if (spz->info->parent_id == SCMI_POWERCAP_ROOT_ZONE_ID) + return 0; + + return spz->spzones[spz->info->parent_id].height + 1; +} + +static inline struct scmi_powercap_zone * +scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz) +{ + if (spz->info->parent_id == SCMI_POWERCAP_ROOT_ZONE_ID) + return NULL; + + return &spz->spzones[spz->info->parent_id]; +} + +/** + * scmi_powercap_register_zone - Register an SCMI powercap zone recursively + * + * @pr: A reference to the root powercap zones descriptors + * @spz: A reference to the SCMI powercap zone to register + * + * When registering SCMI powercap zones with the powercap framework we should + * take care to always register zones starting from the root ones and to + * deregister starting from the leaves. + * + * Unfortunately we cannot assume that the array of available SCMI powercap + * zones provided by the SCMI platform firmware is built to comply with such + * requirement. + * + * This function, given an SCMI powercap zone to register, takes care to walk + * the SCMI powercap zones tree up to the root looking recursively for + * unregistered parent zones before registering the provided zone; at the same + * time each registered zone height in such a tree is accounted for and each + * zone, once registered, is stored in the @registered_zones array that is + * indexed by zone height: this way will be trivial, at unregister time, to walk + * the @registered_zones array backward and unregister all the zones starting + * from the leaves, removing children zones before parents. + * + * While doing this, we prune away any zone marked as invalid (like the ones + * sporting an SCMI abstract power scale) as long as they are positioned as + * leaves in the SCMI powercap zones hierarchy: any non-leaf invalid zone causes + * the entire process to fail since we cannot assume the correctness of an SCMI + * powercap zones hierarchy if some of the internal nodes are missing. + * + * Note that the array of SCMI powercap zones as returned by the SCMI platform + * is known to be sane, i.e. zones relationships have been validated at the + * protocol layer. + * + * Return: 0 on Success + */ +static int scmi_powercap_register_zone(struct scmi_powercap_root *pr, + struct scmi_powercap_zone *spz) +{ + int ret = 0; + struct scmi_powercap_zone *parent; + + if (!spz->info) + return ret; + + parent = scmi_powercap_get_parent_zone(spz); + if (parent && !scmi_powercap_is_zone_registered(parent)) { + /* + * Bail out if a parent domain was marked as unsupported: + * only domains participating as leaves can be skipped. + */ + if (!parent->info) + return -ENODEV; + + ret = scmi_powercap_register_zone(pr, parent); + if (ret) + return ret; + } + + if (!scmi_powercap_is_zone_registered(spz)) { + struct powercap_zone *z; + + z = powercap_register_zone(&spz->zone, + scmi_top_pcntrl, + spz->info->name, + parent ? &parent->zone : NULL, + &zone_ops, 1, &constraint_ops); + if (!IS_ERR(z)) { + spz->height = scmi_powercap_get_zone_height(spz); + list_add(&spz->node, + &pr->registered_zones[spz->height]); + dev_dbg(spz->dev, + "Registered node %s - parent %s - height:%d\n", + spz->info->name, + parent ? parent->info->name : "ROOT", + spz->height); + ret = 0; + } else { + ret = PTR_ERR(z); + dev_err(spz->dev, + "Error registering node:%s - parent:%s - h:%d - ret:%d\n", + spz->info->name, + parent ? parent->info->name : "ROOT", + spz->height, ret); + } + } + + return ret; +} + +static int scmi_powercap_probe(struct scmi_device *sdev) +{ + int ret, i; + struct scmi_powercap_root *pr; + struct scmi_powercap_zone *spz; + struct scmi_protocol_handle *ph; + struct device *dev = &sdev->dev; + + if (!sdev->handle) + return -ENODEV; + + powercap_ops = sdev->handle->devm_protocol_get(sdev, + SCMI_PROTOCOL_POWERCAP, + &ph); + if (IS_ERR(powercap_ops)) + return PTR_ERR(powercap_ops); + + pr = devm_kzalloc(dev, sizeof(*pr), GFP_KERNEL); + if (!pr) + return -ENOMEM; + + ret = powercap_ops->num_domains_get(ph); + if (ret < 0) { + dev_err(dev, "number of powercap domains not found\n"); + return ret; + } + pr->num_zones = ret; + + pr->spzones = devm_kcalloc(dev, pr->num_zones, + sizeof(*pr->spzones), GFP_KERNEL); + if (!pr->spzones) + return -ENOMEM; + + /* Allocate for worst possible scenario of maximum tree height. */ + pr->registered_zones = devm_kcalloc(dev, pr->num_zones, + sizeof(*pr->registered_zones), + GFP_KERNEL); + if (!pr->registered_zones) + return -ENOMEM; + + for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { + /* + * Powercap domains are validate by the protocol layer, i.e. + * when only non-NULL domains are returned here, whose + * parent_id is assured to point to another valid domain. + */ + spz->info = powercap_ops->info_get(ph, i); + + spz->dev = dev; + spz->ph = ph; + spz->spzones = pr->spzones; + INIT_LIST_HEAD(&spz->node); + INIT_LIST_HEAD(&pr->registered_zones[i]); + + /* + * Forcibly skip powercap domains using an abstract scale. + * Note that only leaves domains can be skipped, so this could + * lead later to a global failure. + */ + if (!spz->info->powercap_scale_uw && + !spz->info->powercap_scale_mw) { + dev_warn(dev, + "Abstract power scale not supported. Skip %s.\n", + spz->info->name); + spz->info = NULL; + continue; + } + } + + /* + * Scan array of retrieved SCMI powercap domains and register them + * recursively starting from the root domains. + */ + for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { + ret = scmi_powercap_register_zone(pr, spz); + if (ret) { + dev_err(dev, + "Failed to register powercap zone %s - ret:%d\n", + spz->info->name, ret); + scmi_powercap_unregister_all_zones(pr); + return ret; + } + } + + dev_set_drvdata(dev, pr); + + dev_info(dev, "Registered %d SCMI Powercap domains !\n", pr->num_zones); + + return ret; +} + +static void scmi_powercap_remove(struct scmi_device *sdev) +{ + struct device *dev = &sdev->dev; + struct scmi_powercap_root *pr = dev_get_drvdata(dev); + + scmi_powercap_unregister_all_zones(pr); +} + +static const struct scmi_device_id scmi_id_table[] = { + { SCMI_PROTOCOL_POWERCAP, "powercap" }, + { }, +}; +MODULE_DEVICE_TABLE(scmi, scmi_id_table); + +static struct scmi_driver scmi_powercap_driver = { + .name = "scmi-powercap", + .probe = scmi_powercap_probe, + .remove = scmi_powercap_remove, + .id_table = scmi_id_table, +}; + +static int __init scmi_powercap_init(void) +{ + int ret; + + scmi_top_pcntrl = powercap_register_control_type(NULL, "arm-scmi", NULL); + if (IS_ERR(scmi_top_pcntrl)) + return PTR_ERR(scmi_top_pcntrl); + + ret = scmi_register(&scmi_powercap_driver); + if (ret) + powercap_unregister_control_type(scmi_top_pcntrl); + + return ret; +} +module_init(scmi_powercap_init); + +static void __exit scmi_powercap_exit(void) +{ + scmi_unregister(&scmi_powercap_driver); + + powercap_unregister_control_type(scmi_top_pcntrl); +} +module_exit(scmi_powercap_exit); + +MODULE_AUTHOR("Cristian Marussi <[email protected]>"); +MODULE_DESCRIPTION("ARM SCMI Powercap driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 999e218d7793..fe86a09e3b67 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -147,6 +147,7 @@ static void idle_inject_fn(unsigned int cpu) /** * idle_inject_set_duration - idle and run duration update helper + * @ii_dev: idle injection control device structure * @run_duration_us: CPU run time to allow in microseconds * @idle_duration_us: CPU idle time to inject in microseconds */ @@ -162,6 +163,7 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, /** * idle_inject_get_duration - idle and run duration retrieval helper + * @ii_dev: idle injection control device structure * @run_duration_us: memory location to store the current CPU run time * @idle_duration_us: memory location to store the current CPU idle time */ @@ -175,6 +177,7 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, /** * idle_inject_set_latency - set the maximum latency allowed + * @ii_dev: idle injection control device structure * @latency_us: set the latency requirement for the idle state */ void idle_inject_set_latency(struct idle_inject_device *ii_dev, diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index f0654a932b37..1f968353d479 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/err.h> +#include <linux/kstrtox.h> #include <linux/slab.h> #include <linux/powercap.h> @@ -446,7 +447,7 @@ static ssize_t enabled_store(struct device *dev, { bool mode; - if (strtobool(buf, &mode)) + if (kstrtobool(buf, &mode)) return -EINVAL; if (dev->parent) { struct powercap_zone *power_zone = to_powercap_zone(dev); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index bcccad8f7516..e8c00a884f1f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5154,6 +5154,7 @@ static void regulator_dev_release(struct device *dev) { struct regulator_dev *rdev = dev_get_drvdata(dev); + debugfs_remove_recursive(rdev->debugfs); kfree(rdev->constraints); of_node_put(rdev->dev.of_node); kfree(rdev); @@ -5644,11 +5645,15 @@ wash: mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); + put_device(&rdev->dev); + rdev = NULL; clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); + if (rdev && rdev->dev.of_node) + of_node_put(rdev->dev.of_node); + kfree(rdev); kfree(config); - put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); @@ -5677,7 +5682,6 @@ void regulator_unregister(struct regulator_dev *rdev) mutex_lock(®ulator_list_mutex); - debugfs_remove_recursive(rdev->debugfs); WARN_ON(rdev->open_count); regulator_remove_coupling(rdev); unset_regulator_supplies(rdev); diff --git a/drivers/regulator/rt5759-regulator.c b/drivers/regulator/rt5759-regulator.c index 6b96899eb27e..8488417f4b2c 100644 --- a/drivers/regulator/rt5759-regulator.c +++ b/drivers/regulator/rt5759-regulator.c @@ -243,6 +243,7 @@ static int rt5759_regulator_register(struct rt5759_priv *priv) if (priv->chip_type == CHIP_TYPE_RT5759A) reg_desc->uV_step = RT5759A_STEP_UV; + memset(®_cfg, 0, sizeof(reg_cfg)); reg_cfg.dev = priv->dev; reg_cfg.of_node = np; reg_cfg.init_data = of_get_regulator_init_data(priv->dev, np, reg_desc); diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c index 75a941fb3c2b..1b2eee95ad3f 100644 --- a/drivers/regulator/slg51000-regulator.c +++ b/drivers/regulator/slg51000-regulator.c @@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client) chip->cs_gpiod = cs_gpiod; } + usleep_range(10000, 11000); + i2c_set_clientdata(client, chip); chip->chip_irq = client->irq; chip->dev = dev; diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 430265c404d6..f3856750944f 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -67,6 +67,7 @@ struct twlreg_info { #define TWL6030_CFG_STATE_SLEEP 0x03 #define TWL6030_CFG_STATE_GRP_SHIFT 5 #define TWL6030_CFG_STATE_APP_SHIFT 2 +#define TWL6030_CFG_STATE_MASK 0x03 #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT) #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\ TWL6030_CFG_STATE_APP_SHIFT) @@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev) if (grp < 0) return grp; grp &= P1_GRP_6030; + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val = TWL6030_CFG_STATE_APP(val); } else { + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val &= TWL6030_CFG_STATE_MASK; grp = 1; } - val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - val = TWL6030_CFG_STATE_APP(val); - return grp && (val == TWL6030_CFG_STATE_ON); } @@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev) val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - switch (TWL6030_CFG_STATE_APP(val)) { + if (info->features & TWL6032_SUBCLASS) + val &= TWL6030_CFG_STATE_MASK; + else + val = TWL6030_CFG_STATE_APP(val); + + switch (val) { case TWL6030_CFG_STATE_ON: return REGULATOR_STATUS_NORMAL; @@ -530,6 +537,7 @@ static const struct twlreg_info TWL6030_INFO_##label = { \ #define TWL6032_ADJUSTABLE_LDO(label, offset) \ static const struct twlreg_info TWL6032_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ @@ -562,6 +570,7 @@ static const struct twlreg_info TWLFIXED_INFO_##label = { \ #define TWL6032_ADJUSTABLE_SMPS(label, offset) \ static const struct twlreg_info TWLSMPS_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index cb83f81da416..df17f0f9cb0f 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1954,7 +1954,7 @@ dasd_copy_pair_show(struct device *dev, break; } } - if (!copy->entry[i].primary) + if (i == DASD_CP_ENTRIES) goto out; /* print all secondary */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 662730f3b027..5d0b9991e91a 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4722,7 +4722,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, struct dasd_device *basedev; struct req_iterator iter; struct dasd_ccw_req *cqr; - unsigned int first_offs; unsigned int trkcount; unsigned long *idaws; unsigned int size; @@ -4756,7 +4755,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) / DASD_RAW_SECTORS_PER_TRACK; trkcount = last_trk - first_trk + 1; - first_offs = 0; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; @@ -4800,13 +4798,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, if (use_prefix) { prefix_LRE(ccw++, data, first_trk, last_trk, cmd, basedev, - startdev, 1, first_offs + 1, trkcount, 0, 0); + startdev, 1, 0, trkcount, 0, 0); } else { define_extent(ccw++, data, first_trk, last_trk, cmd, basedev, 0); ccw[-1].flags |= CCW_FLAG_CC; data += sizeof(struct DE_eckd_data); - locate_record_ext(ccw++, data, first_trk, first_offs + 1, + locate_record_ext(ccw++, data, first_trk, 0, trkcount, cmd, basedev, 0, 0); } @@ -5500,7 +5498,7 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) * Dump the range of CCWs into 'page' buffer * and return number of printed chars. */ -static int +static void dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) { int len, count; @@ -5518,16 +5516,21 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) else datap = (char *) ((addr_t) from->cda); - /* dump data (max 32 bytes) */ - for (count = 0; count < from->count && count < 32; count++) { - if (count % 8 == 0) len += sprintf(page + len, " "); - if (count % 4 == 0) len += sprintf(page + len, " "); + /* dump data (max 128 bytes) */ + for (count = 0; count < from->count && count < 128; count++) { + if (count % 32 == 0) + len += sprintf(page + len, "\n"); + if (count % 8 == 0) + len += sprintf(page + len, " "); + if (count % 4 == 0) + len += sprintf(page + len, " "); len += sprintf(page + len, "%02x", datap[count]); } len += sprintf(page + len, "\n"); from++; } - return len; + if (len > 0) + printk(KERN_ERR "%s", page); } static void @@ -5619,37 +5622,33 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, if (req) { /* req == NULL for unsolicited interrupts */ /* dump the Channel Program (max 140 Bytes per line) */ - /* Count CCW and print first CCWs (maximum 1024 % 140 = 7) */ + /* Count CCW and print first CCWs (maximum 7) */ first = req->cpaddr; for (last = first; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++); to = min(first + 6, last); - len = sprintf(page, PRINTK_HEADER - " Related CP in req: %p\n", req); - dasd_eckd_dump_ccw_range(first, to, page + len); - printk(KERN_ERR "%s", page); + printk(KERN_ERR PRINTK_HEADER " Related CP in req: %p\n", req); + dasd_eckd_dump_ccw_range(first, to, page); /* print failing CCW area (maximum 4) */ /* scsw->cda is either valid or zero */ - len = 0; from = ++to; fail = (struct ccw1 *)(addr_t) irb->scsw.cmd.cpa; /* failing CCW */ if (from < fail - 2) { from = fail - 2; /* there is a gap - print header */ - len += sprintf(page, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } to = min(fail + 1, last); - len += dasd_eckd_dump_ccw_range(from, to, page + len); + dasd_eckd_dump_ccw_range(from, to, page + len); /* print last CCWs (maximum 2) */ + len = 0; from = max(from, ++to); if (from < last - 1) { from = last - 1; /* there is a gap - print header */ - len += sprintf(page + len, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } - len += dasd_eckd_dump_ccw_range(from, last, page + len); - if (len > 0) - printk(KERN_ERR "%s", page); + dasd_eckd_dump_ccw_range(from, last, page + len); } free_page((unsigned long) page); } diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index d0ddf2cc9786..9327dcdd6e5e 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -401,7 +401,7 @@ dasd_ioctl_copy_pair_swap(struct block_device *bdev, void __user *argp) return -EFAULT; } if (memchr_inv(data.reserved, 0, sizeof(data.reserved))) { - pr_warn("%s: Ivalid swap data specified.\n", + pr_warn("%s: Invalid swap data specified\n", dev_name(&device->cdev->dev)); dasd_put_device(device); return DASD_COPYPAIRSWAP_INVALID; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 59ac98f2bd27..b02c631f3b71 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -233,8 +233,11 @@ static void __init ap_init_qci_info(void) if (!ap_qci_info) return; ap_qci_info_old = kzalloc(sizeof(*ap_qci_info_old), GFP_KERNEL); - if (!ap_qci_info_old) + if (!ap_qci_info_old) { + kfree(ap_qci_info); + ap_qci_info = NULL; return; + } if (ap_fetch_qci_info(ap_qci_info) != 0) { kfree(ap_qci_info); kfree(ap_qci_info_old); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index bc46721aa01c..3c5b7e4227b2 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -303,16 +303,21 @@ enum storvsc_request_type { }; /* - * SRB status codes and masks; a subset of the codes used here. + * SRB status codes and masks. In the 8-bit field, the two high order bits + * are flags, while the remaining 6 bits are an integer status code. The + * definitions here include only the subset of the integer status codes that + * are tested for in this driver. */ - #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_QUEUE_FROZEN 0x40 -#define SRB_STATUS_INVALID_LUN 0x20 -#define SRB_STATUS_SUCCESS 0x01 -#define SRB_STATUS_ABORTED 0x02 -#define SRB_STATUS_ERROR 0x04 -#define SRB_STATUS_DATA_OVERRUN 0x12 + +/* SRB status integer codes */ +#define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 +#define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_DATA_OVERRUN 0x12 +#define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, void (*process_err_fn)(struct work_struct *work); struct hv_host_device *host_dev = shost_priv(host); - /* - * In some situations, Hyper-V sets multiple bits in the - * srb_status, such as ABORTED and ERROR. So process them - * individually, with the most specific bits first. - */ - - if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { - set_host_byte(scmnd, DID_NO_CONNECT); - process_err_fn = storvsc_remove_lun; - goto do_work; - } + switch (SRB_STATUS(vm_srb->srb_status)) { + case SRB_STATUS_ERROR: + case SRB_STATUS_ABORTED: + case SRB_STATUS_INVALID_REQUEST: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { + /* Check for capacity change */ + if ((asc == 0x2a) && (ascq == 0x9)) { + process_err_fn = storvsc_device_scan; + /* Retry the I/O that triggered this. */ + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } - if (vm_srb->srb_status & SRB_STATUS_ABORTED) { - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && - /* Capacity data has changed */ - (asc == 0x2a) && (ascq == 0x9)) { - process_err_fn = storvsc_device_scan; /* - * Retry the I/O that triggered this. + * Otherwise, let upper layer deal with the + * error when sense message is present */ - set_host_byte(scmnd, DID_REQUEUE); - goto do_work; - } - } - - if (vm_srb->srb_status & SRB_STATUS_ERROR) { - /* - * Let upper layer deal with error when - * sense message is present. - */ - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) return; + } /* * If there is an error; offline the device since all @@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, default: set_host_byte(scmnd, DID_ERROR); } + return; + + case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); + process_err_fn = storvsc_remove_lun; + goto do_work; + } return; diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index 1322b8cce5b7..ababb910b391 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c @@ -128,12 +128,15 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws) dw_spi_dma_sg_burst_init(dws); + pci_dev_put(dma_dev); + return 0; free_rxchan: dma_release_channel(dws->rxchan); dws->rxchan = NULL; err_exit: + pci_dev_put(dma_dev); return -EBUSY; } diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 30d82cc7300b..d209930069cf 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -444,8 +444,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx, unsigned int pre, post; unsigned int fin = spi_imx->spi_clk; - if (unlikely(fspi > fin)) - return 0; + fspi = min(fspi, fin); post = fls(fin) - fls(fspi); if (fin > fspi << post) @@ -1608,6 +1607,13 @@ static int spi_imx_transfer_one(struct spi_controller *controller, return spi_imx_pio_transfer_slave(spi, transfer); /* + * If we decided in spi_imx_can_dma() that we want to do a DMA + * transfer, the SPI transfer has already been mapped, so we + * have to do the DMA transfer here. + */ + if (spi_imx->usedma) + return spi_imx_dma_transfer(spi_imx, transfer); + /* * Calculate the estimated time in us the transfer runs. Find * the number of Hz per byte per polling limit. */ @@ -1618,9 +1624,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller, if (transfer->len < byte_limit) return spi_imx_poll_transfer(spi, transfer); - if (spi_imx->usedma) - return spi_imx_dma_transfer(spi_imx, transfer); - return spi_imx_pio_transfer(spi, transfer); } diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index a33c9a3de395..d6aff909fc36 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1273,8 +1273,11 @@ static int mtk_spi_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct mtk_spi *mdata = spi_master_get_devdata(master); + int ret; - pm_runtime_disable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) + return ret; mtk_spi_reset(mdata); @@ -1283,6 +1286,9 @@ static int mtk_spi_remove(struct platform_device *pdev) clk_unprepare(mdata->spi_hclk); } + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 10f0c5a6e0dc..9f356612ba7e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -924,8 +924,9 @@ static int tegra_qspi_start_transfer_one(struct spi_device *spi, static struct tegra_qspi_client_data *tegra_qspi_parse_cdata_dt(struct spi_device *spi) { struct tegra_qspi_client_data *cdata; + struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master); - cdata = devm_kzalloc(&spi->dev, sizeof(*cdata), GFP_KERNEL); + cdata = devm_kzalloc(tqspi->dev, sizeof(*cdata), GFP_KERNEL); if (!cdata) return NULL; diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index f3947be13e2e..64f0e047c23d 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -80,7 +80,7 @@ static int optee_register_device(const uuid_t *device_uuid) rc = device_register(&optee_device->dev); if (rc) { pr_err("device registration failed, err: %d\n", rc); - kfree(optee_device); + put_device(&optee_device->dev); } return rc; diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c67715f6f756..f9aa50ff14d4 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -600,11 +600,11 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, trace_cdnsp_ep_halt(value ? "Set" : "Clear"); - if (value) { - ret = cdnsp_cmd_stop_ep(pdev, pep); - if (ret) - return ret; + ret = cdnsp_cmd_stop_ep(pdev, pep); + if (ret) + return ret; + if (value) { if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_STOPPED) { cdnsp_queue_halt_endpoint(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); @@ -613,10 +613,6 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, pep->ep_state |= EP_HALTED; } else { - /* - * In device mode driver can call reset endpoint command - * from any endpoint state. - */ cdnsp_queue_reset_ep(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); ret = cdnsp_wait_for_cmd_compl(pdev); diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 794e413800ae..2f29431f612e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1763,10 +1763,15 @@ static u32 cdnsp_td_remainder(struct cdnsp_device *pdev, int trb_buff_len, unsigned int td_total_len, struct cdnsp_request *preq, - bool more_trbs_coming) + bool more_trbs_coming, + bool zlp) { u32 maxp, total_packet_count; + /* Before ZLP driver needs set TD_SIZE = 1. */ + if (zlp) + return 1; + /* One TRB with a zero-length data packet. */ if (!more_trbs_coming || (transferred == 0 && trb_buff_len == 0) || trb_buff_len == td_total_len) @@ -1960,7 +1965,8 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) /* Set the TRB length, TD size, and interrupter fields. */ remainder = cdnsp_td_remainder(pdev, enqd_len, trb_buff_len, full_len, preq, - more_trbs_coming); + more_trbs_coming, + zero_len_trb); length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2025,7 +2031,7 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) if (preq->request.length > 0) { remainder = cdnsp_td_remainder(pdev, 0, preq->request.length, - preq->request.length, preq, 1); + preq->request.length, preq, 1, 0); length_field = TRB_LEN(preq->request.length) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2076,7 +2082,8 @@ int cdnsp_cmd_stop_ep(struct cdnsp_device *pdev, struct cdnsp_ep *pep) u32 ep_state = GET_EP_CTX_STATE(pep->out_ctx); int ret = 0; - if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED) { + if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED || + ep_state == EP_STATE_HALTED) { trace_cdnsp_ep_stopped_or_disabled(pep->out_ctx); goto ep_stopped; } @@ -2225,7 +2232,7 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev, /* Set the TRB length, TD size, & interrupter fields. */ remainder = cdnsp_td_remainder(pdev, running_total, trb_buff_len, td_len, preq, - more_trbs_coming); + more_trbs_coming, 0); length_field = TRB_LEN(trb_buff_len) | TRB_INTR_TARGET(0); diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index 0ecf20eeceee..4be6a873bd07 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -37,15 +37,6 @@ struct dwc3_exynos { struct regulator *vdd10; }; -static int dwc3_exynos_remove_child(struct device *dev, void *unused) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - - return 0; -} - static int dwc3_exynos_probe(struct platform_device *pdev) { struct dwc3_exynos *exynos; @@ -142,7 +133,7 @@ static int dwc3_exynos_remove(struct platform_device *pdev) struct dwc3_exynos *exynos = platform_get_drvdata(pdev); int i; - device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child); + of_platform_depopulate(&pdev->dev); for (i = exynos->num_clks - 1; i >= 0; i--) clk_disable_unprepare(exynos->clks[i]); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 026d4029bda6..6d524fa76443 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, * * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 */ - if (dwc->gadget->speed <= USB_SPEED_HIGH) { + if (dwc->gadget->speed <= USB_SPEED_HIGH || + DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; @@ -1023,12 +1024,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) reg &= ~DWC3_DALEPENA_EP(dep->number); dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); - /* Clear out the ep descriptors for non-ep0 */ - if (dep->number > 1) { - dep->endpoint.comp_desc = NULL; - dep->endpoint.desc = NULL; - } - dwc3_remove_requests(dwc, dep, -ESHUTDOWN); dep->stream_capable = false; @@ -1043,6 +1038,12 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) mask |= (DWC3_EP_DELAY_STOP | DWC3_EP_TRANSFER_STARTED); dep->flags &= mask; + /* Clear out the ep descriptors for non-ep0 */ + if (dep->number > 1) { + dep->endpoint.comp_desc = NULL; + dep->endpoint.desc = NULL; + } + return 0; } diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index c4ed48d6b8a4..a189b08bba80 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -199,16 +199,6 @@ uvc_send_response(struct uvc_device *uvc, struct uvc_request_data *data) * V4L2 ioctls */ -struct uvc_format { - u8 bpp; - u32 fcc; -}; - -static struct uvc_format uvc_formats[] = { - { 16, V4L2_PIX_FMT_YUYV }, - { 0, V4L2_PIX_FMT_MJPEG }, -}; - static int uvc_v4l2_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { @@ -243,47 +233,6 @@ uvc_v4l2_get_format(struct file *file, void *fh, struct v4l2_format *fmt) } static int -uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) -{ - struct video_device *vdev = video_devdata(file); - struct uvc_device *uvc = video_get_drvdata(vdev); - struct uvc_video *video = &uvc->video; - struct uvc_format *format; - unsigned int imagesize; - unsigned int bpl; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(uvc_formats); ++i) { - format = &uvc_formats[i]; - if (format->fcc == fmt->fmt.pix.pixelformat) - break; - } - - if (i == ARRAY_SIZE(uvc_formats)) { - uvcg_info(&uvc->func, "Unsupported format 0x%08x.\n", - fmt->fmt.pix.pixelformat); - return -EINVAL; - } - - bpl = format->bpp * fmt->fmt.pix.width / 8; - imagesize = bpl ? bpl * fmt->fmt.pix.height : fmt->fmt.pix.sizeimage; - - video->fcc = format->fcc; - video->bpp = format->bpp; - video->width = fmt->fmt.pix.width; - video->height = fmt->fmt.pix.height; - video->imagesize = imagesize; - - fmt->fmt.pix.field = V4L2_FIELD_NONE; - fmt->fmt.pix.bytesperline = bpl; - fmt->fmt.pix.sizeimage = imagesize; - fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; - fmt->fmt.pix.priv = 0; - - return 0; -} - -static int uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) { struct video_device *vdev = video_devdata(file); @@ -324,6 +273,27 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) } static int +uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_device *uvc = video_get_drvdata(vdev); + struct uvc_video *video = &uvc->video; + int ret; + + ret = uvc_v4l2_try_format(file, fh, fmt); + if (ret) + return ret; + + video->fcc = fmt->fmt.pix.pixelformat; + video->bpp = fmt->fmt.pix.bytesperline * 8 / video->width; + video->width = fmt->fmt.pix.width; + video->height = fmt->fmt.pix.height; + video->imagesize = fmt->fmt.pix.sizeimage; + + return ret; +} + +static int uvc_v4l2_enum_frameintervals(struct file *file, void *fh, struct v4l2_frmivalenum *fival) { diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 098b62f7b701..c0143d38df83 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -577,7 +577,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, if (scr_readw(r) != vc->vc_video_erase_char) break; if (r != q && new_rows >= rows + logo_lines) { - save = kmalloc(array3_size(logo_lines, new_cols, 2), + save = kzalloc(array3_size(logo_lines, new_cols, 2), GFP_KERNEL); if (save) { int i = min(cols, new_cols); diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index f422f9c58ba7..1ea6d2e5b218 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -67,8 +67,27 @@ static bool is_vmpck_empty(struct snp_guest_dev *snp_dev) return true; } +/* + * If an error is received from the host or AMD Secure Processor (ASP) there + * are two options. Either retry the exact same encrypted request or discontinue + * using the VMPCK. + * + * This is because in the current encryption scheme GHCB v2 uses AES-GCM to + * encrypt the requests. The IV for this scheme is the sequence number. GCM + * cannot tolerate IV reuse. + * + * The ASP FW v1.51 only increments the sequence numbers on a successful + * guest<->ASP back and forth and only accepts messages at its exact sequence + * number. + * + * So if the sequence number were to be reused the encryption scheme is + * vulnerable. If the sequence number were incremented for a fresh IV the ASP + * will reject the request. + */ static void snp_disable_vmpck(struct snp_guest_dev *snp_dev) { + dev_alert(snp_dev->dev, "Disabling vmpck_id %d to prevent IV reuse.\n", + vmpck_id); memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN); snp_dev->vmpck = NULL; } @@ -321,34 +340,71 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (rc) return rc; - /* Call firmware to process the request */ + /* + * Call firmware to process the request. In this function the encrypted + * message enters shared memory with the host. So after this call the + * sequence number must be incremented or the VMPCK must be deleted to + * prevent reuse of the IV. + */ rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * If the extended guest request fails due to having too small of a + * certificate data buffer, retry the same guest request without the + * extended data request in order to increment the sequence number + * and thus avoid IV reuse. + */ + if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && + err == SNP_GUEST_REQ_INVALID_LEN) { + const unsigned int certs_npages = snp_dev->input.data_npages; + + exit_code = SVM_VMGEXIT_GUEST_REQUEST; + + /* + * If this call to the firmware succeeds, the sequence number can + * be incremented allowing for continued use of the VMPCK. If + * there is an error reflected in the return value, this value + * is checked further down and the result will be the deletion + * of the VMPCK and the error code being propagated back to the + * user as an ioctl() return code. + */ + rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * Override the error to inform callers the given extended + * request buffer size was too small and give the caller the + * required buffer size. + */ + err = SNP_GUEST_REQ_INVALID_LEN; + snp_dev->input.data_npages = certs_npages; + } + if (fw_err) *fw_err = err; - if (rc) - return rc; + if (rc) { + dev_alert(snp_dev->dev, + "Detected error from ASP request. rc: %d, fw_err: %llu\n", + rc, *fw_err); + goto disable_vmpck; + } - /* - * The verify_and_dec_payload() will fail only if the hypervisor is - * actively modifying the message header or corrupting the encrypted payload. - * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that - * the key cannot be used for any communication. The key is disabled to ensure - * that AES-GCM does not use the same IV while encrypting the request payload. - */ rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz); if (rc) { dev_alert(snp_dev->dev, - "Detected unexpected decode failure, disabling the vmpck_id %d\n", - vmpck_id); - snp_disable_vmpck(snp_dev); - return rc; + "Detected unexpected decode failure from ASP. rc: %d\n", + rc); + goto disable_vmpck; } /* Increment to new message sequence after payload decryption was successful. */ snp_inc_msg_seqno(snp_dev); return 0; + +disable_vmpck: + snp_disable_vmpck(snp_dev); + return rc; } static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c0031a3ab42f..3ac5fcf98d0d 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -167,8 +167,8 @@ responded: clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; diff --git a/fs/afs/server.c b/fs/afs/server.c index 4981baf97835..b5237206eac3 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server, if (!server) return; - a = atomic_inc_return(&server->active); + a = atomic_read(&server->active); zero = __refcount_dec_and_test(&server->ref, &r); trace_afs_server(debug_id, r - 1, a, reason); if (unlikely(zero)) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a9543f01184c..dcb510f38dda 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4663,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int i; - ASSERT(!path->nowait); + /* + * The nowait semantics are used only for write paths, where we don't + * use the tree mod log and sequence numbers. + */ + if (time_seq) + ASSERT(!path->nowait); nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) @@ -4683,7 +4688,14 @@ again: if (path->need_commit_sem) { path->need_commit_sem = 0; need_commit_sem = true; - down_read(&fs_info->commit_root_sem); + if (path->nowait) { + if (!down_read_trylock(&fs_info->commit_root_sem)) { + ret = -EAGAIN; + goto done; + } + } else { + down_read(&fs_info->commit_root_sem); + } } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); } @@ -4759,7 +4771,7 @@ again: next = c; ret = read_block_for_search(root, path, &next, level, slot, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4769,6 +4781,10 @@ again: if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && path->nowait) { + ret = -EAGAIN; + goto done; + } if (!ret && time_seq) { /* * If we don't get the lock, we may be racing @@ -4799,7 +4815,7 @@ again: ret = read_block_for_search(root, path, &next, level, 0, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4807,8 +4823,16 @@ again: goto done; } - if (!path->skip_locking) - btrfs_tree_read_lock(next); + if (!path->skip_locking) { + if (path->nowait) { + if (!btrfs_try_tree_read_lock(next)) { + ret = -EAGAIN; + goto done; + } + } else { + btrfs_tree_read_lock(next); + } + } } ret = 0; done: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d5dd8bed1488..5ba2e810dc6e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp) } } + btrfs_free_path(path); + path = NULL; if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) ret = -EFAULT; @@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root, } out: + btrfs_free_path(path); + if (!ret || ret == -EOVERFLOW) { rootrefs->num_items = found; /* update min_treeid for next search */ @@ -3205,7 +3209,6 @@ out: } kfree(rootrefs); - btrfs_free_path(path); return ret; } @@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) ipath->fspath->val[i] = rel_ptr; } + btrfs_free_path(path); + path = NULL; ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, ipath->fspath, size); if (ret) { @@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, size = min_t(u32, loi->size, SZ_16M); } - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - inodes = init_data_container(size); if (IS_ERR(inodes)) { ret = PTR_ERR(inodes); - inodes = NULL; - goto out; + goto out_loi; } + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } ret = iterate_inodes_from_logical(loi->logical, fs_info, path, inodes, ignore_offset); + btrfs_free_path(path); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) @@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, ret = -EFAULT; out: - btrfs_free_path(path); kvfree(inodes); out_loi: kfree(loi); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9334c3157c22..b74105a10f16 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, dstgroup->rsv_rfer = inherit->lim.rsv_rfer; dstgroup->rsv_excl = inherit->lim.rsv_excl; - ret = update_qgroup_limit_item(trans, dstgroup); - if (ret) { - qgroup_mark_inconsistent(fs_info); - btrfs_info(fs_info, - "unable to update quota limit for %llu", - dstgroup->qgroupid); - goto unlock; - } + qgroup_dirty(fs_info, dstgroup); } if (srcid) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 145c84b44fd0..1c4b693ee4a3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5702,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, u64 ext_len; u64 clone_len; u64 clone_data_offset; + bool crossed_src_i_size = false; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); @@ -5759,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, if (key.offset >= clone_src_i_size) break; - if (key.offset + ext_len > clone_src_i_size) + if (key.offset + ext_len > clone_src_i_size) { ext_len = clone_src_i_size - key.offset; + crossed_src_i_size = true; + } clone_data_offset = btrfs_file_extent_offset(leaf, ei); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { @@ -5821,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, ret = send_clone(sctx, offset, clone_len, clone_root); } + } else if (crossed_src_i_size && clone_len < len) { + /* + * If we are at i_size of the clone source inode and we + * can not clone from it, terminate the loop. This is + * to avoid sending two write operations, one with a + * length matching clone_len and the final one after + * this loop with a length of len - clone_len. + * + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED + * was passed to the send ioctl), this helps avoid + * sending an encoded write for an offset that is not + * sector size aligned, in case the i_size of the source + * inode is not sector size aligned. That will make the + * receiver fallback to decompression of the data and + * writing it using regular buffered IO, therefore while + * not incorrect, it's not optimal due decompression and + * possible re-compression at the receiver. + */ + break; } else { ret = send_extent_data(sctx, dst_path, offset, clone_len); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 699b54b3acaa..74fef1f49c35 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void) #ifdef CONFIG_BTRFS_DEBUG ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); - if (ret) - goto out2; + if (ret) { + sysfs_unmerge_group(&btrfs_kset->kobj, + &btrfs_static_feature_attr_group); + goto out_remove_group; + } #endif return 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 813986e38258..c3cf3dabe0b1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, u64 *last_old_dentry_offset) { struct btrfs_root *log = inode->root->log_root; - struct extent_buffer *src = path->nodes[0]; - const int nritems = btrfs_header_nritems(src); + struct extent_buffer *src; + const int nritems = btrfs_header_nritems(path->nodes[0]); const u64 ino = btrfs_ino(inode); bool last_found = false; int batch_start = 0; int batch_size = 0; int i; - for (i = path->slots[0]; i < nritems; i++) { + /* + * We need to clone the leaf, release the read lock on it, and use the + * clone before modifying the log tree. See the comment at copy_items() + * about why we need to do this. + */ + src = btrfs_clone_extent_buffer(path->nodes[0]); + if (!src) + return -ENOMEM; + + i = path->slots[0]; + btrfs_release_path(path); + path->nodes[0] = src; + path->slots[0] = i; + + for (; i < nritems; i++) { struct btrfs_dir_item *di; struct btrfs_key key; int ret; @@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, { struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; - struct extent_buffer *src = src_path->nodes[0]; + struct extent_buffer *src; int ret = 0; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); const u64 i_size = i_size_read(&inode->vfs_inode); + /* + * To keep lockdep happy and avoid deadlocks, clone the source leaf and + * use the clone. This is because otherwise we would be changing the log + * tree, to insert items from the subvolume tree or insert csum items, + * while holding a read lock on a leaf from the subvolume tree, which + * creates a nasty lock dependency when COWing log tree nodes/leaves: + * + * 1) Modifying the log tree triggers an extent buffer allocation while + * holding a write lock on a parent extent buffer from the log tree. + * Allocating the pages for an extent buffer, or the extent buffer + * struct, can trigger inode eviction and finally the inode eviction + * will trigger a release/remove of a delayed node, which requires + * taking the delayed node's mutex; + * + * 2) Allocating a metadata extent for a log tree can trigger the async + * reclaim thread and make us wait for it to release enough space and + * unblock our reservation ticket. The reclaim thread can start + * flushing delayed items, and that in turn results in the need to + * lock delayed node mutexes and in the need to write lock extent + * buffers of a subvolume tree - all this while holding a write lock + * on the parent extent buffer in the log tree. + * + * So one task in scenario 1) running in parallel with another task in + * scenario 2) could lead to a deadlock, one wanting to lock a delayed + * node mutex while having a read lock on a leaf from the subvolume, + * while the other is holding the delayed node's mutex and wants to + * write lock the same subvolume leaf for flushing delayed items. + */ + src = btrfs_clone_extent_buffer(src_path->nodes[0]); + if (!src) + return -ENOMEM; + + i = src_path->slots[0]; + btrfs_release_path(src_path); + src_path->nodes[0] = src; + src_path->slots[0] = i; + ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); if (!ins_data) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1912abf6d020..c9e2b0c85309 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, super[i] = page_address(page[i]); } - if (super[0]->generation > super[1]->generation) + if (btrfs_super_generation(super[0]) > + btrfs_super_generation(super[1])) sector = zones[1].start; else sector = zones[0].start; @@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) goto out; } - zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); + zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); if (!zones) { ret = -ENOMEM; goto out; @@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } - kfree(zones); + kvfree(zones); switch (bdev_zoned_model(bdev)) { case BLK_ZONED_HM: @@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) return 0; out: - kfree(zones); + kvfree(zones); out_free_zone_info: btrfs_destroy_dev_zone_info(device); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index fe220686bba4..712a43161448 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1281,7 +1281,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, rc = filemap_write_and_wait_range(src_inode->i_mapping, off, off + len - 1); if (rc) - goto out; + goto unlock; /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); @@ -1297,6 +1297,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * that target is updated on the server */ CIFS_I(target_inode)->time = 0; + +unlock: /* although unlocking in the reverse order from locking is not * strictly necessary here it is a little cleaner to be consistent */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 92e4278ec35d..9e7d9f0baa18 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) /* now drop the ref to the current iface */ if (old_iface && iface) { - kref_put(&old_iface->refcount, release_iface); cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); - } else if (old_iface) { kref_put(&old_iface->refcount, release_iface); + } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + kref_put(&old_iface->refcount, release_iface); } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1956288307f..6c399a8b22b3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * and it is decreased till we reach start. */ again: + ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else @@ -5227,14 +5228,21 @@ again: ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - if (le32_to_cpu(extent->ee_block) > 0) + if (le32_to_cpu(extent->ee_block) > start) *iterator = le32_to_cpu(extent->ee_block) - 1; - else - /* Beginning is reached, end of the loop */ + else if (le32_to_cpu(extent->ee_block) == start) iterator = NULL; - /* Update path extent in case we need to stop */ - while (le32_to_cpu(extent->ee_block) < start) + else { + extent = EXT_LAST_EXTENT(path[depth].p_hdr); + while (le32_to_cpu(extent->ee_block) >= start) + extent--; + + if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) + break; + extent++; + iterator = NULL; + } path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, diff --git a/fs/file.c b/fs/file.c index 5f9c802a5d8d..c942c89ca4cd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct files_struct *files = current->files; struct file *file; - if (atomic_read(&files->count) == 1) { + /* + * If another thread is concurrently calling close_fd() followed + * by put_files_struct(), we must not observe the old table + * entry combined with the new refcount - otherwise we could + * return a file that is concurrently being freed. + * + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * put_files_struct(). + */ + if (atomic_read_acquire(&files->count) == 1) { file = files_lookup_fd_raw(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 443f83382b9b..9958d4020771 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode, wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* - * If the inode is now fully clean, then it can be safely removed from - * its writeback list (if any). Otherwise the flusher threads are - * responsible for the writeback lists. + * If the inode is freeing, its i_io_list shoudn't be updated + * as it can be finally deleted at this moment. */ - if (!(inode->i_state & I_DIRTY_ALL)) - inode_cgwb_move_to_attached(inode, wb); - else if (!(inode->i_state & I_SYNC_QUEUED)) { - if ((inode->i_state & I_DIRTY)) - redirty_tail_locked(inode, wb); - else if (inode->i_state & I_DIRTY_TIME) { - inode->dirtied_when = jiffies; - inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + if (!(inode->i_state & I_FREEING)) { + /* + * If the inode is now fully clean, then it can be safely + * removed from its writeback list (if any). Otherwise the + * flusher threads are responsible for the writeback lists. + */ + if (!(inode->i_state & I_DIRTY_ALL)) + inode_cgwb_move_to_attached(inode, wb); + else if (!(inode->i_state & I_SYNC_QUEUED)) { + if ((inode->i_state & I_DIRTY)) + redirty_tail_locked(inode, wb); + else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; + inode_io_list_move_locked(inode, + wb, + &wb->b_dirty_time); + } } } diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index a058e0136bfe..ab8ceddf9efa 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, struct fscache_volume *volume; struct fscache_cache *cache; size_t klen, hlen; - char *key; + u8 *key; + + klen = strlen(volume_key); + if (klen > NAME_MAX) + return NULL; if (!coherency_data) coherency_len = 0; @@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, /* Stick the length on the front of the key and pad it out to make * hashing easier. */ - klen = strlen(volume_key); hlen = round_up(1 + klen + 1, sizeof(__le32)); key = kzalloc(hlen, GFP_KERNEL); if (!key) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 71bfb663aac5..89f4741728ba 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, .mode = mode }; int err; - bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || - (mode & (FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_ZERO_RANGE)); - - bool block_faults = FUSE_IS_DAX(inode) && lock_inode; + bool block_faults = FUSE_IS_DAX(inode) && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))); if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) @@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fm->fc->no_fallocate) return -EOPNOTSUPP; - if (lock_inode) { - inode_lock(inode); - if (block_faults) { - filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); - if (err) - goto out; - } + inode_lock(inode); + if (block_faults) { + filemap_invalidate_lock(inode->i_mapping); + err = fuse_dax_break_layouts(inode, 0, 0); + if (err) + goto out; + } - if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { - loff_t endbyte = offset + length - 1; + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { + loff_t endbyte = offset + length - 1; - err = fuse_writeback_range(inode, offset, endbyte); - if (err) - goto out; - } + err = fuse_writeback_range(inode, offset, endbyte); + if (err) + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -3039,8 +3035,7 @@ out: if (block_faults) filemap_invalidate_unlock(inode->i_mapping); - if (lock_inode) - inode_unlock(inode); + inode_unlock(inode); fuse_flush_time_update(inode); diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 8de970d6146f..94b8ed4ef870 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_fp->filp, src_off, - dst_fp->filp, dst_off, - len, 0); + ret = vfs_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, len, + COPY_FILE_SPLICE); if (ret < 0) return ret; diff --git a/fs/namei.c b/fs/namei.c index 578c2110df02..9155ecb547ce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; + int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); @@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, if (error) return error; inode = file_inode(file); - if (!(file->f_flags & O_EXCL)) { + if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f650afedd67f..849a720ab43f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src, src_pos, dst, dst_pos, - count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, + COPY_FILE_SPLICE); return ret; } @@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct svc_rqst *rqstp = sd->u.data; struct page *page = buf->page; // may be a compound one unsigned offset = buf->offset; + struct page *last_page; - page += offset / PAGE_SIZE; - for (int i = sd->len; i > 0; i -= PAGE_SIZE) - svc_rqst_replace_page(rqstp, page++); + last_page = page + (offset + sd->len - 1) / PAGE_SIZE; + for (page += offset / PAGE_SIZE; page <= last_page; page++) + svc_rqst_replace_page(rqstp, page); if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 3b55e239705f..9930fa901039 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 77ff8e95421f..dc359b56fdfa 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; + void *kaddr; + struct nilfs_segment_usage *su; int ret; + down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); brelse(bh); } + up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5101131e6047..440960110a42 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif show_val_kb(m, "PageTables: ", global_node_page_state(NR_PAGETABLE)); - show_val_kb(m, "SecPageTables: ", + show_val_kb(m, "SecPageTables: ", global_node_page_state(NR_SECONDARY_PAGETABLE)); show_val_kb(m, "NFS_Unstable: ", 0); diff --git a/fs/read_write.c b/fs/read_write.c index 328ce8cf9a85..24b9668d6377 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + lockdep_assert(sb_write_started(file_inode(file_out)->i_sb)); + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); } @@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */ - if (file_out->f_op->copy_file_range) { + if (flags & COPY_FILE_SPLICE) { + /* cross sb splice is allowed */ + } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) return -EXDEV; @@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags) { ssize_t ret; + bool splice = flags & COPY_FILE_SPLICE; - if (flags != 0) + if (flags & ~COPY_FILE_SPLICE) return -EINVAL; ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, @@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */ - if (file_out->f_op->copy_file_range) { + if (!splice && file_out->f_op->copy_file_range) { ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); goto done; } - if (file_in->f_op->remap_file_range && + if (!splice && file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, @@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * consistent story about which filesystems support copy_file_range() * and which filesystems do not, that will allow userspace tools to * make consistent desicions w.r.t using copy_file_range(). + * + * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. */ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); @@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, pos_out = f_out.file->f_pos; } + ret = -EINVAL; + if (flags != 0) + goto out; + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, flags); if (ret > 0) { diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index abc9a85106f2..2c53fbb8d918 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -41,6 +41,13 @@ static void zonefs_account_active(struct inode *inode) return; /* + * For zones that transitioned to the offline or readonly condition, + * we only need to clear the active state. + */ + if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) + goto out; + + /* * If the zone is active, that is, if it is explicitly open or * partially written, check if it was already accounted as active. */ @@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode) return; } +out: /* The zone is not active. If it was, update the active count */ if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; @@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, inode->i_flags |= S_IMMUTABLE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; case BLK_ZONE_COND_READONLY: /* @@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, zone->cond = BLK_ZONE_COND_OFFLINE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; } + zi->i_flags |= ZONEFS_ZONE_READONLY; inode->i_mode &= ~0222; return i_size_read(inode); case BLK_ZONE_COND_FULL: @@ -1922,18 +1933,18 @@ static int __init zonefs_init(void) if (ret) return ret; - ret = register_filesystem(&zonefs_type); + ret = zonefs_sysfs_init(); if (ret) goto destroy_inodecache; - ret = zonefs_sysfs_init(); + ret = register_filesystem(&zonefs_type); if (ret) - goto unregister_fs; + goto sysfs_exit; return 0; -unregister_fs: - unregister_filesystem(&zonefs_type); +sysfs_exit: + zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); @@ -1942,9 +1953,9 @@ destroy_inodecache: static void __exit zonefs_exit(void) { + unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); - unregister_filesystem(&zonefs_type); } MODULE_AUTHOR("Damien Le Moal"); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 4b3de66c3233..1dbe78119ff1 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) return ZONEFS_ZTYPE_SEQ; } -#define ZONEFS_ZONE_OPEN (1 << 0) -#define ZONEFS_ZONE_ACTIVE (1 << 1) +#define ZONEFS_ZONE_OPEN (1U << 0) +#define ZONEFS_ZONE_ACTIVE (1U << 1) +#define ZONEFS_ZONE_OFFLINE (1U << 2) +#define ZONEFS_ZONE_READONLY (1U << 3) /* * In-memory inode data. diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 492dce43236e..cab7cfebf40b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif +static inline void tlb_remove_table_sync_one(void) { } + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e5..6a94a6eaad27 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1110,10 +1110,10 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int parse_perf_domain(int cpu, const char *list_name, - const char *cell_name) + const char *cell_name, + struct of_phandle_args *args) { struct device_node *cpu_np; - struct of_phandle_args args; int ret; cpu_np = of_cpu_device_node_get(cpu); @@ -1121,41 +1121,44 @@ static inline int parse_perf_domain(int cpu, const char *list_name, return -ENODEV; ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0, - &args); + args); if (ret < 0) return ret; of_node_put(cpu_np); - return args.args[0]; + return 0; } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { - int target_idx; int cpu, ret; + struct of_phandle_args args; - ret = parse_perf_domain(pcpu, list_name, cell_name); + ret = parse_perf_domain(pcpu, list_name, cell_name, pargs); if (ret < 0) return ret; - target_idx = ret; cpumask_set_cpu(pcpu, cpumask); for_each_possible_cpu(cpu) { if (cpu == pcpu) continue; - ret = parse_perf_domain(cpu, list_name, cell_name); + ret = parse_perf_domain(cpu, list_name, cell_name, &args); if (ret < 0) continue; - if (target_idx == ret) + if (pargs->np == args.np && pargs->args_count == args.args_count && + !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) cpumask_set_cpu(cpu, cpumask); + + of_node_put(args.np); } - return target_idx; + return 0; } #else static inline int cpufreq_boost_trigger_state(int state) @@ -1185,7 +1188,8 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { return -EOPNOTSUPP; } diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ } #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/include/linux/fs.h b/include/linux/fs.h index e654435f1651..59ae95ddb679 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2089,6 +2089,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) +/* + * These flags control the behavior of vfs_copy_file_range(). + * They are not available to the user via syscall. + * + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops + */ +#define COPY_FILE_SPLICE (1 << 0) + struct iov_iter; struct io_uring_cmd; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 36e5dd84cf59..8e312c8323a8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -75,7 +75,7 @@ struct fscache_volume { atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int key_hash; /* Hash of key string */ - char *key; /* Volume ID, eg. "[email protected]@1234" */ + u8 *key; /* Volume ID, eg. "[email protected]@1234" */ struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ struct hlist_bl_node hash_link; /* Link in hash table */ struct work_struct work; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18592bdf4c1b..637a60607c7d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -776,6 +776,7 @@ struct kvm { struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; + bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool vm_bugged; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af2ceb4160bc..06cbad166225 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -981,6 +981,7 @@ struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ + u16 op_mod; /* cmd op_mod */ void *out; /* pointer to the cmd output buffer */ }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..974ccca609d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..5f0d7d0b9471 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3af1e927247d..69174093078f 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -281,7 +281,8 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's * rcv_saddr field should already have been updated when this is called. */ -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk); +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family); +void inet_bhash2_reset_saddr(struct sock *sk); void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 20745cf7ae1a..2f2a6023fb0e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -83,7 +83,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; - int qlen; + u32 qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 01a70b27e026..65058faea4db 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -26,6 +26,8 @@ struct sctp_sched_ops { int (*init)(struct sctp_stream *stream); /* Init a stream */ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* free a stream */ + void (*free_sid)(struct sctp_stream *stream, __u16 sid); /* Frees the entire thing */ void (*free)(struct sctp_stream *stream); diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 83fd81c82e4c..9fbd3832bcdc 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -84,8 +84,8 @@ enum sof_ipc_dai_type { SOF_DAI_AMD_BT, /**< AMD ACP BT*/ SOF_DAI_AMD_SP, /**< AMD ACP SP */ SOF_DAI_AMD_DMIC, /**< AMD ACP DMIC */ - SOF_DAI_AMD_HS, /**< Amd HS */ SOF_DAI_MEDIATEK_AFE, /**< Mediatek AFE */ + SOF_DAI_AMD_HS, /**< Amd HS */ }; /* general purpose DAI configuration */ diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 935af4947917..760455dfa860 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -171,15 +171,15 @@ TRACE_EVENT(mm_collapse_huge_page_swapin, TRACE_EVENT(mm_khugepaged_scan_file, - TP_PROTO(struct mm_struct *mm, struct page *page, const char *filename, + TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file, int present, int swap, int result), - TP_ARGS(mm, page, filename, present, swap, result), + TP_ARGS(mm, page, file, present, swap, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, pfn) - __string(filename, filename) + __string(filename, file->f_path.dentry->d_iname) __field(int, present) __field(int, swap) __field(int, result) @@ -188,7 +188,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TP_fast_assign( __entry->mm = mm; __entry->pfn = page ? page_to_pfn(page) : -1; - __assign_str(filename, filename); + __assign_str(filename, file->f_path.dentry->d_iname); __entry->present = present; __entry->swap = swap; __entry->result = result; diff --git a/init/Kconfig b/init/Kconfig index abf65098f1b6..94125d3b6893 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -87,7 +87,7 @@ config CC_HAS_ASM_GOTO_OUTPUT config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. - def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) + def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 7b473259f3f4..68dfc6936aa7 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); - if (ret) - fput(file); return ret; } diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index cef5ff924e63..50bc3af44953 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline int io_run_task_work(void) { + /* + * Always check-and-clear the task_work notification signal. With how + * signaling works for task_work, we can find it set with nothing to + * run. We need to clear it for that case, like get_signal() does. + */ + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + clear_notify_signal(); if (task_work_pending(current)) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - clear_notify_signal(); __set_current_state(TASK_RUNNING); task_work_run(); return 1; diff --git a/io_uring/poll.c b/io_uring/poll.c index 055632e9092a..d9bf1767867e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -40,7 +40,14 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK GENMASK(30, 0) +#define IO_POLL_RETRY_FLAG BIT(30) +#define IO_POLL_REF_MASK GENMASK(29, 0) + +/* + * We usually have 1-2 refs taken, 128 is more than enough and we want to + * maximise the margin between this amount and the moment when it overflows. + */ +#define IO_POLL_REF_BIAS 128 #define IO_WQE_F_DOUBLE 1 @@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) return priv & IO_WQE_F_DOUBLE; } +static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) +{ + int v; + + /* + * poll_refs are already elevated and we don't have much hope for + * grabbing the ownership. Instead of incrementing set a retry flag + * to notify the loop that there might have been some change. + */ + v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); + if (v & IO_POLL_REF_MASK) + return false; + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can * bump it and acquire ownership. It's disallowed to modify requests while not @@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } @@ -235,6 +259,16 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) */ if ((v & IO_POLL_REF_MASK) != 1) req->cqe.res = 0; + if (v & IO_POLL_RETRY_FLAG) { + req->cqe.res = 0; + /* + * We won't find new events that came in between + * vfs_poll and the ref put unless we clear the flag + * in advance. + */ + atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); + v &= ~IO_POLL_RETRY_FLAG; + } /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { @@ -274,7 +308,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & + IO_POLL_REF_MASK); return IOU_POLL_NO_ACTION; } @@ -518,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; - int v; INIT_HLIST_NODE(&req->hash_node); req->work.cancel_seq = atomic_read(&ctx->cancel_seq); @@ -586,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (ipt->owning) { /* - * Release ownership. If someone tried to queue a tw while it was - * locked, kick it off for them. + * Try to release ownership. If we see a change of state, e.g. + * poll was waken up, queue up a tw, it'll deal with it. */ - v = atomic_dec_return(&req->poll_refs); - if (unlikely(v & IO_POLL_REF_MASK)) + if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) __io_poll_execute(req, 0); } return 0; diff --git a/ipc/shm.c b/ipc/shm.c index 7d86f058fb86..bd2fcc4d454e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -275,10 +275,8 @@ static inline void shm_rmid(struct shmid_kernel *s) } -static int __shm_open(struct vm_area_struct *vma) +static int __shm_open(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); @@ -302,7 +300,15 @@ static int __shm_open(struct vm_area_struct *vma) /* This is called by fork, once for every shm attach. */ static void shm_open(struct vm_area_struct *vma) { - int err = __shm_open(vma); + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + int err; + + /* Always call underlying open if present */ + if (sfd->vm_ops->open) + sfd->vm_ops->open(vma); + + err = __shm_open(sfd); /* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted. @@ -359,10 +365,8 @@ static bool shm_may_destroy(struct shmid_kernel *shp) * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d. */ -static void shm_close(struct vm_area_struct *vma) +static void __shm_close(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; @@ -388,6 +392,18 @@ done: up_write(&shm_ids(ns).rwsem); } +static void shm_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + /* Always call underlying close if present */ + if (sfd->vm_ops->close) + sfd->vm_ops->close(vma); + + __shm_close(sfd); +} + /* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_orphaned(int id, void *p, void *data) { @@ -583,13 +599,13 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) * IPC ID that was removed, and possibly even reused by another shm * segment already. Propagate this case as an error to caller. */ - ret = __shm_open(vma); + ret = __shm_open(sfd); if (ret) return ret; ret = call_mmap(sfd->file, vma); if (ret) { - shm_close(vma); + __shm_close(sfd); return ret; } sfd->vm_ops = vma->vm_ops; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 802fc15b0d73..f27fa5ba7d72 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -74,7 +74,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, gfp_flags | __GFP_NOWARN); if (selem) { if (value) - memcpy(SDATA(selem)->data, value, smap->map.value_size); + copy_map_value(&smap->map, SDATA(selem)->data, value); return selem; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 884871427a94..7f04f995c975 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2291,6 +2291,7 @@ event_sched_out(struct perf_event *event, !event->pending_work) { event->pending_work = 1; dec = false; + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); task_work_add(current, &event->pending_task, TWA_RESUME); } if (dec) @@ -2336,6 +2337,7 @@ group_sched_out(struct perf_event *group_event, #define DETACH_GROUP 0x01UL #define DETACH_CHILD 0x02UL +#define DETACH_DEAD 0x04UL /* * Cross CPU call to remove a performance event @@ -2356,12 +2358,20 @@ __perf_remove_from_context(struct perf_event *event, update_cgrp_time_from_cpuctx(cpuctx, false); } + /* + * Ensure event_sched_out() switches to OFF, at the very least + * this avoids raising perf_pending_task() at this time. + */ + if (flags & DETACH_DEAD) + event->pending_disable = 1; event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); + if (flags & DETACH_DEAD) + event->state = PERF_EVENT_STATE_DEAD; if (!ctx->nr_events && ctx->is_active) { if (ctx == &cpuctx->ctx) @@ -5121,9 +5131,7 @@ int perf_event_release_kernel(struct perf_event *event) ctx = perf_event_ctx_lock(event); WARN_ON_ONCE(ctx->parent_ctx); - perf_remove_from_context(event, DETACH_GROUP); - raw_spin_lock_irq(&ctx->lock); /* * Mark this event as STATE_DEAD, there is no external reference to it * anymore. @@ -5135,8 +5143,7 @@ int perf_event_release_kernel(struct perf_event *event) * Thus this guarantees that we will in fact observe and kill _ALL_ * child events. */ - event->state = PERF_EVENT_STATE_DEAD; - raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); perf_event_ctx_unlock(event, ctx); @@ -6577,6 +6584,8 @@ static void perf_pending_task(struct callback_head *head) if (rctx >= 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); + + put_event(event); } #ifdef CONFIG_GUEST_PERF_EVENTS @@ -9030,7 +9039,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } @@ -9273,6 +9282,19 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -9306,6 +9328,13 @@ static int __perf_event_overflow(struct perf_event *event, } if (event->attr.sigtrap) { + /* + * The desired behaviour of sigtrap vs invalid samples is a bit + * tricky; on the one hand, one should not loose the SIGTRAP if + * it is the first event, on the other hand, we should also not + * trigger the WARN or override the data address. + */ + bool valid_sample = sample_is_allowed(event, regs); unsigned int pending_id = 1; if (regs) @@ -9313,7 +9342,7 @@ static int __perf_event_overflow(struct perf_event *event, if (!event->pending_sigtrap) { event->pending_sigtrap = pending_id; local_inc(&event->ctx->nr_pending); - } else if (event->attr.exclude_kernel) { + } else if (event->attr.exclude_kernel && valid_sample) { /* * Should not be able to return to user space without * consuming pending_sigtrap; with exceptions: @@ -9328,7 +9357,10 @@ static int __perf_event_overflow(struct perf_event *event, */ WARN_ON_ONCE(event->pending_sigtrap != pending_id); } - event->pending_addr = data->addr; + + event->pending_addr = 0; + if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) + event->pending_addr = data->addr; irq_work_queue(&event->pending_irq); } diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index cbb0bed958ab..7670a811a565 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) for (i = 0; i < sfn_ptr->num_counters; i++) dfn_ptr->counters[i] += sfn_ptr->counters[i]; + + sfn_ptr = list_next_entry(sfn_ptr, head); } } diff --git a/kernel/notifier.c b/kernel/notifier.c index 0d5bd62c480e..ab75637fd904 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -62,7 +62,7 @@ static int notifier_chain_unregister(struct notifier_block **nl, * value of this parameter is -1. * @nr_calls: Records the number of notifications sent. Don't care * value of this field is NULL. - * @returns: notifier_call_chain returns the value returned by the + * Return: notifier_call_chain returns the value returned by the * last notifier function called. */ static int notifier_call_chain(struct notifier_block **nl, @@ -105,13 +105,13 @@ NOKPROBE_SYMBOL(notifier_call_chain); * @val_up: Value passed unmodified to the notifier function * @val_down: Value passed unmodified to the notifier function when recovering * from an error on @val_up - * @v Pointer passed unmodified to the notifier function + * @v: Pointer passed unmodified to the notifier function * * NOTE: It is important the @nl chain doesn't change between the two * invocations of notifier_call_chain() such that we visit the * exact same notifier callbacks; this rules out any RCU usage. * - * Returns: the return value of the @val_up call. + * Return: the return value of the @val_up call. */ static int notifier_call_chain_robust(struct notifier_block **nl, unsigned long val_up, unsigned long val_down, diff --git a/kernel/power/process.c b/kernel/power/process.c index ddd9988327fe..6c1c7e566d35 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -27,6 +27,8 @@ unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; static int try_to_freeze_tasks(bool user_only) { + const char *what = user_only ? "user space processes" : + "remaining freezable tasks"; struct task_struct *g, *p; unsigned long end_time; unsigned int todo; @@ -36,6 +38,8 @@ static int try_to_freeze_tasks(bool user_only) bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; + pr_info("Freezing %s\n", what); + start = ktime_get_boottime(); end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); @@ -82,9 +86,8 @@ static int try_to_freeze_tasks(bool user_only) elapsed_msecs = ktime_to_ms(elapsed); if (todo) { - pr_cont("\n"); - pr_err("Freezing of tasks %s after %d.%03d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", + pr_err("Freezing %s %s after %d.%03d seconds " + "(%d tasks refusing to freeze, wq_busy=%d):\n", what, wakeup ? "aborted" : "failed", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); @@ -101,8 +104,8 @@ static int try_to_freeze_tasks(bool user_only) read_unlock(&tasklist_lock); } } else { - pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, - elapsed_msecs % 1000); + pr_info("Freezing %s completed (elapsed %d.%03d seconds)\n", + what, elapsed_msecs / 1000, elapsed_msecs % 1000); } return todo ? -EBUSY : 0; @@ -130,14 +133,11 @@ int freeze_processes(void) static_branch_inc(&freezer_active); pm_wakeup_clear(0); - pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); - if (!error) { + if (!error) __usermodehelper_set_disable_depth(UMH_DISABLED); - pr_cont("done."); - } - pr_cont("\n"); + BUG_ON(in_atomic()); /* @@ -166,14 +166,9 @@ int freeze_kernel_threads(void) { int error; - pr_info("Freezing remaining freezable tasks ... "); - pm_nosig_freezing = true; error = try_to_freeze_tasks(false); - if (!error) - pr_cont("done."); - pr_cont("\n"); BUG_ON(in_atomic()); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 2a406753af90..cd8b7b35f1e8 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1723,8 +1723,8 @@ static unsigned long minimum_image_size(unsigned long saveable) * /sys/power/reserved_size, respectively). To make this happen, we compute the * total number of available page frames and allocate at least * - * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 - * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) + * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2 + * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) * * of them, which corresponds to the maximum size of a hibernation image. * @@ -2259,10 +2259,14 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) if (unlikely(buf[j] == BM_END_OF_MAP)) break; - if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) + if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) { memory_bm_set_bit(bm, buf[j]); - else + } else { + if (!pfn_valid(buf[j])) + pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n", + (unsigned long long)PFN_PHYS(buf[j])); return -EFAULT; + } } return 0; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9161d1136d01..1207c78f85c1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -25,9 +25,6 @@ struct sugov_policy { unsigned int next_freq; unsigned int cached_raw_freq; - /* max CPU capacity, which is equal for all CPUs in freq. domain */ - unsigned long max; - /* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; @@ -51,6 +48,7 @@ struct sugov_cpu { unsigned long util; unsigned long bw_dl; + unsigned long max; /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON @@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); + sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), FREQUENCY_UTIL, NULL); @@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, */ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long boost; /* No boost currently required */ @@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = sg_cpu->iowait_boost * sg_policy->max; - boost >>= SCHED_CAPACITY_SHIFT; + boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; @@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, if (!sugov_update_single_common(sg_cpu, time, flags)) return; - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long prev_util = sg_cpu->util; /* @@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), - map_util_perf(sg_cpu->util), - sg_policy->max); + map_util_perf(sg_cpu->util), sg_cpu->max); sg_cpu->sg_policy->last_freq_update_time = time; } @@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util = 0; + unsigned long util = 0, max = 1; unsigned int j; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); + unsigned long j_util, j_max; sugov_get_util(j_sg_cpu); sugov_iowait_apply(j_sg_cpu, time); + j_util = j_sg_cpu->util; + j_max = j_sg_cpu->max; - util = max(j_sg_cpu->util, util); + if (j_util * max > j_max * util) { + util = j_util; + max = j_max; + } } - return get_next_freq(sg_policy, util, sg_policy->max); + return get_next_freq(sg_policy, util, max); } static void @@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); - unsigned int cpu = cpumask_first(policy->cpus); + unsigned int cpu; sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; @@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->work_in_progress = false; sg_policy->limits_changed = false; sg_policy->cached_raw_freq = 0; - sg_policy->max = arch_scale_cpu_capacity(cpu); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fe0e115272..5cfc95a52bc3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2180,10 +2180,12 @@ void tracing_reset_online_cpus(struct array_buffer *buf) } /* Must have trace_types_lock held */ -void tracing_reset_all_online_cpus(void) +void tracing_reset_all_online_cpus_unlocked(void) { struct trace_array *tr; + lockdep_assert_held(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->clear_trace) continue; @@ -2195,6 +2197,13 @@ void tracing_reset_all_online_cpus(void) } } +void tracing_reset_all_online_cpus(void) +{ + mutex_lock(&trace_types_lock); + tracing_reset_all_online_cpus_unlocked(); + mutex_unlock(&trace_types_lock); +} + /* * The tgid_map array maps from pid to tgid; i.e. the value stored at index i * is the tgid last observed corresponding to pid=i. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54ee5711c729..d42e24507152 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -580,6 +580,7 @@ int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); +void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 154996684fb5..4376887e0d8a 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type if (ret) break; } + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); out: argv_free(argv); @@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type) break; } out: + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0356cae0cf74..f71ea6e79b3c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2880,7 +2880,10 @@ static int probe_remove_event_call(struct trace_event_call *call) * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) - return -EBUSY; + goto busy; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -2893,6 +2896,12 @@ static int probe_remove_event_call(struct trace_event_call *call) __trace_remove_event_call(call); return 0; + busy: + /* No need to clear the trace now */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + tr->clear_trace = false; + } + return -EBUSY; } /* Remove an event_call */ @@ -2972,7 +2981,7 @@ static void trace_module_remove_events(struct module *mod) * over from this module may be passed to the new module events and * unexpected results may occur. */ - tracing_reset_all_online_cpus(); + tracing_reset_all_online_cpus_unlocked(); } static int trace_module_notify(struct notifier_block *self, diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48465f7e97b4..1c82478e8dff 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -983,7 +983,7 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, * A trigger can define one or more variables. If any one of them is * currently referenced by any other trigger, this function will * determine that. - + * * Typically used to determine whether or not a trigger can be removed * - if there are any references to a trigger's variables, it cannot. * @@ -3226,7 +3226,7 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, * events. However, for convenience, users are allowed to directly * specify an event field in an action, which will be automatically * converted into a variable on their behalf. - + * * This function creates a field variable with the name var_name on * the hist trigger currently being defined on the target event. If * subsys_name and event_name are specified, this function simply @@ -5143,6 +5143,9 @@ static void event_hist_trigger(struct event_trigger_data *data, void *key = NULL; unsigned int i; + if (unlikely(!rbe)) + return; + memset(compound_key, 0, hist_data->key_size); for_each_hist_key_field(i, hist_data) { diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 29fbfb27c2b2..c3b582d19b62 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1425,7 +1425,6 @@ int synth_event_delete(const char *event_name) mutex_unlock(&event_mutex); if (mod) { - mutex_lock(&trace_types_lock); /* * It is safest to reset the ring buffer if the module * being unloaded registered any events that were @@ -1437,7 +1436,6 @@ int synth_event_delete(const char *event_name) * occur. */ tracing_reset_all_online_cpus(); - mutex_unlock(&trace_types_lock); } return ret; diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index ae78c2d53c8a..539b08ae7020 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1100,8 +1100,10 @@ static int user_event_create(const char *raw_command) group = current_user_event_group(); - if (!group) + if (!group) { + kfree(name); return -ENOENT; + } mutex_lock(&group->reg_mutex); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 78d536d3ff3d..4300c5dc4e5d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -917,7 +917,7 @@ void osnoise_trace_irq_entry(int id) void osnoise_trace_irq_exit(int id, const char *desc) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1048,7 +1048,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1144,7 +1144,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) static void thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) { - int duration; + s64 duration; if (!osn_var->sampling) return; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c3c0b077ade3..3638b3424be5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -399,6 +399,7 @@ config FRAME_WARN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help @@ -1874,8 +1875,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" @@ -2107,6 +2114,7 @@ config KPROBES_SANITY_TEST depends on DEBUG_KERNEL depends on KPROBES depends on KUNIT + select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE default KUNIT_ALL_TESTS help This option provides for testing basic kprobes functionality on diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 96e092de5b72..adb2f9355ee6 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); @@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index c415a685d61b..e814061d6aa0 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set) endif quiet_cmd_vdso_check = VDSOCHK $@ - cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ + cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ then (echo >&2 "$@: dynamic relocations are not supported"; \ rm -f $@; /bin/false); fi diff --git a/mm/compaction.c b/mm/compaction.c index c51f7f545afe..1f6da31dd9a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -985,28 +985,28 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } /* + * Be careful not to clear PageLRU until after we're + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ + if (unlikely(!get_page_unless_zero(page))) + goto isolate_fail; + + /* * Migration will fail if an anonymous page is pinned in memory, * so avoid taking lru_lock and isolating it unnecessarily in an * admittedly racy check. */ mapping = page_mapping(page); - if (!mapping && page_count(page) > page_mapcount(page)) - goto isolate_fail; + if (!mapping && (page_count(page) - 1) > total_mapcount(page)) + goto isolate_fail_put; /* * Only allow to migrate anonymous pages in GFP_NOFS context * because those do not depend on fs locks. */ if (!(cc->gfp_mask & __GFP_FS) && mapping) - goto isolate_fail; - - /* - * Be careful not to clear PageLRU until after we're - * sure the page is not being freed elsewhere -- the - * page release code relies on it. - */ - if (unlikely(!get_page_unless_zero(page))) - goto isolate_fail; + goto isolate_fail_put; /* Only take pages on LRU: a check now makes later tests safe */ if (!PageLRU(page)) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 9f1219a67e3f..07e5f1bdf025 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme( &wmarks); } +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, struct damon_sysfs_schemes *sysfs_schemes) { - int i; + struct damos *scheme, *next; + int i = 0; - for (i = 0; i < sysfs_schemes->nr; i++) { + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } + + for (; i < sysfs_schemes->nr; i++) { struct damos *scheme, *next; scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); @@ -2339,6 +2381,10 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_stats *sysfs_stats; + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; sysfs_stats->nr_tried = scheme->stat.nr_tried; sysfs_stats->sz_tried = scheme->stat.sz_tried; diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false; + /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); } static int __init setup_failslab(char *str) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e48f8ef45b17..e36ca75311a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1800,6 +1800,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, /* we rely on prep_new_huge_page to set the destructor */ set_compound_order(page, order); + __ClearPageReserved(page); __SetPageHead(page); for (i = 0; i < nr_pages; i++) { p = nth_page(page, i); @@ -1816,7 +1817,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, * on the head page when they need know if put_page() is needed * after get_user_pages(). */ - __ClearPageReserved(p); + if (i != 0) /* head page cleared above */ + __ClearPageReserved(p); /* * Subtle and very unlikely * @@ -5204,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); - /* - * Unlock and free the vma lock before releasing i_mmap_rwsem. When - * the vma_lock is freed, this makes the vma ineligible for pmd - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. - * This is important as page tables for this unmapped range will - * be asynchrously deleted. If the page tables are shared, there - * will be issues when accessed by someone else. - */ - __hugetlb_vma_unlock_write_free(vma); - - i_mmap_unlock_write(vma->vm_file->f_mapping); + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ + /* + * Unlock and free the vma lock before releasing i_mmap_rwsem. + * When the vma_lock is freed, this makes the vma ineligible + * for pmd sharing. And, i_mmap_rwsem is required to set up + * pmd sharing. This is important as page tables for this + * unmapped range will be asynchrously deleted. If the page + * tables are shared, there will be issues when accessed by + * someone else. + */ + __hugetlb_vma_unlock_write_free(vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } else { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 7e496856c2eb..46ecea18c4ca 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -75,18 +75,23 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) { /* - * In case of tail calls from any of the below - * to any of the above. + * In case of tail calls from any of the below to any of + * the above, optimized by the compiler such that the + * stack trace would omit the initial entry point below. */ fallback = skipnr + 1; } - /* Also the *_bulk() variants by only checking prefixes. */ + /* + * The below list should only include the initial entry points + * into the slab allocators. Includes the *_bulk() variants by + * checking prefixes. + */ if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") || - str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc")) goto found; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4734315f7940..3703a56571c1 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -97,8 +97,8 @@ struct collapse_control { /* Num pages scanned per node */ u32 node_load[MAX_NUMNODES]; - /* Last target selected in hpage_collapse_find_target_node() */ - int last_target_node; + /* nodemask for allocation fallback */ + nodemask_t alloc_nmask; }; /** @@ -734,7 +734,6 @@ static void khugepaged_alloc_sleep(void) struct collapse_control khugepaged_collapse_control = { .is_khugepaged = true, - .last_target_node = NUMA_NO_NODE, }; static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc) @@ -783,16 +782,11 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) target_node = nid; } - /* do some balance if several nodes have the same hit record */ - if (target_node <= cc->last_target_node) - for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES; - nid++) - if (max_value == cc->node_load[nid]) { - target_node = nid; - break; - } + for_each_online_node(nid) { + if (max_value == cc->node_load[nid]) + node_set(nid, cc->alloc_nmask); + } - cc->last_target_node = target_node; return target_node; } #else @@ -802,9 +796,10 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node) +static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, + nodemask_t *nmask) { - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); + *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); if (unlikely(!*hpage)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return false; @@ -955,12 +950,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, struct collapse_control *cc) { - /* Only allocate from the target node */ gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : - GFP_TRANSHUGE) | __GFP_THISNODE; + GFP_TRANSHUGE); int node = hpage_collapse_find_target_node(cc); - if (!hpage_collapse_alloc_page(hpage, gfp, node)) + if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask)) return SCAN_ALLOC_HUGE_PAGE_FAIL; if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp))) return SCAN_CGROUP_CHARGE_FAIL; @@ -1057,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); result = __collapse_huge_page_isolate(vma, address, pte, cc, @@ -1144,6 +1139,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, goto out; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); pte = pte_offset_map_lock(mm, pmd, address, &ptl); for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR; _pte++, _address += PAGE_SIZE) { @@ -1384,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, return SCAN_SUCCEED; } +/* + * A note about locking: + * Trying to take the page table spinlocks would be useless here because those + * are only used to synchronize: + * + * - modifying terminal entries (ones that point to a data page, not to another + * page table) + * - installing *new* non-terminal entries + * + * Instead, we need roughly the same kind of protection as free_pgtables() or + * mm_take_all_locks() (but only for a single VMA): + * The mmap lock together with this VMA's rmap locks covers all paths towards + * the page table entries we're messing with here, except for hardware page + * table walks and lockless_pages_from_mm(). + */ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - spinlock_t *ptl; pmd_t pmd; + struct mmu_notifier_range range; mmap_assert_write_locked(mm); - ptl = pmd_lock(vma->vm_mm, pmdp); + if (vma->vm_file) + lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem); + /* + * All anon_vmas attached to the VMA have the same root and are + * therefore locked by the same lock. + */ + if (vma->anon_vma) + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); pmd = pmdp_collapse_flush(vma, addr, pmdp); - spin_unlock(ptl); + tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); @@ -1444,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; + /* + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings + * that got written to. Without this, we'd have to also lock the + * anon_vma if one exists. + */ + if (vma->anon_vma) + return SCAN_VMA_CHECK; + /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1477,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto drop_hpage; } + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + * See collapse_and_free_pmd(). + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); result = SCAN_FAIL; @@ -1531,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 4: remove pte entries */ collapse_and_free_pmd(mm, vma, haddr, pmd); + i_mmap_unlock_write(vma->vm_file->f_mapping); + maybe_install_pmd: /* step 5: install pmd entry */ result = install_pmd @@ -1544,6 +1591,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1600,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) { result = SCAN_PAGE_ANON; @@ -2077,6 +2126,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, present = 0; swap = 0; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); rcu_read_lock(); xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) { if (xas_retry(&xas, page)) @@ -2157,8 +2207,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, } } - trace_mm_khugepaged_scan_file(mm, page, file->f_path.dentry->d_iname, - present, swap, result); + trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result); return result; } #else @@ -2576,7 +2625,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, if (!cc) return -ENOMEM; cc->is_khugepaged = false; - cc->last_target_node = NUMA_NO_NODE; mmgrab(mm); lru_add_drain_all(); @@ -2602,6 +2650,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, } mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); diff --git a/mm/madvise.c b/mm/madvise.c index c7105ec6d08c..b913ba6efc10 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for shrink_active_list to actually free - * these pages later if no one else has touched them in the meantime, + * zap_page_range_single call sets things up for shrink_active_list to actually + * free these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * @@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - zap_page_range(vma, start, end - start); + zap_page_range_single(vma, start, end - start, NULL); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2d8549ae1b30..a1a35c12635e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3026,7 +3026,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) { struct obj_cgroup *objcg; - if (!memcg_kmem_enabled() || memcg_kmem_bypass()) + if (!memcg_kmem_enabled()) return NULL; if (PageMemcgKmem(page)) { diff --git a/mm/memory.c b/mm/memory.c index f88c351aecd4..8c8420934d60 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct folio *single_folio; /* Locked folio to be unmapped */ - bool even_cows; /* Zap COWed private pages too? */ - zap_flags_t zap_flags; /* Extra flags for zapping */ -}; - /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { @@ -1720,7 +1711,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, { struct mmu_notifier_range range; struct zap_details details = { - .zap_flags = ZAP_FLAG_DROP_MARKER, + .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, /* Careful - we need to zap private pages too! */ .even_cows = true, }; @@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * * The range must fit into one VMA. */ -static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { + const unsigned long end = address + size; struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, address + size); + address, end); + if (is_vm_hugetlb_page(vma)) + adjust_range_if_pmd_sharing_possible(vma, &range.start, + &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); - unmap_single_vma(&tlb, vma, address, range.end, details); + /* + * unmap 'address-end' not 'range.start-range.end' as range + * could have been expanded for hugetlb pmd sharing. + */ + unmap_single_vma(&tlb, vma, address, end, details); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } @@ -3763,7 +3762,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - vmf->page->pgmap->ops->migrate_to_ram(vmf); + ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 6fa682eef7a0..721b2365dbca 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -357,7 +357,8 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page) } /* - * Unmaps pages for migration. Returns number of unmapped pages. + * Unmaps pages for migration. Returns number of source pfns marked as + * migrating. */ static unsigned long migrate_device_unmap(unsigned long *src_pfns, unsigned long npages, @@ -373,8 +374,11 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, struct page *page = migrate_pfn_to_page(src_pfns[i]); struct folio *folio; - if (!page) + if (!page) { + if (src_pfns[i] & MIGRATE_PFN_MIGRATE) + unmapped++; continue; + } /* ZONE_DEVICE pages are not on LRU */ if (!is_zone_device_page(page)) { diff --git a/mm/mmap.c b/mm/mmap.c index c3c5c1d6103d..a5eb2f175da0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -456,7 +456,7 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas) * vma_mas_szero() - Set a given range to zero. Used when modifying a * vm_area_struct start or end. * - * @mm: The struct_mm + * @mas: The maple tree ma_state * @start: The start address to zero * @end: The end address to zero. */ @@ -1779,9 +1779,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, */ pgoff = 0; get_area = shmem_get_unmapped_area; - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - /* Ensures that larger anonymous mappings are THP aligned. */ - get_area = thp_get_unmapped_area; } addr = get_area(file, addr, len, pgoff, flags); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index add4244e5790..3a2c3f8cad2f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg) /* Simply deliver the interrupt */ } -static void tlb_remove_table_sync_one(void) +void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ -static void tlb_remove_table_sync_one(void) { } - static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 218b28ee49ed..6e60657875d3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3887,6 +3887,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3897,10 +3899,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; + /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/page_ext.c b/mm/page_ext.c index affe80243b6d..ddf1968560f0 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -166,7 +166,7 @@ struct page_ext *page_ext_get(struct page *page) /** * page_ext_put() - Working with page extended information is done. - * @page_ext - Page extended information received from page_ext_get(). + * @page_ext: Page extended information received from page_ext_get(). * * The page extended information of the page may not be valid after this * function is called. diff --git a/mm/swapfile.c b/mm/swapfile.c index 5fc1237a9f21..72e481aacd5d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -973,23 +973,23 @@ done: scan: spin_unlock(&si->lock); while (++offset <= READ_ONCE(si->highest_bit)) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; } offset = si->lowest_bit; while (offset < scan_base) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; offset++; } spin_lock(&si->lock); diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d8b88e5216..8fcc5fa768c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2514,8 +2514,20 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, * the flushers simply cannot keep up with the allocation * rate. Nudge the flusher threads in case they are asleep. */ - if (stat.nr_unqueued_dirty == nr_taken) + if (stat.nr_unqueued_dirty == nr_taken) { wakeup_flusher_threads(WB_REASON_VMSCAN); + /* + * For cgroupv1 dirty throttling is achieved by waking up + * the kernel flusher here and later waiting on folios + * which are in writeback to finish (see shrink_folio_list()). + * + * Flusher may not be able to issue writeback quickly + * enough for cgroupv1 writeback throttling to work + * on a large system. + */ + if (!writeback_throttling_sane(sc)) + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); + } sc->nr.dirty += stat.nr_dirty; sc->nr.congested += stat.nr_congested; @@ -3975,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; @@ -4073,14 +4085,14 @@ restart: #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (arch_has_hw_nonleaf_pmd_young() && + get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); } -#endif + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; @@ -4971,10 +4983,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap int scanned; int reclaimed; LIST_HEAD(list); + LIST_HEAD(clean); struct folio *folio; + struct folio *next; enum vm_event_item item; struct reclaim_stat stat; struct lru_gen_mm_walk *walk; + bool skip_retry = false; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -4991,20 +5006,37 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap if (list_empty(&list)) return scanned; - +retry: reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); + sc->nr_reclaimed += reclaimed; - list_for_each_entry(folio, &list, lru) { - /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ - if (folio_test_workingset(folio)) - folio_set_referenced(folio); + list_for_each_entry_safe_reverse(folio, next, &list, lru) { + if (!folio_evictable(folio)) { + list_del(&folio->lru); + folio_putback_lru(folio); + continue; + } - /* don't add rejected pages to the oldest generation */ if (folio_test_reclaim(folio) && - (folio_test_dirty(folio) || folio_test_writeback(folio))) - folio_clear_active(folio); - else - folio_set_active(folio); + (folio_test_dirty(folio) || folio_test_writeback(folio))) { + /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ + if (folio_test_workingset(folio)) + folio_set_referenced(folio); + continue; + } + + if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) || + folio_mapped(folio) || folio_test_locked(folio) || + folio_test_dirty(folio) || folio_test_writeback(folio)) { + /* don't add rejected folios to the oldest generation */ + set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, + BIT(PG_active)); + continue; + } + + /* retry folios that may have missed folio_rotate_reclaimable() */ + list_move(&folio->lru, &clean); + sc->nr_scanned -= folio_nr_pages(folio); } spin_lock_irq(&lruvec->lru_lock); @@ -5026,7 +5058,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap mem_cgroup_uncharge_list(&list); free_unref_page_list(&list); - sc->nr_reclaimed += reclaimed; + INIT_LIST_HEAD(&list); + list_splice_init(&clean, &list); + + if (!list_empty(&list)) { + skip_retry = true; + goto retry; + } if (need_swapping && type == LRU_GEN_ANON) *need_swapping = true; @@ -5354,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) caps |= BIT(LRU_GEN_MM_WALK); - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); @@ -5844,8 +5882,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) enum lru_list lru; unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; + bool proportional_reclaim; struct blk_plug plug; - bool scan_adjusted; if (lru_gen_enabled()) { lru_gen_shrink_lruvec(lruvec, sc); @@ -5868,8 +5906,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) * abort proportional reclaim if either the file or anon lru has already * dropped to zero at the first pass. */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); + proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -5889,7 +5927,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + if (nr_reclaimed < nr_to_reclaim || proportional_reclaim) continue; /* @@ -5940,8 +5978,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); - - scan_adjusted = true; } blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 56a186768750..07db2f436d44 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -120,7 +120,7 @@ struct p9_conn { struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; - char tmp_buf[7]; + char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; @@ -202,9 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } spin_unlock(&m->req_lock); @@ -291,7 +293,7 @@ static void p9_read_work(struct work_struct *work) if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; - m->rc.capacity = 7; /* start by reading header */ + m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); @@ -314,7 +316,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ - m->rc.size = 7; + m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, @@ -322,14 +324,6 @@ static void p9_read_work(struct work_struct *work) goto error; } - if (m->rc.size >= m->client->msize) { - p9_debug(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", - m->rc.size); - err = -EIO; - goto error; - } - p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); @@ -342,6 +336,14 @@ static void p9_read_work(struct work_struct *work) goto error; } + if (m->rc.size > m->rreq->rc.capacity) { + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d for tag %d with capacity %zd\n", + m->rc.size, m->rc.tag, m->rreq->rc.capacity); + err = -EIO; + goto error; + } + if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", @@ -860,8 +862,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); - if (!p) + if (!p) { + sock_release(csocket); return -ENOMEM; + } csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index b15c64128c3e..aaa5fd364691 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -208,6 +208,14 @@ static void p9_xen_response(struct work_struct *work) continue; } + if (h.size > req->rc.capacity) { + dev_warn(&priv->dev->dev, + "requested packet size too big: %d for tag %d with capacity %zd\n", + h.size, h.tag, req->rc.capacity); + req->status = REQ_STATUS_ERROR; + goto recv_error; + } + memcpy(&req->rc, &h, sizeof(h)); req->rc.offset = 0; @@ -217,6 +225,7 @@ static void p9_xen_response(struct work_struct *work) masked_prod, &masked_cons, XEN_9PFS_RING_SIZE(ring)); +recv_error: virt_mb(); cons += h.size; ring->intf->in_cons = cons; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 25cd35f5922e..007730412947 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -296,7 +296,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, key->ct_zone = ct->zone.id; #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - key->ct_mark = ct->mark; + key->ct_mark = READ_ONCE(ct->mark); #endif cl = nf_ct_labels_find(ct); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 6fac2f0ef074..711cd3b4347a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -48,9 +48,11 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "RPL"; case LWTUNNEL_ENCAP_IOAM6: return "IOAM6"; + case LWTUNNEL_ENCAP_XFRM: + /* module autoload not supported for encap type */ + return NULL; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: - case LWTUNNEL_ENCAP_XFRM: case LWTUNNEL_ENCAP_NONE: case __LWTUNNEL_ENCAP_MAX: /* should not have got here */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a77a85e357e0..952a54763358 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -307,7 +307,31 @@ static int neigh_del_timer(struct neighbour *n) return 0; } -static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_parms_qlen_dec(struct net_device *dev, int family) +{ + struct neigh_parms *p; + + rcu_read_lock(); + p = neigh_get_dev_parms_rcu(dev, family); + if (p) + p->qlen--; + rcu_read_unlock(); +} + +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, + int family) { struct sk_buff_head tmp; unsigned long flags; @@ -321,13 +345,7 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) struct net_device *dev = skb->dev; if (net == NULL || net_eq(dev_net(dev), net)) { - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, family); __skb_unlink(skb, list); __skb_queue_tail(&tmp, skb); } @@ -409,7 +427,8 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, + tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; @@ -1621,13 +1640,8 @@ static void neigh_proxy_process(struct timer_list *t) if (tdif <= 0) { struct net_device *dev = skb->dev; - struct in_device *in_dev; - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, tbl->family); __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) { @@ -1821,7 +1835,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue, NULL); + pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); @@ -3539,18 +3553,6 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, return ret; } -static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, - int family) -{ - switch (family) { - case AF_INET: - return __in_dev_arp_parms_get_rcu(dev); - case AF_INET6: - return __in6_dev_nd_parms_get_rcu(dev); - } - return NULL; -} - static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, int index) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 713b7b8dad7e..b780827f5e0a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -45,11 +45,10 @@ static unsigned int dccp_v4_pernet_id __read_mostly; int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -91,26 +90,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) daddr = fl4->daddr; if (inet->inet_saddr == 0) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = - inet_bhashfn_portaddr(&dccp_hashinfo, sk, - sock_net(sk), - inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } inet->inet_dport = usin->sin_port; @@ -157,6 +143,7 @@ failure: * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e57b43006074..602f3432d80b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -934,26 +934,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (saddr == NULL) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo, - sk, sock_net(sk), - inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } - saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ @@ -985,6 +970,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: dccp_set_state(sk, DCCP_CLOSED); + inet_bhash2_reset_saddr(sk); __sk_dst_reset(sk); failure: inet->inet_dport = 0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index c548ca3e9b0e..85e35c5e8890 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -279,8 +279,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index a50429a62f74..56bb27d67a2e 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -351,17 +351,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { bool was_multicast_frame; - int res; + int res, recv_len; was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); hsr_addr_subst_source(node_src, skb); skb_pull(skb, ETH_HLEN); + recv_len = skb->len; res = netif_rx(skb); if (res == NET_RX_DROP) { dev->stats.rx_dropped++; } else { dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += recv_len; if (was_multicast_frame) dev->stats.multicast++; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4728087c42a5..0da679411330 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1230,7 +1230,6 @@ EXPORT_SYMBOL(inet_unregister_protosw); static int inet_sk_reselect_saddr(struct sock *sk) { - struct inet_bind_hashbucket *prev_addr_hashbucket; struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; @@ -1260,16 +1259,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; } - prev_addr_hashbucket = - inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk, - sock_net(sk), inet->inet_num); - - inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; - - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET); if (err) { - inet->inet_saddr = old_saddr; - inet->inet_rcv_saddr = old_saddr; ip_rt_put(rt); return err; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 170152772d33..3969fa805679 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -314,6 +314,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); ip_hdr(skb)->tot_len = htons(skb->len); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f721c308248b..19a662003eef 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,9 +888,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; + if (fi->nh) { + if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) + return 1; + return 0; + } if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 452ff177e4da..74d403dbd2b4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -126,7 +126,7 @@ struct key_vector { /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ struct hlist_head leaf; /* This array is valid if (pos | bits) > 0 (TNODE) */ - struct key_vector __rcu *tnode[0]; + DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode); }; }; @@ -1381,8 +1381,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, /* The alias was already inserted, so the node must exist. */ l = l ? l : fib_find_node(t, &tp, key); - if (WARN_ON_ONCE(!l)) + if (WARN_ON_ONCE(!l)) { + err = -ENOENT; goto out_free_new_fa; + } if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 033bf3c2538f..3cec471a2cd2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -858,34 +858,80 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) +static void inet_update_saddr(struct sock *sk, void *saddr, int family) +{ + if (family == AF_INET) { + inet_sk(sk)->inet_saddr = *(__be32 *)saddr; + sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else { + sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; + } +#endif +} + +static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); - struct inet_bind_hashbucket *head2; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + int bhash; + + if (!inet_csk(sk)->icsk_bind2_hash) { + /* Not bind()ed before. */ + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + return 0; + } /* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); - if (!new_tb2) + if (!new_tb2) { + if (reset) { + /* The (INADDR_ANY, port) bucket might have already + * been freed, then we cannot fixup icsk_bind2_hash, + * so we give up and unlink sk from bhash/bhash2 not + * to leave inconsistency in bhash2. + */ + inet_put_port(sk); + inet_reset_saddr(sk); + } + return -ENOMEM; + } + bhash = inet_bhashfn(net, port, hinfo->bhash_size); + head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); - if (prev_saddr) { - spin_lock_bh(&prev_saddr->lock); - __sk_del_bind2_node(sk); - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, - inet_csk(sk)->icsk_bind2_hash); - spin_unlock_bh(&prev_saddr->lock); - } + /* If we change saddr locklessly, another thread + * iterating over bhash might see corrupted address. + */ + spin_lock_bh(&head->lock); - spin_lock_bh(&head2->lock); + spin_lock(&head2->lock); + __sk_del_bind2_node(sk); + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); + spin_unlock(&head2->lock); + + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + + spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; @@ -893,15 +939,29 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc } sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; - spin_unlock_bh(&head2->lock); + spin_unlock(&head2->lock); + + spin_unlock_bh(&head->lock); if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); return 0; } + +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) +{ + return __inet_bhash2_update_saddr(sk, saddr, family, false); +} EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); +void inet_bhash2_reset_saddr(struct sock *sk) +{ + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + __inet_bhash2_update_saddr(sk, NULL, 0, true); +} +EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); + /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1b512390b3cf..e880ce77322a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -366,6 +366,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, iph->tos, dev); if (unlikely(err)) goto drop_error; + } else { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) + IPCB(skb)->flags |= IPSKB_NOPOLICY; } #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f8e176c77d1c..b3cc416ed292 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (ctinfo) { case IP_CT_NEW: - ct->mark = hash; + WRITE_ONCE(ct->mark, hash); break; case IP_CT_RELATED: case IP_CT_RELATED_REPLY: @@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) #ifdef DEBUG nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); + pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); if (!clusterip_responsible(cipinfo->config, hash)) { pr_debug("not responsible\n"); return NF_DROP; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 54836a6b81d6..4f2205756cfe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3114,8 +3114,7 @@ int tcp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 87d440f47a70..da46357f501b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -199,15 +199,14 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_timewait_death_row *tcp_death_row; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct ip_options_rcu *inet_opt; struct net *net = sock_net(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -251,24 +250,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!inet->inet_saddr) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { @@ -343,6 +331,7 @@ failure: * if necessary. */ tcp_set_state(sk, TCP_CLOSE); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 79d43548279c..242f4295940e 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -346,6 +346,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); len = skb->len - sizeof(struct ipv6hdr); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2a3f9296df1e..f0548dbcabd2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -292,24 +292,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ @@ -359,6 +346,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); + inet_bhash2_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4a4b0e49ec92..ea435eba3053 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -287,9 +287,13 @@ int __init xfrm6_init(void) if (ret) goto out_state; - register_pernet_subsys(&xfrm6_net_ops); + ret = register_pernet_subsys(&xfrm6_net_ops); + if (ret) + goto out_protocol; out: return ret; +out_protocol: + xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: diff --git a/net/key/af_key.c b/net/key/af_key.c index c85df5b958d2..95edcbedf6ef 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2905,7 +2905,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2923,7 +2923,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { @@ -2934,16 +2934,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } -static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -2971,13 +2972,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } -static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -3019,8 +3024,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) @@ -3150,6 +3158,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; + int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -3161,16 +3170,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) - size += count_ah_combs(t); + alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) - size += count_esp_combs(t); + alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } - skb = alloc_skb(size + 16, GFP_ATOMIC); + skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -3224,10 +3233,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ + alg_size = 0; if (x->id.proto == IPPROTO_AH) - dump_ah_combs(skb, t); + alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) - dump_esp_combs(skb, t); + alg_size = dump_esp_combs(skb, t); + + hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { @@ -3382,7 +3394,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; - hdr->sadb_msg_seq = x->km.seq = get_acqseq(); + hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 754fdda8a5f5..9a1415fe3fa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1474,11 +1474,12 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, } sk = sock->sk; - write_lock(&sk->sk_callback_lock); - + write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) - goto err_sock; + goto err_inval_sock; + rcu_assign_sk_user_data(sk, tunnel); + write_unlock_bh(&sk->sk_callback_lock); tunnel->l2tp_net = net; pn = l2tp_pernet(net); @@ -1507,8 +1508,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1522,16 +1521,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); - write_unlock(&sk->sk_callback_lock); return 0; err_sock: + write_lock_bh(&sk->sk_callback_lock); + rcu_assign_sk_user_data(sk, NULL); +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + if (tunnel->fd < 0) sock_release(sock); else sockfd_put(sock); - - write_unlock(&sk->sk_callback_lock); err: return ret; } diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 2e66598fac79..e8ebd343e2bf 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -452,6 +452,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, (status->encoding == RX_ENC_HE && streams > 8))) return 0; + if (idx >= MCS_GROUP_RATES) + return 0; + duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; *overhead = 36 + (streams << 2); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b6dc6e260334..1dbc62537259 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2354,12 +2354,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - /* if we are invoked by the msk cleanup code, the subflow is - * already orphaned - */ - if (ssk->sk_socket) - sock_orphan(ssk); - + sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2940,7 +2935,11 @@ cleanup: if (ssk == msk->first) subflow->fail_tout = 0; - sock_orphan(ssk); + /* detach from the parent socket, but allow data_ready to + * push incoming data into the mptcp stack, to properly ack it + */ + ssk->sk_socket = NULL; + ssk->sk_wq = NULL; unlock_sock_fast(ssk, slow); } sock_orphan(sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 02a54d59697b..2159b5f9988f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1745,16 +1745,16 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) for (msk = head; msk; msk = next) { struct sock *sk = (struct sock *)msk; - bool slow, do_cancel_work; + bool do_cancel_work; sock_hold(sk); - slow = lock_sock_fast_nested(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; msk->first = NULL; msk->dl_next = NULL; do_cancel_work = __mptcp_close(sk, 0); - unlock_sock_fast(sk, slow); + release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); sock_put(sk); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3adc291d9ce1..7499192af586 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -916,7 +916,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_MULTI if (h->bucketsize >= AHASH_MAX_TUNED) goto set_full; - else if (h->bucketsize < multi) + else if (h->bucketsize <= multi) h->bucketsize += AHASH_INIT_SIZE; #endif if (n->size >= AHASH_MAX(h)) { diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index dd30c03d5a23..75d556d71652 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -151,18 +151,16 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) return -ERANGE; - if (retried) { + if (retried) ip = ntohl(h->next.ip); - e.ip = htonl(ip); - } for (; ip <= ip_to;) { + e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ip += hosts; - e.ip = htonl(ip); - if (e.ip == 0) + if (ip == 0) return 0; ret = 0; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f97bda06d2a9..2692139ce417 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1781,7 +1781,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } #ifdef CONFIG_NF_CONNTRACK_MARK - ct->mark = exp->master->mark; + ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7562b215b932..d71150a40fb0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,9 +328,9 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) { - if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -543,7 +543,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -722,6 +722,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; + u32 mark; int err; if (events & (1 << IPCT_DESTROY)) { @@ -826,8 +827,9 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((events & (1 << IPCT_MARK) || ct->mark) - && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if ((events & (1 << IPCT_MARK) || mark) && + ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -1154,7 +1156,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((ct->mark & filter->mark.mask) != filter->mark.val) + if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif status = (u32)READ_ONCE(ct->status); @@ -2002,9 +2004,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct, mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); mark = ntohl(nla_get_be32(cda[CTA_MARK])); - newmark = (ct->mark & mask) ^ mark; - if (newmark != ct->mark) - ct->mark = newmark; + newmark = (READ_ONCE(ct->mark) & mask) ^ mark; + if (newmark != READ_ONCE(ct->mark)) + WRITE_ONCE(ct->mark, newmark); } #endif @@ -2669,6 +2671,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; + u32 mark; zone = nf_ct_zone(ct); @@ -2730,7 +2733,8 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if (mark && ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4ffe84c5a82c..bca839ab1ae8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -366,7 +366,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #if defined(CONFIG_NF_CONNTRACK_MARK) - seq_printf(s, "mark=%u ", ct->mark); + seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); #endif ct_show_secctx(s, ct); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index b04645ced89b..00b522890d77 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1098,6 +1098,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, struct flow_block_cb *block_cb, *next; int err = 0; + down_write(&flowtable->flow_block_lock); switch (cmd) { case FLOW_BLOCK_BIND: list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); @@ -1112,6 +1113,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, WARN_ON_ONCE(1); err = -EOPNOTSUPP; } + up_write(&flowtable->flow_block_lock); return err; } @@ -1168,7 +1170,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); + down_write(&flowtable->flow_block_lock); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); + up_write(&flowtable->flow_block_lock); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e7152d599d73..7a09421f19e1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5958,7 +5958,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, &timeout); if (err) return err; - } else if (set->flags & NFT_SET_TIMEOUT) { + } else if (set->flags & NFT_SET_TIMEOUT && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { timeout = set->timeout; } @@ -6024,7 +6025,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EOPNOTSUPP; goto err_set_elem_expr; } - } else if (set->num_exprs > 0) { + } else if (set->num_exprs > 0 && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { err = nft_set_elem_expr_clone(ctx, set, expr_array); if (err < 0) goto err_set_elem_expr_clone; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a3f01f209a53..641dc21f92b4 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -98,7 +98,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - *dest = ct->mark; + *dest = READ_ONCE(ct->mark); return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -297,8 +297,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - if (ct->mark != value) { - ct->mark = value; + if (READ_ONCE(ct->mark) != value) { + WRITE_ONCE(ct->mark, value); nf_conntrack_event_cache(IPCT_MARK, ct); } break; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e5ebc0810675..ad3c033db64e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; + u_int32_t oldmark; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) @@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) switch (info->mode) { case XT_CONNMARK_SET: - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + oldmark = READ_ONCE(ct->mark); + newmark = (oldmark & ~info->ctmask) ^ info->ctmark; if (info->shift_dir == D_SHIFT_RIGHT) newmark >>= info->shift_bits; else newmark <<= info->shift_bits; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; @@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) else new_targetmark <<= info->shift_bits; - newmark = (ct->mark & ~info->ctmask) ^ + newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ new_targetmark; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - new_targetmark = (ct->mark & info->ctmask); + new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); if (info->shift_dir == D_SHIFT_RIGHT) new_targetmark >>= info->shift_bits; else @@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - return ((ct->mark & info->mask) == info->mark) ^ info->invert; + return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; } static int connmark_mt_check(const struct xt_mtchk_param *par) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6a193cce2a75..4ffdf2f45c44 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -542,7 +542,7 @@ static int nci_open_device(struct nci_dev *ndev) skb_queue_purge(&ndev->tx_q); ndev->ops->close(ndev); - ndev->flags = 0; + ndev->flags &= BIT(NCI_UNREG); } done: diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index aa5e712adf07..3d36ea5701f0 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_plen(skb->data)); conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); - if (!conn_info) + if (!conn_info) { + kfree_skb(skb); return; + } /* strip the nci data header */ skb_pull(skb, NCI_DATA_HDR_SIZE); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c7b10234cf7c..c8eaf4234b2e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -152,7 +152,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) static u32 ovs_ct_get_mark(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - return ct ? ct->mark : 0; + return ct ? READ_ONCE(ct->mark) : 0; #else return 0; #endif @@ -340,9 +340,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) u32 new_mark; - new_mark = ct_mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); key->ct.mark = new_mark; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ce8dd19f33c..1ab65f7f2a0a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2293,8 +2293,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (snaplen > res) @@ -3520,8 +3519,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; aux.tp_len = origlen; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1ad0ec5afb50..8499ceb7719c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -399,6 +399,7 @@ enum rxrpc_conn_proto_state { struct rxrpc_bundle { struct rxrpc_conn_parameters params; refcount_t ref; + atomic_t active; /* Number of active users */ unsigned int debug_id; bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 3c9eeb5b750c..bdb335cb2d05 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -123,6 +125,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, bundle->params = *cp; rxrpc_get_peer(bundle->params.peer); refcount_set(&bundle->ref, 1); + atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); } @@ -149,7 +152,7 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle) dead = __refcount_dec_and_test(&bundle->ref, &r); - _debug("PUT B=%x %d", d, r); + _debug("PUT B=%x %d", d, r - 1); if (dead) rxrpc_free_bundle(bundle); } @@ -338,6 +341,7 @@ found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); + atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); _leave(" = %u [found]", bundle->debug_id); return bundle; @@ -435,6 +439,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (old) trace_rxrpc_client(old, -1, rxrpc_client_replace); candidate->bundle_shift = shift; + atomic_inc(&bundle->active); bundle->conns[i] = candidate; for (j = 0; j < RXRPC_MAXCALLS; j++) set_bit(shift + j, &bundle->avail_chans); @@ -725,6 +730,7 @@ granted_channel: smp_rmb(); out_put_bundle: + rxrpc_deactivate_bundle(bundle); rxrpc_put_bundle(bundle); out: _leave(" = %d", ret); @@ -900,9 +906,8 @@ out: static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; - struct rxrpc_local *local = bundle->params.local; unsigned int bindex; - bool need_drop = false, need_put = false; + bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -921,15 +926,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) } spin_unlock(&bundle->channel_lock); - /* If there are no more connections, remove the bundle */ - if (!bundle->avail_chans) { - _debug("maybe unbundle"); - spin_lock(&local->client_bundles_lock); + if (need_drop) { + rxrpc_deactivate_bundle(bundle); + rxrpc_put_connection(conn); + } +} - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) - if (bundle->conns[i]) - break; - if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) { +/* + * Drop the active count on a bundle. + */ +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +{ + struct rxrpc_local *local = bundle->params.local; + bool need_put = false; + + if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { + if (!bundle->params.exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; @@ -939,10 +951,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (need_put) rxrpc_put_bundle(bundle); } - - if (need_drop) - rxrpc_put_connection(conn); - _leave(""); } /* diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1e8ab4749c6c..4662a6ce8a7e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -976,7 +976,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE + depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 66b143bb04ac..d41002e4613f 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -61,7 +61,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_get(skb, &ctinfo); if (c) { - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; goto out; @@ -81,7 +81,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_tuplehash_to_ctrack(thash); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); nf_ct_put(c); out: diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b38d91d6b249..4c7f7861ea96 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -178,7 +178,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, entry = tcf_ct_flow_table_flow_action_get_next(action); entry->id = FLOW_ACTION_CT_METADATA; #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - entry->ct_metadata.mark = ct->mark; + entry->ct_metadata.mark = READ_ONCE(ct->mark); #endif ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; @@ -936,9 +936,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) if (!mask) return; - new_mark = mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index d4102f0a9abd..eaa02f098d1c 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -32,7 +32,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, { u8 dscp, newdscp; - newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & + newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ~INET_ECN_MASK; switch (proto) { @@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct sk_buff *skb) { ca->stats_cpmark_set++; - skb->mark = ct->mark & cp->cpmarkmask; + skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, @@ -130,7 +130,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, } if (cp->mode & CTINFO_MODE_DSCP) - if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) + if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); if (cp->mode & CTINFO_MODE_CPMARK) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index ef9fceadef8d..ee6514af830f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) } } +static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_sched_ops *sched; + + if (!SCTP_SO(stream, sid)->ext) + return; + + sched = sctp_sched_ops_from_stream(stream); + sched->free_sid(stream, sid); + kfree(SCTP_SO(stream, sid)->ext); + SCTP_SO(stream, sid)->ext = NULL; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { - kfree(SCTP_SO(new, i)->ext); + sctp_stream_free_ext(new, i); SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; SCTP_SO(stream, i)->ext = NULL; } } - for (i = outcnt; i < stream->outcnt; i++) { - kfree(SCTP_SO(stream, i)->ext); - SCTP_SO(stream, i)->ext = NULL; - } + for (i = outcnt; i < stream->outcnt; i++) + sctp_stream_free_ext(stream, i); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream) struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; - sched->free(stream); + sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) - kfree(SCTP_SO(stream, i)->ext); + sctp_stream_free_ext(stream, i); genradix_free(&stream->out); genradix_free(&stream->in); } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 1ad565ed5627..7c8f9d89e16a 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_fcfs_free(struct sctp_stream *stream) { } @@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, + .free_sid = sctp_sched_fcfs_free_sid, .free = sctp_sched_fcfs_free, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 80b5a2c4cbc7..4fc9f2923ed1 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -204,6 +204,24 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, return sctp_sched_prio_set(stream, sid, 0, gfp); } +static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head; + int i; + + if (!prio) + return; + + SCTP_SO(stream, sid)->ext->prio_head = NULL; + for (i = 0; i < stream->outcnt; i++) { + if (SCTP_SO(stream, i)->ext && + SCTP_SO(stream, i)->ext->prio_head == prio) + return; + } + + kfree(prio); +} + static void sctp_sched_prio_free(struct sctp_stream *stream) { struct sctp_stream_priorities *prio, *n; @@ -323,6 +341,7 @@ static struct sctp_sched_ops sctp_sched_prio = { .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, .init_sid = sctp_sched_prio_init_sid, + .free_sid = sctp_sched_prio_free_sid, .free = sctp_sched_prio_free, .enqueue = sctp_sched_prio_enqueue, .dequeue = sctp_sched_prio_dequeue, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index ff425aed62c7..cc444fe0d67c 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_rr_free(struct sctp_stream *stream) { sctp_sched_rr_unsched_all(stream); @@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = { .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, + .free_sid = sctp_sched_rr_free_sid, .free = sctp_sched_rr_free, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index f09316a9035f..d67440de011e 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1971,6 +1971,9 @@ rcv: /* Ok, everything's fine, try to synch own keys according to peers' */ tipc_crypto_key_synch(rx, *skb); + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + /* Mark skb decrypted */ skb_cb->decrypted = 1; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index e8630707901e..e8dcdf267c0c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -211,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u32 self; int err; - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } hdr = buf_msg(skb); if (caps & TIPC_NODE_ID128) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index d92ec92f0b71..e3b427a70398 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con) conn_put(con); } -static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) { struct tipc_conn *con; int ret; @@ -202,10 +202,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) } con->conid = ret; s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); set_bit(CF_CONNECTED, &con->flags); con->server = s; + con->sock = sock; + conn_get(con); + spin_unlock_bh(&s->idr_lock); return con; } @@ -467,7 +469,7 @@ static void tipc_topsrv_accept(struct work_struct *work) ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) return; - con = tipc_conn_alloc(srv); + con = tipc_conn_alloc(srv, newsock); if (IS_ERR(con)) { ret = PTR_ERR(con); sock_release(newsock); @@ -479,11 +481,11 @@ static void tipc_topsrv_accept(struct work_struct *work) newsk->sk_data_ready = tipc_conn_data_ready; newsk->sk_write_space = tipc_conn_write_space; newsk->sk_user_data = con; - con->sock = newsock; write_unlock_bh(&newsk->sk_callback_lock); /* Wake up receive process in case of 'SYN+' message */ newsk->sk_data_ready(newsk); + conn_put(con); } } @@ -577,17 +579,17 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.filter = filter; *(u64 *)&sub.usr_handle = (u64)port; - con = tipc_conn_alloc(tipc_topsrv(net)); + con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) return false; *conid = con->conid; - con->sock = NULL; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); - if (rc >= 0) - return true; + if (rc) + conn_put(con); + conn_put(con); - return false; + return !rc; } void tipc_topsrv_kern_unsubscr(struct net *net, int conid) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index da752b0cc752..3d86482e83f5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -330,7 +330,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * determine if they are the same ie. */ if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (!memcmp(tmp_old + 2, tmp + 2, 5)) { + if (tmp_old[1] >= 5 && tmp[1] >= 5 && + !memcmp(tmp_old + 2, tmp + 2, 5)) { /* same vendor ie, copy from * subelement */ @@ -2526,10 +2527,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct cfg80211_bss_ies *ies1, *ies2; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - struct cfg80211_non_tx_bss non_tx_data; + struct cfg80211_non_tx_bss non_tx_data = {}; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); + + /* don't do any further MBSSID handling for S1G */ + if (ieee80211_is_s1g_beacon(mgmt->frame_control)) + return res; + if (!res || !wiphy->support_mbssid || !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f5aafd418af..21269e8f2db4 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) } } +static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + __u32 seq = xo->seq.low; + + seq += skb_shinfo(skb)->gso_segs; + if (unlikely(seq < xo->seq.low)) + return true; + + return false; +} + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; @@ -134,7 +146,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 9f4d42eb090f..ce56d659c55a 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -714,7 +714,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(oseq < replay_esn->oseq)) { + if (unlikely(xo->seq.low < replay_esn->oseq)) { XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; xo->seq.hi = oseq_hi; replay_esn->oseq_hi = oseq_hi; diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 8bbcced67c22..2a90139ecbe1 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -30,8 +30,8 @@ KBUILD_PKG_ROOTCMD ?="fakeroot -u" export KDEB_SOURCENAME # Include only those top-level files that are needed by make, plus the GPL copy TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \ - include init io_uring ipc kernel lib mm net samples scripts \ - security sound tools usr virt \ + include init io_uring ipc kernel lib mm net rust \ + samples scripts security sound tools usr virt \ .config .scmversion Makefile \ Kbuild Kconfig COPYING $(wildcard localversion*) MKSPEC := $(srctree)/scripts/package/mkspec diff --git a/scripts/faddr2line b/scripts/faddr2line index 5514c23f45c2..0e73aca4f908 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -74,7 +74,8 @@ command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | + ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) @@ -178,7 +179,7 @@ __faddr2line() { found=2 break fi - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) if [[ $found = 0 ]]; then warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" @@ -259,7 +260,7 @@ __faddr2line() { DONE=1 - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index b7aee23fc387..47ef6bc30c0e 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -113,15 +113,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event); * expand the variable length event to linear buffer space. */ -static int seq_copy_in_kernel(char **bufptr, const void *src, int size) +static int seq_copy_in_kernel(void *ptr, void *src, int size) { + char **bufptr = ptr; + memcpy(*bufptr, src, size); *bufptr += size; return 0; } -static int seq_copy_in_user(char __user **bufptr, const void *src, int size) +static int seq_copy_in_user(void *ptr, void *src, int size) { + char __user **bufptr = ptr; + if (copy_to_user(*bufptr, src, size)) return -EFAULT; *bufptr += size; @@ -151,8 +155,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char return newlen; } err = snd_seq_dump_var_event(event, - in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel : - (snd_seq_dump_func_t)seq_copy_in_user, + in_kernel ? seq_copy_in_kernel : seq_copy_in_user, &buf); return err < 0 ? err : newlen; } diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index f99e00083141..4c677c8546c7 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate, static int select_clock(struct snd_dice *dice, unsigned int rate) { - __be32 reg; + __be32 reg, new; u32 data; int i; int err; @@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate) if (completion_done(&dice->clock_accepted)) reinit_completion(&dice->clock_accepted); - reg = cpu_to_be32(data); + new = cpu_to_be32(data); err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT, - ®, sizeof(reg)); + &new, sizeof(new)); if (err < 0) return err; if (wait_for_completion_timeout(&dice->clock_accepted, - msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) - return -ETIMEDOUT; + msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) { + if (reg != new) + return -ETIMEDOUT; + } return 0; } diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 51721edd8f53..e88d9ff95cdf 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x19, 30, pga_tlv), + 0, 0x1A, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h index fc19c34ca00e..b65560981abb 100644 --- a/sound/soc/codecs/hdac_hda.h +++ b/sound/soc/codecs/hdac_hda.h @@ -14,7 +14,7 @@ enum { HDAC_HDMI_1_DAI_ID, HDAC_HDMI_2_DAI_ID, HDAC_HDMI_3_DAI_ID, - HDAC_LAST_DAI_ID = HDAC_HDMI_3_DAI_ID, + HDAC_DAI_ID_NUM }; struct hdac_hda_pcm { @@ -24,7 +24,7 @@ struct hdac_hda_pcm { struct hdac_hda_priv { struct hda_codec *codec; - struct hdac_hda_pcm pcm[HDAC_LAST_DAI_ID]; + struct hdac_hda_pcm pcm[HDAC_DAI_ID_NUM]; bool need_display_power; }; diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c index 3e04c7f0cce4..ec0905df65d1 100644 --- a/sound/soc/codecs/max98373-i2c.c +++ b/sound/soc/codecs/max98373-i2c.c @@ -549,6 +549,10 @@ static int max98373_i2c_probe(struct i2c_client *i2c) max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num, sizeof(*max98373->cache), GFP_KERNEL); + if (!max98373->cache) { + ret = -ENOMEM; + return ret; + } for (i = 0; i < max98373->cache_num; i++) max98373->cache[i].reg = max98373_i2c_cache_reg[i]; diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 4120842fe699..88a8392a58ed 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -230,7 +230,7 @@ static int rt711_sdca_read_prop(struct sdw_slave *slave) } /* set the timeout values */ - prop->clk_stop_timeout = 20; + prop->clk_stop_timeout = 700; /* wake-up event */ prop->wake_capable = 1; diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 4b2135eba74d..a916f4619ea3 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1794,6 +1794,7 @@ static void sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_CLK_CTRL, SGTL5000_CHIP_CLK_CTRL_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index a969547708d4..52bb55724724 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -14,6 +14,7 @@ #include <dt-bindings/sound/tlv320adc3xxx.h> #include <linux/clk.h> +#include <linux/gpio/consumer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/io.h> @@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = { static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx) { +#ifdef CONFIG_GPIOLIB gpiochip_remove(&adc3xxx->gpio_chip); +#endif } static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b4b4355c6728..b901e4c65e8a 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2503,6 +2503,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component) snd_soc_component_update_bits(component, WM8962_CLOCKING2, WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate + * correct frequency of LRCLK and BCLK. Sometimes the read-only value + * can't be updated timely after enabling SYSCLK. This results in wrong + * calculation values. Delay is introduced here to wait for newest + * value from register. The time of the delay should be at least + * 500~1000us according to test. + */ + usleep_range(500, 1000); dspclk = snd_soc_component_read(component, WM8962_CLOCKING1); if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 79ef4e269bc9..4b86ef82fd93 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -194,6 +194,25 @@ static int fsl_micfil_reset(struct device *dev) if (ret) return ret; + /* + * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined + * as non-volatile register, so SRES still remain in regmap + * cache after set, that every update of REG_MICFIL_CTRL1, + * software reset happens. so clear it explicitly. + */ + ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, + MICFIL_CTRL1_SRES); + if (ret) + return ret; + + /* + * Set SRES should clear CHnF flags, But even add delay here + * the CHnF may not be cleared sometimes, so clear CHnF explicitly. + */ + ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); + if (ret) + return ret; + return 0; } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index bd88de056358..55b009d3c681 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; - if (val > max - min) + if (val > max) return -EINVAL; val_mask = mask << shift; val = (val + min) & mask; @@ -464,10 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { - unsigned int val2; + unsigned int val2 = ucontrol->value.integer.value[1]; + + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max) + return -EINVAL; val_mask = mask << rshift; - val2 = (ucontrol->value.integer.value[1] + min) & mask; + val2 = (val2 + min) & mask; val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index fb87d6d23408..35a16c3f9591 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -822,11 +822,6 @@ static int __soc_pcm_open(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_startup(dai, substream); if (ret < 0) goto err; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - dai->tx_mask = 0; - else - dai->rx_mask = 0; } /* Dynamic PCM DAI links compat checks use dynamic capabilities */ @@ -1252,6 +1247,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, return; be_substream = snd_soc_dpcm_get_substream(be, stream); + if (!be_substream) + return; for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index ce7f6942308f..f3dd9f8e621c 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -1077,7 +1077,7 @@ static int stm32_i2s_parse_dt(struct platform_device *pdev, if (irq < 0) return irq; - ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, IRQF_ONESHOT, + ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, 0, dev_name(&pdev->dev), i2s); if (ret) { dev_err(&pdev->dev, "irq request returned %d\n", ret); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 184ce1684dcd..91b7106a4a73 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11169,7 +11169,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } *link = bpf_program__attach_raw_tracepoint(prog, tp_name); - return libbpf_get_error(link); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index f3a8e8e74eb8..d504d96adc83 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -234,7 +234,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index d285171d4b69..6af142953a94 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; @@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", @@ -129,8 +129,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + return libbpf_err(-E2BIG); + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; ringbuf_unmap_ring(rb, r); @@ -348,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) { struct bpf_map_info info; __u32 len = sizeof(info); + __u64 mmap_sz; void *tmp; struct epoll_event *rb_epoll; int err; @@ -384,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) * simple reading and writing of samples that wrap around the end of * the buffer. See the kernel implementation for details. */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, - PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", @@ -476,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) __u64 cons_pos, prod_pos; struct ringbuf_hdr *hdr; + /* The top two bits are used as special flags */ + if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT)) + return errno = E2BIG, NULL; + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in * the kernel. */ diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index cfe343306e08..c60c90f35d18 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -1462,7 +1462,7 @@ class Data: 'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*', 'ABORT' : r'(?i).*\bABORT\b.*', 'IRQ' : r'.*\bgenirq: .*', - 'TASKFAIL': r'.*Freezing of tasks *.*', + 'TASKFAIL': r'.*Freezing .*after *.*', 'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*', 'DISKFULL': r'.*\bNo space left on device.*', 'USBERR' : r'.*usb .*device .*, error [0-9-]*', diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 099eb4dfd4f7..18405c3b7cee 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -458,7 +458,7 @@ static void test_sk_storage_map_basic(void) struct { int cnt; int lock; - } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; + } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value; struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; @@ -483,38 +483,41 @@ static void test_sk_storage_map_basic(void) "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST | BPF_F_LOCK); CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); CHECK(err, "bpf_map_update_elem(BPF_EXIST)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Update with BPF_NOEXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST | BPF_F_LOCK); CHECK(!err || errno != EEXIST, @@ -526,22 +529,23 @@ static void test_sk_storage_map_basic(void) value.cnt -= 1; err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt again and update with map_flags == 0 */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &sk_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index d457a55ff408..a4b4133d39e9 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -358,10 +358,12 @@ static int get_syms(char ***symsp, size_t *cntp) * We attach to almost all kernel functions and some of them * will cause 'suspicious RCU usage' when fprobe is attached * to them. Filter out the current culprits - arch_cpu_idle - * and rcu_* functions. + * default_idle and rcu_* functions. */ if (!strcmp(name, "arch_cpu_idle")) continue; + if (!strcmp(name, "default_idle")) + continue; if (!strncmp(name, "rcu_", 4)) continue; if (!strcmp(name, "bpf_dispatcher_xdp_func")) @@ -400,7 +402,7 @@ error: return err; } -static void test_bench_attach(void) +void serial_test_kprobe_multi_bench_attach(void) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; @@ -468,6 +470,4 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); - if (test__start_subtest("bench_attach")) - test_bench_attach(); } diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..05d980fb083d 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -41,6 +41,7 @@ /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test /x86_64/svm_nested_soft_inject_test +/x86_64/svm_nested_shutdown_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/tsc_scaling_sync diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..4a2caef2c939 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -101,6 +101,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e8ca0d8a6a7e..5da0c5e2a7af 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -748,6 +748,19 @@ struct ex_regs { uint64_t rflags; }; +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..41c1c73c464d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1074,19 +1074,6 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) } } -struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; - uint32_t offset2; uint32_t reserved; -}; - static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c new file mode 100644 index 000000000000..e73fcdef47bb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_nested_shutdown_test + * + * Copyright (C) 2022, Red Hat, Inc. + * + * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("ud2"); +} + +static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + idt[6].p = 0; // #UD is intercepted but its injection will cause #NP + idt[11].p = 0; // #NP is not intercepted and will cause another + // #NP that will be converted to #DF + idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here */ + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_alloc_svm(vm, &svm_gva); + + vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c index 70b44f0b52fe..ead5d878a71c 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -3,6 +3,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -20,10 +21,11 @@ static void l2_guest_code(void) : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); } -void l1_guest_code(struct vmx_pages *vmx) -{ #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; +unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + +void l1_guest_code_vmx(struct vmx_pages *vmx) +{ GUEST_ASSERT(vmx->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx)); @@ -38,24 +40,53 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_DONE(); } +void l1_guest_code_svm(struct svm_test_data *svm) +{ + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* don't intercept shutdown to test the case of SVM allowing to do so */ + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here, L1 should crash */ + GUEST_ASSERT(0); +} + int main(void) { struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vcpu_events events; - vm_vaddr_t vmx_pages_gva; struct ucall uc; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); + bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); + + TEST_REQUIRE(has_vmx || has_svm); TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); + if (has_vmx) { + vm_vaddr_t vmx_pages_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + } else { + vm_vaddr_t svm_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + } + + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); run = vcpu->run; - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -78,13 +109,21 @@ int main(void) "No triple fault pending"); vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } + if (has_svm) { + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + } else { + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } + return 0; } diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 31c3b6ebd388..21ca91473c09 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4196,10 +4196,13 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -which nettest >/dev/null -if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit $ksft_skip + fi fi declare -i nfail=0 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ee5e98204d3d..a47b26ab48f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1228,6 +1228,17 @@ ipv4_fcnal() run_cmd "$IP ro add 172.16.101.0/24 nhid 21" run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f3dd5f2a0272..2eeaf4aca644 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2152,7 +2152,7 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -2165,7 +2165,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert @@ -2178,7 +2178,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 0879da915014..80d36f7cfee8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -35,8 +35,9 @@ init() ns1="ns1-$rndh" ns2="ns2-$rndh" + ns_sbox="ns_sbox-$rndh" - for netns in "$ns1" "$ns2";do + for netns in "$ns1" "$ns2" "$ns_sbox";do ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 @@ -73,7 +74,7 @@ init() cleanup() { - for netns in "$ns1" "$ns2"; do + for netns in "$ns1" "$ns2" "$ns_sbox"; do ip netns del $netns done rm -f "$cin" "$cout" @@ -243,7 +244,7 @@ do_mptcp_sockopt_tests() { local lret=0 - ./mptcp_sockopt + ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? if [ $lret -ne 0 ]; then @@ -252,7 +253,7 @@ do_mptcp_sockopt_tests() return fi - ./mptcp_sockopt -6 + ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? if [ $lret -ne 0 ]; then diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ffa13a957a36..40aeb5a71a2a 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -247,9 +247,10 @@ run_test() tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 - # time is measured in ms, account for transfer size, affegated link speed + # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) - local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) )) + # ms byte -> bit 10% mbit -> kbit -> bit 10% + local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 736e358dc549..dfe3d287f01d 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -686,10 +686,12 @@ setup_xfrm() { } setup_nettest_xfrm() { - which nettest >/dev/null - if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - return 1 + if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + return 1 + fi fi [ ${1} -eq 6 ] && proto="-6" || proto="" diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 6a443ca3cd3a..0c743752669a 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + # set global exit status, but never reset nonzero one. check_err() { @@ -34,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp } run_one() { @@ -195,8 +197,8 @@ run_all() { return $ret } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 8a1109a545db..894972877e8b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -34,7 +36,7 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & @@ -80,8 +82,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 7fe85ba51075..c9c4b9d65839 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -36,7 +38,7 @@ run_one() { ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action @@ -81,8 +83,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 1bcd82e1f662..c079565add39 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -46,7 +47,7 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null + ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null } create_vxlan_endpoint() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 430895d1a2b6..2d073595c620 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -216,8 +217,8 @@ while getopts "hs:" option; do esac done -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit 1 fi @@ -288,14 +289,14 @@ if [ $CPUS -gt 1 ]; then ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null printf "%-60s" "bad setting: XDP with RX nr less than TX" - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " # the following tests will run with multiple channels active ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 ip netns exec $NS_DST ethtool -L veth$DST rx 2 - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ @@ -311,7 +312,7 @@ if [ $CPUS -gt 2 ]; then chk_channels "setting invalid channels nr" $DST 2 2 fi -ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25d7872b29c1..fab4d3790578 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1198,8 +1198,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) goto out_err_no_arch_destroy_vm; } - kvm->max_halt_poll_ns = halt_poll_ns; - r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_arch_destroy_vm; @@ -3377,9 +3375,6 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = grow_start; - if (val > vcpu->kvm->max_halt_poll_ns) - val = vcpu->kvm->max_halt_poll_ns; - vcpu->halt_poll_ns = val; out: trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); @@ -3483,6 +3478,24 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, } } +static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + if (kvm->override_halt_poll_ns) { + /* + * Ensure kvm->max_halt_poll_ns is not read before + * kvm->override_halt_poll_ns. + * + * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL. + */ + smp_rmb(); + return READ_ONCE(kvm->max_halt_poll_ns); + } + + return READ_ONCE(halt_poll_ns); +} + /* * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt * polling is enabled, busy wait for a short time before blocking to avoid the @@ -3491,12 +3504,18 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, */ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) { + unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); - bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; ktime_t start, cur, poll_end; bool waited = false; + bool do_halt_poll; u64 halt_ns; + if (vcpu->halt_poll_ns > max_halt_poll_ns) + vcpu->halt_poll_ns = max_halt_poll_ns; + + do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; + start = cur = poll_end = ktime_get(); if (do_halt_poll) { ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); @@ -3535,18 +3554,21 @@ out: update_halt_poll_stats(vcpu, start, poll_end, !waited); if (halt_poll_allowed) { + /* Recompute the max halt poll time in case it changed. */ + max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); + if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); - } else if (vcpu->kvm->max_halt_poll_ns) { + } else if (max_halt_poll_ns) { if (halt_ns <= vcpu->halt_poll_ns) ; /* we had a long block, shrink polling */ else if (vcpu->halt_poll_ns && - halt_ns > vcpu->kvm->max_halt_poll_ns) + halt_ns > max_halt_poll_ns) shrink_halt_poll_ns(vcpu); /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns && - halt_ns < vcpu->kvm->max_halt_poll_ns) + else if (vcpu->halt_poll_ns < max_halt_poll_ns && + halt_ns < max_halt_poll_ns) grow_halt_poll_ns(vcpu); } else { vcpu->halt_poll_ns = 0; @@ -4581,6 +4603,16 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return -EINVAL; kvm->max_halt_poll_ns = cap->args[0]; + + /* + * Ensure kvm->override_halt_poll_ns does not become visible + * before kvm->max_halt_poll_ns. + * + * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns(). + */ + smp_wmb(); + kvm->override_halt_poll_ns = true; + return 0; } case KVM_CAP_DIRTY_LOG_RING: diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 346e47f15572..7c248193ca26 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -297,7 +297,12 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, if (!gpc->valid || old_uhva != gpc->uhva) { ret = hva_to_pfn_retry(kvm, gpc); } else { - /* If the HVA→PFN mapping was already valid, don't unmap it. */ + /* + * If the HVA→PFN mapping was already valid, don't unmap it. + * But do update gpc->khva because the offset within the page + * may have changed. + */ + gpc->khva = old_khva + page_offset; old_pfn = KVM_PFN_ERR_FAULT; old_khva = NULL; ret = 0; |