diff options
219 files changed, 2875 insertions, 1200 deletions
@@ -391,6 +391,7 @@ Sebastian Reichel <[email protected]> <[email protected]> Sebastian Reichel <[email protected]> <[email protected]> Sedat Dilek <[email protected]> <[email protected]> Seth Forshee <[email protected]> <[email protected]> +Shannon Nelson <[email protected]> <[email protected]> Shiraz Hashim <[email protected]> <[email protected]> Shuah Khan <[email protected]> <[email protected]> Shuah Khan <[email protected]> <[email protected]> diff --git a/Documentation/cpu-freq/index.rst b/Documentation/cpu-freq/index.rst index aba7831ab1cb..2fe32dad562a 100644 --- a/Documentation/cpu-freq/index.rst +++ b/Documentation/cpu-freq/index.rst @@ -20,18 +20,15 @@ Author: Dominik Brodowski <[email protected]> Mailing List ------------ -There is a CPU frequency changing CVS commit and general list where -you can report bugs, problems or submit patches. To post a message, -send an email to [email protected]. +There is a CPU frequency general list where you can report bugs, +problems or submit patches. To post a message, send an email to Links ----- the FTP archives: * ftp://ftp.linux.org.uk/pub/linux/cpufreq/ -how to access the CVS repository: -* http://cvs.arm.linux.org.uk/ - the CPUFreq Mailing list: * http://vger.kernel.org/vger-lists.html#linux-pm diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml index 2ab4642679c0..55c4f94a14d1 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml @@ -148,7 +148,7 @@ allOf: items: - const: oscclk - const: dout_clkcmu_fsys1_bus - - const: dout_clkcmu_fsys1_mmc_card + - const: gout_clkcmu_fsys1_mmc_card - const: dout_clkcmu_fsys1_usbdrd - if: diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 24fa3d87a40b..903b31129f01 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -25,6 +25,7 @@ properties: - description: v2 of CPUFREQ HW (EPSS) items: - enum: + - qcom,qdu1000-cpufreq-epss - qcom,sm6375-cpufreq-epss - qcom,sm8250-cpufreq-epss - const: qcom,cpufreq-epss @@ -56,6 +57,9 @@ properties: '#freq-domain-cells': const: 1 + '#clock-cells': + const: 1 + required: - compatible - reg @@ -83,11 +87,16 @@ examples: enable-method = "psci"; next-level-cache = <&L2_0>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_0: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-unified; + cache-level = <3>; }; }; }; @@ -99,8 +108,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_100>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_100: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -112,8 +124,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_200>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_200: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -125,8 +140,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_300>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_300: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -138,8 +156,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_400>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_400: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -151,8 +172,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_500>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_500: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -164,8 +188,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_600>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_600: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -177,8 +204,11 @@ examples: enable-method = "psci"; next-level-cache = <&L2_700>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_700: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -197,6 +227,7 @@ examples: clock-names = "xo", "alternate"; #freq-domain-cells = <1>; + #clock-cells = <1>; }; }; ... diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml index 66d0ec763f0b..cf9c2f7bddc2 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml @@ -108,7 +108,7 @@ patternProperties: The power for the OPP in micro-Watts. Entries for multiple regulators shall be provided in the same field - separated by angular brackets <>. If current values aren't required + separated by angular brackets <>. If power values aren't required for a regulator, then it shall be filled with 0. If power values aren't required for any of the regulators, then this field is not required. The OPP binding doesn't provide any provisions to relate the @@ -230,9 +230,9 @@ patternProperties: minItems: 1 maxItems: 8 # Should be enough regulators - '^opp-microwatt': + '^opp-microwatt-': description: - Named opp-microwatt property. Similar to opp-microamp property, + Named opp-microwatt property. Similar to opp-microamp-<name> property, but for microwatt instead. $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 1 diff --git a/Documentation/devicetree/bindings/opp/opp-v2.yaml b/Documentation/devicetree/bindings/opp/opp-v2.yaml index eaf8fba2c691..2f05920fce79 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2.yaml @@ -155,7 +155,7 @@ examples: opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000>; - lock-latency-ns = <290000>; + clock-latency-ns = <290000>; turbo-mode; }; }; diff --git a/MAINTAINERS b/MAINTAINERS index 69565ac0c224..19c7bdfa26fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5585,8 +5585,6 @@ F: drivers/scsi/cxgbi/cxgb3i CXGB4 CRYPTO DRIVER (chcr) M: Ayush Sawal <[email protected]> -M: Vinay Kumar Yadav <[email protected]> -M: Rohit Maheshwari <[email protected]> S: Supported W: http://www.chelsio.com @@ -5594,8 +5592,6 @@ F: drivers/crypto/chelsio CXGB4 INLINE CRYPTO DRIVER M: Ayush Sawal <[email protected]> -M: Vinay Kumar Yadav <[email protected]> -M: Rohit Maheshwari <[email protected]> S: Supported W: http://www.chelsio.com @@ -16149,7 +16145,8 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson <[email protected]> +M: Shannon Nelson <[email protected]> +M: Brett Creeley <[email protected]> S: Supported @@ -17485,10 +17482,8 @@ S: Maintained F: drivers/net/wireless/realtek/rtw89/ REDPINE WIRELESS DRIVER -M: Amitkumar Karwar <[email protected]> -M: Siva Rebbagondla <[email protected]> -S: Maintained +S: Orphan F: drivers/net/wireless/rsi/ REGISTER MAP ABSTRACTION @@ -20009,6 +20004,7 @@ F: drivers/clk/clk-sc[mp]i.c F: drivers/cpufreq/sc[mp]i-cpufreq.c F: drivers/firmware/arm_scmi/ F: drivers/firmware/arm_scpi.c +F: drivers/powercap/arm_scmi_powercap.c F: drivers/regulator/scmi-regulator.c F: drivers/reset/reset-scmi.c F: include/linux/sc[mp]i_protocol.h @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 7a113325abb9..6f9004ebf424 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -666,7 +666,7 @@ compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/ti/k3-am625-sk.dts b/arch/arm64/boot/dts/ti/k3-am625-sk.dts index 93a5f0817efc..4620ef5e19bb 100644 --- a/arch/arm64/boot/dts/ti/k3-am625-sk.dts +++ b/arch/arm64/boot/dts/ti/k3-am625-sk.dts @@ -31,6 +31,15 @@ bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,0x02800000"; }; + opp-table { + /* Add 1.4GHz OPP for am625-sk board. Requires VDD_CORE to be at 0.85V */ + opp-1400000000 { + opp-hz = /bits/ 64 <1400000000>; + opp-supported-hw = <0x01 0x0004>; + clock-latency-ns = <6000000>; + }; + }; + memory@80000000 { device_type = "memory"; /* 2G RAM */ diff --git a/arch/arm64/boot/dts/ti/k3-am625.dtsi b/arch/arm64/boot/dts/ti/k3-am625.dtsi index 887f31c23fef..cea2cc7de5dd 100644 --- a/arch/arm64/boot/dts/ti/k3-am625.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am625.dtsi @@ -48,6 +48,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 135 0>; }; cpu1: cpu@1 { @@ -62,6 +64,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 136 0>; }; cpu2: cpu@2 { @@ -76,6 +80,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 137 0>; }; cpu3: cpu@3 { @@ -90,6 +96,51 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 138 0>; + }; + }; + + a53_opp_table: opp-table { + compatible = "operating-points-v2-ti-cpu"; + opp-shared; + syscon = <&wkup_conf>; + + opp-200000000 { + opp-hz = /bits/ 64 <200000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-400000000 { + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x01 0x0006>; + clock-latency-ns = <6000000>; + }; + + opp-1250000000 { + opp-hz = /bits/ 64 <1250000000>; + opp-supported-hw = <0x01 0x0004>; + clock-latency-ns = <6000000>; + opp-suspend; }; }; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..439e2bc5d5d8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,16 +14,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() - -static inline -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - return false; -} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 67babd5f04c2..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include <linux/linkage.h> SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-112]! + stp x29, x30, [sp, #-32]! mov x29, sp /* @@ -17,20 +17,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) stp x1, x18, [sp, #16] /* - * Preserve all callee saved registers and record the stack pointer - * value in a per-CPU variable so we can recover from synchronous - * exceptions occurring while running the firmware routines. - */ - stp x19, x20, [sp, #32] - stp x21, x22, [sp, #48] - stp x23, x24, [sp, #64] - stp x25, x26, [sp, #80] - stp x27, x28, [sp, #96] - - adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - str x29, [x8] - - /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the * stack. @@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #112 + ldp x29, x30, [sp], #32 b.ne 0f ret 0: @@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) - -SYM_FUNC_START(__efi_rt_asm_recover) - ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - mov sp, x8 - - ldp x0, x18, [sp, #16] - ldp x19, x20, [sp, #32] - ldp x21, x22, [sp, #48] - ldp x23, x24, [sp, #64] - ldp x25, x26, [sp, #80] - ldp x27, x28, [sp, #96] - ldp x29, x30, [sp], #112 - - b efi_handle_runtime_exception -SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ee53f2a0aa03..a908a37f0367 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,7 +9,6 @@ #include <linux/efi.h> #include <linux/init.h> -#include <linux/percpu.h> #include <asm/efi.h> @@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } - -asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); - -asmlinkage efi_status_t __efi_rt_asm_recover(void); - -asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) -{ - pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return EFI_ABORTED; -} - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - /* Check whether the exception occurred while running the firmware */ - if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) - return false; - - pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dump_stack(); - - regs->pc = (u64)__efi_rt_asm_recover; - return true; -} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3e9cf9826417..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,7 +30,6 @@ #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> -#include <asm/efi.h> #include <asm/exception.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> @@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } - if (efi_runtime_fixup_exception(regs, msg)) - return; - die_kernel_fault(msg, addr, esr, regs); } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index aa0e0e0d4ee5..79d5bfd913e0 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -490,6 +490,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..4678627673df 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4745bb9998bd..6d8492b6e2b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs) /* kernel/traps.c */ DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); #ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot); DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async); #endif DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43f1c76d48ce..a379b0ce19ff 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; - /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ + EMIT(PPC_RAW_LI(_R4, 0)); + +#define BPF_TAILCALL_PROLOGUE_SIZE 4 + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); - /* - * Initialize tail_call_cnt in stack frame if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. - */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, - bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - else - EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); -#define BPF_TAILCALL_PROLOGUE_SIZE 16 + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); /* * We need a stack frame, but we don't necessarily need to @@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); -} - -void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); - - bpf_jit_emit_common_epilogue(image, ctx); - - /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + /* Tear down our stack frame */ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_MTLR(_R0)); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + EMIT(PPC_RAW_BLR()); } @@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) @@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(_R3)); EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); + /* Put tail_call_cnt in r4 */ + EMIT(PPC_RAW_MR(_R4, _R0)); + /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa78595a6089..593cf09264d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -317,9 +317,9 @@ config SMP config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP - range 2 512 if !SBI_V01 - range 2 32 if SBI_V01 && 32BIT - range 2 64 if SBI_V01 && 64BIT + range 2 512 if !RISCV_SBI_V01 + range 2 32 if RISCV_SBI_V01 && 32BIT + range 2 64 if RISCV_SBI_V01 && 64BIT default "32" if 32BIT default "64" if 64BIT diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 1b471ff73178..816e753de636 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index f74879a8f1ea..e229d7be4b66 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -10,6 +10,7 @@ #include <asm/mmu_context.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> +#include <asm/pgalloc.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -20,7 +21,10 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_setup() ({ \ + sync_kernel_mappings(efi_mm.pgd); \ + efi_virtmap_load(); \ + }) #define arch_efi_call_virt_teardown() efi_virtmap_unload() #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 947f23d7b6af..59dc12b5b7e8 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ +static inline void sync_kernel_mappings(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + sync_kernel_mappings(pgd); } return pgd; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..92ec2d9d7273 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd) return pte_dirty(pmd_pte(pmd)); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pte_young(pmd_pte(pmd)); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..3831b638ecab 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); +/* Check other CPUs stop or not */ +bool smp_crash_stop_failed(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..186abd146eaf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -404,6 +404,19 @@ handle_syscall_trace_exit: #ifdef CONFIG_VMAP_STACK handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + la sp, shadow_stack addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ee79e6839b86..2d139b724bc8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -15,6 +15,8 @@ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ #include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/irq.h> /* * kexec_image_info - Print received image details @@ -138,20 +140,35 @@ void machine_shutdown(void) #endif } -/* Override the weak function in kernel/panic.c */ -void crash_smp_send_stop(void) +static void machine_kexec_mask_interrupts(void) { - static int cpus_stopped; + unsigned int i; + struct irq_desc *desc; - /* - * This function can be called twice in panic path, but obviously - * we execute this only once. - */ - if (cpus_stopped) - return; + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); - smp_send_stop(); - cpus_stopped = 1; + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } } /* @@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + pr_info("Starting crashdump kernel...\n"); } @@ -195,6 +214,11 @@ machine_kexec(struct kimage *image) void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 67ec1fadcfe2..86acd690d529 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,10 +322,11 @@ subsys_initcall(topology_init); void free_initmem(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), - IS_ENABLED(CONFIG_64BIT) ? - set_memory_rw : set_memory_rw_nx); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 760a64518c58..8c3b59f1f9b8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/kexec.h> #include <linux/profile.h> #include <linux/smp.h> #include <linux/sched.h> @@ -22,11 +23,13 @@ #include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, IPI_MAX @@ -71,6 +74,32 @@ static void ipi_stop(void) wait_for_interrupt(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) @@ -124,8 +153,9 @@ void arch_irq_work_raise(void) void handle_IPI(struct pt_regs *regs) { - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; - unsigned long *stats = ipi_data[smp_processor_id()].stats; + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; riscv_clear_ipi(); @@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + if (ops & (1 << IPI_IRQ_WORK)) { stats[IPI_IRQ_WORK]++; irq_work_run(); @@ -176,6 +210,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", }; @@ -235,6 +270,64 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f3e96d60a2ff..7abd8e4c4df6 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void) OVERFLOW_STACK_SIZE; } +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index db6548509bb3..06e6b27f3bcc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..11e901286414 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a779418ceba9..3bc9736bddb1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd) return pte_dirty(pte); } +#define pmd_young pmd_young static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..dfdb103ae4f6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..286a71810f9e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_DIRTY; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; @@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e3230cccaa7..6daf84229548 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21b7347a26d..e436c9c1ef3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 23f49a2f4d14..6cceca64a6bc 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, { struct memory_initiator *ia; struct memory_initiator *ib; - unsigned long *p_nodes = priv; ia = list_entry(a, struct memory_initiator, node); ib = list_entry(b, struct memory_initiator, node); - set_bit(ia->processor_pxm, p_nodes); - set_bit(ib->processor_pxm, p_nodes); - return ia->processor_pxm - ib->processor_pxm; } +static int initiators_to_nodemask(unsigned long *p_nodes) +{ + struct memory_initiator *initiator; + + if (list_empty(&initiators)) + return -ENXIO; + + list_for_each_entry(initiator, &initiators, node) + set_bit(initiator->processor_pxm, p_nodes); + + return 0; +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target) * initiators. */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + list_sort(NULL, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + if (!access0done) { for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; @@ -643,8 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); - best = 0; + if (initiators_to_nodemask(p_nodes) < 0) + return; + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6471b559230e..b46aa490b4cd 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -939,8 +939,8 @@ static int genpd_runtime_suspend(struct device *dev) return 0; genpd_lock(genpd); - gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_power_off(genpd, true, 0); + gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_unlock(genpd); return 0; @@ -978,9 +978,8 @@ static int genpd_runtime_resume(struct device *dev) goto out; genpd_lock(genpd); + genpd_restore_performance_state(dev, gpd_data->rpm_pstate); ret = genpd_power_on(genpd, 0); - if (!ret) - genpd_restore_performance_state(dev, gpd_data->rpm_pstate); genpd_unlock(genpd); if (ret) @@ -1018,8 +1017,8 @@ err_stop: err_poweroff: if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) { genpd_lock(genpd); - gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_power_off(genpd, true, 0); + gpd_data->rpm_pstate = genpd_drop_performance_state(dev); genpd_unlock(genpd); } @@ -1189,12 +1188,15 @@ static int genpd_prepare(struct device *dev) * genpd_finish_suspend - Completion of suspend or hibernation of device in an * I/O pm domain. * @dev: Device to suspend. - * @poweroff: Specifies if this is a poweroff_noirq or suspend_noirq callback. + * @suspend_noirq: Generic suspend_noirq callback. + * @resume_noirq: Generic resume_noirq callback. * * Stop the device and remove power from the domain if all devices in it have * been stopped. */ -static int genpd_finish_suspend(struct device *dev, bool poweroff) +static int genpd_finish_suspend(struct device *dev, + int (*suspend_noirq)(struct device *dev), + int (*resume_noirq)(struct device *dev)) { struct generic_pm_domain *genpd; int ret = 0; @@ -1203,10 +1205,7 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff) if (IS_ERR(genpd)) return -EINVAL; - if (poweroff) - ret = pm_generic_poweroff_noirq(dev); - else - ret = pm_generic_suspend_noirq(dev); + ret = suspend_noirq(dev); if (ret) return ret; @@ -1217,10 +1216,7 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff) !pm_runtime_status_suspended(dev)) { ret = genpd_stop_dev(genpd, dev); if (ret) { - if (poweroff) - pm_generic_restore_noirq(dev); - else - pm_generic_resume_noirq(dev); + resume_noirq(dev); return ret; } } @@ -1244,16 +1240,20 @@ static int genpd_suspend_noirq(struct device *dev) { dev_dbg(dev, "%s()\n", __func__); - return genpd_finish_suspend(dev, false); + return genpd_finish_suspend(dev, + pm_generic_suspend_noirq, + pm_generic_resume_noirq); } /** - * genpd_resume_noirq - Start of resume of device in an I/O PM domain. + * genpd_finish_resume - Completion of resume of device in an I/O PM domain. * @dev: Device to resume. + * @resume_noirq: Generic resume_noirq callback. * * Restore power to the device's PM domain, if necessary, and start the device. */ -static int genpd_resume_noirq(struct device *dev) +static int genpd_finish_resume(struct device *dev, + int (*resume_noirq)(struct device *dev)) { struct generic_pm_domain *genpd; int ret; @@ -1265,7 +1265,7 @@ static int genpd_resume_noirq(struct device *dev) return -EINVAL; if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd)) - return pm_generic_resume_noirq(dev); + return resume_noirq(dev); genpd_lock(genpd); genpd_sync_power_on(genpd, true, 0); @@ -1283,6 +1283,19 @@ static int genpd_resume_noirq(struct device *dev) } /** + * genpd_resume_noirq - Start of resume of device in an I/O PM domain. + * @dev: Device to resume. + * + * Restore power to the device's PM domain, if necessary, and start the device. + */ +static int genpd_resume_noirq(struct device *dev) +{ + dev_dbg(dev, "%s()\n", __func__); + + return genpd_finish_resume(dev, pm_generic_resume_noirq); +} + +/** * genpd_freeze_noirq - Completion of freezing a device in an I/O PM domain. * @dev: Device to freeze. * @@ -1293,24 +1306,11 @@ static int genpd_resume_noirq(struct device *dev) */ static int genpd_freeze_noirq(struct device *dev) { - const struct generic_pm_domain *genpd; - int ret = 0; - dev_dbg(dev, "%s()\n", __func__); - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - ret = pm_generic_freeze_noirq(dev); - if (ret) - return ret; - - if (genpd->dev_ops.stop && genpd->dev_ops.start && - !pm_runtime_status_suspended(dev)) - ret = genpd_stop_dev(genpd, dev); - - return ret; + return genpd_finish_suspend(dev, + pm_generic_freeze_noirq, + pm_generic_thaw_noirq); } /** @@ -1322,23 +1322,9 @@ static int genpd_freeze_noirq(struct device *dev) */ static int genpd_thaw_noirq(struct device *dev) { - const struct generic_pm_domain *genpd; - int ret = 0; - dev_dbg(dev, "%s()\n", __func__); - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - if (genpd->dev_ops.stop && genpd->dev_ops.start && - !pm_runtime_status_suspended(dev)) { - ret = genpd_start_dev(genpd, dev); - if (ret) - return ret; - } - - return pm_generic_thaw_noirq(dev); + return genpd_finish_resume(dev, pm_generic_thaw_noirq); } /** @@ -1353,7 +1339,9 @@ static int genpd_poweroff_noirq(struct device *dev) { dev_dbg(dev, "%s()\n", __func__); - return genpd_finish_suspend(dev, true); + return genpd_finish_suspend(dev, + pm_generic_poweroff_noirq, + pm_generic_restore_noirq); } /** @@ -1365,40 +1353,9 @@ static int genpd_poweroff_noirq(struct device *dev) */ static int genpd_restore_noirq(struct device *dev) { - struct generic_pm_domain *genpd; - int ret = 0; - dev_dbg(dev, "%s()\n", __func__); - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - /* - * At this point suspended_count == 0 means we are being run for the - * first time for the given domain in the present cycle. - */ - genpd_lock(genpd); - if (genpd->suspended_count++ == 0) { - /* - * The boot kernel might put the domain into arbitrary state, - * so make it appear as powered off to genpd_sync_power_on(), - * so that it tries to power it on in case it was really off. - */ - genpd->status = GENPD_STATE_OFF; - } - - genpd_sync_power_on(genpd, true, 0); - genpd_unlock(genpd); - - if (genpd->dev_ops.stop && genpd->dev_ops.start && - !pm_runtime_status_suspended(dev)) { - ret = genpd_start_dev(genpd, dev); - if (ret) - return ret; - } - - return pm_generic_restore_noirq(dev); + return genpd_finish_resume(dev, pm_generic_restore_noirq); } /** @@ -2749,17 +2706,6 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; - if (power_on) { - genpd_lock(pd); - ret = genpd_power_on(pd, 0); - genpd_unlock(pd); - } - - if (ret) { - genpd_remove_device(pd, dev); - return -EPROBE_DEFER; - } - /* Set the default performance state */ pstate = of_get_required_opp_performance_state(dev->of_node, index); if (pstate < 0 && pstate != -ENODEV && pstate != -EOPNOTSUPP) { @@ -2771,6 +2717,24 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, goto err; dev_gpd_data(dev)->default_pstate = pstate; } + + if (power_on) { + genpd_lock(pd); + ret = genpd_power_on(pd, 0); + genpd_unlock(pd); + } + + if (ret) { + /* Drop the default performance state */ + if (dev_gpd_data(dev)->default_pstate) { + dev_pm_genpd_set_performance_state(dev, 0); + dev_gpd_data(dev)->default_pstate = 0; + } + + genpd_remove_device(pd, dev); + return -EPROBE_DEFER; + } + return 1; err: diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index b52049098d4e..50e726b6c2cf 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -243,8 +243,7 @@ void pm_runtime_set_memalloc_noio(struct device *dev, bool enable) * flag was set by any one of the descendants. */ if (!dev || (!enable && - device_for_each_child(dev, NULL, - dev_memalloc_noio))) + device_for_each_child(dev, NULL, dev_memalloc_noio))) break; } mutex_unlock(&dev_hotplug_mutex); @@ -265,15 +264,13 @@ static int rpm_check_suspend_allowed(struct device *dev) retval = -EACCES; else if (atomic_read(&dev->power.usage_count)) retval = -EAGAIN; - else if (!dev->power.ignore_children && - atomic_read(&dev->power.child_count)) + else if (!dev->power.ignore_children && atomic_read(&dev->power.child_count)) retval = -EBUSY; /* Pending resume requests take precedence over suspends. */ - else if ((dev->power.deferred_resume - && dev->power.runtime_status == RPM_SUSPENDING) - || (dev->power.request_pending - && dev->power.request == RPM_REQ_RESUME)) + else if ((dev->power.deferred_resume && + dev->power.runtime_status == RPM_SUSPENDING) || + (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME)) retval = -EAGAIN; else if (__dev_pm_qos_resume_latency(dev) == 0) retval = -EPERM; @@ -404,9 +401,9 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) * * Do that if resume fails too. */ - if (use_links - && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) - || (dev->power.runtime_status == RPM_RESUMING && retval))) { + if (use_links && + ((dev->power.runtime_status == RPM_SUSPENDING && !retval) || + (dev->power.runtime_status == RPM_RESUMING && retval))) { idx = device_links_read_lock(); __rpm_put_suppliers(dev, false); @@ -422,6 +419,38 @@ fail: } /** + * rpm_callback - Run a given runtime PM callback for a given device. + * @cb: Runtime PM callback to run. + * @dev: Device to run the callback for. + */ +static int rpm_callback(int (*cb)(struct device *), struct device *dev) +{ + int retval; + + if (dev->power.memalloc_noio) { + unsigned int noio_flag; + + /* + * Deadlock might be caused if memory allocation with + * GFP_KERNEL happens inside runtime_suspend and + * runtime_resume callbacks of one block device's + * ancestor or the block device itself. Network + * device might be thought as part of iSCSI block + * device, so network device and its ancestor should + * be marked as memalloc_noio too. + */ + noio_flag = memalloc_noio_save(); + retval = __rpm_callback(cb, dev); + memalloc_noio_restore(noio_flag); + } else { + retval = __rpm_callback(cb, dev); + } + + dev->power.runtime_error = retval; + return retval != -EACCES ? retval : -EIO; +} + +/** * rpm_idle - Notify device bus type if the device can be suspended. * @dev: Device to notify the bus type about. * @rpmflags: Flag bits. @@ -459,6 +488,7 @@ static int rpm_idle(struct device *dev, int rpmflags) /* Act as though RPM_NOWAIT is always set. */ else if (dev->power.idle_notification) retval = -EINPROGRESS; + if (retval) goto out; @@ -484,7 +514,17 @@ static int rpm_idle(struct device *dev, int rpmflags) dev->power.idle_notification = true; - retval = __rpm_callback(callback, dev); + if (dev->power.irq_safe) + spin_unlock(&dev->power.lock); + else + spin_unlock_irq(&dev->power.lock); + + retval = callback(dev); + + if (dev->power.irq_safe) + spin_lock(&dev->power.lock); + else + spin_lock_irq(&dev->power.lock); dev->power.idle_notification = false; wake_up_all(&dev->power.wait_queue); @@ -495,38 +535,6 @@ static int rpm_idle(struct device *dev, int rpmflags) } /** - * rpm_callback - Run a given runtime PM callback for a given device. - * @cb: Runtime PM callback to run. - * @dev: Device to run the callback for. - */ -static int rpm_callback(int (*cb)(struct device *), struct device *dev) -{ - int retval; - - if (dev->power.memalloc_noio) { - unsigned int noio_flag; - - /* - * Deadlock might be caused if memory allocation with - * GFP_KERNEL happens inside runtime_suspend and - * runtime_resume callbacks of one block device's - * ancestor or the block device itself. Network - * device might be thought as part of iSCSI block - * device, so network device and its ancestor should - * be marked as memalloc_noio too. - */ - noio_flag = memalloc_noio_save(); - retval = __rpm_callback(cb, dev); - memalloc_noio_restore(noio_flag); - } else { - retval = __rpm_callback(cb, dev); - } - - dev->power.runtime_error = retval; - return retval != -EACCES ? retval : -EIO; -} - -/** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. * @rpmflags: Flag bits. @@ -564,12 +572,12 @@ static int rpm_suspend(struct device *dev, int rpmflags) /* Synchronous suspends are not allowed in the RPM_RESUMING state. */ if (dev->power.runtime_status == RPM_RESUMING && !(rpmflags & RPM_ASYNC)) retval = -EAGAIN; + if (retval) goto out; /* If the autosuspend_delay time hasn't expired yet, reschedule. */ - if ((rpmflags & RPM_AUTO) - && dev->power.runtime_status != RPM_SUSPENDING) { + if ((rpmflags & RPM_AUTO) && dev->power.runtime_status != RPM_SUSPENDING) { u64 expires = pm_runtime_autosuspend_expiration(dev); if (expires != 0) { @@ -584,7 +592,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) * rest. */ if (!(dev->power.timer_expires && - dev->power.timer_expires <= expires)) { + dev->power.timer_expires <= expires)) { /* * We add a slack of 25% to gather wakeups * without sacrificing the granularity. @@ -594,9 +602,9 @@ static int rpm_suspend(struct device *dev, int rpmflags) dev->power.timer_expires = expires; hrtimer_start_range_ns(&dev->power.suspend_timer, - ns_to_ktime(expires), - slack, - HRTIMER_MODE_ABS); + ns_to_ktime(expires), + slack, + HRTIMER_MODE_ABS); } dev->power.timer_autosuspends = 1; goto out; @@ -787,8 +795,8 @@ static int rpm_resume(struct device *dev, int rpmflags) goto out; } - if (dev->power.runtime_status == RPM_RESUMING - || dev->power.runtime_status == RPM_SUSPENDING) { + if (dev->power.runtime_status == RPM_RESUMING || + dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) { @@ -815,8 +823,8 @@ static int rpm_resume(struct device *dev, int rpmflags) for (;;) { prepare_to_wait(&dev->power.wait_queue, &wait, TASK_UNINTERRUPTIBLE); - if (dev->power.runtime_status != RPM_RESUMING - && dev->power.runtime_status != RPM_SUSPENDING) + if (dev->power.runtime_status != RPM_RESUMING && + dev->power.runtime_status != RPM_SUSPENDING) break; spin_unlock_irq(&dev->power.lock); @@ -836,9 +844,9 @@ static int rpm_resume(struct device *dev, int rpmflags) */ if (dev->power.no_callbacks && !parent && dev->parent) { spin_lock_nested(&dev->parent->power.lock, SINGLE_DEPTH_NESTING); - if (dev->parent->power.disable_depth > 0 - || dev->parent->power.ignore_children - || dev->parent->power.runtime_status == RPM_ACTIVE) { + if (dev->parent->power.disable_depth > 0 || + dev->parent->power.ignore_children || + dev->parent->power.runtime_status == RPM_ACTIVE) { atomic_inc(&dev->parent->power.child_count); spin_unlock(&dev->parent->power.lock); retval = 1; @@ -867,6 +875,7 @@ static int rpm_resume(struct device *dev, int rpmflags) parent = dev->parent; if (dev->power.irq_safe) goto skip_parent; + spin_unlock(&dev->power.lock); pm_runtime_get_noresume(parent); @@ -876,8 +885,8 @@ static int rpm_resume(struct device *dev, int rpmflags) * Resume the parent if it has runtime PM enabled and not been * set to ignore its children. */ - if (!parent->power.disable_depth - && !parent->power.ignore_children) { + if (!parent->power.disable_depth && + !parent->power.ignore_children) { rpm_resume(parent, 0); if (parent->power.runtime_status != RPM_ACTIVE) retval = -EBUSY; @@ -887,6 +896,7 @@ static int rpm_resume(struct device *dev, int rpmflags) spin_lock(&dev->power.lock); if (retval) goto out; + goto repeat; } skip_parent: @@ -1291,9 +1301,9 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) * not active, has runtime PM enabled and the * 'power.ignore_children' flag unset. */ - if (!parent->power.disable_depth - && !parent->power.ignore_children - && parent->power.runtime_status != RPM_ACTIVE) { + if (!parent->power.disable_depth && + !parent->power.ignore_children && + parent->power.runtime_status != RPM_ACTIVE) { dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n", dev_name(dev), dev_name(parent)); @@ -1358,9 +1368,9 @@ static void __pm_runtime_barrier(struct device *dev) dev->power.request_pending = false; } - if (dev->power.runtime_status == RPM_SUSPENDING - || dev->power.runtime_status == RPM_RESUMING - || dev->power.idle_notification) { + if (dev->power.runtime_status == RPM_SUSPENDING || + dev->power.runtime_status == RPM_RESUMING || + dev->power.idle_notification) { DEFINE_WAIT(wait); /* Suspend, wake-up or idle notification in progress. */ @@ -1445,8 +1455,8 @@ void __pm_runtime_disable(struct device *dev, bool check_resume) * means there probably is some I/O to process and disabling runtime PM * shouldn't prevent the device from processing the I/O. */ - if (check_resume && dev->power.request_pending - && dev->power.request == RPM_REQ_RESUME) { + if (check_resume && dev->power.request_pending && + dev->power.request == RPM_REQ_RESUME) { /* * Prevent suspends and idle notifications from being carried * out after we have woken up the device. @@ -1606,6 +1616,7 @@ void pm_runtime_irq_safe(struct device *dev) { if (dev->parent) pm_runtime_get_sync(dev->parent); + spin_lock_irq(&dev->power.lock); dev->power.irq_safe = 1; spin_unlock_irq(&dev->power.lock); diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 1621ce818705..d69905233aff 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev) !pm_suspend_via_firmware()) goto suspended; - if (!tpm_chip_start(chip)) { + rc = tpm_try_get_ops(chip); + if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); else rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); - tpm_chip_stop(chip); + tpm_put_ops(chip); } suspended: diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index b174f727a8ef..16870943a13e 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { }; static const struct sck at91rm9200_systemck[] = { - { .n = "udpck", .p = "usbck", .id = 2 }, + { .n = "udpck", .p = "usbck", .id = 1 }, { .n = "uhpck", .p = "usbck", .id = 4 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c index a18ed88f3b82..b3198784e1c3 100644 --- a/drivers/clk/qcom/gcc-sc8280xp.c +++ b/drivers/clk/qcom/gcc-sc8280xp.c @@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_1_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_ref_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 7cf5e130e92f..0f21a8a767ac 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/pm_domain.h> -#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/reset-controller.h> @@ -56,22 +55,6 @@ enum gdsc_status { GDSC_ON }; -static int gdsc_pm_runtime_get(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_resume_and_get(sc->dev); -} - -static int gdsc_pm_runtime_put(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_put_sync(sc->dev); -} - /* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */ static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status) { @@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); } -static int _gdsc_enable(struct gdsc *sc) +static int gdsc_enable(struct generic_pm_domain *domain) { + struct gdsc *sc = domain_to_gdsc(domain); int ret; if (sc->pwrsts == PWRSTS_ON) @@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc) return 0; } -static int gdsc_enable(struct generic_pm_domain *domain) +static int gdsc_disable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); int ret; - ret = gdsc_pm_runtime_get(sc); - if (ret) - return ret; - - return _gdsc_enable(sc); -} - -static int _gdsc_disable(struct gdsc *sc) -{ - int ret; - if (sc->pwrsts == PWRSTS_ON) return gdsc_assert_reset(sc); @@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc) return 0; } -static int gdsc_disable(struct generic_pm_domain *domain) -{ - struct gdsc *sc = domain_to_gdsc(domain); - int ret; - - ret = _gdsc_disable(sc); - - gdsc_pm_runtime_put(sc); - - return ret; -} - static int gdsc_init(struct gdsc *sc) { u32 mask, val; @@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc) return ret; } - /* ...and the power-domain */ - ret = gdsc_pm_runtime_get(sc); - if (ret) - goto err_disable_supply; - /* * Votable GDSCs can be ON due to Vote from other masters. * If a Votable GDSC is ON, make sure we have a Vote. @@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc) if (sc->flags & VOTABLE) { ret = gdsc_update_collapse_bit(sc, false); if (ret) - goto err_put_rpm; + goto err_disable_supply; } /* Turn on HW trigger mode if supported */ if (sc->flags & HW_CTRL) { ret = gdsc_hwctrl(sc, true); if (ret < 0) - goto err_put_rpm; + goto err_disable_supply; } /* @@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc) ret = pm_genpd_init(&sc->pd, NULL, !on); if (ret) - goto err_put_rpm; + goto err_disable_supply; return 0; -err_put_rpm: - if (on) - gdsc_pm_runtime_put(sc); err_disable_supply: if (on && sc->rsupply) regulator_disable(sc->rsupply); @@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc, for (i = 0; i < num; i++) { if (!scs[i]) continue; - if (pm_runtime_enabled(dev)) - scs[i]->dev = dev; scs[i]->regmap = regmap; scs[i]->rcdev = rcdev; ret = gdsc_init(scs[i]); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 981a12c8502d..803512688336 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -30,7 +30,6 @@ struct reset_controller_dev; * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller - * @dev: the device holding the GDSC, used for pm_runtime calls */ struct gdsc { struct generic_pm_domain pd; @@ -74,7 +73,6 @@ struct gdsc { const char *supply; struct regulator *rsupply; - struct device *dev; }; struct gdsc_desc { diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 273f77d54dab..e6d6cbf8c4e6 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids); static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask) { const struct exynos_clkout_variant *variant; + const struct of_device_id *match; if (!dev->parent) { dev_err(dev, "not instantiated from MFD\n"); return -EINVAL; } - variant = of_device_get_match_data(dev->parent); - if (!variant) { + match = of_match_device(exynos_clkout_ids, dev->parent); + if (!match) { dev_err(dev, "cannot match parent device\n"); return -EINVAL; } + variant = match->data; *mux_mask = variant->mux_mask; diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c index 62ce6814f141..0d2a950ed184 100644 --- a/drivers/clk/samsung/clk-exynos7885.c +++ b/drivers/clk/samsung/clk-exynos7885.c @@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1), DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll", + DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2", CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1), DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3), @@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1), DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll", CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll", + DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2", CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1), /* CORE */ diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 969a552da8d2..a0d66fabf073 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -51,7 +51,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, + .features = CLOCK_EVT_FEAT_ONESHOT, .rating = 100, .set_next_event = riscv_clock_next_event, }; diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 82e5de1f6f8c..0a0352d8fa45 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -41,6 +41,15 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM To compile this driver as a module, choose M here: the module will be called sun50i-cpufreq-nvmem. +config ARM_APPLE_SOC_CPUFREQ + tristate "Apple Silicon SoC CPUFreq support" + depends on ARCH_APPLE || (COMPILE_TEST && 64BIT) + select PM_OPP + default ARCH_APPLE + help + This adds the CPUFreq driver for Apple Silicon machines + (e.g. Apple M1). + config ARM_ARMADA_37XX_CPUFREQ tristate "Armada 37xx CPUFreq support" depends on ARCH_MVEBU && CPUFREQ_DT @@ -340,8 +349,8 @@ config ARM_TEGRA194_CPUFREQ config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" - depends on ARCH_OMAP2PLUS - default ARCH_OMAP2PLUS + depends on ARCH_OMAP2PLUS || ARCH_K3 + default y help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 49b98c62c5af..32a7029e25ed 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o ################################################################################## # ARM SoC drivers +obj-$(CONFIG_ARM_APPLE_SOC_CPUFREQ) += apple-soc-cpufreq.o obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o obj-$(CONFIG_ARM_ARMADA_8K_CPUFREQ) += armada-8k-cpufreq.o obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 1bb2b90ebb21..78adfb2ffff6 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -19,6 +19,7 @@ #include <linux/compiler.h> #include <linux/dmi.h> #include <linux/slab.h> +#include <linux/string_helpers.h> #include <linux/acpi.h> #include <linux/io.h> @@ -135,8 +136,8 @@ static int set_boost(struct cpufreq_policy *policy, int val) { on_each_cpu_mask(policy->cpus, boost_set_msr_each, (void *)(long)val, 1); - pr_debug("CPU %*pbl: Core Boosting %sabled.\n", - cpumask_pr_args(policy->cpus), val ? "en" : "dis"); + pr_debug("CPU %*pbl: Core Boosting %s.\n", + cpumask_pr_args(policy->cpus), str_enabled_disabled(val)); return 0; } @@ -535,15 +536,6 @@ static void free_acpi_perf_data(void) free_percpu(acpi_perf_data); } -static int cpufreq_boost_online(unsigned int cpu) -{ - /* - * On the CPU_UP path we simply keep the boost-disable flag - * in sync with the current global state. - */ - return boost_set_msr(acpi_cpufreq_driver.boost_enabled); -} - static int cpufreq_boost_down_prep(unsigned int cpu) { /* @@ -897,6 +889,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); + if (acpi_cpufreq_driver.set_boost) + set_boost(policy, acpi_cpufreq_driver.boost_enabled); + return result; err_unreg: @@ -916,6 +911,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) pr_debug("%s\n", __func__); + cpufreq_boost_down_prep(policy->cpu); policy->fast_switch_possible = false; policy->driver_data = NULL; acpi_processor_unregister_performance(data->acpi_perf_cpu); @@ -958,12 +954,8 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .attr = acpi_cpufreq_attr, }; -static enum cpuhp_state acpi_cpufreq_online; - static void __init acpi_cpufreq_boost_init(void) { - int ret; - if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA))) { pr_debug("Boost capabilities not present in the processor\n"); return; @@ -971,24 +963,6 @@ static void __init acpi_cpufreq_boost_init(void) acpi_cpufreq_driver.set_boost = set_boost; acpi_cpufreq_driver.boost_enabled = boost_state(0); - - /* - * This calls the online callback on all online cpu and forces all - * MSRs to the same value. - */ - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpufreq/acpi:online", - cpufreq_boost_online, cpufreq_boost_down_prep); - if (ret < 0) { - pr_err("acpi_cpufreq: failed to register hotplug callbacks\n"); - return; - } - acpi_cpufreq_online = ret; -} - -static void acpi_cpufreq_boost_exit(void) -{ - if (acpi_cpufreq_online > 0) - cpuhp_remove_state_nocalls(acpi_cpufreq_online); } static int __init acpi_cpufreq_init(void) @@ -1032,7 +1006,6 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) { free_acpi_perf_data(); - acpi_cpufreq_boost_exit(); } return ret; } @@ -1041,8 +1014,6 @@ static void __exit acpi_cpufreq_exit(void) { pr_debug("%s\n", __func__); - acpi_cpufreq_boost_exit(); - cpufreq_unregister_driver(&acpi_cpufreq_driver); free_acpi_perf_data(); diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 6448e03bcf48..59b19b9975e8 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -125,6 +125,8 @@ static int __init amd_freq_sensitivity_init(void) if (!pcidev) { if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK)) return -ENODEV; + } else { + pci_dev_put(pcidev); } if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val)) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c new file mode 100644 index 000000000000..d1801281cdd9 --- /dev/null +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Apple SoC CPU cluster performance state driver + * + * Copyright The Asahi Linux Contributors + * + * Based on scpi-cpufreq.c + */ + +#include <linux/bitfield.h> +#include <linux/bitops.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> +#include <linux/cpumask.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/pm_opp.h> +#include <linux/slab.h> + +#define APPLE_DVFS_CMD 0x20 +#define APPLE_DVFS_CMD_BUSY BIT(31) +#define APPLE_DVFS_CMD_SET BIT(25) +#define APPLE_DVFS_CMD_PS2 GENMASK(16, 12) +#define APPLE_DVFS_CMD_PS1 GENMASK(4, 0) + +/* Same timebase as CPU counter (24MHz) */ +#define APPLE_DVFS_LAST_CHG_TIME 0x38 + +/* + * Apple ran out of bits and had to shift this in T8112... + */ +#define APPLE_DVFS_STATUS 0x50 +#define APPLE_DVFS_STATUS_CUR_PS_T8103 GENMASK(7, 4) +#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103 4 +#define APPLE_DVFS_STATUS_TGT_PS_T8103 GENMASK(3, 0) +#define APPLE_DVFS_STATUS_CUR_PS_T8112 GENMASK(9, 5) +#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112 5 +#define APPLE_DVFS_STATUS_TGT_PS_T8112 GENMASK(4, 0) + +/* + * Div is +1, base clock is 12MHz on existing SoCs. + * For documentation purposes. We use the OPP table to + * get the frequency. + */ +#define APPLE_DVFS_PLL_STATUS 0xc0 +#define APPLE_DVFS_PLL_FACTOR 0xc8 +#define APPLE_DVFS_PLL_FACTOR_MULT GENMASK(31, 16) +#define APPLE_DVFS_PLL_FACTOR_DIV GENMASK(15, 0) + +#define APPLE_DVFS_TRANSITION_TIMEOUT 100 + +struct apple_soc_cpufreq_info { + u64 max_pstate; + u64 cur_pstate_mask; + u64 cur_pstate_shift; +}; + +struct apple_cpu_priv { + struct device *cpu_dev; + void __iomem *reg_base; + const struct apple_soc_cpufreq_info *info; +}; + +static struct cpufreq_driver apple_soc_cpufreq_driver; + +static const struct apple_soc_cpufreq_info soc_t8103_info = { + .max_pstate = 15, + .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8103, + .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103, +}; + +static const struct apple_soc_cpufreq_info soc_t8112_info = { + .max_pstate = 31, + .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8112, + .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112, +}; + +static const struct apple_soc_cpufreq_info soc_default_info = { + .max_pstate = 15, + .cur_pstate_mask = 0, /* fallback */ +}; + +static const struct of_device_id apple_soc_cpufreq_of_match[] = { + { + .compatible = "apple,t8103-cluster-cpufreq", + .data = &soc_t8103_info, + }, + { + .compatible = "apple,t8112-cluster-cpufreq", + .data = &soc_t8112_info, + }, + { + .compatible = "apple,cluster-cpufreq", + .data = &soc_default_info, + }, + {} +}; + +static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); + struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_frequency_table *p; + unsigned int pstate; + + if (priv->info->cur_pstate_mask) { + u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_STATUS); + + pstate = (reg & priv->info->cur_pstate_mask) >> priv->info->cur_pstate_shift; + } else { + /* + * For the fallback case we might not know the layout of DVFS_STATUS, + * so just use the command register value (which ignores boost limitations). + */ + u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_CMD); + + pstate = FIELD_GET(APPLE_DVFS_CMD_PS1, reg); + } + + cpufreq_for_each_valid_entry(p, policy->freq_table) + if (p->driver_data == pstate) + return p->frequency; + + dev_err(priv->cpu_dev, "could not find frequency for pstate %d\n", + pstate); + return 0; +} + +static int apple_soc_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int index) +{ + struct apple_cpu_priv *priv = policy->driver_data; + unsigned int pstate = policy->freq_table[index].driver_data; + u64 reg; + + /* Fallback for newer SoCs */ + if (index > priv->info->max_pstate) + index = priv->info->max_pstate; + + if (readq_poll_timeout_atomic(priv->reg_base + APPLE_DVFS_CMD, reg, + !(reg & APPLE_DVFS_CMD_BUSY), 2, + APPLE_DVFS_TRANSITION_TIMEOUT)) { + return -EIO; + } + + reg &= ~(APPLE_DVFS_CMD_PS1 | APPLE_DVFS_CMD_PS2); + reg |= FIELD_PREP(APPLE_DVFS_CMD_PS1, pstate); + reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate); + reg |= APPLE_DVFS_CMD_SET; + + writeq_relaxed(reg, priv->reg_base + APPLE_DVFS_CMD); + + return 0; +} + +static unsigned int apple_soc_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + if (apple_soc_cpufreq_set_target(policy, policy->cached_resolved_idx) < 0) + return 0; + + return policy->freq_table[policy->cached_resolved_idx].frequency; +} + +static int apple_soc_cpufreq_find_cluster(struct cpufreq_policy *policy, + void __iomem **reg_base, + const struct apple_soc_cpufreq_info **info) +{ + struct of_phandle_args args; + const struct of_device_id *match; + int ret = 0; + + ret = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", + "#performance-domain-cells", + policy->cpus, &args); + if (ret < 0) + return ret; + + match = of_match_node(apple_soc_cpufreq_of_match, args.np); + of_node_put(args.np); + if (!match) + return -ENODEV; + + *info = match->data; + + *reg_base = of_iomap(args.np, 0); + if (IS_ERR(*reg_base)) + return PTR_ERR(*reg_base); + + return 0; +} + +static struct freq_attr *apple_soc_cpufreq_hw_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* Filled in below if boost is enabled */ + NULL, +}; + +static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) +{ + int ret, i; + unsigned int transition_latency; + void __iomem *reg_base; + struct device *cpu_dev; + struct apple_cpu_priv *priv; + const struct apple_soc_cpufreq_info *info; + struct cpufreq_frequency_table *freq_table; + + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", policy->cpu); + return -ENODEV; + } + + ret = dev_pm_opp_of_add_table(cpu_dev); + if (ret < 0) { + dev_err(cpu_dev, "%s: failed to add OPP table: %d\n", __func__, ret); + return ret; + } + + ret = apple_soc_cpufreq_find_cluster(policy, ®_base, &info); + if (ret) { + dev_err(cpu_dev, "%s: failed to get cluster info: %d\n", __func__, ret); + return ret; + } + + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); + if (ret) { + dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); + goto out_iounmap; + } + + ret = dev_pm_opp_get_opp_count(cpu_dev); + if (ret <= 0) { + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); + ret = -EPROBE_DEFER; + goto out_free_opp; + } + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_free_opp; + } + + ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); + if (ret) { + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); + goto out_free_priv; + } + + /* Get OPP levels (p-state indexes) and stash them in driver_data */ + for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + unsigned long rate = freq_table[i].frequency * 1000 + 999; + struct dev_pm_opp *opp = dev_pm_opp_find_freq_floor(cpu_dev, &rate); + + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto out_free_cpufreq_table; + } + freq_table[i].driver_data = dev_pm_opp_get_level(opp); + dev_pm_opp_put(opp); + } + + priv->cpu_dev = cpu_dev; + priv->reg_base = reg_base; + priv->info = info; + policy->driver_data = priv; + policy->freq_table = freq_table; + + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) + transition_latency = CPUFREQ_ETERNAL; + + policy->cpuinfo.transition_latency = transition_latency; + policy->dvfs_possible_from_any_cpu = true; + policy->fast_switch_possible = true; + + if (policy_has_boost_freq(policy)) { + ret = cpufreq_enable_boost_support(); + if (ret) { + dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); + } else { + apple_soc_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + apple_soc_cpufreq_driver.boost_enabled = true; + } + } + + return 0; + +out_free_cpufreq_table: + dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); +out_free_priv: + kfree(priv); +out_free_opp: + dev_pm_opp_remove_all_dynamic(cpu_dev); +out_iounmap: + iounmap(reg_base); + return ret; +} + +static int apple_soc_cpufreq_exit(struct cpufreq_policy *policy) +{ + struct apple_cpu_priv *priv = policy->driver_data; + + dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); + dev_pm_opp_remove_all_dynamic(priv->cpu_dev); + iounmap(priv->reg_base); + kfree(priv); + + return 0; +} + +static struct cpufreq_driver apple_soc_cpufreq_driver = { + .name = "apple-cpufreq", + .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, + .verify = cpufreq_generic_frequency_table_verify, + .attr = cpufreq_generic_attr, + .get = apple_soc_cpufreq_get_rate, + .init = apple_soc_cpufreq_init, + .exit = apple_soc_cpufreq_exit, + .target_index = apple_soc_cpufreq_set_target, + .fast_switch = apple_soc_cpufreq_fast_switch, + .register_em = cpufreq_register_em_with_opp, + .attr = apple_soc_cpufreq_hw_attr, +}; + +static int __init apple_soc_cpufreq_module_init(void) +{ + if (!of_machine_is_compatible("apple,arm-platform")) + return -ENODEV; + + return cpufreq_register_driver(&apple_soc_cpufreq_driver); +} +module_init(apple_soc_cpufreq_module_init); + +static void __exit apple_soc_cpufreq_module_exit(void) +{ + cpufreq_unregister_driver(&apple_soc_cpufreq_driver); +} +module_exit(apple_soc_cpufreq_module_exit); + +MODULE_DEVICE_TABLE(of, apple_soc_cpufreq_of_match); +MODULE_AUTHOR("Hector Martin <[email protected]>"); +MODULE_DESCRIPTION("Apple SoC CPU cluster DVFS driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 6ac3800db450..8ab672883043 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,8 @@ static const struct of_device_id allowlist[] __initconst = { static const struct of_device_id blocklist[] __initconst = { { .compatible = "allwinner,sun50i-h6", }, + { .compatible = "apple,arm-platform", }, + { .compatible = "arm,vexpress", }, { .compatible = "calxeda,highbank", }, @@ -160,6 +162,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "ti,am43", }, { .compatible = "ti,dra7", }, { .compatible = "ti,omap3", }, + { .compatible = "ti,am625", }, { .compatible = "qcom,ipq8064", }, { .compatible = "qcom,apq8064", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 69b3d61852ac..7e56a42750ea 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1207,6 +1207,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) goto err_free_rcpumask; + init_completion(&policy->kobj_unregister); ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, cpufreq_global_kobject, "policy%u", cpu); if (ret) { @@ -1245,7 +1246,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); init_waitqueue_head(&policy->transition_wait); - init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); policy->cpu = cpu; diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 1570d6f3e75d..55c7ffd37d1c 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -128,25 +128,23 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i, j, count; - len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); - len += scnprintf(buf + len, PAGE_SIZE - len, " : "); + len += sysfs_emit_at(buf, len, " From : To\n"); + len += sysfs_emit_at(buf, len, " : "); for (i = 0; i < stats->state_num; i++) { if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", - stats->freq_table[i]); + len += sysfs_emit_at(buf, len, "%9u ", stats->freq_table[i]); } if (len >= PAGE_SIZE) return PAGE_SIZE; - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); for (i = 0; i < stats->state_num; i++) { if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u: ", - stats->freq_table[i]); + len += sysfs_emit_at(buf, len, "%9u: ", stats->freq_table[i]); for (j = 0; j < stats->state_num; j++) { if (len >= PAGE_SIZE) @@ -157,11 +155,11 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) else count = stats->trans_table[i * stats->max_state + j]; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", count); + len += sysfs_emit_at(buf, len, "%9u ", count); } if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); } if (len >= PAGE_SIZE) { diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6ff73c30769f..fd73d6d2b808 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -298,6 +298,7 @@ static int hwp_active __read_mostly; static int hwp_mode_bdw __read_mostly; static bool per_cpu_limits __read_mostly; static bool hwp_boost __read_mostly; +static bool hwp_forced __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -1679,12 +1680,12 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) return; /* - * If powerup EPP is something other than chipset default 0x80 and - * - is more performance oriented than 0x80 (default balance_perf EPP) + * If the EPP is set by firmware, which means that firmware enabled HWP + * - Is equal or less than 0x80 (default balance_perf EPP) * - But less performance oriented than performance EPP * then use this as new balance_perf EPP. */ - if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE && + if (hwp_forced && cpudata->epp_default <= HWP_EPP_BALANCE_PERFORMANCE && cpudata->epp_default > HWP_EPP_PERFORMANCE) { epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default; return; @@ -2378,6 +2379,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(COMETLAKE, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(TIGERLAKE, core_funcs), + X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); @@ -3384,7 +3386,7 @@ static int __init intel_pstate_init(void) id = x86_match_cpu(hwp_support_ids); if (id) { - bool hwp_forced = intel_pstate_hwp_is_enabled(); + hwp_forced = intel_pstate_hwp_is_enabled(); if (hwp_forced) pr_info("HWP enabled by BIOS\n"); diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 3e000e1a75c6..4c57c6725c13 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -407,10 +407,10 @@ static int guess_fsb(int mult) { int speed = cpu_khz / 1000; int i; - int speeds[] = { 666, 1000, 1333, 2000 }; + static const int speeds[] = { 666, 1000, 1333, 2000 }; int f_max, f_min; - for (i = 0; i < 4; i++) { + for (i = 0; i < ARRAY_SIZE(speeds); i++) { f_max = ((speeds[i] * mult) + 50) / 100; f_max += (ROUNDING / 2); f_min = f_max - ROUNDING; diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f0e0a35c7f21..f80339779084 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -160,6 +160,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, struct mtk_cpufreq_data *data; struct device *dev = &pdev->dev; struct resource *res; + struct of_phandle_args args; void __iomem *base; int ret, i; int index; @@ -168,11 +169,14 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, if (!data) return -ENOMEM; - index = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", - "#performance-domain-cells", - policy->cpus); - if (index < 0) - return index; + ret = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", + "#performance-domain-cells", + policy->cpus, &args); + if (ret < 0) + return ret; + + index = args.args[0]; + of_node_put(args.np); res = platform_get_resource(pdev, IORESOURCE_MEM, index); if (!res) { diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 833589bc95e4..340fed35e45d 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -4,6 +4,7 @@ */ #include <linux/bitfield.h> +#include <linux/clk-provider.h> #include <linux/cpufreq.h> #include <linux/init.h> #include <linux/interconnect.h> @@ -43,7 +44,6 @@ struct qcom_cpufreq_soc_data { struct qcom_cpufreq_data { void __iomem *base; struct resource *res; - const struct qcom_cpufreq_soc_data *soc_data; /* * Mutex to synchronize between de-init sequence and re-starting LMh @@ -55,12 +55,18 @@ struct qcom_cpufreq_data { bool cancel_throttle; struct delayed_work throttle_work; struct cpufreq_policy *policy; + struct clk_hw cpu_clk; bool per_core_dcvs; struct freq_qos_request throttle_freq_req; }; +static struct { + struct qcom_cpufreq_data *data; + const struct qcom_cpufreq_soc_data *soc_data; +} qcom_cpufreq; + static unsigned long cpu_hw_rate, xo_rate; static bool icc_scaling_enabled; @@ -109,7 +115,7 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, unsigned int index) { struct qcom_cpufreq_data *data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; unsigned long freq = policy->freq_table[index].frequency; unsigned int i; @@ -125,9 +131,37 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, return 0; } +static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) +{ + unsigned int lval; + + if (qcom_cpufreq.soc_data->reg_current_vote) + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_current_vote) & 0x3ff; + else + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_domain_state) & 0xff; + + return lval * xo_rate; +} + +/* Get the current frequency of the CPU (after throttling) */ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) { struct qcom_cpufreq_data *data; + struct cpufreq_policy *policy; + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) + return 0; + + data = policy->driver_data; + + return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; +} + +/* Get the frequency requested by the cpufreq core for the CPU */ +static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) +{ + struct qcom_cpufreq_data *data; const struct qcom_cpufreq_soc_data *soc_data; struct cpufreq_policy *policy; unsigned int index; @@ -137,7 +171,7 @@ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) return 0; data = policy->driver_data; - soc_data = data->soc_data; + soc_data = qcom_cpufreq.soc_data; index = readl_relaxed(data->base + soc_data->reg_perf_state); index = min(index, LUT_MAX_ENTRIES - 1); @@ -149,7 +183,7 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { struct qcom_cpufreq_data *data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; unsigned int index; unsigned int i; @@ -173,7 +207,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, unsigned long rate; int ret; struct qcom_cpufreq_data *drv_data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = drv_data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL); if (!table) @@ -193,6 +227,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, } } else if (ret != -ENODEV) { dev_err(cpu_dev, "Invalid opp table in device tree\n"); + kfree(table); return ret; } else { policy->fast_switch_possible = true; @@ -286,18 +321,6 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } -static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) -{ - unsigned int lval; - - if (data->soc_data->reg_current_vote) - lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff; - else - lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff; - - return lval * xo_rate; -} - static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) { struct cpufreq_policy *policy = data->policy; @@ -341,7 +364,7 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) * If h/w throttled frequency is higher than what cpufreq has requested * for, then stop polling and switch back to interrupt mechanism. */ - if (throttled_freq >= qcom_cpufreq_hw_get(cpu)) + if (throttled_freq >= qcom_cpufreq_get_freq(cpu)) enable_irq(data->throttle_irq); else mod_delayed_work(system_highpri_wq, &data->throttle_work, @@ -367,9 +390,9 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) disable_irq_nosync(c_data->throttle_irq); schedule_delayed_work(&c_data->throttle_work, 0); - if (c_data->soc_data->reg_intr_clr) + if (qcom_cpufreq.soc_data->reg_intr_clr) writel_relaxed(GT_IRQ_STATUS, - c_data->base + c_data->soc_data->reg_intr_clr); + c_data->base + qcom_cpufreq.soc_data->reg_intr_clr); return IRQ_HANDLED; } @@ -503,8 +526,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) struct of_phandle_args args; struct device_node *cpu_np; struct device *cpu_dev; - struct resource *res; - void __iomem *base; struct qcom_cpufreq_data *data; int ret, index; @@ -526,51 +547,18 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) return ret; index = args.args[0]; - - res = platform_get_resource(pdev, IORESOURCE_MEM, index); - if (!res) { - dev_err(dev, "failed to get mem resource %d\n", index); - return -ENODEV; - } - - if (!request_mem_region(res->start, resource_size(res), res->name)) { - dev_err(dev, "failed to request resource %pR\n", res); - return -EBUSY; - } - - base = ioremap(res->start, resource_size(res)); - if (!base) { - dev_err(dev, "failed to map resource %pR\n", res); - ret = -ENOMEM; - goto release_region; - } - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto unmap_base; - } - - data->soc_data = of_device_get_match_data(&pdev->dev); - data->base = base; - data->res = res; + data = &qcom_cpufreq.data[index]; /* HW should be in enabled state to proceed */ - if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { + if (!(readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_enable) & 0x1)) { dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index); - ret = -ENODEV; - goto error; + return -ENODEV; } - if (readl_relaxed(base + data->soc_data->reg_dcvs_ctrl) & 0x1) + if (readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_dcvs_ctrl) & 0x1) data->per_core_dcvs = true; qcom_get_related_cpus(index, policy->cpus); - if (cpumask_empty(policy->cpus)) { - dev_err(dev, "Domain-%d failed to get related CPUs\n", index); - ret = -ENOENT; - goto error; - } policy->driver_data = data; policy->dvfs_possible_from_any_cpu = true; @@ -578,14 +566,13 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy); if (ret) { dev_err(dev, "Domain-%d failed to read LUT\n", index); - goto error; + return ret; } ret = dev_pm_opp_get_opp_count(cpu_dev); if (ret <= 0) { dev_err(cpu_dev, "Failed to add OPPs\n"); - ret = -ENODEV; - goto error; + return -ENODEV; } if (policy_has_boost_freq(policy)) { @@ -594,18 +581,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); } - ret = qcom_cpufreq_hw_lmh_init(policy, index); - if (ret) - goto error; - - return 0; -error: - kfree(data); -unmap_base: - iounmap(base); -release_region: - release_mem_region(res->start, resource_size(res)); - return ret; + return qcom_cpufreq_hw_lmh_init(policy, index); } static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) @@ -658,20 +634,33 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .ready = qcom_cpufreq_ready, }; +static unsigned long qcom_cpufreq_hw_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +{ + struct qcom_cpufreq_data *data = container_of(hw, struct qcom_cpufreq_data, cpu_clk); + + return qcom_lmh_get_throttle_freq(data); +} + +static const struct clk_ops qcom_cpufreq_hw_clk_ops = { + .recalc_rate = qcom_cpufreq_hw_recalc_rate, +}; + static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { + struct clk_hw_onecell_data *clk_data; + struct device *dev = &pdev->dev; struct device *cpu_dev; struct clk *clk; - int ret; + int ret, i, num_domains; - clk = clk_get(&pdev->dev, "xo"); + clk = clk_get(dev, "xo"); if (IS_ERR(clk)) return PTR_ERR(clk); xo_rate = clk_get_rate(clk); clk_put(clk); - clk = clk_get(&pdev->dev, "alternate"); + clk = clk_get(dev, "alternate"); if (IS_ERR(clk)) return PTR_ERR(clk); @@ -689,11 +678,70 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) if (ret) return ret; + /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ + num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * 4); + if (num_domains <= 0) + return num_domains; + + qcom_cpufreq.data = devm_kzalloc(dev, sizeof(struct qcom_cpufreq_data) * num_domains, + GFP_KERNEL); + if (!qcom_cpufreq.data) + return -ENOMEM; + + qcom_cpufreq.soc_data = of_device_get_match_data(dev); + + clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_domains), GFP_KERNEL); + if (!clk_data) + return -ENOMEM; + + clk_data->num = num_domains; + + for (i = 0; i < num_domains; i++) { + struct qcom_cpufreq_data *data = &qcom_cpufreq.data[i]; + struct clk_init_data clk_init = {}; + struct resource *res; + void __iomem *base; + + base = devm_platform_get_and_ioremap_resource(pdev, i, &res); + if (IS_ERR(base)) { + dev_err(dev, "Failed to map resource %pR\n", res); + return PTR_ERR(base); + } + + data->base = base; + data->res = res; + + /* Register CPU clock for each frequency domain */ + clk_init.name = kasprintf(GFP_KERNEL, "qcom_cpufreq%d", i); + if (!clk_init.name) + return -ENOMEM; + + clk_init.flags = CLK_GET_RATE_NOCACHE; + clk_init.ops = &qcom_cpufreq_hw_clk_ops; + data->cpu_clk.init = &clk_init; + + ret = devm_clk_hw_register(dev, &data->cpu_clk); + if (ret < 0) { + dev_err(dev, "Failed to register clock %d: %d\n", i, ret); + kfree(clk_init.name); + return ret; + } + + clk_data->hws[i] = &data->cpu_clk; + kfree(clk_init.name); + } + + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, clk_data); + if (ret < 0) { + dev_err(dev, "Failed to add clock provider\n"); + return ret; + } + ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver); if (ret) - dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n"); + dev_err(dev, "CPUFreq HW driver failed to register\n"); else - dev_dbg(&pdev->dev, "QCOM CPUFreq HW driver initialized\n"); + dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n"); return ret; } diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 7d0d62a06bf3..c6fdf019dbde 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -39,7 +39,7 @@ static struct clk *spear1340_cpu_get_possible_parent(unsigned long newfreq) * In SPEAr1340, cpu clk's parent sys clk can take input from * following sources */ - const char *sys_clk_src[] = { + static const char * const sys_clk_src[] = { "sys_syn_clk", "pll1_clk", "pll2_clk", diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 6c88827f4e62..f98f53bf1011 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -65,8 +65,8 @@ struct tegra186_cpufreq_cluster { struct tegra186_cpufreq_data { void __iomem *regs; - struct tegra186_cpufreq_cluster *clusters; const struct tegra186_cpufreq_cpu *cpus; + struct tegra186_cpufreq_cluster clusters[]; }; static int tegra186_cpufreq_init(struct cpufreq_policy *policy) @@ -221,15 +221,12 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) struct tegra_bpmp *bpmp; unsigned int i = 0, err; - data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, + struct_size(data, clusters, TEGRA186_NUM_CLUSTERS), + GFP_KERNEL); if (!data) return -ENOMEM; - data->clusters = devm_kcalloc(&pdev->dev, TEGRA186_NUM_CLUSTERS, - sizeof(*data->clusters), GFP_KERNEL); - if (!data->clusters) - return -ENOMEM; - data->cpus = tegra186_cpus; bpmp = tegra_bpmp_get(&pdev->dev); diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index f64180dd2005..be4209d97cb3 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -39,6 +39,14 @@ #define OMAP34xx_ProdID_SKUID 0x4830A20C #define OMAP3_SYSCON_BASE (0x48000000 + 0x2000 + 0x270) +#define AM625_EFUSE_K_MPU_OPP 11 +#define AM625_EFUSE_S_MPU_OPP 19 +#define AM625_EFUSE_T_MPU_OPP 20 + +#define AM625_SUPPORT_K_MPU_OPP BIT(0) +#define AM625_SUPPORT_S_MPU_OPP BIT(1) +#define AM625_SUPPORT_T_MPU_OPP BIT(2) + #define VERSION_COUNT 2 struct ti_cpufreq_data; @@ -104,6 +112,25 @@ static unsigned long omap3_efuse_xlate(struct ti_cpufreq_data *opp_data, return BIT(efuse); } +static unsigned long am625_efuse_xlate(struct ti_cpufreq_data *opp_data, + unsigned long efuse) +{ + unsigned long calculated_efuse = AM625_SUPPORT_K_MPU_OPP; + + switch (efuse) { + case AM625_EFUSE_T_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_T_MPU_OPP; + fallthrough; + case AM625_EFUSE_S_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_S_MPU_OPP; + fallthrough; + case AM625_EFUSE_K_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_K_MPU_OPP; + } + + return calculated_efuse; +} + static struct ti_cpufreq_soc_data am3x_soc_data = { .efuse_xlate = amx3_efuse_xlate, .efuse_fallback = AM33XX_800M_ARM_MPU_MAX_FREQ, @@ -198,6 +225,14 @@ static struct ti_cpufreq_soc_data am3517_soc_data = { .multi_regulator = false, }; +static struct ti_cpufreq_soc_data am625_soc_data = { + .efuse_xlate = am625_efuse_xlate, + .efuse_offset = 0x0018, + .efuse_mask = 0x07c0, + .efuse_shift = 0x6, + .rev_offset = 0x0014, + .multi_regulator = false, +}; /** * ti_cpufreq_get_efuse() - Parse and return efuse value present on SoC @@ -301,6 +336,7 @@ static const struct of_device_id ti_cpufreq_of_match[] = { { .compatible = "ti,dra7", .data = &dra7_soc_data }, { .compatible = "ti,omap34xx", .data = &omap34xx_soc_data, }, { .compatible = "ti,omap36xx", .data = &omap36xx_soc_data, }, + { .compatible = "ti,am625", .data = &am625_soc_data, }, /* legacy */ { .compatible = "ti,omap3430", .data = &omap34xx_soc_data, }, { .compatible = "ti,omap3630", .data = &omap36xx_soc_data, }, diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 821984947ed9..c80cf9ddabd8 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -181,7 +181,8 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) if (ret) goto remove_pd; - pr_info("Initialized CPU PM domain topology\n"); + pr_info("Initialized CPU PM domain topology using %s mode\n", + use_osi ? "OSI" : "PC"); return 0; put_node: diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c index 252f2a9686a6..7ca3d7d9b5ea 100644 --- a/drivers/cpuidle/dt_idle_states.c +++ b/drivers/cpuidle/dt_idle_states.c @@ -211,18 +211,15 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, of_node_put(cpu_node); if (err) return err; - /* - * Update the driver state count only if some valid DT idle states - * were detected - */ - if (i) - drv->state_count = state_idx; + + /* Set the number of total supported idle states. */ + drv->state_count = state_idx; /* * Return the number of present and valid DT idle states, which can * also be 0 on platforms with missing DT idle states or legacy DT * configuration predating the DT idle states bindings. */ - return i; + return state_idx - start_idx; } EXPORT_SYMBOL_GPL(dt_init_idle_driver); diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 97086fab698e..903325aac991 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,13 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static struct resource hmem_active = { + .name = "HMEM devices", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + void hmem_register_device(int target_nid, struct resource *r) { /* define a clean / non-busy resource for the platform device */ @@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r) goto out_pdev; } + if (!__request_region(&hmem_active, res.start, resource_size(&res), + dev_name(&pdev->dev), 0)) { + dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + goto out_active; + } + pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r) return; out_resource: + __release_region(&hmem_active, res.start, resource_size(&res)); +out_active: platform_device_put(pdev); out_pdev: memregion_free(id); @@ -73,15 +88,6 @@ out_pdev: static __init int hmem_register_one(struct resource *res, void *data) { - /* - * If the resource is not a top-level resource it was already - * assigned to a device by the HMAT parsing. - */ - if (res->parent != &iomem_resource) { - pr_info("HMEM: skip %pr, already claimed\n", res); - return 0; - } - hmem_register_device(phys_to_target_node(res->start), res); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 0b52af415b28..ce64ca1c6e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 2): fw_name = FIRMWARE_VANGOGH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case IP_VERSION(3, 0, 16): fw_name = FIRMWARE_DIMGREY_CAVEFISH; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 6925e0280dbe..f4f3d2665a6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 461c62c88413..de77054195c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3723,12 +3723,16 @@ out: static u8 bigjoiner_pipes(struct drm_i915_private *i915) { + u8 pipes; + if (DISPLAY_VER(i915) >= 12) - return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); + pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); else if (DISPLAY_VER(i915) >= 11) - return BIT(PIPE_B) | BIT(PIPE_C); + pipes = BIT(PIPE_B) | BIT(PIPE_C); else - return 0; + pipes = 0; + + return pipes & RUNTIME_INFO(i915)->pipe_mask; } static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..7caa3412a244 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) @@ -1017,6 +1022,11 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; + if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); awake |= engine->mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 2403ccd52c74..bba8cb6e8ae4 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL); struct dram_info *dram_info = &i915->dram_info; - val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val); - switch (val) { + switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) { case 0: dram_info->type = INTEL_DRAM_DDR4; break; diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 81e688975c6a..a901e4e33d81 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev) ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors); ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, sizeof(struct ec_sensor), GFP_KERNEL); + if (!ec_data->sensors) + return -ENOMEM; status = setup_lock_data(dev); if (status) { diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 8bf32c6c85d9..9bee4d33fbdf 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { - if (host_bridge->device == tjmax_pci_table[i].device) + if (host_bridge->device == tjmax_pci_table[i].device) { + pci_dev_put(host_bridge); return tjmax_pci_table[i].tjmax; + } } } + pci_dev_put(host_bridge); for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { if (strstr(c->x86_model_id, tjmax_table[i].id)) @@ -533,6 +536,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) { struct temp_data *tdata = pdata->core_data[indx]; + /* if we errored on add then this is already gone */ + if (!tdata) + return; + /* Remove the sysfs attributes */ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c index 05f68e9c9477..23b9f94fe0a9 100644 --- a/drivers/hwmon/i5500_temp.c +++ b/drivers/hwmon/i5500_temp.c @@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, u32 tstimer; s8 tsfsc; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Failed to enable device\n"); return err; diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index f6ec165c0fa8..1837cccd993c 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) return; out_register: + list_del(&data->list); hwmon_device_unregister(data->hwmon_dev); out_user: ipmi_destroy_user(data->user); diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 2a57f4b60c29..e06186986444 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, * Shunt Voltage Sum register has 14-bit value with 1-bit shift * Other Shunt Voltage registers have 12 bits with 3-bit shift */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) *val = sign_extend32(regval >> 1, 14); else *val = sign_extend32(regval >> 3, 12); @@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; else regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index 7404e974762f..2dbbbac9de09 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, return ret; /* in milidegrees celcius, temp is given by: */ - *val = (__val * 204) + 550; + *val = (__val * 204) + 5500; return 0; } diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index fe0cd205502d..f58943cb1341 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -852,7 +852,8 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, CDNS_I2C_POLL_US, CDNS_I2C_TIMEOUT_US); if (ret) { ret = -EAGAIN; - i2c_recover_bus(adap); + if (id->adap.bus_recovery_info) + i2c_recover_bus(adap); goto out; } @@ -1263,8 +1264,13 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->rinfo.pinctrl = devm_pinctrl_get(&pdev->dev); if (IS_ERR(id->rinfo.pinctrl)) { + int err = PTR_ERR(id->rinfo.pinctrl); + dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(id->rinfo.pinctrl); + if (err != -ENODEV) + return err; + } else { + id->adap.bus_recovery_info = &id->rinfo; } id->membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r_mem); @@ -1283,7 +1289,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->adap.retries = 3; /* Default retry value. */ id->adap.algo_data = id; id->adap.dev.parent = &pdev->dev; - id->adap.bus_recovery_info = &id->rinfo; init_completion(&id->xfer_done); snprintf(id->adap.name, sizeof(id->adap.name), "Cadence I2C at %08lx", (unsigned long)r_mem->start); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 3082183bd66a..fc70920c4dda 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1132,7 +1132,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; - int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; + int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE && + msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1298,7 +1299,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter, result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic); } else { if (!atomic && - i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD) + i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD && + msgs[i].flags & I2C_M_DMA_SAFE) result = i2c_imx_dma_write(i2c_imx, &msgs[i]); else result = i2c_imx_write(i2c_imx, &msgs[i], atomic); diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 0c365b57d957..83457359ec45 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2393,8 +2393,17 @@ static struct platform_driver npcm_i2c_bus_driver = { static int __init npcm_i2c_init(void) { + int ret; + npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - return platform_driver_register(&npcm_i2c_bus_driver); + + ret = platform_driver_register(&npcm_i2c_bus_driver); + if (ret) { + debugfs_remove_recursive(npcm_i2c_debugfs_dir); + return ret; + } + + return 0; } module_init(npcm_i2c_init); diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 84a77512614d..8fce98bb77ff 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -626,7 +626,6 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i dev_err(gi2c->se.dev, "I2C timeout gpi flags:%d addr:0x%x\n", gi2c->cur->flags, gi2c->cur->addr); gi2c->err = -ETIMEDOUT; - goto err; } if (gi2c->err) { diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index b4edf10e8fd0..7539b0740351 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -467,6 +467,7 @@ static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); struct i2c_driver *driver; + bool do_power_on; int status; if (!client) @@ -545,8 +546,8 @@ static int i2c_device_probe(struct device *dev) if (status < 0) goto err_clear_wakeup_irq; - status = dev_pm_domain_attach(&client->dev, - !i2c_acpi_waive_d0_probe(dev)); + do_power_on = !i2c_acpi_waive_d0_probe(dev); + status = dev_pm_domain_attach(&client->dev, do_power_on); if (status) goto err_clear_wakeup_irq; @@ -585,7 +586,7 @@ static int i2c_device_probe(struct device *dev) err_release_driver_resources: devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, do_power_on); err_clear_wakeup_irq: dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -610,7 +611,7 @@ static void i2c_device_remove(struct device *dev) devres_release_group(&client->dev, client->devres_group_id); - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, true); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 3a4952935366..3d9c5758d8a4 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -211,12 +211,14 @@ static int raydium_i2c_send(struct i2c_client *client, error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer)); if (likely(!error)) - return 0; + goto out; msleep(RM_RETRY_DELAY_MS); } while (++tries < RM_MAX_RETRIES); dev_err(&client->dev, "%s failed: %d\n", __func__, error); +out: + kfree(tx_buf); return error; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..bc94059a5b87 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 996a8b5ee5ee..5287efe247b1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The extra devTLB flush quirk impacts those QAT devices with PCI device + * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() + * check because it applies only to the built-in QAT devices and it doesn't + * grant additional privileges. + */ +#define BUGGY_QAT_DEVID_MASK 0x494c +static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) +{ + if (pdev->vendor != PCI_VENDOR_ID_INTEL) + return false; + + if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) + return false; + + return true; +} + static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -3854,8 +3873,10 @@ static inline bool has_external_pci(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->external_facing) + if (pdev->external_facing) { + pci_dev_put(pdev); return true; + } return false; } @@ -4490,9 +4511,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (dev_is_pci(dev)) { if (ecap_dev_iotlb_support(iommu->ecap) && pci_ats_supported(pdev) && - dmar_ats_supported(pdev, iommu)) + dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; - + info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); @@ -4931,3 +4953,48 @@ static void __init check_tylersburg_isoch(void) pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", vtisochctrl); } + +/* + * Here we deal with a device TLB defect where device may inadvertently issue ATS + * invalidation completion before posted writes initiated with translated address + * that utilized translations matching the invalidation address range, violating + * the invalidation completion ordering. + * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is + * vulnerable to this defect. In other words, any dTLB invalidation initiated not + * under the control of the trusted/privileged host device driver must use this + * quirk. + * Device TLBs are invalidated under the following six conditions: + * 1. Device driver does DMA API unmap IOVA + * 2. Device driver unbind a PASID from a process, sva_unbind_device() + * 3. PASID is torn down, after PASID cache is flushed. e.g. process + * exit_mmap() due to crash + * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where + * VM has to free pages that were unmapped + * 5. Userspace driver unmaps a DMA buffer + * 6. Cache invalidation in vSVA usage (upcoming) + * + * For #1 and #2, device drivers are responsible for stopping DMA traffic + * before unmap/unbind. For #3, iommu driver gets mmu_notifier to + * invalidate TLB the same way as normal user unmap which will use this quirk. + * The dTLB invalidation after PASID cache flush does not need this quirk. + * + * As a reminder, #6 will *NEED* this quirk as we enable nested translation. + */ +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long mask, + u32 pasid, u16 qdep) +{ + u16 sid; + + if (likely(!info->dtlb_extra_inval)) + return; + + sid = PCI_DEVID(info->bus, info->devfn); + if (pasid == PASID_RID2PASID) { + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, address, mask); + } else { + qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, + pasid, qdep, address, mask); + } +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..db9df7c3790c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -623,6 +623,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -728,6 +729,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order); +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long pages, + u32 pasid, u16 qdep); void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, u32 pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7d08eb034f2d..03b25358946c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm, return; qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); - if (info->ats_enabled) + if (info->ats_enabled) { qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, svm->pasid, sdev->qdep, address, order_base_2(pages)); + quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), + svm->pasid, sdev->qdep); + } } static void intel_flush_svm_range_dev(struct intel_svm *svm, @@ -745,12 +748,16 @@ bad_req: * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + if (!pdev) + goto bad_req; - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); + if (intel_svm_prq_report(iommu, &pdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + else + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); + pci_dev_put(pdev); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 542dde9d2609..144027035892 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -35,11 +35,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, struct frame_vector *vec) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int ret_pin_user_pages_fast = 0; - int ret = 0; - int err; + int ret; if (nr_frames == 0) return 0; @@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = pin_user_pages_fast(start, nr_frames, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, (struct page **)(vec->ptrs)); - if (ret > 0) { - vec->got_ref = true; - vec->is_pfns = false; - goto out_unlocked; - } - ret_pin_user_pages_fast = ret; - - mmap_read_lock(mm); - vec->got_ref = false; - vec->is_pfns = true; - ret = 0; - do { - unsigned long *nums = frame_vector_pfns(vec); - - vma = vma_lookup(mm, start); - if (!vma) - break; - - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret) - goto out; - // If follow_pfn() returns -EINVAL, then this - // is not an IO mapping or a raw PFN mapping. - // In that case, return the original error from - // pin_user_pages_fast(). Otherwise this - // function would return -EINVAL when - // pin_user_pages_fast() returned -ENOMEM, - // which makes debugging hard. - if (err == -EINVAL && ret_pin_user_pages_fast) - ret = ret_pin_user_pages_fast; - else - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* Bail out if VMA doesn't completely cover the tail page. */ - if (start < vma->vm_end) - break; - } while (ret < nr_frames); -out: - mmap_read_unlock(mm); -out_unlocked: - if (!ret) - ret = -EFAULT; - if (ret > 0) - vec->nr_frames = ret; - return ret; + vec->got_ref = true; + vec->is_pfns = false; + vec->nr_frames = ret; + + if (likely(ret > 0)) + return ret; + + /* This used to (racily) return non-refcounted pfns. Let people know */ + WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); + vec->nr_frames = 0; + return ret ? ret : -EFAULT; } EXPORT_SYMBOL(get_vaddr_frames); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index c5de202f530a..de1cc9e1ae57 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card) card->pref_erase = 0; } +static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP; - if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP; @@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8d9bceeff986..155ce2bdfe62 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, struct mmc_test_dbgfs_file *df; if (card->debugfs_root) - debugfs_create_file(name, mode, card->debugfs_root, card, fops); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); df = kmalloc(sizeof(*df), GFP_KERNEL); if (!df) { diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index df941438aef5..26bc59b5a7cc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev, return PTR_ERR(host->src_clk_cg); } - host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg"); + /* If present, always enable for this clock gate */ + host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg"); if (IS_ERR(host->sys_clk_cg)) host->sys_clk_cg = NULL; - /* If present, always enable for this clock gate */ - clk_prepare_enable(host->sys_clk_cg); - host->bulk_clks[0].id = "pclk_cg"; host->bulk_clks[1].id = "axi_cg"; host->bulk_clks[2].id = "ahb_cg"; diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 31ea0a2fce35..ffeb5759830f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) * system resume back. */ cqhci_writel(cq_host, 0, CQHCI_CTL); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) dev_err(mmc_dev(host->mmc), "failed to exit halt state when enable CQE\n"); diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index b92a408f138d..bec3f9e3cd3f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) } if (IS_ERR(sprd_host->pinctrl)) - return 0; + goto reset; switch (ios->signal_voltage) { case MMC_SIGNAL_VOLTAGE_180: @@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) /* Wait for 300 ~ 500 us for pin state stable */ usleep_range(300, 500); + +reset: sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fef03de85b99..c7ad32a75b57 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); +static bool sdhci_timing_has_preset(unsigned char timing) +{ + switch (timing) { + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + return true; + }; + return false; +} + +static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) +{ + return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + sdhci_timing_has_preset(timing); +} + +static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) +{ + /* + * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK + * Frequency. Check if preset values need to be enabled, or the Driver + * Strength needs updating. Note, clock changes are handled separately. + */ + return !host->preset_enabled && + (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); +} + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); + bool reinit_uhs = host->reinit_uhs; + bool turning_on_clk = false; u8 ctrl; + host->reinit_uhs = false; + if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { + turning_on_clk = ios->clock && !host->clock; + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); + /* + * Special case to avoid multiple clock changes during voltage + * switching. + */ + if (!reinit_uhs && + turning_on_clk && + host->timing == ios->timing && + host->version >= SDHCI_SPEC_300 && + !sdhci_presetable_values_change(host, ios)) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - ((ios->timing == MMC_TIMING_UHS_SDR12) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR50) || - (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_MMC_DDR52))) { + if (sdhci_preset_needed(host, ios->timing)) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); + host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d750c464bd1e..87a3aaa07438 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -524,6 +524,8 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ + u8 drv_type; /* Current UHS-I driver type */ + bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index 094197780776..ed3d0b8989a0 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -263,8 +263,10 @@ static void can327_feed_frame_to_netdev(struct can327 *elm, struct sk_buff *skb) { lockdep_assert_held(&elm->lock); - if (!netif_running(elm->dev)) + if (!netif_running(elm->dev)) { + kfree_skb(skb); return; + } /* Queue for NAPI pickup. * rx-offload will update stats and LEDs for us. diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c index 194c86e0f340..8f6dccd5a587 100644 --- a/drivers/net/can/cc770/cc770_isa.c +++ b/drivers/net/can/cc770/cc770_isa.c @@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "couldn't register device (err=%d)\n", err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n", priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_cc770dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 00d11e95fd98..e5575d2755e4 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1909,7 +1909,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) cdev->hclk = devm_clk_get(cdev->dev, "hclk"); cdev->cclk = devm_clk_get(cdev->dev, "cclk"); - if (IS_ERR(cdev->cclk)) { + if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) { dev_err(cdev->dev, "no clock found\n"); ret = -ENODEV; } diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 8f184a852a0a..f2219aa2824b 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -120,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) - return ret; + goto err_free_dev; mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); @@ -132,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = m_can_class_register(mcan_class); if (ret) - goto err; + goto err_free_irq; /* Enable interrupt control at CAN wrapper IP */ writel(0x1, base + CTL_CSR_INT_CTL_OFFSET); @@ -144,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return 0; -err: +err_free_irq: pci_free_irq_vectors(pci); +err_free_dev: + m_can_class_free_dev(mcan_class->net); return ret; } @@ -161,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci) writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET); m_can_class_unregister(mcan_class); + m_can_class_free_dev(mcan_class->net); pci_free_irq_vectors(pci); } diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c index d513fac50718..db3e767d5320 100644 --- a/drivers/net/can/sja1000/sja1000_isa.c +++ b/drivers/net/can/sja1000/sja1000_isa.c @@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", DRV_NAME, err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n", DRV_NAME, priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_sja1000dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 25f863b4f5f0..ddb7c5735c9a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -2091,8 +2091,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) netdev->dev_port = channel_idx; ret = register_candev(netdev); - if (ret) + if (ret) { + es58x_dev->netdev[channel_idx] = NULL; + free_candev(netdev); return ret; + } netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), es58x_dev->param->dql_min_limit); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 218b098b261d..47619e9cb005 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -47,6 +47,10 @@ #define MCBA_VER_REQ_USB 1 #define MCBA_VER_REQ_CAN 2 +/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */ +#define MCBA_VER_TERMINATION_ON 0 +#define MCBA_VER_TERMINATION_OFF 1 + #define MCBA_SIDL_EXID_MASK 0x8 #define MCBA_DLC_MASK 0xf #define MCBA_DLC_RTR_MASK 0x40 @@ -463,7 +467,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, priv->usb_ka_first_pass = false; } - if (msg->termination_state) + if (msg->termination_state == MCBA_VER_TERMINATION_ON) priv->can.termination = MCBA_TERMINATION_ENABLED; else priv->can.termination = MCBA_TERMINATION_DISABLED; @@ -785,9 +789,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term) }; if (term == MCBA_TERMINATION_ENABLED) - usb_msg.termination = 1; + usb_msg.termination = MCBA_VER_TERMINATION_ON; else - usb_msg.termination = 0; + usb_msg.termination = MCBA_VER_TERMINATION_OFF; mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg); diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 438e46af03e9..80f07bd20593 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -961,7 +961,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = { { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", }, { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", }, { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", }, - { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", }, + { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", }, { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", }, { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", }, { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", }, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a08f221e30d4..ac4ea93bd8dd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -13,6 +13,7 @@ #include "aq_ptp.h" #include "aq_filters.h" #include "aq_macsec.h" +#include "aq_main.h" #include <linux/ptp_clock_kernel.h> @@ -858,7 +859,7 @@ static int aq_set_ringparam(struct net_device *ndev, if (netif_running(ndev)) { ndev_running = true; - dev_close(ndev); + aq_ndev_close(ndev); } cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); @@ -874,7 +875,7 @@ static int aq_set_ringparam(struct net_device *ndev, goto err_exit; if (ndev_running) - err = dev_open(ndev, NULL); + err = aq_ndev_open(ndev); err_exit: return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 8a0af371e7dc..77609dc0a08d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -58,7 +58,7 @@ struct net_device *aq_ndev_alloc(void) return ndev; } -static int aq_ndev_open(struct net_device *ndev) +int aq_ndev_open(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; @@ -88,7 +88,7 @@ err_exit: return err; } -static int aq_ndev_close(struct net_device *ndev) +int aq_ndev_close(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h index 99870865f66d..a78c1a168d8e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h @@ -16,5 +16,7 @@ DECLARE_STATIC_KEY_FALSE(aq_xdp_locking_key); void aq_ndev_schedule_work(struct work_struct *work); struct net_device *aq_ndev_alloc(void); +int aq_ndev_open(struct net_device *ndev); +int aq_ndev_close(struct net_device *ndev); #endif /* AQ_MAIN_H */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f623c12eaf95..2ca2b61b451f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -74,7 +74,7 @@ #include "fec.h" static void set_multicast_list(struct net_device *ndev); -static void fec_enet_itr_coal_init(struct net_device *ndev); +static void fec_enet_itr_coal_set(struct net_device *ndev); #define DRIVER_NAME "fec" @@ -1220,8 +1220,7 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_init(ndev); - + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) @@ -2856,19 +2855,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev, return 0; } -static void fec_enet_itr_coal_init(struct net_device *ndev) -{ - struct ethtool_coalesce ec; - - ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - fec_enet_set_coalesce(ndev, &ec, NULL, NULL); -} - static int fec_enet_get_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, void *data) @@ -3623,6 +3609,10 @@ static int fec_enet_init(struct net_device *ndev) fep->rx_align = 0x3; fep->tx_align = 0x3; #endif + fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT; + fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT; /* Check mask of the streaming and coherent API */ ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32)); diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 560d1d442232..d3fdc290937f 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1741,11 +1741,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { - dev_kfree_skb_any(skb); - skb = NULL; + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) return -ENOMEM; - } /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 4a6630586ec9..fc373472e4e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue; **/ static int __init fm10k_init_module(void) { + int ret; + pr_info("%s\n", fm10k_driver_string); pr_info("%s\n", fm10k_copyright); @@ -43,7 +45,13 @@ static int __init fm10k_init_module(void) fm10k_dbg_init(); - return fm10k_register_pci_driver(); + ret = fm10k_register_pci_driver(); + if (ret) { + fm10k_dbg_exit(); + destroy_workqueue(fm10k_workqueue); + } + + return ret; } module_init(fm10k_init_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b5dcd15ced36..b3cb587a2032 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16644,6 +16644,8 @@ static struct pci_driver i40e_driver = { **/ static int __init i40e_init_module(void) { + int err; + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); @@ -16661,7 +16663,14 @@ static int __init i40e_init_module(void) } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index d7465296f650..f71e132ede09 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5196,6 +5196,8 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { + int ret; + pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -5206,7 +5208,12 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - return pci_register_driver(&iavf_driver); + + ret = pci_register_driver(&iavf_driver); + if (ret) + destroy_workqueue(iavf_wq); + + return ret; } module_init(iavf_init_module); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 99933e89717a..e338fa572793 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4869,6 +4869,8 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { + int err; + pr_info("%s\n", ixgbevf_driver_string); pr_info("%s\n", ixgbevf_copyright); ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); @@ -4877,7 +4879,13 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - return pci_register_driver(&ixgbevf_driver); + err = pci_register_driver(&ixgbevf_driver); + if (err) { + destroy_workqueue(ixgbevf_wq); + return err; + } + + return 0; } module_init(ixgbevf_init_module); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 282db6fe3b08..67aa02bb2b85 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -884,7 +884,7 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx) { #ifdef CONFIG_DCB - if (pfvf->pfc_alloc_status[qidx]) + if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx]) return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx]; #endif diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 4046be0e86ff..a9a1028cb17b 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -457,7 +457,7 @@ prestera_kern_neigh_cache_find(struct prestera_switch *sw, n_cache = rhashtable_lookup_fast(&sw->router->kern_neigh_cache_ht, key, __prestera_kern_neigh_cache_ht_params); - return IS_ERR(n_cache) ? NULL : n_cache; + return n_cache; } static void diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index aa080dc57ff0..02faaea2aefa 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -330,7 +330,7 @@ prestera_nh_neigh_find(struct prestera_switch *sw, nh_neigh = rhashtable_lookup_fast(&sw->router->nh_neigh_ht, key, __prestera_nh_neigh_ht_params); - return IS_ERR(nh_neigh) ? NULL : nh_neigh; + return nh_neigh; } struct prestera_nh_neigh * @@ -484,7 +484,7 @@ __prestera_nexthop_group_find(struct prestera_switch *sw, nh_grp = rhashtable_lookup_fast(&sw->router->nexthop_group_ht, key, __prestera_nexthop_group_ht_params); - return IS_ERR(nh_grp) ? NULL : nh_grp; + return nh_grp; } static struct prestera_nexthop_group * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 74bd05e5dda2..e7a894ba5c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1497,8 +1497,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, return -EFAULT; err = sscanf(outlen_str, "%d", &outlen); - if (err < 0) - return err; + if (err != 1) + return -EINVAL; ptr = kzalloc(outlen, GFP_KERNEL); if (!ptr) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 285d32d2fd08..d7c020f72401 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); } @@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) err_destroy_tables: while (--i >= 0) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 3dc6c987b8da..f900709639f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -427,15 +427,15 @@ mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) return NULL; } -static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *rx_sa, - bool active) +static int macsec_rx_sa_active_update(struct macsec_context *ctx, + struct mlx5e_macsec_sa *rx_sa, + bool active) { - struct mlx5_core_dev *mdev = macsec->mdev; - struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; int err = 0; - if (rx_sa->active != active) + if (rx_sa->active == active) return 0; rx_sa->active = active; @@ -444,13 +444,11 @@ static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, return 0; } - attrs.sci = cpu_to_be64((__force u64)rx_sa->sci); - attrs.enc_key_id = rx_sa->enc_key_id; - err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) - return err; + rx_sa->active = false; - return 0; + return err; } static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) @@ -476,6 +474,11 @@ static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) return false; } + if (!ctx->secy->tx_sc.encrypt) { + netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n"); + return false; + } + return true; } @@ -620,6 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) if (tx_sa->active == ctx_tx_sa->active) goto out; + tx_sa->active = ctx_tx_sa->active; if (tx_sa->assoc_num != tx_sc->encoding_sa) goto out; @@ -635,8 +639,6 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); } - - tx_sa->active = ctx_tx_sa->active; out: mutex_unlock(&macsec->lock); @@ -736,9 +738,14 @@ static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) sc_xarray_element->rx_sc = rx_sc; err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, - XA_LIMIT(1, USHRT_MAX), GFP_KERNEL); - if (err) + XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL); + if (err) { + if (err == -EBUSY) + netdev_err(ctx->netdev, + "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n", + MLX5_MACEC_RX_FS_ID_MAX); goto destroy_sc_xarray_elemenet; + } rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); if (!rx_sc->md_dst) { @@ -798,16 +805,16 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) goto out; } - rx_sc->active = ctx_rx_sc->active; if (rx_sc->active == ctx_rx_sc->active) goto out; + rx_sc->active = ctx_rx_sc->active; for (i = 0; i < MACSEC_NUM_AN; ++i) { rx_sa = rx_sc->rx_sa[i]; if (!rx_sa) continue; - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active); if (err) goto out; } @@ -818,16 +825,43 @@ out: return err; } +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc) +{ + struct mlx5e_macsec_sa *rx_sa; + int i; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + /* At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + kfree_rcu(rx_sc); +} + static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) { struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec *macsec; struct list_head *list; int err = 0; - int i; mutex_lock(&priv->macsec->lock); @@ -849,31 +883,7 @@ static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) goto out; } - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - -/* - * At this point the relevant MACsec offload Rx rule already removed at - * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current - * Rx related data propagating using xa_erase which uses rcu to sync, - * once fs_id is erased then this rx_sc is hidden from datapath. - */ - list_del_rcu(&rx_sc->rx_sc_list_element); - xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); - metadata_dst_free(rx_sc->md_dst); - kfree(rx_sc->sc_xarray_element); - - kfree_rcu(rx_sc); - + macsec_del_rxsc_ctx(macsec, rx_sc); out: mutex_unlock(&macsec->lock); @@ -1015,7 +1025,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active); out: mutex_unlock(&macsec->lock); @@ -1234,7 +1244,6 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc, *tmp; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec_sa *tx_sa; struct mlx5e_macsec *macsec; struct list_head *list; @@ -1263,28 +1272,15 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) } list = &macsec_device->macsec_rx_sc_list_head; - list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - - list_del_rcu(&rx_sc->rx_sc_list_element); - - kfree_rcu(rx_sc); - } + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) + macsec_del_rxsc_ctx(macsec, rx_sc); kfree(macsec_device->dev_addr); macsec_device->dev_addr = NULL; list_del_rcu(&macsec_device->macsec_device_list_element); --macsec->num_of_devices; + kfree(macsec_device); out: mutex_unlock(&macsec->lock); @@ -1748,7 +1744,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, if (!macsec) return; - fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data); + fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data); rcu_read_lock(); sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h index d580b4a91253..347380a2cd9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -10,9 +10,11 @@ #include <net/macsec.h> #include <net/dst_metadata.h> -/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */ +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX #define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) -#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0)) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) struct mlx5e_priv; struct mlx5e_macsec; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 1ac0cf04e811..5b658a5588c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -250,7 +250,7 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); if (!ns) @@ -261,8 +261,10 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto out_spec; + } tx_tables = &tx_fs->tables; ft_crypto = &tx_tables->ft_crypto; @@ -898,7 +900,7 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); if (!ns) @@ -909,8 +911,10 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto free_spec; + } rx_tables = &rx_fs->tables; ft_crypto = &rx_tables->ft_crypto; @@ -1142,10 +1146,10 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, ft_crypto = &rx_tables->ft_crypto; /* Set bit[31 - 30] macsec marker - 0x01 */ - /* Set bit[3-0] fs id */ + /* Set bit[15-0] fs id */ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(set_action_in, action, data, fs_id | BIT(30)); + MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30)); MLX5_SET(set_action_in, action, offset, 0); MLX5_SET(set_action_in, action, length, 32); @@ -1205,6 +1209,7 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, rx_rule->rule[1] = rule; } + kvfree(spec); return macsec_rule; err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2169486c4bfb..374e3fbdc2cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1362,6 +1362,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) devl_rate_nodes_destroy(devlink); } + /* Destroy legacy fdb when disabling sriov in legacy mode. */ + if (esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_disable_locked(esw); esw->esw_funcs.num_vfs = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f68dc2d0dbe6..3029bc1c0dd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -736,6 +736,14 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->esw_funcs.num_vfs; + + return 0; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3fda75fe168c..8c6c9bcb3dc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3387,6 +3387,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, int err; esw->mode = MLX5_ESWITCH_LEGACY; + + /* If changing from switchdev to legacy mode without sriov enabled, + * no need to create legacy fdb. + */ + if (!mlx5_sriov_is_enabled(esw->dev)) + return 0; + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 108a3503f413..edd910258314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -312,6 +312,8 @@ revert_changes: for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + attr->dests[curr_dest].termtbl = NULL; + /* search for the destination associated with the * current term table */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index be1307a63e6d..32c3e0a649a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -700,10 +700,13 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; #ifdef CONFIG_MLX5_ESWITCH - dev = ldev->pf[MLX5_LAG_P1].dev; - if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) - return false; + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) + return false; + } + dev = ldev->pf[MLX5_LAG_P1].dev; mode = mlx5_eswitch_mode(dev); for (i = 0; i < ldev->ports; i++) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 31d443dd8386..f68461b13391 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -46,7 +46,7 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action) { - int ret; + int ret = -EOPNOTSUPP; if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; @@ -67,6 +67,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, goto out; } + if (ret) + goto out; + /* Release old action */ if (tbl->miss_action) refcount_dec(&tbl->miss_action->refcount); diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 19d043b593cc..62320be4de5a 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev) struct sk_buff *skb; int i; - for (i = 0; i < RX_BD_NUM; i++) { - phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - phys); - - dma_unmap_single(ndev->dev.parent, phys_addr, - NIXGE_MAX_JUMBO_FRAME_SIZE, - DMA_FROM_DEVICE); - - skb = (struct sk_buff *)(uintptr_t) - nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - sw_id_offset); - dev_kfree_skb(skb); - } + if (priv->rx_bd_v) { + for (i = 0; i < RX_BD_NUM; i++) { + phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + phys); + + dma_unmap_single(ndev->dev.parent, phys_addr, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); + + skb = (struct sk_buff *)(uintptr_t) + nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + sw_id_offset); + dev_kfree_skb(skb); + } - if (priv->rx_bd_v) dma_free_coherent(ndev->dev.parent, sizeof(*priv->rx_bd_v) * RX_BD_NUM, priv->rx_bd_v, priv->rx_bd_p); + } if (priv->tx_skb) devm_kfree(ndev->dev.parent, priv->tx_skb); diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9fb1fa479d4b..16e6bd466143 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -767,34 +767,34 @@ static int qed_mcp_cancel_load_req(struct qed_hwfn *p_hwfn, return rc; } -#define CONFIG_QEDE_BITMAP_IDX BIT(0) -#define CONFIG_QED_SRIOV_BITMAP_IDX BIT(1) -#define CONFIG_QEDR_BITMAP_IDX BIT(2) -#define CONFIG_QEDF_BITMAP_IDX BIT(4) -#define CONFIG_QEDI_BITMAP_IDX BIT(5) -#define CONFIG_QED_LL2_BITMAP_IDX BIT(6) +#define BITMAP_IDX_FOR_CONFIG_QEDE BIT(0) +#define BITMAP_IDX_FOR_CONFIG_QED_SRIOV BIT(1) +#define BITMAP_IDX_FOR_CONFIG_QEDR BIT(2) +#define BITMAP_IDX_FOR_CONFIG_QEDF BIT(4) +#define BITMAP_IDX_FOR_CONFIG_QEDI BIT(5) +#define BITMAP_IDX_FOR_CONFIG_QED_LL2 BIT(6) static u32 qed_get_config_bitmap(void) { u32 config_bitmap = 0x0; if (IS_ENABLED(CONFIG_QEDE)) - config_bitmap |= CONFIG_QEDE_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDE; if (IS_ENABLED(CONFIG_QED_SRIOV)) - config_bitmap |= CONFIG_QED_SRIOV_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_SRIOV; if (IS_ENABLED(CONFIG_QED_RDMA)) - config_bitmap |= CONFIG_QEDR_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDR; if (IS_ENABLED(CONFIG_QED_FCOE)) - config_bitmap |= CONFIG_QEDF_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDF; if (IS_ENABLED(CONFIG_QED_ISCSI)) - config_bitmap |= CONFIG_QEDI_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDI; if (IS_ENABLED(CONFIG_QED_LL2)) - config_bitmap |= CONFIG_QED_LL2_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_LL2; return config_bitmap; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bd0607680329..2fd5c6fdb500 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2991,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter) QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val); dev_info(&adapter->pdev->dev, "%s: lock recovery initiated\n", __func__); - msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); + mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK); id = ((val >> 2) & 0xF); if (id == adapter->portnum) { @@ -3027,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter) if (status) break; - msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY); + mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY); i++; if (i == 1) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 36324126db6d..6bc923326268 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3020,6 +3020,7 @@ static int __maybe_unused ravb_resume(struct device *dev) ret = ravb_open(ndev); if (ret < 0) return ret; + ravb_set_rx_mode(ndev); netif_device_attach(ndev); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c25bfecb4a2d..e5cfde1cbd5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -748,6 +748,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, if (fc & FLOW_RX) { pr_debug("\tReceive Flow-Control ON\n"); flow |= GMAC_RX_FLOW_CTRL_RFE; + } else { + pr_debug("\tReceive Flow-Control OFF\n"); } writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6b43da78cdf0..23ec0a9e396c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1061,8 +1061,16 @@ static void stmmac_mac_link_up(struct phylink_config *config, ctrl |= priv->hw->link.duplex; /* Flow Control operation */ - if (tx_pause && rx_pause) - stmmac_mac_flow_ctrl(priv, duplex); + if (rx_pause && tx_pause) + priv->flow_ctrl = FLOW_AUTO; + else if (rx_pause && !tx_pause) + priv->flow_ctrl = FLOW_RX; + else if (!rx_pause && tx_pause) + priv->flow_ctrl = FLOW_TX; + else + priv->flow_ctrl = FLOW_OFF; + + stmmac_mac_flow_ctrl(priv, duplex); if (ctrl != old_ctrl) writel(ctrl, priv->ioaddr + MAC_CTRL_REG); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c50b137f92d7..d04a239c658e 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2082,7 +2082,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; - if (port->ndev) + if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED) unregister_netdev(port->ndev); } } diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 14e8d04cb434..2e9742952c4e 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -211,7 +211,7 @@ static __net_init int loopback_net_init(struct net *net) int err; err = -ENOMEM; - dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); + dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup); if (!dev) goto out; diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 689e728345ce..eb344f6d4a7b 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -148,7 +148,7 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, /* Associate the fwnode with the device structure so it * can be looked up later. */ - phy->mdio.dev.fwnode = child; + phy->mdio.dev.fwnode = fwnode_handle_get(child); /* All data is now stored in the phy struct, so register it */ rc = phy_device_register(phy); diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 464d88ca8ab0..a4abea921046 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void) rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_transport_register_client(&ntb_netdev_client); + + rc = ntb_transport_register_client(&ntb_netdev_client); + if (rc) { + ntb_transport_unregister_client_dev(KBUILD_MODNAME); + return rc; + } + + return 0; } module_init(ntb_netdev_init_module); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 57849ac0384e..8cff61dbc4b5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -217,6 +217,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev) static void phy_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_phy_device(dev)); } @@ -1520,6 +1521,7 @@ error: error_module_put: module_put(d->driver->owner); + d->driver = NULL; error_put_device: put_device(d); if (ndev_owner != bus->owner) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6547b6cc6cbe..2805b04d6402 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1603,19 +1603,29 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, linkmode_copy(supported, phy->supported); linkmode_copy(config.advertising, phy->advertising); - /* Clause 45 PHYs switch their Serdes lane between several different - * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G - * speeds. We really need to know which interface modes the PHY and - * MAC supports to properly work out which linkmodes can be supported. + /* Check whether we would use rate matching for the proposed interface + * mode. */ - if (phy->is_c45 && + config.rate_matching = phy_get_rate_matching(phy, interface); + + /* Clause 45 PHYs may switch their Serdes lane between, e.g. 10GBASE-R, + * 5GBASE-R, 2500BASE-X and SGMII if they are not using rate matching. + * For some interface modes (e.g. RXAUI, XAUI and USXGMII) switching + * their Serdes is either unnecessary or not reasonable. + * + * For these which switch interface modes, we really need to know which + * interface modes the PHY supports to properly work out which ethtool + * linkmodes can be supported. For now, as a work-around, we validate + * against all interface modes, which may lead to more ethtool link + * modes being advertised than are actually supported. + */ + if (phy->is_c45 && config.rate_matching == RATE_MATCH_NONE && interface != PHY_INTERFACE_MODE_RXAUI && interface != PHY_INTERFACE_MODE_XAUI && interface != PHY_INTERFACE_MODE_USXGMII) config.interface = PHY_INTERFACE_MODE_NA; else config.interface = interface; - config.rate_matching = phy_get_rate_matching(phy, config.interface); ret = phylink_validate(pl, supported, &config); if (ret) { diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7a3ab3427369..24001112c323 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -686,7 +686,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - sock_put(&tfile->sk); } } @@ -702,6 +701,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) if (dev) netdev_state_change(dev); rtnl_unlock(); + + if (clean) + sock_put(&tfile->sk); } static void tun_detach_all(struct net_device *dev) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 9bbfff803357..b545d93c6e37 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -959,30 +959,51 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (unsigned int i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index eb1d1ba3a443..67df8221b5ae 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -482,14 +482,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index d41e373f9c0a..d6b166fc5c0e 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -365,7 +365,8 @@ static void ipc_mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) /* Pass the DL packet to the netif layer. */ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, struct iosm_wwan *wwan, u32 offset, - u8 service_class, struct sk_buff *skb) + u8 service_class, struct sk_buff *skb, + u32 pkt_len) { struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC); @@ -373,7 +374,7 @@ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, return -ENOMEM; skb_pull(dest_skb, offset); - skb_set_tail_pointer(dest_skb, dest_skb->len); + skb_trim(dest_skb, pkt_len); /* Pass the packet to the netif layer. */ dest_skb->priority = service_class; @@ -429,7 +430,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) { - u32 pad_len, packet_offset; + u32 pad_len, packet_offset, adgh_len; struct iosm_wwan *wwan; struct mux_adgh *adgh; u8 *block = skb->data; @@ -470,10 +471,12 @@ static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, packet_offset = sizeof(*adgh) + pad_len; if_id += ipc_mux->wwan_q_offset; + adgh_len = le16_to_cpu(adgh->length); /* Pass the packet to the netif layer */ rc = ipc_mux_net_receive(ipc_mux, if_id, wwan, packet_offset, - adgh->service_class, skb); + adgh->service_class, skb, + adgh_len - packet_offset); if (rc) { dev_err(ipc_mux->dev, "mux adgh decoding error"); return; @@ -547,7 +550,7 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, int if_id, int nr_of_dg) { u32 dl_head_pad_len = ipc_mux->session[if_id].dl_head_pad_len; - u32 packet_offset, i, rc; + u32 packet_offset, i, rc, dg_len; for (i = 0; i < nr_of_dg; i++, dg++) { if (le32_to_cpu(dg->datagram_index) @@ -562,11 +565,12 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, packet_offset = le32_to_cpu(dg->datagram_index) + dl_head_pad_len; + dg_len = le16_to_cpu(dg->datagram_length); /* Pass the packet to the netif layer. */ rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan, packet_offset, - dg->service_class, - skb); + dg->service_class, skb, + dg_len - dl_head_pad_len); if (rc) goto dg_error; } @@ -1207,10 +1211,9 @@ static int mux_ul_dg_update_tbl_index(struct iosm_mux *ipc_mux, qlth_n_ql_size, ul_list); ipc_mux_ul_adb_finish(ipc_mux); if (ipc_mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed, - IOSM_AGGR_MUX_SIG_ADBH)) { - dev_kfree_skb(src_skb); + IOSM_AGGR_MUX_SIG_ADBH)) return -ENOMEM; - } + ipc_mux->size_needed = le32_to_cpu(adb->adbh->block_length); ipc_mux->size_needed += offsetof(struct mux_adth, dg); @@ -1471,8 +1474,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb) ipc_mux->ul_data_pend_bytes); /* Reset the skb settings. */ - skb->tail = 0; - skb->len = 0; + skb_trim(skb, 0); /* Add the consumed ADB to the free list. */ skb_queue_tail((&ipc_mux->ul_adb.free_list), skb); diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h index 9b3a6d86ece7..289397c4ea6c 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h @@ -122,7 +122,7 @@ struct iosm_protocol { struct iosm_imem *imem; struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES]; struct device *dev; - phys_addr_t phy_ap_shm; + dma_addr_t phy_ap_shm; u32 old_msg_tail; }; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da55ce45ac70..69e333922bea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ - synchronize_rcu(); + synchronize_srcu(&ns->head->srcu); if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 93e2138a8b42..7e025b8948cb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) struct nvme_ns_head *head = ns->head; sector_t capacity = get_capacity(head->disk); int node; + int srcu_idx; + srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { if (capacity != get_capacity(ns->disk)) clear_bit(NVME_NS_READY, &ns->flags); } + srcu_read_unlock(&head->srcu, srcu_idx); for_each_node(node) rcu_assign_pointer(head->current_path[node], NULL); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f4335519399d..488ad7dabeb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); if (bv->bv_len > first_prp_len) cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + else + cmnd->dptr.prp2 = 0; return BLK_STS_OK; } diff --git a/drivers/of/property.c b/drivers/of/property.c index 967f79b59016..134cfc980b70 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -993,8 +993,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, nargs, index, &of_args); if (ret < 0) return ret; - if (!args) + if (!args) { + of_node_put(of_args.np); return 0; + } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 605d68673f92..e55c6095adf0 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -578,169 +578,140 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, return false; } -static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, - struct opp_table *opp_table) +static u32 *_parse_named_prop(struct dev_pm_opp *opp, struct device *dev, + struct opp_table *opp_table, + const char *prop_type, bool *triplet) { - u32 *microvolt, *microamp = NULL, *microwatt = NULL; - int supplies = opp_table->regulator_count; - int vcount, icount, pcount, ret, i, j; struct property *prop = NULL; char name[NAME_MAX]; + int count, ret; + u32 *out; - /* Search for "opp-microvolt-<name>" */ + /* Search for "opp-<prop_type>-<name>" */ if (opp_table->prop_name) { - snprintf(name, sizeof(name), "opp-microvolt-%s", + snprintf(name, sizeof(name), "opp-%s-%s", prop_type, opp_table->prop_name); prop = of_find_property(opp->np, name, NULL); } if (!prop) { - /* Search for "opp-microvolt" */ - sprintf(name, "opp-microvolt"); + /* Search for "opp-<prop_type>" */ + snprintf(name, sizeof(name), "opp-%s", prop_type); prop = of_find_property(opp->np, name, NULL); - - /* Missing property isn't a problem, but an invalid entry is */ - if (!prop) { - if (unlikely(supplies == -1)) { - /* Initialize regulator_count */ - opp_table->regulator_count = 0; - return 0; - } - - if (!supplies) - return 0; - - dev_err(dev, "%s: opp-microvolt missing although OPP managing regulators\n", - __func__); - return -EINVAL; - } + if (!prop) + return NULL; } - if (unlikely(supplies == -1)) { - /* Initialize regulator_count */ - supplies = opp_table->regulator_count = 1; - } else if (unlikely(!supplies)) { - dev_err(dev, "%s: opp-microvolt wasn't expected\n", __func__); - return -EINVAL; + count = of_property_count_u32_elems(opp->np, name); + if (count < 0) { + dev_err(dev, "%s: Invalid %s property (%d)\n", __func__, name, + count); + return ERR_PTR(count); } - vcount = of_property_count_u32_elems(opp->np, name); - if (vcount < 0) { - dev_err(dev, "%s: Invalid %s property (%d)\n", - __func__, name, vcount); - return vcount; - } - - /* There can be one or three elements per supply */ - if (vcount != supplies && vcount != supplies * 3) { - dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n", - __func__, name, vcount, supplies); - return -EINVAL; + /* + * Initialize regulator_count, if regulator information isn't provided + * by the platform. Now that one of the properties is available, fix the + * regulator_count to 1. + */ + if (unlikely(opp_table->regulator_count == -1)) + opp_table->regulator_count = 1; + + if (count != opp_table->regulator_count && + (!triplet || count != opp_table->regulator_count * 3)) { + dev_err(dev, "%s: Invalid number of elements in %s property (%u) with supplies (%d)\n", + __func__, prop_type, count, opp_table->regulator_count); + return ERR_PTR(-EINVAL); } - microvolt = kmalloc_array(vcount, sizeof(*microvolt), GFP_KERNEL); - if (!microvolt) - return -ENOMEM; + out = kmalloc_array(count, sizeof(*out), GFP_KERNEL); + if (!out) + return ERR_PTR(-EINVAL); - ret = of_property_read_u32_array(opp->np, name, microvolt, vcount); + ret = of_property_read_u32_array(opp->np, name, out, count); if (ret) { dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret); - ret = -EINVAL; - goto free_microvolt; - } - - /* Search for "opp-microamp-<name>" */ - prop = NULL; - if (opp_table->prop_name) { - snprintf(name, sizeof(name), "opp-microamp-%s", - opp_table->prop_name); - prop = of_find_property(opp->np, name, NULL); + kfree(out); + return ERR_PTR(-EINVAL); } - if (!prop) { - /* Search for "opp-microamp" */ - sprintf(name, "opp-microamp"); - prop = of_find_property(opp->np, name, NULL); - } + if (triplet) + *triplet = count != opp_table->regulator_count; - if (prop) { - icount = of_property_count_u32_elems(opp->np, name); - if (icount < 0) { - dev_err(dev, "%s: Invalid %s property (%d)\n", __func__, - name, icount); - ret = icount; - goto free_microvolt; - } + return out; +} - if (icount != supplies) { - dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n", - __func__, name, icount, supplies); - ret = -EINVAL; - goto free_microvolt; - } +static u32 *opp_parse_microvolt(struct dev_pm_opp *opp, struct device *dev, + struct opp_table *opp_table, bool *triplet) +{ + u32 *microvolt; - microamp = kmalloc_array(icount, sizeof(*microamp), GFP_KERNEL); - if (!microamp) { - ret = -EINVAL; - goto free_microvolt; - } + microvolt = _parse_named_prop(opp, dev, opp_table, "microvolt", triplet); + if (IS_ERR(microvolt)) + return microvolt; - ret = of_property_read_u32_array(opp->np, name, microamp, - icount); - if (ret) { - dev_err(dev, "%s: error parsing %s: %d\n", __func__, - name, ret); - ret = -EINVAL; - goto free_microamp; + if (!microvolt) { + /* + * Missing property isn't a problem, but an invalid + * entry is. This property isn't optional if regulator + * information is provided. Check only for the first OPP, as + * regulator_count may get initialized after that to a valid + * value. + */ + if (list_empty(&opp_table->opp_list) && + opp_table->regulator_count > 0) { + dev_err(dev, "%s: opp-microvolt missing although OPP managing regulators\n", + __func__); + return ERR_PTR(-EINVAL); } } - /* Search for "opp-microwatt" */ - sprintf(name, "opp-microwatt"); - prop = of_find_property(opp->np, name, NULL); - - if (prop) { - pcount = of_property_count_u32_elems(opp->np, name); - if (pcount < 0) { - dev_err(dev, "%s: Invalid %s property (%d)\n", __func__, - name, pcount); - ret = pcount; - goto free_microamp; - } + return microvolt; +} - if (pcount != supplies) { - dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n", - __func__, name, pcount, supplies); - ret = -EINVAL; - goto free_microamp; - } +static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, + struct opp_table *opp_table) +{ + u32 *microvolt, *microamp, *microwatt; + int ret = 0, i, j; + bool triplet; - microwatt = kmalloc_array(pcount, sizeof(*microwatt), - GFP_KERNEL); - if (!microwatt) { - ret = -EINVAL; - goto free_microamp; - } + microvolt = opp_parse_microvolt(opp, dev, opp_table, &triplet); + if (IS_ERR(microvolt)) + return PTR_ERR(microvolt); - ret = of_property_read_u32_array(opp->np, name, microwatt, - pcount); - if (ret) { - dev_err(dev, "%s: error parsing %s: %d\n", __func__, - name, ret); - ret = -EINVAL; - goto free_microwatt; - } + microamp = _parse_named_prop(opp, dev, opp_table, "microamp", NULL); + if (IS_ERR(microamp)) { + ret = PTR_ERR(microamp); + goto free_microvolt; } - for (i = 0, j = 0; i < supplies; i++) { - opp->supplies[i].u_volt = microvolt[j++]; + microwatt = _parse_named_prop(opp, dev, opp_table, "microwatt", NULL); + if (IS_ERR(microwatt)) { + ret = PTR_ERR(microwatt); + goto free_microamp; + } - if (vcount == supplies) { - opp->supplies[i].u_volt_min = opp->supplies[i].u_volt; - opp->supplies[i].u_volt_max = opp->supplies[i].u_volt; - } else { - opp->supplies[i].u_volt_min = microvolt[j++]; - opp->supplies[i].u_volt_max = microvolt[j++]; + /* + * Initialize regulator_count if it is uninitialized and no properties + * are found. + */ + if (unlikely(opp_table->regulator_count == -1)) { + opp_table->regulator_count = 0; + return 0; + } + + for (i = 0, j = 0; i < opp_table->regulator_count; i++) { + if (microvolt) { + opp->supplies[i].u_volt = microvolt[j++]; + + if (triplet) { + opp->supplies[i].u_volt_min = microvolt[j++]; + opp->supplies[i].u_volt_max = microvolt[j++]; + } else { + opp->supplies[i].u_volt_min = opp->supplies[i].u_volt; + opp->supplies[i].u_volt_max = opp->supplies[i].u_volt; + } } if (microamp) @@ -750,7 +721,6 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, opp->supplies[i].u_watt = microwatt[i]; } -free_microwatt: kfree(microwatt); free_microamp: kfree(microamp); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 52ecd66ce357..047a8374b4fd 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int __intel_gpio_get_gpio_mode(u32 value) +{ + return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) { - return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; + return __intel_gpio_get_gpio_mode(readl(padcfg0)); } static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) @@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + u32 value; if (!pd || !intel_pad_usable(pctrl, pin)) return false; @@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) return true; + /* + * The firmware on some systems may configure GPIO pins to be + * an interrupt source in so called "direct IRQ" mode. In such + * cases the GPIO controller driver has no idea if those pins + * are being used or not. At the same time, there is a known bug + * in the firmwares that don't restore the pin settings correctly + * after suspend, i.e. by an unknown reason the Rx value becomes + * inverted. + * + * Hence, let's save and restore the pins that are configured + * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. + * + * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. + */ + value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); + if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + return true; + return false; } diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 65d312967619..27f0a54e12bf 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = { static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) { - void __iomem *reg = eint->base + eint->regs->dom_en; + void __iomem *dom_en = eint->base + eint->regs->dom_en; + void __iomem *mask_set = eint->base + eint->regs->mask_set; unsigned int i; for (i = 0; i < eint->hw->ap_num; i += 32) { - writel(0xffffffff, reg); - reg += 4; + writel(0xffffffff, dom_en); + writel(0xffffffff, mask_set); + dom_en += 4; + mask_set += 4; } return 0; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 67bec7ea0f8b..414ee6bb8ac9 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (pcs->bits_per_mux) { + if (pcs->bits_per_mux && pcs->fmask) { pcs->bits_per_pin = fls(pcs->fmask); nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; } else { diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index 8f9c571d7257..00ac7e381441 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -203,6 +203,7 @@ static const struct x86_cpu_id intel_uncore_cpu_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_cpu_ids); diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 515e3ceb3393..90d33cd1b670 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -44,6 +44,19 @@ config IDLE_INJECT synchronously on a set of specified CPUs or alternatively on a per CPU basis. +config ARM_SCMI_POWERCAP + tristate "ARM SCMI Powercap driver" + depends on ARM_SCMI_PROTOCOL + help + This enables support for the ARM Powercap based on ARM SCMI + Powercap protocol. + + ARM SCMI Powercap protocol allows power limits to be enforced + and monitored against the SCMI Powercap domains advertised as + available by the SCMI platform firmware. + + When compiled as module it will be called arm_scmi_powercap.ko. + config DTPM bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)" depends on OF diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index 494617cdad88..4474201b4aa7 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_POWERCAP) += powercap_sys.o obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o obj-$(CONFIG_IDLE_INJECT) += idle_inject.o +obj-$(CONFIG_ARM_SCMI_POWERCAP) += arm_scmi_powercap.o diff --git a/drivers/powercap/arm_scmi_powercap.c b/drivers/powercap/arm_scmi_powercap.c new file mode 100644 index 000000000000..05d0e516176a --- /dev/null +++ b/drivers/powercap/arm_scmi_powercap.c @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SCMI Powercap support. + * + * Copyright (C) 2022 ARM Ltd. + */ + +#include <linux/device.h> +#include <linux/math.h> +#include <linux/limits.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/powercap.h> +#include <linux/scmi_protocol.h> + +#define to_scmi_powercap_zone(z) \ + container_of(z, struct scmi_powercap_zone, zone) + +static const struct scmi_powercap_proto_ops *powercap_ops; + +struct scmi_powercap_zone { + unsigned int height; + struct device *dev; + struct scmi_protocol_handle *ph; + const struct scmi_powercap_info *info; + struct scmi_powercap_zone *spzones; + struct powercap_zone zone; + struct list_head node; +}; + +struct scmi_powercap_root { + unsigned int num_zones; + struct scmi_powercap_zone *spzones; + struct list_head *registered_zones; +}; + +static struct powercap_control_type *scmi_top_pcntrl; + +static int scmi_powercap_zone_release(struct powercap_zone *pz) +{ + return 0; +} + +static int scmi_powercap_get_max_power_range_uw(struct powercap_zone *pz, + u64 *max_power_range_uw) +{ + *max_power_range_uw = U32_MAX; + return 0; +} + +static int scmi_powercap_get_power_uw(struct powercap_zone *pz, + u64 *power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 avg_power, pai; + int ret; + + if (!spz->info->powercap_monitoring) + return -EINVAL; + + ret = powercap_ops->measurements_get(spz->ph, spz->info->id, &avg_power, + &pai); + if (ret) + return ret; + + *power_uw = avg_power; + if (spz->info->powercap_scale_mw) + *power_uw *= 1000; + + return 0; +} + +static const struct powercap_zone_ops zone_ops = { + .get_max_power_range_uw = scmi_powercap_get_max_power_range_uw, + .get_power_uw = scmi_powercap_get_power_uw, + .release = scmi_powercap_zone_release, +}; + +static void scmi_powercap_normalize_cap(const struct scmi_powercap_zone *spz, + u64 power_limit_uw, u32 *norm) +{ + bool scale_mw = spz->info->powercap_scale_mw; + u64 val; + + val = scale_mw ? DIV_ROUND_UP_ULL(power_limit_uw, 1000) : power_limit_uw; + /* + * This cast is lossless since here @req_power is certain to be within + * the range [min_power_cap, max_power_cap] whose bounds are assured to + * be two unsigned 32bits quantities. + */ + *norm = clamp_t(u32, val, spz->info->min_power_cap, + spz->info->max_power_cap); + *norm = rounddown(*norm, spz->info->power_cap_step); + + val = (scale_mw) ? *norm * 1000 : *norm; + if (power_limit_uw != val) + dev_dbg(spz->dev, + "Normalized %s:CAP - requested:%llu - normalized:%llu\n", + spz->info->name, power_limit_uw, val); +} + +static int scmi_powercap_set_power_limit_uw(struct powercap_zone *pz, int cid, + u64 power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 norm_power; + + if (!spz->info->powercap_cap_config) + return -EINVAL; + + scmi_powercap_normalize_cap(spz, power_uw, &norm_power); + + return powercap_ops->cap_set(spz->ph, spz->info->id, norm_power, false); +} + +static int scmi_powercap_get_power_limit_uw(struct powercap_zone *pz, int cid, + u64 *power_limit_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 power; + int ret; + + ret = powercap_ops->cap_get(spz->ph, spz->info->id, &power); + if (ret) + return ret; + + *power_limit_uw = power; + if (spz->info->powercap_scale_mw) + *power_limit_uw *= 1000; + + return 0; +} + +static void scmi_powercap_normalize_time(const struct scmi_powercap_zone *spz, + u64 time_us, u32 *norm) +{ + /* + * This cast is lossless since here @time_us is certain to be within the + * range [min_pai, max_pai] whose bounds are assured to be two unsigned + * 32bits quantities. + */ + *norm = clamp_t(u32, time_us, spz->info->min_pai, spz->info->max_pai); + *norm = rounddown(*norm, spz->info->pai_step); + + if (time_us != *norm) + dev_dbg(spz->dev, + "Normalized %s:PAI - requested:%llu - normalized:%u\n", + spz->info->name, time_us, *norm); +} + +static int scmi_powercap_set_time_window_us(struct powercap_zone *pz, int cid, + u64 time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + u32 norm_pai; + + if (!spz->info->powercap_pai_config) + return -EINVAL; + + scmi_powercap_normalize_time(spz, time_window_us, &norm_pai); + + return powercap_ops->pai_set(spz->ph, spz->info->id, norm_pai); +} + +static int scmi_powercap_get_time_window_us(struct powercap_zone *pz, int cid, + u64 *time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + int ret; + u32 pai; + + ret = powercap_ops->pai_get(spz->ph, spz->info->id, &pai); + if (ret) + return ret; + + *time_window_us = pai; + + return 0; +} + +static int scmi_powercap_get_max_power_uw(struct powercap_zone *pz, int cid, + u64 *max_power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *max_power_uw = spz->info->max_power_cap; + if (spz->info->powercap_scale_mw) + *max_power_uw *= 1000; + + return 0; +} + +static int scmi_powercap_get_min_power_uw(struct powercap_zone *pz, int cid, + u64 *min_power_uw) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *min_power_uw = spz->info->min_power_cap; + if (spz->info->powercap_scale_mw) + *min_power_uw *= 1000; + + return 0; +} + +static int scmi_powercap_get_max_time_window_us(struct powercap_zone *pz, + int cid, u64 *time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *time_window_us = spz->info->max_pai; + + return 0; +} + +static int scmi_powercap_get_min_time_window_us(struct powercap_zone *pz, + int cid, u64 *time_window_us) +{ + struct scmi_powercap_zone *spz = to_scmi_powercap_zone(pz); + + *time_window_us = (u64)spz->info->min_pai; + + return 0; +} + +static const char *scmi_powercap_get_name(struct powercap_zone *pz, int cid) +{ + return "SCMI power-cap"; +} + +static const struct powercap_zone_constraint_ops constraint_ops = { + .set_power_limit_uw = scmi_powercap_set_power_limit_uw, + .get_power_limit_uw = scmi_powercap_get_power_limit_uw, + .set_time_window_us = scmi_powercap_set_time_window_us, + .get_time_window_us = scmi_powercap_get_time_window_us, + .get_max_power_uw = scmi_powercap_get_max_power_uw, + .get_min_power_uw = scmi_powercap_get_min_power_uw, + .get_max_time_window_us = scmi_powercap_get_max_time_window_us, + .get_min_time_window_us = scmi_powercap_get_min_time_window_us, + .get_name = scmi_powercap_get_name, +}; + +static void scmi_powercap_unregister_all_zones(struct scmi_powercap_root *pr) +{ + int i; + + /* Un-register children zones first starting from the leaves */ + for (i = pr->num_zones - 1; i >= 0; i--) { + if (!list_empty(&pr->registered_zones[i])) { + struct scmi_powercap_zone *spz; + + list_for_each_entry(spz, &pr->registered_zones[i], node) + powercap_unregister_zone(scmi_top_pcntrl, + &spz->zone); + } + } +} + +static inline bool +scmi_powercap_is_zone_registered(struct scmi_powercap_zone *spz) +{ + return !list_empty(&spz->node); +} + +static inline unsigned int +scmi_powercap_get_zone_height(struct scmi_powercap_zone *spz) +{ + if (spz->info->parent_id == SCMI_POWERCAP_ROOT_ZONE_ID) + return 0; + + return spz->spzones[spz->info->parent_id].height + 1; +} + +static inline struct scmi_powercap_zone * +scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz) +{ + if (spz->info->parent_id == SCMI_POWERCAP_ROOT_ZONE_ID) + return NULL; + + return &spz->spzones[spz->info->parent_id]; +} + +/** + * scmi_powercap_register_zone - Register an SCMI powercap zone recursively + * + * @pr: A reference to the root powercap zones descriptors + * @spz: A reference to the SCMI powercap zone to register + * + * When registering SCMI powercap zones with the powercap framework we should + * take care to always register zones starting from the root ones and to + * deregister starting from the leaves. + * + * Unfortunately we cannot assume that the array of available SCMI powercap + * zones provided by the SCMI platform firmware is built to comply with such + * requirement. + * + * This function, given an SCMI powercap zone to register, takes care to walk + * the SCMI powercap zones tree up to the root looking recursively for + * unregistered parent zones before registering the provided zone; at the same + * time each registered zone height in such a tree is accounted for and each + * zone, once registered, is stored in the @registered_zones array that is + * indexed by zone height: this way will be trivial, at unregister time, to walk + * the @registered_zones array backward and unregister all the zones starting + * from the leaves, removing children zones before parents. + * + * While doing this, we prune away any zone marked as invalid (like the ones + * sporting an SCMI abstract power scale) as long as they are positioned as + * leaves in the SCMI powercap zones hierarchy: any non-leaf invalid zone causes + * the entire process to fail since we cannot assume the correctness of an SCMI + * powercap zones hierarchy if some of the internal nodes are missing. + * + * Note that the array of SCMI powercap zones as returned by the SCMI platform + * is known to be sane, i.e. zones relationships have been validated at the + * protocol layer. + * + * Return: 0 on Success + */ +static int scmi_powercap_register_zone(struct scmi_powercap_root *pr, + struct scmi_powercap_zone *spz) +{ + int ret = 0; + struct scmi_powercap_zone *parent; + + if (!spz->info) + return ret; + + parent = scmi_powercap_get_parent_zone(spz); + if (parent && !scmi_powercap_is_zone_registered(parent)) { + /* + * Bail out if a parent domain was marked as unsupported: + * only domains participating as leaves can be skipped. + */ + if (!parent->info) + return -ENODEV; + + ret = scmi_powercap_register_zone(pr, parent); + if (ret) + return ret; + } + + if (!scmi_powercap_is_zone_registered(spz)) { + struct powercap_zone *z; + + z = powercap_register_zone(&spz->zone, + scmi_top_pcntrl, + spz->info->name, + parent ? &parent->zone : NULL, + &zone_ops, 1, &constraint_ops); + if (!IS_ERR(z)) { + spz->height = scmi_powercap_get_zone_height(spz); + list_add(&spz->node, + &pr->registered_zones[spz->height]); + dev_dbg(spz->dev, + "Registered node %s - parent %s - height:%d\n", + spz->info->name, + parent ? parent->info->name : "ROOT", + spz->height); + ret = 0; + } else { + ret = PTR_ERR(z); + dev_err(spz->dev, + "Error registering node:%s - parent:%s - h:%d - ret:%d\n", + spz->info->name, + parent ? parent->info->name : "ROOT", + spz->height, ret); + } + } + + return ret; +} + +static int scmi_powercap_probe(struct scmi_device *sdev) +{ + int ret, i; + struct scmi_powercap_root *pr; + struct scmi_powercap_zone *spz; + struct scmi_protocol_handle *ph; + struct device *dev = &sdev->dev; + + if (!sdev->handle) + return -ENODEV; + + powercap_ops = sdev->handle->devm_protocol_get(sdev, + SCMI_PROTOCOL_POWERCAP, + &ph); + if (IS_ERR(powercap_ops)) + return PTR_ERR(powercap_ops); + + pr = devm_kzalloc(dev, sizeof(*pr), GFP_KERNEL); + if (!pr) + return -ENOMEM; + + ret = powercap_ops->num_domains_get(ph); + if (ret < 0) { + dev_err(dev, "number of powercap domains not found\n"); + return ret; + } + pr->num_zones = ret; + + pr->spzones = devm_kcalloc(dev, pr->num_zones, + sizeof(*pr->spzones), GFP_KERNEL); + if (!pr->spzones) + return -ENOMEM; + + /* Allocate for worst possible scenario of maximum tree height. */ + pr->registered_zones = devm_kcalloc(dev, pr->num_zones, + sizeof(*pr->registered_zones), + GFP_KERNEL); + if (!pr->registered_zones) + return -ENOMEM; + + for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { + /* + * Powercap domains are validate by the protocol layer, i.e. + * when only non-NULL domains are returned here, whose + * parent_id is assured to point to another valid domain. + */ + spz->info = powercap_ops->info_get(ph, i); + + spz->dev = dev; + spz->ph = ph; + spz->spzones = pr->spzones; + INIT_LIST_HEAD(&spz->node); + INIT_LIST_HEAD(&pr->registered_zones[i]); + + /* + * Forcibly skip powercap domains using an abstract scale. + * Note that only leaves domains can be skipped, so this could + * lead later to a global failure. + */ + if (!spz->info->powercap_scale_uw && + !spz->info->powercap_scale_mw) { + dev_warn(dev, + "Abstract power scale not supported. Skip %s.\n", + spz->info->name); + spz->info = NULL; + continue; + } + } + + /* + * Scan array of retrieved SCMI powercap domains and register them + * recursively starting from the root domains. + */ + for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { + ret = scmi_powercap_register_zone(pr, spz); + if (ret) { + dev_err(dev, + "Failed to register powercap zone %s - ret:%d\n", + spz->info->name, ret); + scmi_powercap_unregister_all_zones(pr); + return ret; + } + } + + dev_set_drvdata(dev, pr); + + dev_info(dev, "Registered %d SCMI Powercap domains !\n", pr->num_zones); + + return ret; +} + +static void scmi_powercap_remove(struct scmi_device *sdev) +{ + struct device *dev = &sdev->dev; + struct scmi_powercap_root *pr = dev_get_drvdata(dev); + + scmi_powercap_unregister_all_zones(pr); +} + +static const struct scmi_device_id scmi_id_table[] = { + { SCMI_PROTOCOL_POWERCAP, "powercap" }, + { }, +}; +MODULE_DEVICE_TABLE(scmi, scmi_id_table); + +static struct scmi_driver scmi_powercap_driver = { + .name = "scmi-powercap", + .probe = scmi_powercap_probe, + .remove = scmi_powercap_remove, + .id_table = scmi_id_table, +}; + +static int __init scmi_powercap_init(void) +{ + int ret; + + scmi_top_pcntrl = powercap_register_control_type(NULL, "arm-scmi", NULL); + if (IS_ERR(scmi_top_pcntrl)) + return PTR_ERR(scmi_top_pcntrl); + + ret = scmi_register(&scmi_powercap_driver); + if (ret) + powercap_unregister_control_type(scmi_top_pcntrl); + + return ret; +} +module_init(scmi_powercap_init); + +static void __exit scmi_powercap_exit(void) +{ + scmi_unregister(&scmi_powercap_driver); + + powercap_unregister_control_type(scmi_top_pcntrl); +} +module_exit(scmi_powercap_exit); + +MODULE_AUTHOR("Cristian Marussi <[email protected]>"); +MODULE_DESCRIPTION("ARM SCMI Powercap driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 999e218d7793..fe86a09e3b67 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -147,6 +147,7 @@ static void idle_inject_fn(unsigned int cpu) /** * idle_inject_set_duration - idle and run duration update helper + * @ii_dev: idle injection control device structure * @run_duration_us: CPU run time to allow in microseconds * @idle_duration_us: CPU idle time to inject in microseconds */ @@ -162,6 +163,7 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, /** * idle_inject_get_duration - idle and run duration retrieval helper + * @ii_dev: idle injection control device structure * @run_duration_us: memory location to store the current CPU run time * @idle_duration_us: memory location to store the current CPU idle time */ @@ -175,6 +177,7 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, /** * idle_inject_set_latency - set the maximum latency allowed + * @ii_dev: idle injection control device structure * @latency_us: set the latency requirement for the idle state */ void idle_inject_set_latency(struct idle_inject_device *ii_dev, diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index f0654a932b37..1f968353d479 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/err.h> +#include <linux/kstrtox.h> #include <linux/slab.h> #include <linux/powercap.h> @@ -446,7 +447,7 @@ static ssize_t enabled_store(struct device *dev, { bool mode; - if (strtobool(buf, &mode)) + if (kstrtobool(buf, &mode)) return -EINVAL; if (dev->parent) { struct powercap_zone *power_zone = to_powercap_zone(dev); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c0031a3ab42f..3ac5fcf98d0d 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -167,8 +167,8 @@ responded: clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; diff --git a/fs/afs/server.c b/fs/afs/server.c index 4981baf97835..b5237206eac3 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server, if (!server) return; - a = atomic_inc_return(&server->active); + a = atomic_read(&server->active); zero = __refcount_dec_and_test(&server->ref, &r); trace_afs_server(debug_id, r - 1, a, reason); if (unlikely(zero)) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 71bfb663aac5..89f4741728ba 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, .mode = mode }; int err; - bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || - (mode & (FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_ZERO_RANGE)); - - bool block_faults = FUSE_IS_DAX(inode) && lock_inode; + bool block_faults = FUSE_IS_DAX(inode) && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))); if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) @@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fm->fc->no_fallocate) return -EOPNOTSUPP; - if (lock_inode) { - inode_lock(inode); - if (block_faults) { - filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); - if (err) - goto out; - } + inode_lock(inode); + if (block_faults) { + filemap_invalidate_lock(inode->i_mapping); + err = fuse_dax_break_layouts(inode, 0, 0); + if (err) + goto out; + } - if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { - loff_t endbyte = offset + length - 1; + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { + loff_t endbyte = offset + length - 1; - err = fuse_writeback_range(inode, offset, endbyte); - if (err) - goto out; - } + err = fuse_writeback_range(inode, offset, endbyte); + if (err) + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -3039,8 +3035,7 @@ out: if (block_faults) filemap_invalidate_unlock(inode->i_mapping); - if (lock_inode) - inode_unlock(inode); + inode_unlock(inode); fuse_flush_time_update(inode); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 3b55e239705f..9930fa901039 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 492dce43236e..cab7cfebf40b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif +static inline void tlb_remove_table_sync_one(void) { } + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e5..6a94a6eaad27 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1110,10 +1110,10 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int parse_perf_domain(int cpu, const char *list_name, - const char *cell_name) + const char *cell_name, + struct of_phandle_args *args) { struct device_node *cpu_np; - struct of_phandle_args args; int ret; cpu_np = of_cpu_device_node_get(cpu); @@ -1121,41 +1121,44 @@ static inline int parse_perf_domain(int cpu, const char *list_name, return -ENODEV; ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0, - &args); + args); if (ret < 0) return ret; of_node_put(cpu_np); - return args.args[0]; + return 0; } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { - int target_idx; int cpu, ret; + struct of_phandle_args args; - ret = parse_perf_domain(pcpu, list_name, cell_name); + ret = parse_perf_domain(pcpu, list_name, cell_name, pargs); if (ret < 0) return ret; - target_idx = ret; cpumask_set_cpu(pcpu, cpumask); for_each_possible_cpu(cpu) { if (cpu == pcpu) continue; - ret = parse_perf_domain(cpu, list_name, cell_name); + ret = parse_perf_domain(cpu, list_name, cell_name, &args); if (ret < 0) continue; - if (target_idx == ret) + if (pargs->np == args.np && pargs->args_count == args.args_count && + !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) cpumask_set_cpu(cpu, cpumask); + + of_node_put(args.np); } - return target_idx; + return 0; } #else static inline int cpufreq_boost_trigger_state(int state) @@ -1185,7 +1188,8 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { return -EOPNOTSUPP; } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } diff --git a/include/linux/license.h b/include/linux/license.h index ad937f57f2cb..7cce390f120b 100644 --- a/include/linux/license.h +++ b/include/linux/license.h @@ -2,8 +2,6 @@ #ifndef __LICENSE_H #define __LICENSE_H -#include <linux/string.h> - static inline int license_is_gpl_compatible(const char *license) { return (strcmp(license, "GPL") == 0 diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..974ccca609d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..5f0d7d0b9471 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 01a70b27e026..65058faea4db 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -26,6 +26,8 @@ struct sctp_sched_ops { int (*init)(struct sctp_stream *stream); /* Init a stream */ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* free a stream */ + void (*free_sid)(struct sctp_stream *stream, __u16 sid); /* Frees the entire thing */ void (*free)(struct sctp_stream *stream); diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 802fc15b0d73..f27fa5ba7d72 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -74,7 +74,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, gfp_flags | __GFP_NOWARN); if (selem) { if (value) - memcpy(SDATA(selem)->data, value, smap->map.value_size); + copy_map_value(&smap->map, SDATA(selem)->data, value); return selem; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 9d15d2d96119..7f04f995c975 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2291,6 +2291,7 @@ event_sched_out(struct perf_event *event, !event->pending_work) { event->pending_work = 1; dec = false; + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); task_work_add(current, &event->pending_task, TWA_RESUME); } if (dec) @@ -2336,6 +2337,7 @@ group_sched_out(struct perf_event *group_event, #define DETACH_GROUP 0x01UL #define DETACH_CHILD 0x02UL +#define DETACH_DEAD 0x04UL /* * Cross CPU call to remove a performance event @@ -2356,12 +2358,20 @@ __perf_remove_from_context(struct perf_event *event, update_cgrp_time_from_cpuctx(cpuctx, false); } + /* + * Ensure event_sched_out() switches to OFF, at the very least + * this avoids raising perf_pending_task() at this time. + */ + if (flags & DETACH_DEAD) + event->pending_disable = 1; event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); + if (flags & DETACH_DEAD) + event->state = PERF_EVENT_STATE_DEAD; if (!ctx->nr_events && ctx->is_active) { if (ctx == &cpuctx->ctx) @@ -5121,9 +5131,7 @@ int perf_event_release_kernel(struct perf_event *event) ctx = perf_event_ctx_lock(event); WARN_ON_ONCE(ctx->parent_ctx); - perf_remove_from_context(event, DETACH_GROUP); - raw_spin_lock_irq(&ctx->lock); /* * Mark this event as STATE_DEAD, there is no external reference to it * anymore. @@ -5135,8 +5143,7 @@ int perf_event_release_kernel(struct perf_event *event) * Thus this guarantees that we will in fact observe and kill _ALL_ * child events. */ - event->state = PERF_EVENT_STATE_DEAD; - raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); perf_event_ctx_unlock(event, ctx); @@ -6577,6 +6584,8 @@ static void perf_pending_task(struct callback_head *head) if (rctx >= 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); + + put_event(event); } #ifdef CONFIG_GUEST_PERF_EVENTS @@ -9030,7 +9039,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } diff --git a/kernel/power/process.c b/kernel/power/process.c index ddd9988327fe..6c1c7e566d35 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -27,6 +27,8 @@ unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; static int try_to_freeze_tasks(bool user_only) { + const char *what = user_only ? "user space processes" : + "remaining freezable tasks"; struct task_struct *g, *p; unsigned long end_time; unsigned int todo; @@ -36,6 +38,8 @@ static int try_to_freeze_tasks(bool user_only) bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; + pr_info("Freezing %s\n", what); + start = ktime_get_boottime(); end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); @@ -82,9 +86,8 @@ static int try_to_freeze_tasks(bool user_only) elapsed_msecs = ktime_to_ms(elapsed); if (todo) { - pr_cont("\n"); - pr_err("Freezing of tasks %s after %d.%03d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", + pr_err("Freezing %s %s after %d.%03d seconds " + "(%d tasks refusing to freeze, wq_busy=%d):\n", what, wakeup ? "aborted" : "failed", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); @@ -101,8 +104,8 @@ static int try_to_freeze_tasks(bool user_only) read_unlock(&tasklist_lock); } } else { - pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, - elapsed_msecs % 1000); + pr_info("Freezing %s completed (elapsed %d.%03d seconds)\n", + what, elapsed_msecs / 1000, elapsed_msecs % 1000); } return todo ? -EBUSY : 0; @@ -130,14 +133,11 @@ int freeze_processes(void) static_branch_inc(&freezer_active); pm_wakeup_clear(0); - pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); - if (!error) { + if (!error) __usermodehelper_set_disable_depth(UMH_DISABLED); - pr_cont("done."); - } - pr_cont("\n"); + BUG_ON(in_atomic()); /* @@ -166,14 +166,9 @@ int freeze_kernel_threads(void) { int error; - pr_info("Freezing remaining freezable tasks ... "); - pm_nosig_freezing = true; error = try_to_freeze_tasks(false); - if (!error) - pr_cont("done."); - pr_cont("\n"); BUG_ON(in_atomic()); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 2a406753af90..cd8b7b35f1e8 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1723,8 +1723,8 @@ static unsigned long minimum_image_size(unsigned long saveable) * /sys/power/reserved_size, respectively). To make this happen, we compute the * total number of available page frames and allocate at least * - * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 - * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) + * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2 + * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) * * of them, which corresponds to the maximum size of a hibernation image. * @@ -2259,10 +2259,14 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) if (unlikely(buf[j] == BM_END_OF_MAP)) break; - if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) + if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) { memory_bm_set_bit(bm, buf[j]); - else + } else { + if (!pfn_valid(buf[j])) + pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n", + (unsigned long long)PFN_PHYS(buf[j])); return -EFAULT; + } } return 0; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fe0e115272..5cfc95a52bc3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2180,10 +2180,12 @@ void tracing_reset_online_cpus(struct array_buffer *buf) } /* Must have trace_types_lock held */ -void tracing_reset_all_online_cpus(void) +void tracing_reset_all_online_cpus_unlocked(void) { struct trace_array *tr; + lockdep_assert_held(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->clear_trace) continue; @@ -2195,6 +2197,13 @@ void tracing_reset_all_online_cpus(void) } } +void tracing_reset_all_online_cpus(void) +{ + mutex_lock(&trace_types_lock); + tracing_reset_all_online_cpus_unlocked(); + mutex_unlock(&trace_types_lock); +} + /* * The tgid_map array maps from pid to tgid; i.e. the value stored at index i * is the tgid last observed corresponding to pid=i. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54ee5711c729..d42e24507152 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -580,6 +580,7 @@ int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); +void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 154996684fb5..4376887e0d8a 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type if (ret) break; } + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); out: argv_free(argv); @@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type) break; } out: + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0356cae0cf74..f71ea6e79b3c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2880,7 +2880,10 @@ static int probe_remove_event_call(struct trace_event_call *call) * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) - return -EBUSY; + goto busy; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -2893,6 +2896,12 @@ static int probe_remove_event_call(struct trace_event_call *call) __trace_remove_event_call(call); return 0; + busy: + /* No need to clear the trace now */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + tr->clear_trace = false; + } + return -EBUSY; } /* Remove an event_call */ @@ -2972,7 +2981,7 @@ static void trace_module_remove_events(struct module *mod) * over from this module may be passed to the new module events and * unexpected results may occur. */ - tracing_reset_all_online_cpus(); + tracing_reset_all_online_cpus_unlocked(); } static int trace_module_notify(struct notifier_block *self, diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48465f7e97b4..1c82478e8dff 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -983,7 +983,7 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, * A trigger can define one or more variables. If any one of them is * currently referenced by any other trigger, this function will * determine that. - + * * Typically used to determine whether or not a trigger can be removed * - if there are any references to a trigger's variables, it cannot. * @@ -3226,7 +3226,7 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, * events. However, for convenience, users are allowed to directly * specify an event field in an action, which will be automatically * converted into a variable on their behalf. - + * * This function creates a field variable with the name var_name on * the hist trigger currently being defined on the target event. If * subsys_name and event_name are specified, this function simply @@ -5143,6 +5143,9 @@ static void event_hist_trigger(struct event_trigger_data *data, void *key = NULL; unsigned int i; + if (unlikely(!rbe)) + return; + memset(compound_key, 0, hist_data->key_size); for_each_hist_key_field(i, hist_data) { diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 29fbfb27c2b2..c3b582d19b62 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1425,7 +1425,6 @@ int synth_event_delete(const char *event_name) mutex_unlock(&event_mutex); if (mod) { - mutex_lock(&trace_types_lock); /* * It is safest to reset the ring buffer if the module * being unloaded registered any events that were @@ -1437,7 +1436,6 @@ int synth_event_delete(const char *event_name) * occur. */ tracing_reset_all_online_cpus(); - mutex_unlock(&trace_types_lock); } return ret; diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index ae78c2d53c8a..539b08ae7020 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1100,8 +1100,10 @@ static int user_event_create(const char *raw_command) group = current_user_event_group(); - if (!group) + if (!group) { + kfree(name); return -ENOENT; + } mutex_lock(&group->reg_mutex); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 78d536d3ff3d..4300c5dc4e5d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -917,7 +917,7 @@ void osnoise_trace_irq_entry(int id) void osnoise_trace_irq_exit(int id, const char *desc) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1048,7 +1048,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1144,7 +1144,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) static void thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) { - int duration; + s64 duration; if (!osn_var->sampling) return; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a1005415f0f4..3638b3424be5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -399,6 +399,7 @@ config FRAME_WARN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help @@ -1874,8 +1875,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" diff --git a/mm/compaction.c b/mm/compaction.c index c51f7f545afe..1f6da31dd9a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -985,28 +985,28 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } /* + * Be careful not to clear PageLRU until after we're + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ + if (unlikely(!get_page_unless_zero(page))) + goto isolate_fail; + + /* * Migration will fail if an anonymous page is pinned in memory, * so avoid taking lru_lock and isolating it unnecessarily in an * admittedly racy check. */ mapping = page_mapping(page); - if (!mapping && page_count(page) > page_mapcount(page)) - goto isolate_fail; + if (!mapping && (page_count(page) - 1) > total_mapcount(page)) + goto isolate_fail_put; /* * Only allow to migrate anonymous pages in GFP_NOFS context * because those do not depend on fs locks. */ if (!(cc->gfp_mask & __GFP_FS) && mapping) - goto isolate_fail; - - /* - * Be careful not to clear PageLRU until after we're - * sure the page is not being freed elsewhere -- the - * page release code relies on it. - */ - if (unlikely(!get_page_unless_zero(page))) - goto isolate_fail; + goto isolate_fail_put; /* Only take pages on LRU: a check now makes later tests safe */ if (!PageLRU(page)) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 5ce403378c20..07e5f1bdf025 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme( &wmarks); } +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, struct damon_sysfs_schemes *sysfs_schemes) { - int i; + struct damos *scheme, *next; + int i = 0; + + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } - for (i = 0; i < sysfs_schemes->nr; i++) { + for (; i < sysfs_schemes->nr; i++) { struct damos *scheme, *next; scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f1385c3b6c96..e36ca75311a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5206,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); - /* - * Unlock and free the vma lock before releasing i_mmap_rwsem. When - * the vma_lock is freed, this makes the vma ineligible for pmd - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. - * This is important as page tables for this unmapped range will - * be asynchrously deleted. If the page tables are shared, there - * will be issues when accessed by someone else. - */ - __hugetlb_vma_unlock_write_free(vma); - - i_mmap_unlock_write(vma->vm_file->f_mapping); + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ + /* + * Unlock and free the vma lock before releasing i_mmap_rwsem. + * When the vma_lock is freed, this makes the vma ineligible + * for pmd sharing. And, i_mmap_rwsem is required to set up + * pmd sharing. This is important as page tables for this + * unmapped range will be asynchrously deleted. If the page + * tables are shared, there will be issues when accessed by + * someone else. + */ + __hugetlb_vma_unlock_write_free(vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } else { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a8d5ef2a77d2..3703a56571c1 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1051,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); result = __collapse_huge_page_isolate(vma, address, pte, cc, @@ -1379,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, return SCAN_SUCCEED; } +/* + * A note about locking: + * Trying to take the page table spinlocks would be useless here because those + * are only used to synchronize: + * + * - modifying terminal entries (ones that point to a data page, not to another + * page table) + * - installing *new* non-terminal entries + * + * Instead, we need roughly the same kind of protection as free_pgtables() or + * mm_take_all_locks() (but only for a single VMA): + * The mmap lock together with this VMA's rmap locks covers all paths towards + * the page table entries we're messing with here, except for hardware page + * table walks and lockless_pages_from_mm(). + */ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - spinlock_t *ptl; pmd_t pmd; + struct mmu_notifier_range range; mmap_assert_write_locked(mm); - ptl = pmd_lock(vma->vm_mm, pmdp); + if (vma->vm_file) + lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem); + /* + * All anon_vmas attached to the VMA have the same root and are + * therefore locked by the same lock. + */ + if (vma->anon_vma) + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); pmd = pmdp_collapse_flush(vma, addr, pmdp); - spin_unlock(ptl); + tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); @@ -1439,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; + /* + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings + * that got written to. Without this, we'd have to also lock the + * anon_vma if one exists. + */ + if (vma->anon_vma) + return SCAN_VMA_CHECK; + /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1472,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto drop_hpage; } + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + * See collapse_and_free_pmd(). + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); result = SCAN_FAIL; @@ -1526,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 4: remove pte entries */ collapse_and_free_pmd(mm, vma, haddr, pmd); + i_mmap_unlock_write(vma->vm_file->f_mapping); + maybe_install_pmd: /* step 5: install pmd entry */ result = install_pmd @@ -1539,6 +1591,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1595,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) { result = SCAN_PAGE_ANON; diff --git a/mm/madvise.c b/mm/madvise.c index c7105ec6d08c..b913ba6efc10 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for shrink_active_list to actually free - * these pages later if no one else has touched them in the meantime, + * zap_page_range_single call sets things up for shrink_active_list to actually + * free these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * @@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - zap_page_range(vma, start, end - start); + zap_page_range_single(vma, start, end - start, NULL); return 0; } diff --git a/mm/memory.c b/mm/memory.c index 8a6d5c823f91..8c8420934d60 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct folio *single_folio; /* Locked folio to be unmapped */ - bool even_cows; /* Zap COWed private pages too? */ - zap_flags_t zap_flags; /* Extra flags for zapping */ -}; - /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { @@ -1720,7 +1711,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, { struct mmu_notifier_range range; struct zap_details details = { - .zap_flags = ZAP_FLAG_DROP_MARKER, + .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, /* Careful - we need to zap private pages too! */ .even_cows = true, }; @@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * * The range must fit into one VMA. */ -static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { + const unsigned long end = address + size; struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, address + size); + address, end); + if (is_vm_hugetlb_page(vma)) + adjust_range_if_pmd_sharing_possible(vma, &range.start, + &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); - unmap_single_vma(&tlb, vma, address, range.end, details); + /* + * unmap 'address-end' not 'range.start-range.end' as range + * could have been expanded for hugetlb pmd sharing. + */ + unmap_single_vma(&tlb, vma, address, end, details); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } diff --git a/mm/mmap.c b/mm/mmap.c index 74a84eb33b90..a5eb2f175da0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1779,9 +1779,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, */ pgoff = 0; get_area = shmem_get_unmapped_area; - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - /* Ensures that larger anonymous mappings are THP aligned. */ - get_area = thp_get_unmapped_area; } addr = get_area(file, addr, len, pgoff, flags); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index add4244e5790..3a2c3f8cad2f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg) /* Simply deliver the interrupt */ } -static void tlb_remove_table_sync_one(void) +void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ -static void tlb_remove_table_sync_one(void) { } - static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); diff --git a/mm/vmscan.c b/mm/vmscan.c index 026199c047e0..8fcc5fa768c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; @@ -4085,14 +4085,14 @@ restart: #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (arch_has_hw_nonleaf_pmd_young() && + get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); } -#endif + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; @@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) caps |= BIT(LRU_GEN_MM_WALK); - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index eeea0a6a75b6..07db2f436d44 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -862,8 +862,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); - if (!p) + if (!p) { + sock_release(csocket); return -ENOMEM; + } csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index a50429a62f74..56bb27d67a2e 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -351,17 +351,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { bool was_multicast_frame; - int res; + int res, recv_len; was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); hsr_addr_subst_source(node_src, skb); skb_pull(skb, ETH_HLEN); + recv_len = skb->len; res = netif_rx(skb); if (res == NET_RX_DROP) { dev->stats.rx_dropped++; } else { dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += recv_len; if (was_multicast_frame) dev->stats.multicast++; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f721c308248b..19a662003eef 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,9 +888,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; + if (fi->nh) { + if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) + return 1; + return 0; + } if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 2e66598fac79..e8ebd343e2bf 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -452,6 +452,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, (status->encoding == RX_ENC_HE && streams > 8))) return 0; + if (idx >= MCS_GROUP_RATES) + return 0; + duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; *overhead = 36 + (streams << 2); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b6dc6e260334..1dbc62537259 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2354,12 +2354,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - /* if we are invoked by the msk cleanup code, the subflow is - * already orphaned - */ - if (ssk->sk_socket) - sock_orphan(ssk); - + sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2940,7 +2935,11 @@ cleanup: if (ssk == msk->first) subflow->fail_tout = 0; - sock_orphan(ssk); + /* detach from the parent socket, but allow data_ready to + * push incoming data into the mptcp stack, to properly ack it + */ + ssk->sk_socket = NULL; + ssk->sk_wq = NULL; unlock_sock_fast(ssk, slow); } sock_orphan(sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 02a54d59697b..2159b5f9988f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1745,16 +1745,16 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) for (msk = head; msk; msk = next) { struct sock *sk = (struct sock *)msk; - bool slow, do_cancel_work; + bool do_cancel_work; sock_hold(sk); - slow = lock_sock_fast_nested(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; msk->first = NULL; msk->dl_next = NULL; do_cancel_work = __mptcp_close(sk, 0); - unlock_sock_fast(sk, slow); + release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); sock_put(sk); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ce8dd19f33c..1ab65f7f2a0a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2293,8 +2293,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (snaplen > res) @@ -3520,8 +3519,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; aux.tp_len = origlen; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index ef9fceadef8d..ee6514af830f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) } } +static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_sched_ops *sched; + + if (!SCTP_SO(stream, sid)->ext) + return; + + sched = sctp_sched_ops_from_stream(stream); + sched->free_sid(stream, sid); + kfree(SCTP_SO(stream, sid)->ext); + SCTP_SO(stream, sid)->ext = NULL; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { - kfree(SCTP_SO(new, i)->ext); + sctp_stream_free_ext(new, i); SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; SCTP_SO(stream, i)->ext = NULL; } } - for (i = outcnt; i < stream->outcnt; i++) { - kfree(SCTP_SO(stream, i)->ext); - SCTP_SO(stream, i)->ext = NULL; - } + for (i = outcnt; i < stream->outcnt; i++) + sctp_stream_free_ext(stream, i); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream) struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; - sched->free(stream); + sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) - kfree(SCTP_SO(stream, i)->ext); + sctp_stream_free_ext(stream, i); genradix_free(&stream->out); genradix_free(&stream->in); } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 1ad565ed5627..7c8f9d89e16a 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_fcfs_free(struct sctp_stream *stream) { } @@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, + .free_sid = sctp_sched_fcfs_free_sid, .free = sctp_sched_fcfs_free, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 80b5a2c4cbc7..4fc9f2923ed1 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -204,6 +204,24 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, return sctp_sched_prio_set(stream, sid, 0, gfp); } +static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head; + int i; + + if (!prio) + return; + + SCTP_SO(stream, sid)->ext->prio_head = NULL; + for (i = 0; i < stream->outcnt; i++) { + if (SCTP_SO(stream, i)->ext && + SCTP_SO(stream, i)->ext->prio_head == prio) + return; + } + + kfree(prio); +} + static void sctp_sched_prio_free(struct sctp_stream *stream) { struct sctp_stream_priorities *prio, *n; @@ -323,6 +341,7 @@ static struct sctp_sched_ops sctp_sched_prio = { .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, .init_sid = sctp_sched_prio_init_sid, + .free_sid = sctp_sched_prio_free_sid, .free = sctp_sched_prio_free, .enqueue = sctp_sched_prio_enqueue, .dequeue = sctp_sched_prio_dequeue, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index ff425aed62c7..cc444fe0d67c 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_rr_free(struct sctp_stream *stream) { sctp_sched_rr_unsched_all(stream); @@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = { .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, + .free_sid = sctp_sched_rr_free_sid, .free = sctp_sched_rr_free, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index f09316a9035f..d67440de011e 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1971,6 +1971,9 @@ rcv: /* Ok, everything's fine, try to synch own keys according to peers' */ tipc_crypto_key_synch(rx, *skb); + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + /* Mark skb decrypted */ skb_cb->decrypted = 1; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index da752b0cc752..3d86482e83f5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -330,7 +330,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * determine if they are the same ie. */ if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (!memcmp(tmp_old + 2, tmp + 2, 5)) { + if (tmp_old[1] >= 5 && tmp[1] >= 5 && + !memcmp(tmp_old + 2, tmp + 2, 5)) { /* same vendor ie, copy from * subelement */ @@ -2526,10 +2527,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct cfg80211_bss_ies *ies1, *ies2; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - struct cfg80211_non_tx_bss non_tx_data; + struct cfg80211_non_tx_bss non_tx_data = {}; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); + + /* don't do any further MBSSID handling for S1G */ + if (ieee80211_is_s1g_beacon(mgmt->frame_control)) + return res; + if (!res || !wiphy->support_mbssid || !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index f99e00083141..4c677c8546c7 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate, static int select_clock(struct snd_dice *dice, unsigned int rate) { - __be32 reg; + __be32 reg, new; u32 data; int i; int err; @@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate) if (completion_done(&dice->clock_accepted)) reinit_completion(&dice->clock_accepted); - reg = cpu_to_be32(data); + new = cpu_to_be32(data); err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT, - ®, sizeof(reg)); + &new, sizeof(new)); if (err < 0) return err; if (wait_for_completion_timeout(&dice->clock_accepted, - msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) - return -ETIMEDOUT; + msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) { + if (reg != new) + return -ETIMEDOUT; + } return 0; } diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 51721edd8f53..e88d9ff95cdf 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x19, 30, pga_tlv), + 0, 0x1A, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index a969547708d4..52bb55724724 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -14,6 +14,7 @@ #include <dt-bindings/sound/tlv320adc3xxx.h> #include <linux/clk.h> +#include <linux/gpio/consumer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/io.h> @@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = { static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx) { +#ifdef CONFIG_GPIOLIB gpiochip_remove(&adc3xxx->gpio_chip); +#endif } static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 79ef4e269bc9..4b86ef82fd93 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -194,6 +194,25 @@ static int fsl_micfil_reset(struct device *dev) if (ret) return ret; + /* + * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined + * as non-volatile register, so SRES still remain in regmap + * cache after set, that every update of REG_MICFIL_CTRL1, + * software reset happens. so clear it explicitly. + */ + ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, + MICFIL_CTRL1_SRES); + if (ret) + return ret; + + /* + * Set SRES should clear CHnF flags, But even add delay here + * the CHnF may not be cleared sometimes, so clear CHnF explicitly. + */ + ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); + if (ret) + return ret; + return 0; } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index bd88de056358..55b009d3c681 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; - if (val > max - min) + if (val > max) return -EINVAL; val_mask = mask << shift; val = (val + min) & mask; @@ -464,10 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { - unsigned int val2; + unsigned int val2 = ucontrol->value.integer.value[1]; + + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max) + return -EINVAL; val_mask = mask << rshift; - val2 = (ucontrol->value.integer.value[1] + min) & mask; + val2 = (val2 + min) & mask; val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 184ce1684dcd..91b7106a4a73 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11169,7 +11169,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } *link = bpf_program__attach_raw_tracepoint(prog, tp_name); - return libbpf_get_error(link); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index f3a8e8e74eb8..d504d96adc83 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -234,7 +234,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index d285171d4b69..6af142953a94 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; @@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", @@ -129,8 +129,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + return libbpf_err(-E2BIG); + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; ringbuf_unmap_ring(rb, r); @@ -348,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) { struct bpf_map_info info; __u32 len = sizeof(info); + __u64 mmap_sz; void *tmp; struct epoll_event *rb_epoll; int err; @@ -384,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) * simple reading and writing of samples that wrap around the end of * the buffer. See the kernel implementation for details. */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, - PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", @@ -476,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) __u64 cons_pos, prod_pos; struct ringbuf_hdr *hdr; + /* The top two bits are used as special flags */ + if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT)) + return errno = E2BIG, NULL; + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in * the kernel. */ diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index cfe343306e08..c60c90f35d18 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -1462,7 +1462,7 @@ class Data: 'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*', 'ABORT' : r'(?i).*\bABORT\b.*', 'IRQ' : r'.*\bgenirq: .*', - 'TASKFAIL': r'.*Freezing of tasks *.*', + 'TASKFAIL': r'.*Freezing .*after *.*', 'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*', 'DISKFULL': r'.*\bNo space left on device.*', 'USBERR' : r'.*usb .*device .*, error [0-9-]*', diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 099eb4dfd4f7..18405c3b7cee 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -458,7 +458,7 @@ static void test_sk_storage_map_basic(void) struct { int cnt; int lock; - } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; + } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value; struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; @@ -483,38 +483,41 @@ static void test_sk_storage_map_basic(void) "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST | BPF_F_LOCK); CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); CHECK(err, "bpf_map_update_elem(BPF_EXIST)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Update with BPF_NOEXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST | BPF_F_LOCK); CHECK(!err || errno != EEXIST, @@ -526,22 +529,23 @@ static void test_sk_storage_map_basic(void) value.cnt -= 1; err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt again and update with map_flags == 0 */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &sk_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index d457a55ff408..a4b4133d39e9 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -358,10 +358,12 @@ static int get_syms(char ***symsp, size_t *cntp) * We attach to almost all kernel functions and some of them * will cause 'suspicious RCU usage' when fprobe is attached * to them. Filter out the current culprits - arch_cpu_idle - * and rcu_* functions. + * default_idle and rcu_* functions. */ if (!strcmp(name, "arch_cpu_idle")) continue; + if (!strcmp(name, "default_idle")) + continue; if (!strncmp(name, "rcu_", 4)) continue; if (!strcmp(name, "bpf_dispatcher_xdp_func")) @@ -400,7 +402,7 @@ error: return err; } -static void test_bench_attach(void) +void serial_test_kprobe_multi_bench_attach(void) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; @@ -468,6 +470,4 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); - if (test__start_subtest("bench_attach")) - test_bench_attach(); } diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ee5e98204d3d..a47b26ab48f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1228,6 +1228,17 @@ ipv4_fcnal() run_cmd "$IP ro add 172.16.101.0/24 nhid 21" run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" |